1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2017, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(hdt, 36 [ hdt_open/2, % -HDT, +Path 37 hdt_open/3, % -HDT, +Path, +Options 38 hdt_close/1, % +HDT 39 hdt_search/4, % +HDT, ?S,?P,?O 40 hdt_header/4, % +HDT, ?S,?P,?O 41 42 hdt_subject/2, % +HDT, ?Subject 43 hdt_predicate/2, % +HDT, ?Predicate 44 hdt_shared/2, % +HDT, ?Shared 45 hdt_object/2, % +HDT, ?Object 46 hdt_node/2, % +HDT, ?Node 47 48 hdt_suggestions/5, % +HDT, +Base, +Role, +MaxCount, -List 49 hdt_property/2, % +HTD, -Property 50 51 hdt_subject_id/3, % +HDT, ?Subject, ?Id 52 hdt_predicate_id/3, % +HDT, ?Predicate, ?Id 53 hdt_object_id/3, % +HDT, ?Object, ?Id 54 hdt_pre_triple/3, % +HDT, ?StringTriple, -IdTriple 55 hdt_post_triple/3, % +HDT, ?StringTriple, +IdTriple 56 hdt_search_id/4, % +HDT, ?S,?P,?O 57 hdt_search_cost/5, % +HDT, ?S,?P,?O, -Cost 58 59 hdt_create_from_file/3, % +HDTFile, +RDFFile, +Options 60 61 op(110, xfx, @), % must be above . 62 op(650, xfx, ^^) % must be above : 63 ]). 64:- use_module(library(semweb/rdf11)). 65:- use_module(library(sgml)). 66:- use_module(library(lists)). 67 68:- use_foreign_library(foreign(hdt4pl)). 69 70/** <module> Access HDT (Header Dictionary Triples) files 71*/ 72 73:- rdf_meta 74 hdt_search(+,r,r,o), hdt_subject(+,r), hdt_predicate(+,r), hdt_shared(+,r), hdt_object(+,o), hdt_subject_id(+, r, ?), hdt_node(+, o), hdt_predicate_id(+, r, ?), hdt_object_id(+, o, ?), hdt_search_cost(+, r, r, o, -).
File is expanded by absolute_file_name/3, with the default extension `.hdt`.
Options:
map
(map the file
into memory, default) or load
(load the content of the
file).true
. Such an index
is needed for partially instantiated calls to hdt_search/4.
The index is maintained in a file with extension `.index.v1-1`
in the same directory as the HDT file. (When the index is
created, some statistics are printed to standard error.)
An index is not needed if you only want to extract all
triples.111hdt_open(HDT, File) :- 112 hdt_open(HDT, File, []). 113 114hdt_open(HDT, File, Options) :- 115 absolute_file_name(File, FileAbs, [extensions([hdt]), expand(true), access(read)]), 116 hdt_open_(HDT, FileAbs, Options).
122hdt_search(HDT, S, P, O) :-
123 pre_object(HDT, O, OHDT),
124 hdt_search(HDT, content, S, P, OHDT),
125 post_object(O, OHDT).
131hdt_header(HDT, S, P, O) :- 132 hdt_search(HDT, header, S, P, O0), 133 header_object(O0, O). 134 135header_object(O0, O) :- 136 string(O0), !, 137 header_untyped_object(O0, O). 138header_object(O, O). 139 140header_untyped_object(O0, O) :- 141 catch(xsd_number_string(N, O0), 142 error(syntax_error(xsd_number), _), 143 fail), !, 144 ( integer(N) 145 -> rdf_equal(O, N^^xsd:integer) 146 ; rdf_equal(O, N^^xsd:float) 147 ). 148header_untyped_object(O0, O) :- 149 catch(xsd_time_string(Term, Type, O0), 150 error(_,_), fail), !, 151 O = Term^^Type. 152header_untyped_object(S, O) :- 153 rdf_equal(O, S^^xsd:string).
169hdt_subject(HDT, Subject) :- 170 ( var(Subject) 171 -> ( hdt_column_(HDT, shared, Var) 172 ; hdt_column_(HDT, subject, Var) 173 ), 174 Var = Subject 175 ; hdt_search(HDT, Subject, _, _) 176 -> true 177 ). 178 179hdt_predicate(HDT, Predicate) :- 180 ( var(Predicate) 181 -> hdt_column_(HDT, predicate, Var), 182 Var = Predicate 183 ; hdt_search(HDT, _, Predicate, _) 184 -> true 185 ). 186 187hdt_shared(HDT, Shared) :- 188 ( var(Shared) 189 -> hdt_column_(HDT, shared, Var), 190 Var = Shared 191 ; rdf_is_subject(Shared), 192 hdt_subject(HDT, Shared), 193 hdt_object(HDT, Shared) 194 -> true 195 ). 196 197hdt_object(HDT, Object) :- 198 ( var(Object) 199 -> ( hdt_column_(HDT, shared, Var), 200 Var = Object 201 ; hdt_object_(HDT, OHDT), 202 post_object(Object, OHDT) 203 ) 204 ; hdt_search(HDT, _, _, Object) 205 -> true 206 ). 207 208hdt_node(HDT, Node) :- 209 ( var(Node) 210 -> ( hdt_column_(HDT, shared, Var), 211 Var = Node 212 ; hdt_column_(HDT, subject, Var), 213 Var = Node 214 ; hdt_object_(HDT, OHDT), 215 post_object(Node, OHDT) 216 ) 217 ; hdt_search(HDT, Node, _, _) 218 -> true 219 ; hdt_search(HDT, _, _, Node) 220 -> true 221 ).
230pre_object(_HDT, O, OHDT) :- 231 atom(O), \+ boolean(O), !, 232 OHDT = O. 233pre_object(_HDT, O, OHDT) :- 234 ground(O), !, 235 rdf_lexical_form(O, Lexical), 236 canonical_string(Lexical, OHDT). 237pre_object(HDT, O, OHDT) :- 238 nonvar(O), 239 O = String@Lang, 240 ground(String), 241 atomics_to_string(["\"", String, "\"@"], Prefix), 242 hdt_suggestions(HDT, Prefix, object, 1000, List), 243 length(List, Found), 244 Found < 1000, !, % we got them all 245 member(_@Lang, List), 246 canonical_string(String@Lang, OHDT). 247pre_object(_, _, _). 248 249canonical_string(Lexical^^Type, HDT) :- 250 atomics_to_string(["\"", Lexical, "\"^^<", Type, ">"], HDT). 251canonical_string(Lexical@Lang, HDT) :- 252 atomics_to_string(["\"", Lexical, "\"@", Lang], HDT). 253 254boolean(false). 255boolean(true).
259post_object(O, _HDT) :- 260 ground(O), !. 261post_object(O, IRI) :- 262 atom(IRI), !, 263 O = IRI. 264post_object(O, HDT) :- 265 rdf_canonical_literal(HDT, O).
mapping(-Mapping)
max_id(-ID)
)max_object_id(-ID)
)max_predicate_id(-ID)
)max_subject_id(-ID)
)objects(-Count)
)predicates(-Count)
)shared(-Count)
)subjects(-Count)
)elements(-Count)
)295hdt_property(HDT, Property) :- 296 hdt_property(Property), 297 hdt_property_(HDT, Property). 298 299hdt_property(mapping(_)). 300hdt_property(max_id(_)). 301hdt_property(max_object_id(_)). 302hdt_property(max_predicate_id(_)). 303hdt_property(max_subject_id(_)). 304hdt_property(objects(_)). 305hdt_property(predicates(_)). 306hdt_property(shared(_)). 307hdt_property(subjects(_)). 308hdt_property(elements(_)). 309 310 311 /******************************* 312 * IDENTIFIERS * 313 *******************************/
324hdt_subject_id(HDT, String, Id) :- 325 hdt_string_id(HDT, subject, String, Id). 326hdt_predicate_id(HDT, String, Id) :- 327 hdt_string_id(HDT, predicate, String, Id). 328hdt_object_id(HDT, Object, Id) :- 329 pre_object(HDT, Object, String), 330 hdt_string_id(HDT, object, String, Id), 331 post_object(Object, String).
hdt_search(HDT, S, P, O) :- Triple = t(S,P,O), TripleID = t(SID,PID,OID), hdt_pre_triple(HDT, Triple, TripleID), hdt_search_id(HDT,SID,PID,OID), hdt_post_triple(HDT, Triple, TripleID).
350hdt_pre_triple(HDT, t(S0,P0,O0), t(S,P,O)) :- 351 pre_iri_id(HDT, subject, S0, S), 352 pre_iri_id(HDT, predicate, P0, P), 353 ( ground(O0) 354 -> pre_object(HDT, O0, String), 355 hdt_string_id(HDT, object, String, O) 356 ; true 357 ). 358 359hdt_post_triple(HDT, t(S0,P0,O0), t(S,P,O)) :- 360 post_iri_id(HDT, subject, S0, S), 361 post_iri_id(HDT, predicate, P0, P), 362 ( ground(O0) 363 -> true 364 ; hdt_string_id(HDT, object, String, O), 365 post_object(O0, String) 366 ). 367 368pre_iri_id(_, _, In, _) :- 369 var(In), !. 370pre_iri_id(HDT, Role, In, Id) :- 371 hdt_string_id(HDT, Role, In, Id). 372 373post_iri_id(_, _, S0, _) :- 374 atom(S0), !. 375post_iri_id(HDT, Role, In, Id) :- 376 hdt_string_id(HDT, Role, In, Id).
385hdt_search_cost(HDT, S, P, O, Cost) :- 386 Triple = t(S,P,O), 387 TripleID = t(SID,PID,OID), 388 hdt_pre_triple(HDT, Triple, TripleID), 389 hdt_search_cost_id(HDT, SID, PID, OID, Cost), !. 390hdt_search_cost(_, _, _, _, 0). 391 392 393 /******************************* 394 * CREATE * 395 *******************************/
ntriples
format. The file names are expanded using
absolute_file_name/3, but without any default extension (this
is different from hdt_open/3 works). Options:
ntriples
, nt
, turtle
, ttl
, nquads
, nq
, trig
(also upper/lower case variants such as "TriG" or "TTL")
defaults to ntriples
.413 /******************************* 414 * MESSAGES * 415 *******************************/ 416 417:- multifile prolog:error_message//1. 418 419prologerror_message(hdt_error(Message)) --> 420 [ 'HDT: ~w'-[Message] ]