1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2017, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(hdt,
   36	  [ hdt_open/2,			% -HDT, +Path
   37	    hdt_open/3,			% -HDT, +Path, +Options
   38	    hdt_close/1,		% +HDT
   39	    hdt_search/4,		% +HDT, ?S,?P,?O
   40	    hdt_header/4,		% +HDT, ?S,?P,?O
   41
   42	    hdt_subject/2,		% +HDT, ?Subject
   43	    hdt_predicate/2,		% +HDT, ?Predicate
   44	    hdt_shared/2,		% +HDT, ?Shared
   45	    hdt_object/2,		% +HDT, ?Object
   46	    hdt_node/2,     % +HDT, ?Node
   47
   48	    hdt_suggestions/5,		% +HDT, +Base, +Role, +MaxCount, -List
   49	    hdt_property/2,		% +HTD, -Property
   50
   51	    hdt_subject_id/3,		% +HDT, ?Subject, ?Id
   52	    hdt_predicate_id/3,		% +HDT, ?Predicate, ?Id
   53	    hdt_object_id/3,		% +HDT, ?Object, ?Id
   54	    hdt_pre_triple/3,		% +HDT, ?StringTriple, -IdTriple
   55	    hdt_post_triple/3,		% +HDT, ?StringTriple, +IdTriple
   56	    hdt_search_id/4,		% +HDT, ?S,?P,?O
   57	    hdt_search_cost/5,		% +HDT, ?S,?P,?O, -Cost
   58
   59	    hdt_create_from_file/3,	% +HDTFile, +RDFFile, +Options
   60
   61	    op(110, xfx, @),		% must be above .
   62	    op(650, xfx, ^^)		% must be above :
   63	  ]).   64:- use_module(library(semweb/rdf11)).   65:- use_module(library(sgml)).   66:- use_module(library(lists)).   67
   68:- use_foreign_library(foreign(hdt4pl)).   69
   70/** <module> Access HDT (Header Dictionary Triples) files
   71*/
   72
   73:- rdf_meta
   74	hdt_search(+,r,r,o),	hdt_subject(+,r),	hdt_predicate(+,r),	hdt_shared(+,r),	hdt_object(+,o),	hdt_subject_id(+, r, ?),	hdt_node(+, o),	hdt_predicate_id(+, r, ?),	hdt_object_id(+, o, ?),	hdt_search_cost(+, r, r, o, -).
 hdt_open(-HDT, +File) is det
 hdt_open(-HDT, +File, +Options) is det
Open an existing HDT file and unify HDT with a handle to it. The handle is an opaque symbol that is subject to (atom) garbage collection.

File is expanded by absolute_file_name/3, with the default extension `.hdt`.

Options:

access(+Access)
How the file is accessed. One of map (map the file into memory, default) or load (load the content of the file).
indexed(+Boolean)
Whether an index is created. Default is true. Such an index is needed for partially instantiated calls to hdt_search/4. The index is maintained in a file with extension `.index.v1-1` in the same directory as the HDT file. (When the index is created, some statistics are printed to standard error.) An index is not needed if you only want to extract all triples.
  111hdt_open(HDT, File) :-
  112	hdt_open(HDT, File, []).
  113
  114hdt_open(HDT, File, Options) :-
  115	absolute_file_name(File, FileAbs, [extensions([hdt]), expand(true), access(read)]),
  116	hdt_open_(HDT, FileAbs, Options).
 hdt_search(+HDT, ?S, ?P, ?O)
True if <S,P,O> is a triple in HDT.
  122hdt_search(HDT, S, P, O) :-
  123	pre_object(HDT, O, OHDT),
  124	hdt_search(HDT, content, S, P, OHDT),
  125	post_object(O, OHDT).
 hdt_header(+HDT, ?S, ?P, ?O)
True if <S,P,O> is a triple in the header of HDT.
  131hdt_header(HDT, S, P, O) :-
  132	hdt_search(HDT, header, S, P, O0),
  133	header_object(O0, O).
  134
  135header_object(O0, O) :-
  136	string(O0), !,
  137	header_untyped_object(O0, O).
  138header_object(O, O).
  139
  140header_untyped_object(O0, O) :-
  141	catch(xsd_number_string(N, O0),
  142	      error(syntax_error(xsd_number), _),
  143	      fail), !,
  144	(   integer(N)
  145	->  rdf_equal(O, N^^xsd:integer)
  146	;   rdf_equal(O, N^^xsd:float)
  147	).
  148header_untyped_object(O0, O) :-
  149	catch(xsd_time_string(Term, Type, O0),
  150	      error(_,_), fail), !,
  151	O = Term^^Type.
  152header_untyped_object(S, O) :-
  153	rdf_equal(O, S^^xsd:string).
 hdt_subject(+HDT, ?S) is nondet
 hdt_predicate(+HDT, ?P) is nondet
 hdt_object(+HDT, ?O) is nondet
 hdt_shared(+HDT, ?SO) is nondet
 hdt_node(+HDT, ?Node) is nondet
Enumerate possible values for the individual components of the triples represented in the HDT. Note that these enumarators do not enumerate blank nodes. The predicate hdt_shared/2 enumerates resources that exist in the dataset both as subject and object. If the second argument is instantiated hdt_search/4 is used to perform an indexed search and the predicates are semidet.
  169hdt_subject(HDT, Subject) :-
  170	(   var(Subject)
  171	->  (   hdt_column_(HDT, shared, Var)
  172	    ;	hdt_column_(HDT, subject, Var)
  173	    ),
  174	    Var = Subject
  175	;   hdt_search(HDT, Subject, _, _)
  176	->  true
  177	).
  178
  179hdt_predicate(HDT, Predicate) :-
  180	(   var(Predicate)
  181	->  hdt_column_(HDT, predicate, Var),
  182	    Var = Predicate
  183	;   hdt_search(HDT, _, Predicate, _)
  184	->  true
  185	).
  186
  187hdt_shared(HDT, Shared) :-
  188	(   var(Shared)
  189	->  hdt_column_(HDT, shared, Var),
  190	    Var = Shared
  191	;   rdf_is_subject(Shared),
  192	    hdt_subject(HDT, Shared),
  193	    hdt_object(HDT, Shared)
  194	->  true
  195	).
  196
  197hdt_object(HDT, Object) :-
  198	(   var(Object)
  199	->  (   hdt_column_(HDT, shared, Var),
  200	        Var = Object
  201	    ;	hdt_object_(HDT, OHDT),
  202		post_object(Object, OHDT)
  203	    )
  204	;   hdt_search(HDT, _, _, Object)
  205	->  true
  206	).
  207
  208hdt_node(HDT, Node) :-
  209	(   var(Node)
  210	->  (   hdt_column_(HDT, shared, Var),
  211	        Var = Node
  212	    ;   hdt_column_(HDT, subject, Var),
  213	        Var = Node
  214	    ;   hdt_object_(HDT, OHDT),
  215	        post_object(Node, OHDT)
  216	    )
  217	;   hdt_search(HDT, Node, _, _)
  218	->  true
  219	;   hdt_search(HDT, _, _, Node)
  220	->  true
  221	).
 pre_object(+HDT, ?O, -OHDT) is det
 post_object(?O, +OHDT) is det
Pre/post object processing. The HDT library itself is purely string based.
  230pre_object(_HDT, O, OHDT) :-
  231	atom(O), \+ boolean(O), !,
  232	OHDT = O.
  233pre_object(_HDT, O, OHDT) :-
  234	ground(O), !,
  235	rdf_lexical_form(O, Lexical),
  236	canonical_string(Lexical, OHDT).
  237pre_object(HDT, O, OHDT) :-
  238	nonvar(O),
  239	O = String@Lang,
  240	ground(String),
  241	atomics_to_string(["\"", String, "\"@"], Prefix),
  242	hdt_suggestions(HDT, Prefix, object, 1000, List),
  243	length(List, Found),
  244	Found < 1000, !,		% we got them all
  245	member(_@Lang, List),
  246	canonical_string(String@Lang, OHDT).
  247pre_object(_, _, _).
  248
  249canonical_string(Lexical^^Type, HDT) :-
  250	atomics_to_string(["\"", Lexical, "\"^^<", Type, ">"], HDT).
  251canonical_string(Lexical@Lang, HDT) :-
  252	atomics_to_string(["\"", Lexical, "\"@", Lang], HDT).
  253
  254boolean(false).
  255boolean(true).
 post_object(?PrologObj, ?HDTObjectString) is semidet
  259post_object(O, _HDT) :-
  260	ground(O), !.
  261post_object(O, IRI) :-
  262	atom(IRI), !,
  263	O = IRI.
  264post_object(O, HDT) :-
  265	rdf_canonical_literal(HDT, O).
 hdt_suggestions(+HDT, +Base, +Role, +MaxResults, -Results:list) is det
True when Results is a list of suggestions for Base in the triple role Role. Some experimentation suggests it performs a prefix match on the internal string representation. This implies that literals are only found if the first character of Base is `"`.
Arguments:
Base- is a string or atom
Role- is one of subject, predicate or object
 hdt_property(+HDT, ?Property) is nondet
True if Property is a property of HTD. Defined properties are
  295hdt_property(HDT, Property) :-
  296	hdt_property(Property),
  297	hdt_property_(HDT, Property).
  298
  299hdt_property(mapping(_)).
  300hdt_property(max_id(_)).
  301hdt_property(max_object_id(_)).
  302hdt_property(max_predicate_id(_)).
  303hdt_property(max_subject_id(_)).
  304hdt_property(objects(_)).
  305hdt_property(predicates(_)).
  306hdt_property(shared(_)).
  307hdt_property(subjects(_)).
  308hdt_property(elements(_)).
  309
  310
  311		 /*******************************
  312		 *	    IDENTIFIERS		*
  313		 *******************************/
 hdt_subject_id(+HDT, ?Subject:atom, ?Id:integer) is semidet
 hdt_predicate_id(+HDT, ?Predicate:atom, ?Id:integer) is semidet
 hdt_object_id(+HDT, ?Object:any, ?Id:integer) is semidet
True if String is mapped to Id in the given role. Fails if the requested String or Id is not known for the given role in HDT.
Arguments:
Role- is one of subject, predicate or object
  324hdt_subject_id(HDT, String, Id) :-
  325	hdt_string_id(HDT, subject, String, Id).
  326hdt_predicate_id(HDT, String, Id) :-
  327	hdt_string_id(HDT, predicate, String, Id).
  328hdt_object_id(HDT, Object, Id) :-
  329	pre_object(HDT, Object, String),
  330	hdt_string_id(HDT, object, String, Id),
  331	post_object(Object, String).
 hdt_pre_triple(+HDT, ?TripleIn, -TripleID) is det
 hdt_post_triple(+HDT, ?TripleIn, +TripleID) is det
Perform term->id and id->term translation for triples. The predicate hdt_search/4 could be defined as:
hdt_search(HDT, S, P, O) :-
    Triple   = t(S,P,O),
    TripleID = t(SID,PID,OID),
    hdt_pre_triple(HDT, Triple, TripleID),
    hdt_search_id(HDT,SID,PID,OID),
    hdt_post_triple(HDT, Triple, TripleID).
See also
- hdt_search_id/4.
  350hdt_pre_triple(HDT, t(S0,P0,O0), t(S,P,O)) :-
  351	pre_iri_id(HDT, subject, S0, S),
  352	pre_iri_id(HDT, predicate, P0, P),
  353	(   ground(O0)
  354	->  pre_object(HDT, O0, String),
  355	    hdt_string_id(HDT, object, String, O)
  356	;   true
  357	).
  358
  359hdt_post_triple(HDT, t(S0,P0,O0), t(S,P,O)) :-
  360	post_iri_id(HDT, subject, S0, S),
  361	post_iri_id(HDT, predicate, P0, P),
  362	(   ground(O0)
  363	->  true
  364	;   hdt_string_id(HDT, object, String, O),
  365	    post_object(O0, String)
  366	).
  367
  368pre_iri_id(_, _, In, _) :-
  369	var(In), !.
  370pre_iri_id(HDT, Role, In, Id) :-
  371	hdt_string_id(HDT, Role, In, Id).
  372
  373post_iri_id(_, _, S0, _) :-
  374	atom(S0), !.
  375post_iri_id(HDT, Role, In, Id) :-
  376	hdt_string_id(HDT, Role, In, Id).
 hdt_search_id(+HDT, ?S:integer, ?P:integer, ?O:integer) is nondet
True if a triple with the indicated identifiers exists.
 hdt_search_cost(HDT, ?S, ?P, ?O, -Cost:nonneg) is det
  385hdt_search_cost(HDT, S, P, O, Cost) :-
  386	Triple   = t(S,P,O),
  387	TripleID = t(SID,PID,OID),
  388	hdt_pre_triple(HDT, Triple, TripleID),
  389	hdt_search_cost_id(HDT, SID, PID, OID, Cost), !.
  390hdt_search_cost(_, _, _, _, 0).
  391
  392
  393		 /*******************************
  394		 *	       CREATE		*
  395		 *******************************/
 hdt_create_from_file(+HDTFile, +RDFFile, +Options)
Create a HDT file from an RDF file. The format of RDFFile defaults to ntriples format. The file names are expanded using absolute_file_name/3, but without any default extension (this is different from hdt_open/3 works). Options:
base_uri(+URI)
URI is used for generating the header properties (see http_header/4.
format(+Notation)
Notation is one of: ntriples, nt, turtle, ttl, nquads, nq, trig (also upper/lower case variants such as "TriG" or "TTL") defaults to ntriples.
  413		 /*******************************
  414		 *	      MESSAGES		*
  415		 *******************************/
  416
  417:- multifile prolog:error_message//1.  418
  419prolog:error_message(hdt_error(Message)) -->
  420	[ 'HDT: ~w'-[Message] ]