1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    2%    Authors:       Nicos Angelopoulos
    3%    E-mail:        Nicos Angelopoulos http://stoics.org.uk/~nicos/sware/contact.html
    4%    Copyright (C): Nicos Angelopoulos, 2015-2025
    5%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    6/*
    7   This program is free software; you can redistribute it and/or
    8    modify it under the terms of the MIT license
    9
   10    This program is distributed in the hope that it will be useful,
   11    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
   13
   14*/
   15:- module( bio_db, [
   16                % This interface has now being split according to 
   17                % biological organisms, see files in cell/
   18                % 1. housekeeping:
   19                % bio_db/0,
   20                bio_db_close/1,
   21                bio_db_db_predicate/1,
   22                bio_db_data_predicate/4,
   23                bio_db_info/2,
   24                bio_db_info/3,
   25                bio_db_info/4,
   26                bio_db_interface/1,
   27                bio_db_interface/2,
   28                bio_db_install/2, bio_db_install/3,
   29                bio_db_organism/1, bio_db_organism/2, bio_db_organism/3,
   30                bio_db_organism_alias/2,
   31                bio_db_org_in_opts/2,
   32                bio_db_paths/0,
   33                bio_db_source/2,
   34                bio_db_version/2,
   35                bio_db_citation/2,
   36                bio_db_close_connections/0,
   37                % 2 derived
   38                % A.symbols
   39                is_symbol/2,
   40                ncbi_symb/3,
   41                % B. gene ontology
   42                go_id/2,          % +/-Go, -/+Int
   43                go_id/3,          % +GoOrInt, -Go, -Int
   44                % C. string edges
   45                org_edge_strg_symb/4  % ?Org, ?Symb1, ?Symb2, -W
   46             ] ).   47
   48:- dynamic( bio_db_handle/5 ).   49
   50:- dynamic( '$bio_db_handle'/2 ). % this is needed for the asserted server preds 
   51
   52
   53% auto-load (& other system) libraries
   54:- use_module(library(lists)).   55:- use_module(library(apply)).   56:- use_module(library(debug)).     % /1,3.
   57:- use_module(library(archive)).   % archive_extract/3.
   58:- use_module(library(filesex)).   59
   60:- use_module(library(lib)).   61
   62:- ensure_loaded('../src/bio_db_data_predicate').   63
   64:- lib(source(bio_db), homonyms(true)).   65
   66:- lib(stoics_lib:date_two_digit_dotted/1).   67:- lib(go_id/2).   68:- lib(is_symbol/2).   69:- lib(ncbi_symb/3).   70:- lib(org_edge_strg_symb/4).   71:- lib(bio_db_org_in_opts/2).   72:- lib(end(bio_db)).   73
   74% :- initialization( lib(& bio_db, load_main(false)), after_load ).
   75:- initialization( lib(@(bio_db)), after_load ).
 bio_db_organism(?Org)
Colloquial name for organisms supported by bio_db.

Human is considered the default organism and returned first.

?- bio_db_organism(Org).
Org = human ;
Org = chicken ;
Org = mouse ;
Org = multi ;
Org = pig.
author
- nicos angelopoulos
version
- 0:2 2019/4/8
- 0:3 2022/12/29, changed to colloquials and added chicken, were hs and mouse.
- 0:4 2023/6/3, added pig

*/

   99bio_db_organism(human).      % defaulty
  100bio_db_organism(chicken).    % 2022/12/21
  101bio_db_organism(mouse).
  102bio_db_organism(multi).      % 2023/9/15
  103bio_db_organism(pig).        % 2023/6/2
 bio_db_organism(?KnownAs, ?Canon)
 bio_db_organism(?KnownAs, ?Token, ?Canon)
Canon is the canonical, colloquial, representation of organism KnownAs and Token is a 4 letter bio_db representation of that organism.

KnownAs is either a known colloquial name tabled in bio_db_organism/1, an alias to an organism or an organism token. Token is the token used in bio_db predicate, file and directory names for this organism.

?- bio_db_organism(KnownAs,Org), write(KnownAs:Org), nl, fail.
hs:human
gallus:chicken
gallus_gallus:chicken
gg6a:chicken
human:human
chicken:chicken
mouse:mouse
galg:chicken
homs:human
musm:mouse
suss:pig
mult:multi

?- bio_db_organism(hs, Org).
Org = human.

?- bio_db_organism(KnownAs, Token, human).
KnownAs = hs,
Token = homs ;
KnownAs = human,
Token = homs ;
KnownAs = Token, Token = homs.

?- hgnc_homs_symb_hgnc( 'LMTK3', Hgnc ).
Hgnc = 19295.
author
- nicos angelopoulos
version
- 0.2 2019/5/2
- 0.3 2022/12/25, added /3 version, and added many aliases

*/

  148bio_db_organism( Alias, Org ) :-
  149     bio_db_organism( Alias, _Token, Org ).
  150
  151bio_db_organism( Alias, Token, Org ) :-
  152    ( ground(Alias) -> Backtrack = false; Backtrack = true ),
  153    bio_db_organism_alias( Alias, Org ),
  154    ( Backtrack == false -> !; true ),
  155    bio_db_organism_token( Org, Token ).
  156bio_db_organism( Org, Token, Canon ) :-
  157    ( ground(Org) -> Backtrack = false; Backtrack = true ),
  158    bio_db_organism( Org ),
  159    bio_db_organism_token( Org, Token ),
  160    ( Backtrack == false -> !; true ),
  161    Canon = Org.
  162bio_db_organism( TokenIs, Token, Canon ) :-
  163    ( ground(TokenIs) -> Backtrack = false; Backtrack = true ),
  164    bio_db_organism_token( Canon, TokenIs ),
  165    ( Backtrack == false -> !; true ),
  166    Token = TokenIs.
  167
  168bio_db_organism_known( A, T, O ) :-
  169     bio_db_organism( A, T, O ),
  170     !.
  171bio_db_organism_known( A, T, O ) :-
  172     throw( un_known(bio_db_organism(A,T,O)) ).
  173     
  174
  175bio_db_organism_token(chicken, galg).
  176bio_db_organism_token(  human, homs).
  177bio_db_organism_token(  mouse, musm).
  178bio_db_organism_token(  multi, mult).
  179bio_db_organism_token(    pig, suss).
 bio_db_organism_alias(?Alias, -Org)
Alias is a known and supported alternative name for the canonical Org name for an organism.
?- bio_db_organism_alias( human, hs ).
true.

Note this used to be bio_db_organism/2 which has now (19.05.02) changed.

author
- nicos angelopoulos
version
- 0:1 2019/5/2
- 0:2 2022/12/20, gallus also known as chicken and gallus_gallus */
  197bio_db_organism_alias( hs, human ).
  198bio_db_organism_alias( gallus, chicken ).
  199bio_db_organism_alias( gallus_gallus, chicken ).
  200bio_db_organism_alias( gg6a, chicken ).
  201bio_db_organism_alias( gg7w, chicken ).
  202
  203% this search path can be added to requires
  204% bio_db_map/2,
  205% ncbi_homs_ensp_unip/2,
  206% ncbi_homs_ensp_ensg/2,
  207
  208/* was:
  209bio_db_interface_atom( prolog ).
  210bio_db_interface_atom( prosqlite ).
  211bio_db_interface_atom( berkeley ).
  212*/
  213bio_db_interface_atom( Iface ) :-
  214    bio_db_interface_extensions( Iface, _ ).
  215
  216bio_db_interface_initialisation( null ). % so it exists, fixme: should nt this be prolog ?
  217bio_db_interface_initialisation( prosqlite ) :-
  218    use_module( library(prosqlite) ).
  219bio_db_interface_initialisation( berkeley ) :-
  220    use_module( library(bdb) ).
  221bio_db_interface_initialisation( rocks ) :-
  222    use_module( library(rocksdb) ).
  223
  224bio_db_default_interface( prolog ).
  225
  226:- Opts = [access(read_write),type(atom),keep(true)],
  227   bio_db_default_interface( Def ),
  228   create_prolog_flag( bio_db_interface, Def, Opts ).  229
  230:- Opts = [access(read_write),type(atom),keep(true)],
  231   create_prolog_flag( bio_db_pl_from_zip, user, Opts ).  % true/false/user
  232
  233:- Opts = [access(read_write),type(atom),keep(true)],
  234   create_prolog_flag( bio_db_del_zip, user, Opts ).  % true/false/user, only asked for pl files
  235
  236:- use_module( library(lib) ).  237:- lib( source(bio_db), homonyms(true) ).  238
  239:- lib(options).  240:- lib(pack_errors).  241
  242:- lib(stoics_lib:at_con/3).  243:- lib(stoics_lib:portray_clauses/2).  244:- lib(stoics_lib:url_file/3).  245:- lib(stoics_lib:message_report/3).  246
  247:- lib(ui_yes_no/5).  248:- lib(bio_db_map/2).  249:- ensure_loaded('../auxil/build_repo/lib/bio_db_pl_info').   % /2.
  250:- lib(end(bio_db)).  251
  252stoics( 'https://stoics.org.uk/~nicos/sware/packs/bio_db_repo/data' ).

Access, use and manage big, biological datasets.

Bio_db gives access to pre-packed biological databases and simplifies management and translation of biological data to Prolog friendly formats.

There are currently 2 major types of data supported: maps, and graphs. Maps define product mappings, translations and memberships, while graphs define interactions which can be visualised as weighed graphs (see bio_db_data_predicate/4 for a full list of statically generated list of bio_db data predicates).

There are 2 prolog flags (see current_prolog_flag/2) that can control the behaviour of the library: bio_db_qcompile (def: true) and bio_db_interface (def: prolog). When the first one is set to false, it can disable the compilation to

Bio_db itself does include any of the datasets. You can either download the separate pack(bio_db_repo) which contains all of the Prolog datasets or let pack(bio_db) download the data file one at the time- as needed. As of version v4.4 there are 144 associated data predicates serving 76398976 records.

This pack can be installed as per usual via

?- pack(bio_db_repo).

However, please note this will download all available tables (zipped) with a total download of 477Mb (v4.4). The first time a table is interrogated it is unzipped ot the .pl version and the interpreter automatically also create a .qlf. When the all the tables have been access at least once, the pack will take around 6.3Gb (v4.4).

If you do not want to install all datasets, you should not install the pack as above. Instead pack(bio_db) will download individual data tables the first time you try to access some of its data. Auto-downloading works transparently to the user, where a data set is downloaded by simply calling the predicate.

For example

?- hgnc_homs_symb_hgnc( 'LMTK3', Hgnc ).
% prolog DB:table hgnc:hgnc_homs_symb_hgnc/2 is not installed, do you want to download (Y/n) ?
% Trying to get: url_file(http://www.stoics.org.uk/bio_db_repo/data/maps/hgnc/hgnc_homs_symb_hgnc.pl,/usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_symb_hgnc.pl)
% Loading prolog db: /usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_symb_hgnc.pl
Hgnc = 19295.

?- bio_db_interface( prosqlite ).
% Setting bio_db_interface prolog_flag, to: prosqlite
true.

?- hgnc_homs_prev_symb( Prv, Symb ).
% prosqlite DB:table hgnc:hgnc_homs_prev_symb/2 is not installed, do you want to download (Y/n) ?
% Trying to get: url_file(http://www.stoics.org.uk/bio_db_repo/data/maps/hgnc/hgnc_homs_prev_symb.sqlite,/usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_prev_symb.sqlite)
false.

?- hgnc_homs_prev_symb( Prv, Symb ).
% prosqlite DB:table hgnc:hgnc_homs_prev_symb/2 is not installed, do you want to download (Y/n) ?
% Trying to get: url_file(http://www.stoics.org.uk/bio_db_repo/data/maps/hgnc/hgnc_homs_prev_symb.sqlite,/usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_prev_symb.sqlite)
% Loading prosqlite db: /usr/local/users/nicos/local/git/test_bio_db/data/maps/hgnc/hgnc_homs_prev_symb.sqlite
Prv = 'A1BG-AS',
Symb = 'A1BG-AS1' .

See bio_db_data_predicate/4 for a way to enumerate all data predicates. The source of which is in src/bio_db_data_predicate.pl which also includes in the comments the cell structure.

As of version 2.0 bio_db is formed of a number of hierarchically organised cells that can be loaded independently. This is because there now too many predicates and is also a devise for better supporting organism specific data. There are currently two main cells, hs (human) and mouse. Each sub-celled by data source of origin.

?- use_module(library(bio_db)).

Loads the whole interface (all cells), without the user needing to be aware of anything. The only difference is that the user will not be able to see all the module predicates at the first line of file pack(bio_db/prolog/bio_db.pl)).

?- lib(bio_db).

Also loads everything.

?- lib(& bio_db).

Loads the skeleton of the module (cells usually laod the module dependencies like this).

?- lib(& bio_db(hs)).

Loads hs cell (and skeleton). hs comprises of a number of sub-cells.

?- lib(& bio_db(hs(hgnc))).

Loads the hs/hgnc primary cell (and the skeleton).

In both the above loads, the following becomes available, however, the former load also loads additional predicates for human, but non hgnc based.

?- hgnc_homs_hgnc_symb( Hgnc, 'LMTK3' ).
Hgnc = 19295.

The following

?- use_module( pack('bio_db/cell/hs/hgnc') ).

also loads just the HGNC part of the human section of bio_db, but it is not a recommended way to do so.

Organisms

galg
Gallus gallus (red junglefowl), colloquial: chicken
homs
Homo sapiens, colloquial: human
mult
covers multiple organisms, longer form: multi
musm
Mus musculus, colloquial: mouse
suss
Sus scrofa (wild boar or Eurasian boar) colloquial: pig

Databases

Ensembl=ense
Homo sapiens genes and proteins. Genes and trascripts mappings along with mapping to genomic location (latter not included in release yet)
HGNC=hgnc
Hugo Gene Nomenclature Committee, http://www.genenames.org/
NCBI=ncbi
NCBI
Uniprot=unip
Protein database.
String=strg
Protein-Protein interactions data base
MGI=mgim
Mouse Genome Informatics, mouse specific datasets (last M for marker, their identifier)
Reactome=reac
Pathway database

For each database, a relation token with the same name, maps the field is the unique identifier of that database.

Other relation tokens

symb
HGNC gene symbol (short, unique name for genes)
name
(HGNC) gene name (long, less standarised version of gene name)
prev
HGNC previous gene symbol
syno
HGNC gene symbol synonym
ensg
ensembl gene
enst
ensembl transcript
ensp
ensembl protein
gonm
GO name of a term
pros
Prosite protein family information
rnuc
RNA nucleic sequence ID to HGNC symbol.
unig
uniprotein gene id
sprt
Swiss-Prot part of Uniprot (high quality, curated)
trem
TrEMBL part of Uniprot (non curated)
mgim
MGI Marker (identifier for Mouse Genome Informatics Markers)
cgnc
Chicken gene nomenclature committee
taxo
taxonomy id (NCBI)
scnm
scientific names for species (NCBI)
gbnm
genbank common name (NCBI)

The name convention for map predicates is

   ?- hgnc_homs_hgnc_symb( Hgnc, Symb ).
   Hgnc = 1,
   Symb = 'A12M1~withdrawn' ;
   Hgnc = 2,
   Symb = 'A12M2~withdrawn' .

   ?- hgnc_homs_hgnc_symb( 19295, Symb ).
   Symb = 'LMTK3'.

   ?- hgnc_homs_symb_hgnc( 'LMTK3', Hgnc ).
  Hgnc = 19295.

Where the first hgnc corresponds to the source database, the second token, homs, identifies the organism, the third and fourth tokens are the fields of the map. Above, the second hgnc

The last part of the predicate name corresponds to the second (or all other) argument(s), which here is the unique Symbol assigned to a gene by HGNC. In the current version of bio_db, all tokens in map filenames are 4 characters long. Map data for predicate Pname from database DB are looked for in DB(Pname.Ext) (see bio_db_paths/0). Extension, Ext, depends on the current bio_db database interface (see bio_db_interface/1), and it is sqlite if the interface is prosqlite and pl otherwise.

The name convention for graphs is

  ?- strg_homs_edge_symb( Symb1, Symb2, W ).
  S1 = 'A1BG',
  S2 = 'ABAT',
  W = 360 ;
  S1 = 'A1BG',
  S2 = 'ABCC6',
  W = 158 .

The first part indicates the database and the second one the organism/species. Graph data for predicate Pname from database DB are looked for in bio_db_data(graphs/DB/Pname.Ext) (see bio_db_paths/1). Extension, Ext, depends on the current bio_db database interface (see bio_db_interface/1), and it is sqlite if the interface is prosqlite and pl otherwise.

Bio_db supports four db interfaces: prolog, prosqlite, berkeley and rocks. The first one is via Prolog fact bases, which is the default. The second is an interface to SQLite via pack(prosqlite) while the third and fourth work with the SWI-Prolog packs bdb and rocksdb. The underlying mechanisms are entirely transparent to the user. In order to use the sqlite data sources pack(prosqlite) needs to be installed via the pack manager

 ?- pack_install( prosqlite ).

The user can control which interface is in use with the bio_db_interface/1 predicate.

 ?- bio_db_interface( Curr ).
 Curr = prolog.

 ?- bio_db_interface( prosqlite ).

 ?- bio_db_interface( Curr ).
 Curr = prosqlite.

The type of the interface of a bio_db data predicate is determined by the interface at the time of first call.

Once the user has initiated the serving of a predicate via calling a goal to it, it is then possible to have access to information about the dataset such as download date and sourle url.

?- hgnc_homs_hgnc_symb( Hgnc, Symb ).
Hgnc = 1,
Symb = 'A12M1~withdrawn' .

?- bio_db_info( hgnc_homs_hgnc_symb/2, Key, Value ), write( Key-Value ), nl, fail.
interface-prolog
source_url-ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc_complete_set.txt.gz
datetime-datetime(2018,11,27,12,32,11)
data_types-data_types(integer,atom)
unique_lengths-unique_lengths(46023,46023,46023)
relation_type-relation_type(1,1)
header-row(HGNC ID,Approved Symbol)
false

As of version 2.0 there are two flags that can automate some of the interactions.

:- set_prolog_flag(bio_db_pl_from_zip, user).
:- set_prolog_flag(bio_db_del_zip, user).

In both cases the recognised values for the flags are: [user,true,false]. User is for prompting the user and true is progressing with an implicit yes answer. The first flag automates conversion from .pl.zip to .pl (which will be the case for the first time you access any dataset if you have installed bio_db_repo), and the second controls the deletion of the zip file once the .pl file has been created.

As of version 4.0 there are 91 associated data predicates serving 55444729 records.

Thanks to Jan Wielemaker for a retractall fix and for code for fast loading of precompiled fact bases (and indeed for the changes in SWI that made this possible).

author
- nicos angelopoulos
version
- 0.5 2016/09/11
- 0.7 2016/10/21, experimenting with distros in github
- 0.9 2017/03/10, small changes for pack(requires) -> pack(lib) v1.1
- 1.0 2017/10/09, to coincide with ppdp paper presentation
- 2.1 2018/11/27, introduces cells and mouse data (and fixed dependency of 2.0)
- 2.4 2019/04/02, test: bio_db_stats, new mouse db predicates, iface: bio_db_data_predicate/4
- 2.5 2019/04/22, edge_strg_symb/4; bio_db_organism/1,2; go_id/2,3
- 2.6 2019/05/08, changed to organism alias interface; evidence in gont maps
- 2.7 2019/05/12, edge_strg_symb/4 -> org_edge_strg_symb/4
- 3.0 2019/05/15, paper submission
- 3.1 2020/03/09, fixed lib; no unigene
- 3.2 2020/09/18, include mouse ense + fixes/updates on building scripts
- 3:4 2021/05/10, removed edge_gont_includes/2 (reciprocal of is_a), and edge_gont_consists_of/2 (reciprocal of part_of/2)
- 3:6 2021/12/04, fixed pack_errors and map_ense_mouse_enst_chrl/5; bio_db_stats.pl version 0.2
- 4:1 2022/12/29, huge re-config of data predicate names + reac-tome (maps) + chicken
- 4:2 2023/06/06, support for pig
- 4:3 2023/10/05, mult for multi organisms; vgnc database; ncbi taxonomy db; build-reorganisation
- 4:4 2024/04/05, db(ncbi) preds were complete rehaul, better and more complete db(reactome) support, fixed pig cells
- 4:5 2024/04/05, fixed certificate issue when downloading individual files of bio_db_repo
- 4:6 2024/10/17, gg7w for loghorn chicken, hgnc change of base URL
- 4:7 2025/3/28, reactome predicate in sus scrofa; missing is_symbol/2 for chicken and pig
See also
- doc/Releases.txt for version details
- bio_db_data_predicate/4 for a way to enumerate all data predicates
- cell/ for the definitions of the data predicates

*/

 bio_db_paths
Initialisation call- setting up path aliases.

There are two main directory repositories the predicate deals with: (a) the bio_db installed databases root (alias bio_db_data), and (b) the root of downloaded databases (alias bio_db_downloads). Optionally a top directory of which both (a) and (b) are subdirs can be defined (alias bio_db). The default value for alias bio_db is a made-up pack directory pack(bio_db_repo). The default for bio_db_data is sub directory data of alias bio_db, while bio_db_downloads defaults to sub directory downloads of the alias bio_db. The canonical subdirectory name for (a) is data and for (b) is downloads.

pack(bio_db_repo) can also be installed as a complete package from SWI's manager.

?- pack_install( bio_db_repo ).

This will install all the Prolog database files. The single tar and gzipped file is 246 Mb in size and the fully expanded version of a Prolog installation can take up to 3.1Gb. The precise size depends on how many tables are accessed at least once (each producing an expanded .pl and a .qlf file).

Directory locations for (a) and (b) above can be given as either prolog flags with key bio_db_root and bio_dn_root respectively or via environment variables BioDbRoot and BioDnRoot.

Installed root alias(bio_db_data) contains sub-dirs

graphs
for graphs; string and reactome
maps
for all the supported maps

The above are mapped to aliases bio_graphs and bio_maps respectively. Within each of these sub-directories there is further structure based on the database the set was originated.

Downloaded root alias(bio_db_downloads) may contain sub-dirs

hgnc
data from HGNC database
ncbi
data from NCBI database
reactome
data from Reactome database
string
data from string database
uniprot
protein data from EBI
ense
ensembl database

Alias bio_db_downloads is only useful if you are downloading data files directly from the supported databases.

See

?- absolute_file_name( packs(bio_db(auxil)), Auxil ), ls( Auxil ).

for examples of how these can be used.

For most users these aliases are not needed as the library manages them automatically.

To be done
- transfer datasets and downloads to new pack location when running on newly installed SWI version upgrade.

*/

  660bio_db_paths :-
  661    bio_db_paths_root,
  662    bio_db_paths_installed,
  663    bio_db_paths_installed_sub,
  664    bio_db_paths_downloaded.
  665
  666bio_db_paths_root :-
  667    bio_db_setting( bio_db_root, Root ),
  668    !,
  669    bio_db_path_new( bio_db, Root ).
  670bio_db_paths_root.
  671
  672bio_db_paths_installed :-
  673    bio_db_setting( bio_db_data_root, DbRoot ),
  674    !,
  675    bio_db_path_new( bio_db_data, DbRoot ).
  676bio_db_paths_installed :-
  677    user:file_search_path( bio_db, BioDb ),
  678    os_path_1( BioDb, data, BioDbData ),
  679    % exists_directory( DbRoot ),
  680    !,
  681    bio_db_path_new( bio_db_data, BioDbData ).
  682bio_db_paths_installed :-
  683    throw( missing_setting(bio_db_data_root) ).
  684
  685bio_db_paths_downloaded :-
  686    bio_db_setting( bio_db_downloads_root, DnRoot ),
  687    !,
  688    bio_db_path_new( bio_db_downloads, DnRoot ).
  689bio_db_paths_downloaded :-
  690    user:file_search_path( bio_db_downloads_root, BioRoot ),
  691    os_path_1( BioRoot, downloads, DnRoot ),
  692    exists_directory( DnRoot ),
  693    !,
  694    bio_db_path_new( bio_db_downloads, DnRoot ).
  695
  696bio_db_paths_installed_sub :-
  697    user:file_search_path( bio_db_data, DbRoot ),
  698    findall( Sub, bio_db_sub(Sub), Subs ),
  699    maplist( bio_db_paths_installed_sub(DbRoot), Subs ),
  700    !.
  701
  702bio_db_paths_installed_sub( DbRoot, Sub ) :-
  703    os_path_1( DbRoot, Sub, AbsSub ),
  704    % exists_directory( Abs ),
  705    % directory_files( Abs, DbSubs ), % os_dirs
  706    ( atom_concat(SubSingular,'s',Sub) -> true; SubSingular = Sub ),
  707    atom_concat( bio_, SubSingular, BioDbSub ),
  708    bio_db_path_new( BioDbSub, AbsSub ),
  709    % os_path_1( AbsSub, Db, AbsDb )
  710    % bio_db_path_new( Db, AbsDb ),
  711    % bio_db_source( Sub, Db ),
  712    findall( DbSub, bio_db_source(Sub,DbSub), DbSubs ),
  713    maplist( bio_db_paths_installed_sub_dbs(AbsSub,Sub), DbSubs ),
  714    !.
  715bio_db_paths_installed_sub( _DbRoot, _Sub ).
  716
  717bio_db_paths_installed_sub_dbs( Abs, Sub, Db ) :-
  718    bio_db_source( Sub, Db ),
  719    os_path_1( Abs, Db, Full ),
  720    bio_db_path_new( Db, Full ).
  721% bio_db_paths_installed_sub_dbs( _Abs, _Sub ).
  722
  723bio_db_setting( PlSet, Value ) :-
  724    current_prolog_flag( PlSet, Value ),
  725    debug( bio_db, 'bio_db setting via flag: ~w, set to: ~w', [PlSet,Value] ),
  726    !.
  727bio_db_setting( PlSet, Value ) :-
  728    atomic_list_concat( Parts, '_', PlSet ),
  729    maplist( upcase_first, Parts, Arts ),
  730    atomic_list_concat( Arts, EnvVar ),
  731    getenv( EnvVar, Value ),
  732    debug( bio_db, 'bio_db setting via env: ~w, setting: ~w, set to: ~w', [EnvVar,PlSet,Value] ),
  733    !.
  734bio_db_setting( PlSet, Value ) :-
  735    bio_db_setting_default( PlSet, Value ).
  736
  737bio_db_path_new( Alias, New ) :-
  738    user:file_search_path( Alias, Old ),
  739    bio_db_path_new_exists( Alias, Old, New ),
  740    !.
  741bio_db_path_new( Alias, Path ) :-
  742    debug( bio_db, 'Asserting search alias: ~w, to ~p', [Alias,Path] ),
  743    assert( user:file_search_path(Alias,Path) ).
  744
  745bio_db_path_new_exists( _Alias, Old, Old ) :-
  746    !.
  747bio_db_path_new_exists( Alias, Old, New ) :-
  748    throw( fixme(alias_exists(Alias,Old,New)) ).
  749
  750bio_db_path_exists( Alias ) :- % fixme: is this called from anywhere ?
  751    throw( fixme(bio_db_paths_installed/0,search_path_exists(Alias)) ).
  752
  753upcase_first( Atom, Upped ) :-
  754    sub_atom( Atom, 0, 1, _, Flw ), 
  755    upcase_atom( Flw, Fup ),
  756    atom_length( Atom, Len ),
  757    Ken is Len - 1,
  758    sub_atom( atom, 1, Ken, 0, Tail ),
  759    atom_concat( Fup, Tail, Upped ).
 bio_db_version(-Vers, -Date)
Version Mj:Mn:Fx, and release date date(Y,M,D).
?- bio_db_version( V, D ).
V = 4:7:0,
D = date(2025, 3, 28).
author
- Nicos Angelopoulos
version
- 4:7 2025/3/28, fixes, and building scripts
See also
- bio_db_data_predicate/4 (which should be generated for each new version)
- doc/Releases.txt for more detail on change log
- module documentation for brief comments on versioning history of this pack

*/

  778bio_db_version(4:7:0, date(2025,3,28)).
 bio_db_citation(-Atom, -Bibterm)
This predicate succeeds once for each publication related to this library. Atom is the atom representation suitable for printing while Bibterm is a bibtex(Type,Key,Pairs) term of the same publication. Produces all related publications on backtracking.
  788bio_db_citation( Atom, bibtex(Type,Key,Pairs) ) :-
  789  Atom = 'Accessing biological data as Prolog facts.\nNicos Angelopoulos and Jan Wielemaker. In Proceedings of 19th International Symposium on Principles and Practice of Declarative Programming, Namur, Belgium, October, 2017 (PPDP\'17), 10 pages.',
  790  Type = inproceedings,
  791  Key  = 'AngelopoulosN_GiamasG_2015',
  792  Pairs = [
  793               title  = 'Accessing biological data as Prolog facts',
  794               author = 'Nicos Angelopoulos and Jan Wielemaker',
  795               booktitle= '19th International Symposium on Principles and Practice of Declarative Programming',
  796               year = 2017,
  797               month = 'October',
  798               address= 'Namur, Belgium'
  799               % url     = 'http://ceur-ws.org/Vol-1433/tc_74.pdf'
  800     ].
  801
  802bio_db_citation( Atom, bibtex(Type,Key,Pairs) ) :-
  803  Atom = 'A logical approach to working with biological databases.\nNicos Angelopoulos and Georgios Giamas.\nProceedings of the 31st International Conference on Logic Programming (ICLP 2015) Accepted as a technical communication.\nCork, Ireland. September 2015.',
  804  Type = inproceedings,
  805  Key  = 'AngelopoulosN_GiamasG_2015',
  806  Pairs = [
  807               author = 'Nicos Angelopoulos and Georgios Giamas',
  808               title  = 'A logical approach to working with biological databases',
  809               booktitle= 'Technical Communication in Proceedings of the 31st International Conference on Logic Programming (ICLP 2015)',
  810               year = 2015,
  811               month = 'September',
  812            address= 'Cork, Ireland',
  813               url     = 'http://ceur-ws.org/Vol-1433/tc_74.pdf'
  814     ].
  815
  816bio_db_citation( Atom, bibtex(Type,Key,Pairs) ) :-
  817    Atom = 'Working with biological databases.\nNicos Angelopoulos and Georgios Giamas.\n1th Workshop on Constraint Based Methods for Bioinformatics (2015)\nCork, Ireland. September 2015',
  818    Type    = inproceedings,
  819    Key  = 'AngelopoulosN_GiamasG_2015a',
  820    Pairs = [
  821               author = 'Nicos Angelopoulos and Georgios Giamas',
  822            title  = 'Working with biological databases',
  823            booktitle = '11th Workshop on Constraint Based Methods for Bioinformatics (2015)',
  824            year = 2015,
  825            month = 'September',
  826            address = 'Cork, Ireland',
  827            url = 'http://clp.dimi.uniud.it/wp/wp-content/uploads/2015/08/WCB_2015_paper_1.pdf'
  828    ].
 bio_db_source(?Type, ?Db)
True if Db is a source database for bio_db serving predicate of type Type. Type is either maps or graphs.

The databases are

To be done
- fixme: this gets out of synch for new dbs, maybe have it in other location or throw a message if it fails ? */
  844bio_db_source(maps, hgnc).
  845bio_db_source(maps, gont).
  846bio_db_source(maps, ncbi).
  847bio_db_source(maps, unip).
  848bio_db_source(maps, vgnc).
  849bio_db_source(graphs, string ).
  850% bio_db_source( graphs, gont ). % these are used for aliases, so gont already exists
  851bio_db_source( graphs, reactome ).
  852
  853bio_db_sub(graphs).
  854bio_db_sub(maps).
  855
  856bio_db_setting_default( 'bio_db_root', BioDbRoot ) :-
  857    absolute_file_name( pack(bio_db), BioDb ),
  858    directory_file_path( Dir, bio_db, BioDb ),
  859    directory_file_path( Dir, bio_db_repo, BioDbRoot ).
  860bio_db_setting_default( 'bio_db_data_root', BioDbData ) :-
  861    absolute_file_name( bio_db(data), BioDbData ).
  862bio_db_setting_default( 'bio_db_downloads_root', BioDbDnloads ) :-
  863    absolute_file_name( bio_db(downloads), BioDbDnloads ).
 bio_db_interface(?Iface, -Status)
Interrogate the installation status (true or false) of bio_db's known interfaces. true if the interface dependencies are installed and the interface can be used, and =|false=| otherwise.

Can be used to enumerate all known or installed interfaces.

 ?- findall( Iface, bio_db_interface(Iface,_), Ifaces ).
 Ifaces = [prolog, berkeley, prosqlite, rocks].
  878bio_db_interface( prolog, true ).
  879bio_db_interface( berkeley, Bool ) :-
  880    ( catch( use_module( library(bdb) ), _, fail ) -> Bool = true; Bool = false ).
  881bio_db_interface( prosqlite, Bool ) :-
  882    ( catch( use_module( library(prosqlite) ), _, fail ) -> Bool = true; Bool = false ).
  883bio_db_interface( rocks, Bool ) :-
  884    ( catch( use_module( library(rocksdb) ), _, fail ) -> Bool = true; Bool = false ).
 bio_db_interface(?Iface)
Interrogate or set the current interface for bio_db database predicates. By default Iface = prolog. Also supported: prosqlite (needs pack proSQLite), berkley (needs SWI's own library(bdb) and rocks (needs pack(rocskdb).
?- bio_db_interface( Iface ).
Iface = prolog.

?- debug( bio_db ).
true.

?- bio_db_interface( wrong ).
% Could not set bio_db_interface prolog_flag, to: wrong, which in not one of: [prolog,prosqlite,berkeley,rocks]
false.

?- bio_db_interface( Iface ).
Iface = prolog.

?- hgnc_homs_symb_hgnc( 'LMTK3', Hgnc ).
% Loading prolog db: /usr/local/users/nicos/local/git/lib/swipl-7.1.32/pack/bio_db_repo/data/maps/hgnc/hgnc_homs_symb_hgnc.pl
Hgnc = 19295.

?- bio_db_interface( prosqlite ).
% Setting bio_db_interface prolog_flag, to: prosqlite
true.

?- hgnc_homs_prev_symb( Prev, Symb ).
% prosqlite DB:table hgnc:hgnc_homs_prev_symb/2 is not installed, do you want to download (Y/n) ?
% Execution Aborted
?- hgnc_homs_prev_symb( Prev, Symb ).
% Loading prosqlite db: /usr/local/users/nicos/local/git/lib/swipl-7.1.32/pack/bio_db_repo/data/maps/hgnc/hgnc_homs_prev_symb.sqlite
Prev = 'A1BG-AS',
Symb = 'A1BG-AS1' ;

In which case Iface is prosqlite.

  926bio_db_interface( Iface ) :-
  927    var( Iface ),
  928    !,
  929    current_prolog_flag( bio_db_interface, IfacePrv ),
  930    bio_db_interface_known( IfacePrv, Iface ).
  931bio_db_interface( Iface ) :-
  932    ground( Iface ),
  933    bio_db_interface_set( Iface ).
  934
  935bio_db_info( Iface, Pid, Key, Value ) :-
  936    var( Iface ),
  937    !,
  938    bio_db_info_gen( Iface, Pid, Key, Value ).
  939bio_db_info( Iface, Pid, Key, Value ) :-
  940    atom( Iface ),
  941    bio_db_info_source( Iface, Pid, Key, Value ).
  942
  943bio_db_info_gen( Iface, Pid, Key, Value ) :-
  944    bio_db_interface_extensions( Iface, _ ),
  945    bio_db_info( Iface, Pid, Key, Value ).
  946
  947bio_db_install_defaults( [org(hs),interactive(true)] ).
 bio_db_install(+PidOrPname, +Iface)
 bio_db_install(+PidOrPname, +Iface, +Opts)
Install the interface (Iface) for bio_db database that corresponds to predicate identifier (Pid) or a predicate name (Pname). Note that this is not necessary to do in advance as the library will auto load missing Iface and Pid combinations when first interrogated.

Opts

interactive(Ictive=true)
set false to accept default interactions
org(Org=hs)
organism

*/

  963bio_db_install( PorP, Iface ) :-
  964    bio_db_install( PorP, Iface, [] ).
  965bio_db_install( PorP, Iface, OptS ) :-
  966    options_append( bio_db_install, OptS, Opts ),
  967    options( interactive(Ictive), Opts ),
  968    options( org(Org), Opts ),
  969    bio_db_porp_call( PorP, bio_db_install/2, Call ),
  970    bio_db_map_call_db_pname( Call, Db, Pname, Arity ),
  971    ( bio_db_info(Iface,PorP,_,_) -> 
  972        Mess = '~a DB:table ~w is already installed. It will be overwritten. Continue',
  973        Args = [Iface,PorP],
  974        ui_yes_no( Ictive, Mess, Args, y, Reply ), 
  975        ( Reply == true ->
  976            bio_db_interface_extensions( Iface, [Ext|_] ),
  977            ( bio_db_pname_source(Org,Db,Pname,read,Ext,File) ->
  978                delete_installed( Ext, File )
  979                ;
  980                true
  981            ),
  982            bio_db_serve_pname( false, false, Org, Db, Pname, Arity, Iface, Call )
  983            ;
  984            % ensure qlf is also installed, before failing
  985            ( Iface == prolog ->
  986                ( bio_db_pname_source(Org,Db,Pname,read,qlf,_ExistFile) ->
  987                    Mess1 = 'Qlf is also istalled.',
  988                    phrase('$messages':translate_message(debug(Mess1,[])), Lines1),
  989                    print_message_lines(current_output, kind(informational), Lines1)
  990                    ;
  991                    bio_db_pname_source( Org, Db, Pname, read, pl, File ),
  992                    bio_db_load_call( false, Pname, Arity, Iface, File, true )
  993                )
  994                ;
  995                true
  996            )
  997        )
  998        ;
  999        bio_db_serve_pname( false, false, Db, Pname, Arity, Iface, Call )
 1000    ).
 1001
 1002delete_installed( rocks, Dir ) :-
 1003    delete_installed_db_dir_and_info( Dir ).
 1004delete_installed( db, File ) :-
 1005    delete_installed_db_file_and_info( File ).
 1006delete_installed( sqlite, File ) :-
 1007    delete_installed_db_file_and_info( File ).
 1008delete_installed( pl, File ) :-
 1009    delete_installed_db_file_and_info( File ),
 1010    file_name_extension( Stem, _Ext, File ),
 1011    file_name_extension( Stem,  qlf, Qile ),
 1012    ( exists_file(Qile) ->
 1013        debug( bio_db, 'Deleting file: ~p', Qile ),
 1014        delete_file(Qile)
 1015        ;
 1016        true
 1017    ).
 1018
 1019delete_installed_db_file_and_info( File ) :-
 1020    ( exists_file(File) ->
 1021        debug( bio_db, 'Deleting file: ~p', File ),
 1022        delete_file( File )
 1023        ;
 1024        true
 1025    ),
 1026    file_name_extension( Stem, Ext, File ),
 1027    atom_concat( Stem, '_info', InfoStem ),
 1028    file_name_extension( InfoStem, Ext, InfoFile ),
 1029    ( exists_file(InfoFile) ->
 1030        debug( bio_db, 'Deleting file: ~p', InfoFile ),
 1031        delete_file( InfoFile )
 1032        ;
 1033        true
 1034    ).
 1035
 1036delete_installed_db_dir_and_info( Dir ) :-
 1037    ( exists_directory(Dir) ->
 1038        debug( bio_db, 'Deleting directory: ~p', Dir ),
 1039        delete_directory_contents( Dir )
 1040        ;
 1041        true
 1042    ),
 1043    file_name_extension( Stem, Ext, Dir ),
 1044    atom_concat( Stem, '_info', InfoStem ),
 1045    file_name_extension( InfoStem, Ext, InfoDir ),
 1046    ( exists_directory(InfoDir) ->
 1047        debug( bio_db, 'Deleting directory: ~p', InfoDir ),
 1048        delete_directory_contents( InfoDir )
 1049        ;
 1050        true
 1051    ).
 bio_db_porp_call(PorP, CallerId, Call)
Constract a generic call from predicate id or predicate name (Porp).

*/

 1058bio_db_porp_call( Porp, Cid, Call ) :-
 1059    ground( Porp ),
 1060    bio_db_porp_call_ground( Porp, Cid, Call ).
 1061
 1062bio_db_porp_call_ground( Pname/Arity, _Cid, Call ) :- !,
 1063    functor( Call, Pname, Arity ).
 1064bio_db_porp_call_ground( Pname, Cid, Call ) :-
 1065    % find the name from the module def of bio_db. A bit hackish.
 1066    atom( Pname ),
 1067    absolute_file_name( pack('bio_db/prolog/bio_db.pl'), BioDbF, [access(exist)] ),
 1068    open( BioDbF, read, In ),
 1069    read( In, ModuleDef ), 
 1070    close( In ),
 1071    ModuleDef = (:- module( bio_db, Pids ) ),
 1072    ( memberchk(Pname/Arity,Pids) ->
 1073        true
 1074        ;
 1075        throw( not_a_db_pred(Pname), [pack(bio_db),pred(Cid)] )
 1076    ),
 1077    functor( Call, Pname, Arity ).
 bio_db_predicate_name(+PidOrPname, -Pname)
Auxiliary predicate that strips the Pname of Pid or assumes atomic PidOrPname to be a Pname.
To be done
- check it looks like a db name. this is only useful for db_preds.

*/

 1087bio_db_predicate_name( Pid, Pname ) :-
 1088    ground( Pid ),
 1089    bio_db_predicate_name_ground( Pid, Pname ).
 1090
 1091bio_db_predicate_name_ground( Pname/_, Pname ) :- !.
 1092bio_db_predicate_name_ground( Pname, Pname ) :-
 1093    atom( Pname ).
 1094
 1095bio_db_predicate_db( Pid, Db ) :-
 1096    bio_db_predicate_name( Pid, Pname ),
 1097    atomic_list_concat( [_,Db|_], '_', Pname ).
 1098
 1099bio_db_info_source( Iface, Pid, Key, Value ) :-
 1100    bio_db_predicate_name( Pid, Pname ),
 1101    bio_db_predicate_db( Pname, Db ),
 1102    bio_db_pname_source( Db, Pname, read, Iface, DbF ),
 1103    ( bio_db_interface_initialisation(Iface) -> true; true ),
 1104    bio_db_info_db_file( Iface, Pid, DbF, Key, Value ).
 1105
 1106bio_db_info_db_file( prolog, _Pid, DbF, Key, Value ) :-
 1107    bio_db_pl_info( DbF, Infos ),
 1108    member( Info, Infos ),
 1109    arg( 1, Info, Key ),
 1110    arg( 2, Info, Value ).
 1111bio_db_info_db_file( prosqlite, Pid, DbF, Key, Value ) :-
 1112    bio_db_predicate_info( Pid, Info ),
 1113    % bio_db_source_info( DbF, InfoF ),
 1114    sqlite_connect( DbF, Info ),
 1115    atom_concat( 'Select * from ', Info, Query ),
 1116    findall( Row, sqlite_query(Info,Query,Row), Rows ),
 1117    sqlite_disconnect( Info ),
 1118    member( row(Key,ValueAtom), Rows ),
 1119    ( catch(atom_to_term(ValueAtom,Value,_),_,fail) ->
 1120        true
 1121        ;
 1122        Value = ValueAtom
 1123    ).
 1124bio_db_info_db_file( berkeley, Pid, DbF, Key, Value ) :-
 1125    bio_db_info_interface_infos( berkeley, Pid, DbF, _,  KVs ),
 1126    member( Key-Value, KVs ).
 1127bio_db_info_db_file( rocks, Pid, DbF, Key, Value ) :-
 1128    bio_db_info_interface_infos( rocks, Pid, DbF, _,  KVs ),
 1129    member( Key-Value, KVs ).
 bio_db_info(+Pid, ?Iface)
 bio_db_info(+Pid, ?Key, -Value)
 bio_db_info(+Iface, +Pid, ?Key, -Value)
Retrieve information about bio_db database predicates.

When Iface is not given, Key and Value are those of the interface under which Pid is currently open for access. The predicate errors if Pid is not open for serving yet.

The bio_db_info/2 version succeeds for all interfaces Pid is installed- it is simply a shortcut to: bio_db_info( Iface, Pid, _, _ ).

The Key-Value information returned are about the particular data predicate as saved in the specific backend.

Key

source_url
an atomic value of the URL
datetime
datetime/6 term
data_types
data_types/n given the primary type for each argyument in the data table
header
row/n term, where n is the number of columns in the data table
unique_lengths
unique_lengths/3 term, lengths for the ordered sets of: Ks, Vs and KVs
relation_type(From, TO)
where From and To take values in 1 and m
?- bio_db_info( Iface, hgnc_homs_hgnc_symb/2, Key, Value), write( Iface:Key:Value ), nl, fail.
prolog:source_url:ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc_complete_set.txt.gz
prolog:datetime:datetime(2016,9,10,0,2,14)
prolog:data_types:data_types(integer,atom)
prolog:unique_lengths:unique_lengths(44266,44266,44266)
prolog:relation_type:relation_type(1,1)
prolog:header:row(HGNC ID,Approved Symbol)
prosqlite:source_url:ftp://ftp.ebi.ac.uk/pub/databases/genenames/hgnc_complete_set.txt.gz
prosqlite:datetime:datetime(2016,9,10,0,2,14)
prosqlite:data_types:data_types(integer,atom)
prosqlite:unique_lengths:unique_lengths(44266,44266,44266)
prosqlite:relation_type:relation_type(1,1)
prosqlite:header:row(HGNC ID,Approved Symbol)

*/

 1183bio_db_info( PorP, Iface ) :-
 1184    bio_db_info( Iface, PorP, _, _ ),
 1185    !.
 1186
 1187bio_db_info( Pid, Key, Value ) :-
 1188    bio_db_db_predicate( Pid ),
 1189    !,
 1190    bio_db_info_pred( Pid, Key, Value ).
 1191bio_db_info( Pid, _Key, _Value ) :-
 1192    throw( not_a_db_pred(Pid), [pack(bio_db),pred(bio_db_info/3)] ).
 1193
 1194bio_db_info_pred( Pid, Key, Value ) :-
 1195    bio_db_handle( Pid, Iface, File, Handle, _Mod ),
 1196    !,
 1197    bio_db_info_interface( Iface, Pid, File, Handle, Key, Value ).
 1198
 1199bio_db_info_pred( Pid, _Key, _Value ) :-
 1200    throw( close_to_info(Pid), [pack(bio_db),pred(bio_db_info/3)] ).
 1201
 1202bio_db_info_interface_kvs( Iface, Pid, File, Handle, KVs ) :-
 1203    bio_db_info_interface_infos( Iface, Pid, File, Handle, Pairs ),
 1204    \+ var( KVs ),
 1205    bio_db_info_interface_kvs( KVs, Pairs ).
 1206    
 1207bio_db_info_interface_kvs( [], _ ).
 1208bio_db_info_interface_kvs( [K-V|T], Pairs ) :-
 1209    memberchk( K-V, Pairs ),
 1210    bio_db_info_interface_kvs( T, Pairs ).
 1211    
 1212bio_db_info_interface( Iface, Pid, File, Handle, Key, Value ) :-
 1213    bio_db_info_interface_infos( Iface, Pid, File, Handle, KVs ),
 1214    member( Key-Value, KVs ).
 1215    
 1216bio_db_info_interface_infos( Callable, Pid, _File, _Handle, Pairs ) :-
 1217    memberchk( Callable, [prolog,prosqlite] ),
 1218    !,
 1219    bio_db_predicate_info( Pid, InfoName ),
 1220    Goal =.. [InfoName,Key,Value],
 1221    findall( Key-Value, ( (Key = interface, Value = Callable) ;  bio_db:Goal ), Pairs ).
 1222bio_db_info_interface_infos( berkeley, _Pid, File, _Handle, KVs ) :-
 1223    % fixme add key = Berkley interface
 1224    % ( ((Key=interface, Value=berkeley); bdb_enum( Handle, info+Key , Value)) ).
 1225    bio_db_source_info( File, InfoF ),
 1226
 1227    bdb_open( InfoF, read, InfoHandle, [key(atom),value(term)] ),
 1228    findall( AKey-AValue, bdb_enum(InfoHandle,AKey,AValue), Pairs ),
 1229    bdb_close( InfoHandle ),
 1230    KVs = [interface-berkeley|Pairs].
 1231bio_db_info_interface_infos( rocks, _Pid, File, _Handle, KVs ) :-
 1232    % fixme add key = Berkley interface
 1233    file_name_extension( Stem, Ext, File ),
 1234    atom_concat( Stem, '_info', InfoStem ),
 1235    file_name_extension( InfoStem, Ext, InfoFile ),
 1236    rocks_open( InfoFile, InfoHandle, [key(atom),value(term)] ),
 1237    findall( AKey-AValue, rocks_enum(InfoHandle,AKey,AValue), Pairs ),
 1238    rocks_close( InfoHandle ),
 1239    KVs = [interface-rocks|Pairs].
 bio_db_close(+Pid)
Close the current serving of predicate Pid. Next time a Pid Goal is called the current interface (bio_db_interface/1) will be used to establish a new server and resolve the query.

Predicate throws an error if the Pid does not correspond to a db_predicate or if it is not currently servered by any of the backends.

?- bio_db_interface(prosqlite).
?- hgnc_homs_hgnc_symb( Hgnc, Symb ).
Hgnc = 506,
Symb = 'ANT3~withdrawn' .

?- bio_db_close( hgnc_homs_hgnc_symb/2 ).
?- bio_db_interface( prolog ).
?- hgnc_homs_hgnc_symb( Hgnc, Symb ).
Hgnc = 1,
Symb = 'A12M1~withdrawn' .
?- bio_db_close(hgnc_homs_hgnc_symb/2).

*/

 1265bio_db_close( Pid ) :-
 1266    bio_db_db_predicate( Pid ),
 1267    !,
 1268    bio_db_close_pred( Pid ).
 1269bio_db_close( Pid ) :-
 1270    throw( not_a_db_pred(Pid), [pack(bio_db),pred(bio_db_close/1)] ).
 1271
 1272bio_db_close_pred( Pid ) :-
 1273    bio_db_handle( Pid, Iface, File, Handle, Mod ),
 1274    !,
 1275    bio_db_close_connection( Iface, Handle ),
 1276    Pid = Pname/Arity,
 1277    functor( Head, Pname, Arity ),
 1278    retractall( Head ),
 1279    atom_concat( Pname, '_info', InfoPname ),
 1280    functor( InfoHead, InfoPname, 2 ),
 1281    retractall( InfoHead ),
 1282    retractall( bio_db_handle(Pid,Iface,File,Handle,Mod) ),
 1283    assert( (Head :- bio_db_serve(Head)) ).
 1284bio_db_close_pred( Pid ) :-
 1285    throw( not_served(Pid), [pack(bio_db),pred(db_close/1)] ),
 1286    fail.
 1287
 1288bio_db_close_connection( prosqlite, Handle ) :-
 1289    sqlite_disconnect( Handle ).
 1290bio_db_close_connection( prolog, _Handle ).
 1291bio_db_close_connection( berkeley, Handle ) :- 
 1292    bdb_close( Handle ).
 1293bio_db_close_connection( rocks, Handle ) :- 
 1294    rocks_close( Handle ).
 bio_db_close_connections
Close all currently open bio_db backend connections.

This is called by bio_db at halt.

*/

 1303bio_db_close_connections:-
 1304    findall( Pid, bio_db:bio_db_handle(Pid,_B,_C,_D,_Mod), Pids ),
 1305    member( Pid, Pids ),
 1306    bio_db_close( Pid ),
 1307    fail.
 1308bio_db_close_connections.
 bio_db_db_predicate(?Pid)
True if Pid is a predicate identifier which is defined in current bio_db session, and contains 4 _ sep tokens, each of length 4. When Pid is a free variable all such predicate identifiers are returned on backtracking.

For a statically produced list of all data predicates in bio_db see, bio_db_data_predicate/4.

  ?- bio_db_db_predicate( hgnc_homs_hgnc_symb/2 ).
  true.

  ?- bio_db_db_predicate( X ).
  X = hgnc_homs_symb_ncbi/2 ;
  X = ense_homs_enst_ensg/2 ;
  ...

*/

 1330bio_db_db_predicate( Pname/Arity) :-
 1331    ground(Pname/Arity), !,
 1332    functor(Head,Pname,Arity),
 1333    bio_db_data_predicate_name(Pname),
 1334    % predicate_property(bio_db:Head, exported), !.
 1335    predicate_property(bio_db:Head, defined), !.  
 1336    % fixme: when called from closing,  maybe do a bit of checking ? \+ (rule=:=1,clauses=:=1)
 1337bio_db_db_predicate( Pname/Arity) :-
 1338    % module_property(bio_db, exports(List)),
 1339    % member(Pname/Arity, List),
 1340    current_predicate( bio_db:Pname/Arity ),
 1341    bio_db_data_predicate_name(Pname).
 1342
 1343bio_db_data_predicate_name( Pname ) :-
 1344     atomic_list_concat( Parts, '_', Pname ),
 1345     maplist( atom_length, Parts, [4,4,4,4] ),
 1346     !.
 1347bio_db_data_predicate_name( _Db, _Parts, Pname, Arity ) :-
 1348    throw( not_a_db_pred(Pname/Arity), [pack(bio_db),pred(bio_db_close/1)] ).
 1349
 1350% map stubs, 
 1351% these are in memory iff the map is to be loaded as prolog 
 1352% and this is the first call to the pred, they get replaced
 1353% by the map data after that.
 1354% 
 1355bio_db_serve( Call ) :-
 1356    functor( Call, Pn, _ ),
 1357    ( atomic_list_concat([_,OrgPredTkn,_,_],'_',Pn) ->
 1358          ( bio_db_organism(OrgPredTkn,OrgTkn,_Org) ->
 1359               true
 1360               ; 
 1361               ( bio_db_organism(_,OrgPredTkn,_) ->
 1362                    OrgTkn = OrgPredTkn
 1363                    ;
 1364                    throw( cannot_get_org_token_for_bio_db_served(Call) )
 1365               )
 1366          )
 1367    ),
 1368    bio_db_serve( OrgTkn, Call, true ).
 1369
 1370bio_db_serve( Org, Call ) :-
 1371    bio_db_serve( Org, Call, true ).
 1372
 1373bio_db_serve( Org, Call, Load ) :-
 1374    bio_db_interface( Iface ),
 1375    bio_db_map_call_db_pname( Call, Db, Pname, Arity ),
 1376    bio_db_serve_pname( Load, true, Org, Db, Pname, Arity, Iface, Call ).
 1377
 1378bio_db_interface_set( Iface ) :-
 1379    bio_db_interface_atom( Iface ),
 1380    !,
 1381    M = 'Setting bio_db_interface prolog_flag, to: ~a',
 1382    debug( bio_db, M, Iface ),
 1383    ( bio_db_interface_initialisation(Iface) -> true; true ),
 1384    set_prolog_flag( bio_db_interface, Iface ).
 1385bio_db_interface_set( Iface ) :-
 1386    findall( Aface, bio_db_interface_atom(Aface), AllFaces ),
 1387    throw( arg_enumerate(1,AllFaces,Iface), [pack(bio_db),pred(bio_db_interface/2)] ).
 1388    
 1389bio_db_interface_extensions( prolog, [pl,''] ).
 1390bio_db_interface_extensions( prosqlite, [sqlite,''] ).
 1391bio_db_interface_extensions( berkeley, [db,''] ).
 1392bio_db_interface_extensions( rocks, [rocks,''] ).
 1393
 1394bio_db_interface_known( Prov, Iface ) :-
 1395    atomic( Prov ),
 1396    bio_db_interface_atom( Prov ),
 1397    !,
 1398    Iface = Prov.
 1399bio_db_interface_known( Prov, Def ) :-
 1400    bio_db_default_interface( Def ),
 1401    M = 'Resetting bogus bio_db_interface prolog_flag, from: ~w, to default: ~a',
 1402    debug( bio_db, M, [Prov,Def] ), % fixme: this is informational rather than debug
 1403    set_prolog_flag( bio_db_interface, Def ).
 1404
 1405% prosqlite here
 1406/*
 1407bio_db_serve_pname( load, Db, Pname, Arity, Call ) :-
 1408    current_prolog_flag( bio_db_interface, prosqlite ),
 1409    !,
 1410    Term =.. [Db,Pname],
 1411    absolute_file_name( Term, Src, [access(Mode),file_type(prolog),file_errors(fail)] ).
 1412    sqlite_connect( phones, phones_db, as_predicates(true) )
 1413    */
 bio_db_serve_pname(+LoadFlag, +Ictive, Org, Db, Pname, Arity, Iface, _Call)
LoadFlag can be one of check, true (for loading) and false for ensuring the db is installed but does not actually hot-swap it in. Ictive is a boolean with true for interactively questioning user whereas false accepts the defaults with no interupptions.

*/

 1423bio_db_serve_pname( check, _Ictive, Org, Db, Pname, _Arity, Iface, _Call ) :-
 1424    !,
 1425    % bio_db_interface_extensions( Iface, Exts ),
 1426    bio_db_interface_extensions( Iface, [Ext|_] ),
 1427    % new implementation, untested:
 1428    bio_db_pname_source( Org, Db, Pname, read, Ext, _Abs ).
 1429    % % bio_db_db_pname_source( Db, Pname, exist, Ext, Abs ),
 1430    % Rel =.. [Db|Pname],
 1431    % absolute_file_name( Rel, Abs, [extensions(Exts),access(exist)] ),
 1432    % exists_file( Abs ),
 1433
 1434bio_db_serve_pname( Load, _Ictive, Org, Db, Pname, Arity, Iface, Call ) :-
 1435    bio_db_interface_extensions( Iface, [Ext|_] ),
 1436    bio_db_pname_source( Org, Db, Pname, read, Ext, File ),
 1437    % bio_db_db_pname_source( Db, Pname, exist, Ext, Load ),
 1438    % user:file_search_path( Db, _DbPath ),
 1439    !,
 1440    bio_db_load_call( Load, Pname, Arity, Iface, File, Call ).
 1441bio_db_serve_pname( Load, Ictive, Org, Db, Pname, Arity, Iface, Call ) :-
 1442    Iface \== prolog,
 1443    bio_db_interface_extensions( prolog, [Ext|_] ),
 1444    bio_db_pname_source( Org, Db, Pname, read, Ext, File ),
 1445    Mess = '~a DB:table ~w:~w is not installed, but the Prolog db exists. Shall it be created from Prolog',
 1446    Args = [Iface,Db,Pname/Arity],
 1447    ui_yes_no( Ictive, Mess, Args, y, Reply ),
 1448    Reply == true,
 1449    % bio_db_serve_pname_from_local( Reply, Db, Pname, Arity, Iface, Load, Call ),
 1450    bio_db_pl_nonpl_interface( Iface, File, NonPlLoad ),
 1451    !,
 1452    % fixme: add logic for deleting prolog interface of downloaded db
 1453    bio_db_load_call( Load, Pname, Arity, Iface, NonPlLoad, Call ).
 1454bio_db_serve_pname( Load, Ictive, Org, Db, Pname, Arity, Iface, Call ) :-
 1455    % bio_db_pname_source( Db, Pname, read, prolog+zip, ZLoad ),
 1456    % bio_db_pname_source( Db, Pname, read, 'pl.zip', ZLoad ),
 1457    bio_db_pname_source( Org, Db, Pname, read, prolog+zip, ZLoad ),
 1458    !,
 1459    file_name_extension( PlLoad, zip, ZLoad ),
 1460    current_prolog_flag( bio_db_pl_from_zip, PlFromZipFlag ),
 1461    ( PlFromZipFlag == user ->
 1462        Mess = '~a DB:table ~w:~w is not installed, but the zipped prolog db exists. Shall it be created from this',
 1463        Args = [Iface,Db,Pname/Arity],
 1464        ui_yes_no( Ictive, Mess, Args, y, Reply )
 1465        ;
 1466        MessFg = '~a DB:table ~w:~w is not installed, but the zipped prolog db exists. Flag bio_db_pl_from_zip says: ~w',
 1467        message_report( MessFg, [Iface,Db,Pname/Arity,PlFromZipFlag], informational ),
 1468        Reply = PlFromZipFlag
 1469    ),
 1470    ( Reply == true ->
 1471        file_directory_name( ZLoad, Dir ),
 1472        archive_extract( ZLoad, Dir, [] ),
 1473        ( Iface \== prolog ->
 1474            bio_db_pl_nonpl_interface( Iface, PlLoad, NonPlLoad ),
 1475            bio_db_reply_delete_file( true, PlLoad )
 1476            ;
 1477            current_prolog_flag(bio_db_del_zip,DelZipFlag),
 1478            ( DelZipFlag == user ->
 1479                ZipDelMess = 'Delete the zip file: ~p',
 1480                ui_yes_no( Ictive, ZipDelMess, [ZLoad], n, ZipDelReply )
 1481                ;
 1482                MessDelFg = 'Zip file will be deleted depending on value of flag bio_db_del_zip, which is: ~w',
 1483                message_report( MessDelFg, [DelZipFlag], informational ),
 1484                ZipDelReply = DelZipFlag
 1485            ),
 1486            bio_db_reply_delete_file( ZipDelReply, ZLoad ),
 1487            NonPlLoad = PlLoad
 1488        ),
 1489        !,
 1490        bio_db_load_call( Load, Pname, Arity, Iface, NonPlLoad, Call )
 1491        ;
 1492        % fixme: do fresh download
 1493        debug( bio_db, 'Downloading fresh zip file for: ~w', Pname/Arity ),
 1494        delete_file( ZLoad ),
 1495        file_directory_name( ZLoad, DataDir ),
 1496        directory_files( DataDir, DataFiles ),
 1497        findall( Delable-FullDel, ( member(Delable,DataFiles), 
 1498                                file_name_extension(Pname,_DelExt,Delable), 
 1499                                directory_file_path(DataDir,Delable,FullDel)
 1500                            ),
 1501                                Delables ),
 1502        maplist( bio_db_conflict_file, Delables ),
 1503        bio_db_serve_pname_reply( true, Ictive, Load, Org, Db, Pname, Arity, Iface, Call )
 1504    ).
 1505% here  fixem: 
 1506% add logic that warns if other interfaces will be 
 1507bio_db_serve_pname( Load, Ictive, Org, Db, Pname, Arity, Iface, Call ) :-
 1508    ( Iface == prolog -> 
 1509        Mess = '~a DB:table ~w:~w is not installed, do you want to download it'
 1510        ;
 1511        Mess = '~a DB:table ~w:~w is not installed, do you want to download the prolog db and then generate this interface'
 1512    ),
 1513    Args = [Iface,Db,Pname/Arity],
 1514    ui_yes_no( Ictive, Mess, Args, y, Reply ),
 1515    bio_db_serve_pname_reply( Reply, Ictive, Load, Org, Db, Pname, Arity, Iface, Call ).
 1516
 1517bio_db_serve_pname_reply( false, _Ictive, _Load, _Org, _Db, _Pname, _Arity, _Iface, _Call ) :-
 1518    abort.
 1519bio_db_serve_pname_reply( true, Ictive, Load, Org, Db, Pname, Arity, Iface, Call ) :-
 1520    stoics( Stoics ),
 1521    Mess = 'Downloading dataset from server: ~w',
 1522    phrase('$messages':translate_message(debug(Mess,[Stoics])), Lines),
 1523    print_message_lines(current_output, kind(informational), Lines),
 1524    atomic_list_concat( [_,_,Comp3|_], '_', Pname ), 
 1525    bio_db_predicate_type_sub_dir( Comp3, Sub ),
 1526    atomic_list_concat( [Stoics,Org,Sub,Db,Pname], '/', StoicsStem ),
 1527    atomic_list_concat( [StoicsStem,pl,zip], '.', StoicsFile ),
 1528    bio_db_pname_source( Org, Db, Pname, none, 'pl.zip', Local ),
 1529    debug( bio_db, 'Trying to get: ~w', url_file(StoicsFile,Local,insecure(true)) ),
 1530    % directory_file_path( LocDir, _, Local ),
 1531    file_directory_name( Local, LocalDir ),
 1532    % here
 1533    bio_db_repo_skeleton_pack,
 1534    make_directory_path( LocalDir ),
 1535    url_file( StoicsFile, Local, insecure(true) ),  % 2024.04.05 you needed latest stoics_lib; fixme: temp
 1536    % fixme: delete the .pl file here if it exists before unpacking ?  % although this is inconsistent with calling logic
 1537    archive_extract( Local, LocalDir, [] ),
 1538    % here( 'Unzip the pl, create the Iface and if not Iface==Prolog, suggest deleting the .pl db' ),
 1539    file_name_extension( LocalPlF, zip, Local ),
 1540    directory_files( LocalDir, LocalFiles ),
 1541    bio_db_interface_extensions( Iface, [Ext|_] ),
 1542    findall( Delable-FullDel, ( member(Delable,LocalFiles), 
 1543                            file_name_extension(Pname,DelExt,Delable), 
 1544                            \+ memberchk(DelExt,['pl.zip',pl,Ext]),
 1545                            directory_file_path(LocalDir,Delable,FullDel)
 1546                            ),
 1547                                Delables ),
 1548    debug( bio_db, 'Candidates for deletion: ~w', [Delables] ),
 1549
 1550    ( \+ exists_file(LocalPlF) -> 
 1551        throw( decompression_didnot_produce(LocalPlF) )
 1552        ; 
 1553        % here: ask to delete .zip file
 1554        ZipDelMess = 'Delete the zip file: ~p',
 1555        ui_yes_no( Ictive, ZipDelMess, [Local], n, ZipDelReply ),
 1556        bio_db_reply_delete_file( ZipDelReply, Local )
 1557    ),
 1558    ( Iface == prolog ->
 1559        NonPlLoad = LocalPlF
 1560        ;
 1561        bio_db_pl_nonpl_interface( Iface, LocalPlF, NonPlLoad ),
 1562        PlDelMess = 'Delete the Prolog file: ~p',
 1563        ui_yes_no( Ictive, PlDelMess, [LocalPlF], y, PlDelReply ),
 1564        bio_db_reply_delete_file( PlDelReply, LocalPlF )
 1565    ),
 1566    maplist( bio_db_conflict_file, Delables ),
 1567    % then( 'go back and make sure you deal with existing other interfaces (delete them)' ),
 1568    !,
 1569    bio_db_load_call( Load, Pname, Arity, Iface, NonPlLoad, Call ).
 1570    % we probably (now need something lighter than:
 1571    % bio_db_serve_pname( load, Db, Pname, Arity, Iface, Call ).
 1572
 1573bio_db_repo_skeleton_pack :-
 1574    absolute_file_name( pack(bio_db), BioDbD, [file_type(directory)] ),
 1575    directory_file_path( PackD, _, BioDbD ),
 1576    directory_file_path( PackD, bio_db_repo, RepoD ),
 1577    directory_file_path( RepoD, 'pack.pl', RepoPackPl ),
 1578    ( exists_file(RepoPackPl) ->
 1579        true
 1580        ;
 1581        make_directory_path( RepoD ),
 1582        ensure_loaded( pack('bio_db/auxil/lib/bio_db_repo_info') ),
 1583        findall( InfTerm, bio_db_repo_info(InfTerm), [InfNm,InfTi|Infs] ),
 1584        date_two_digit_dotted( Dotted ),
 1585        atomic_list_concat( [YrA,MnA,DyA], '.', Dotted ),
 1586        % atomic_list_concat( [Dotted,skeleton], '-', PlPackVers ),
 1587        Clauses = [InfNm,InfTi,version(Dotted)|Infs],
 1588        portray_clauses( Clauses, file(RepoPackPl) ),
 1589        atomic_list_concat( [20,YrA], FullYA ),
 1590        maplist( atom_number, [YrA,FullYA,MnA,DyA], [Yr,FullY,Mn,Dy] ), % the day gets a -skeleton suffix
 1591        atomic_list_concat( [DyA,skeleton], '-', DyPsfx ),
 1592        directory_file_path( RepoD, prolog, RepoPlD ),
 1593        make_directory_path( RepoPlD ),
 1594        directory_file_path( RepoPlD, 'bio_db_repo_version.pl', ModVersF ),
 1595        portray_clauses( [bio_db_repo_version(Yr:Mn:DyPsfx,date(FullY,Mn,Dy))], file(ModVersF) ),
 1596        directory_file_path( BioDbD, 'auxil/lib/bio_db_repo.pl', BioDbRepoPlF ),
 1597        directory_file_path( RepoPlD, 'bio_db_repo.pl', DstRepoF ),
 1598        copy_file( BioDbRepoPlF, DstRepoF )
 1599    ).
 1600
 1601bio_db_conflict_file( Delable-Full ) :-
 1602    Mess = 'Current db file might be inconsistent to new zip file. Delete db file: ~p',
 1603    Ictive = false,
 1604    % fixme: should we be passing Ictive from above ?
 1605    ui_yes_no( Ictive, Mess, [Delable], y, Reply ),
 1606    bio_db_reply_delete_file( Reply, Full ).
 1607
 1608/*
 1609bio_db_serve_pname_from_local( false, _Db, _Pname, Arity,Iface, Load, Call ) :-
 1610    ( bio_db_db_pname_source( Db, Pname, read, prolog+zip, ZLoad ) ->
 1611        fail  % .zip will be tried by caller on failure
 1612        ; 
 1613    ).
 1614    fail.
 1615    */
 1616% fixme: this is not called from anywhere? 
 1617bio_db_serve_pname_from_local( true, _Db, Pname, Arity, Iface, Load, Call ) :-
 1618    % fixme: add predicates for interogating and deleting db/interface pairs
 1619    bio_db_pl_nonpl_interface( Iface, Load, NonPlLoad ),
 1620    % fixme: add logic for deleting prolog interface of downloaded db
 1621    !,
 1622    bio_db_load_call( Pname, Arity, Iface, NonPlLoad, Call ).
 1623
 1624bio_db_pl_nonpl_interface( Iface, Load, NonPlLoad ) :-
 1625    debug( bio_db, 'Converting to interface: ~a, from file: ~p', [Iface,Load] ),
 1626    atom_concat( pl_, Iface, Stem ),
 1627    atom_concat( 'bio_db/auxil/backends/', Stem, Backend ),
 1628    ensure_loaded( pack(Backend) ),
 1629    Conv =.. [Stem,Load],
 1630    call( Conv ),
 1631    file_name_extension( LoadStem, _Pl, Load ),
 1632    bio_db_interface_extensions( Iface, [Ext|_] ),
 1633    file_name_extension( LoadStem, Ext, NonPlLoad ).
 1634
 1635bio_db_ensure_loaded( Iface, Pid, Load, Handle, From ) :-
 1636    atom( Iface ),
 1637    bio_db_ensure_loaded_1( Iface, Pid, Load, Handle, From ),
 1638    !.
 1639bio_db_ensure_loaded( Iface, Pid, Load, _Handle, _From ) :-
 1640    % fixme: Goal in error can be supplied ?
 1641    throw( failed_to_load(Iface,Pid,Load), [pack(bio_db),pred(bio_db_ensure_loaded/4)] ).
 1642
 1643bio_db_ensure_loaded_1( prolog, Pid, Load, [], From ) :-
 1644    Pid = Pname/_Arity,
 1645    atomic_list_concat( [Ppfx|_], '_', Pname ),
 1646    bio_db_pl_load( Ppfx, Pid, Load, From ).
 1647bio_db_ensure_loaded_1( prosqlite, Pname/_Arity, Load, Pname, _From ) :-
 1648    sqlite_connect( Load, Pname, [as_predicates(true),at_module(bio_db)] ).
 1649bio_db_ensure_loaded_1( berkeley, Pname/Arity, Load, Berkeley, _From ) :-
 1650    \+ '$bio_db_handle'(Pname,_),
 1651    % fixme: is the option needed ? we are just reading- check
 1652    % bio_db_info_interface( berkeley, _Pid, Load, _Handle, data_types, data_types(Ktype,Vtype) ),
 1653
 1654    Pairs = [data_types-DtTypes,relation_type-RelType],
 1655    bio_db_info_interface_kvs( berkeley, _Pid, Load, _Handle, Pairs ),
 1656    bio_db_info_interface_types( RelType, DtTypes, berkeley, Dup, _DbTypes, KeyType, ValType ),
 1657    % Open = bdb_open( Load, read, Berkeley, [duplicates(Dupl),key(KeyType),value(ValType)] ),
 1658    Open = bdb_open( Load, read, Berkeley, [dup(Dup),key(KeyType),value(ValType)] ),
 1659    debug( bio_db, 'Bdb opening for reading with: ~w' , Open ),
 1660    call( Open ),
 1661    % bdb_open( Load, read, Berkeley, [duplicates(true),key(KeyType),value(ValType)] ),  % 0.5
 1662    % retractall( '$bio_db_handle'(Pname,_) ),  % fixme: we can do some error reporting if something does exist
 1663    % assert( '$bio_db_handle'(Pname,Berkeley) ),
 1664    % atomic_list_concat( [Ppfx|_], '_', Pname ),
 1665    arg( 1, RelType, Krt ),
 1666    arg( 1, RelType, Vrt ),
 1667    ground( Arity ),
 1668    bio_db_berkeley_predicate_assert_arity( Arity, Krt, Vrt, Pname, bdb_get, bdb_enum, Berkeley ).
 1669bio_db_ensure_loaded_1( rocks, Pname/Arity, Load, Handle, _From ) :-
 1670    /*
 1671    bio_db_info_interface( rocks, _Pid, Load, _Handle, data_types, data_types(Ktype,Vtype) ),
 1672    */
 1673    Pairs = [data_types-DtTypes,relation_type-RelType],
 1674    bio_db_info_interface_kvs( rocks, _Pid, Load, _Handle, Pairs ),
 1675    bio_db_info_interface_types( RelType, DtTypes, rocks, Dup, _DbTypes, KeyType, ValType ),
 1676    % maplist( bio_db_info_rocks_singleton_type, [Ktype,Vtype], [Kbype,Vbype] ),
 1677    % ( Dup == false -> KeyType = NoDupKeyType; NoDupKeyType = term ),
 1678    % 2nd take, duplicates are now stored as lists of values
 1679    ( Dup == false -> ValType = DupValType; DupValType = term ),
 1680    Open = rocks_open( Load, Handle, [key(KeyType),value(DupValType)] ),
 1681    debug( bio_db, 'Rocks opening for reading with: ~w' , Open ),
 1682    call( Open ),
 1683
 1684    % atomic_list_concat( [Ppfx|_], '_', Pname ),
 1685    bio_db_rocks_predicate_assert_arity( Arity, Dup, Pname, rocks_get, rocks_enum, Handle ).
 1686    % bio_db_rocks_predicate_assert_arity( Kbype/Vbype, Arity, Pname, rocks_get, rocks_enum, Handle ).
 1687
 1688% bio_db_pl_load( map, Pid, Load, From ).
 1689bio_db_pl_load( _Type, Pid, Load, Mod ) :-
 1690    dynamic( Mod:Pid ),  % fixme: we should be able to remove this? 
 1691    % ensure_loaded( Load ).  % following is an elaboration of code by JW: 16.11.13:
 1692    (   (file_name_extension(Base,pl,Load), \+ current_prolog_flag(bio_db_qcompile,false))
 1693    ->  Mod:load_files( Base, [qcompile(auto),if(not_loaded)] )
 1694    ;   ensure_loaded( Mod:Load )  % fixme: use load_files/2 ?
 1695    ).
 1696
 1697% bio_db_pl_load( edge, Pname/_Arity, Load ) :-
 1698/*
 1699bio_db_pl_load( edge, Pid, Load ) :-
 1700    % os_postfix ... :(
 1701    % % file_name_extension( Base, Ext, Load ),
 1702    % % atomic_list_concat( [Base,ord], '_', OrdBase ),
 1703    % % file_name_extension( OrdBase, Ext, OrdLoad ),
 1704    % % ensure_loaded( OrdLoad ),
 1705    ensure_loaded( Load ),
 1706    % % atomic_list_concat( [Pname,ord], '_', Pord ),
 1707    % % Head =.. [Pname,X,Y,W],
 1708    % % GoalF =.. [Pord,X,Y,W],
 1709    % % GoalB =.. [Pord,Y,X,W],
 1710    % % consult_clause( (Head:-(GoalF;GoalB)) ).
 1711    true.
 1712    */
 1713
 1714    /*
 1715bio_db_kv_db_predicate_assert( _, Pname, Krt, Vrt, Arity, Get, Enum, Handle ) :-
 1716    ground( Arity ),
 1717    bio_db_kv_db_predicate_assert_arity( Arity, Krt, Vrt, Pname, Get, Enum, Handle ).
 1718bio_db_kv_db_predicate_assert( edge, Pname, Arity, Get, Enum, Handle ) :-
 1719    bio_db_kv_db_predicate_assert_edge( Arity, Pname, Get, Enum, Handle ).
 1720    */
 1721
 1722bio_db_berkeley_predicate_assert_arity( 2, 1, 1,  Pname, Get, Enum, Handle ) :-
 1723    !, % maybe this relevat to other modes too  (here mode is 2,1,1
 1724    Head =.. [Pname,Key,Value],
 1725    GetG  =.. [  Get, Handle, Key, Value ],
 1726    EnumG =.. [ Enum, Handle, Key, Value ],
 1727    Conditional =  ( ( ground(Key) -> 
 1728                    GetG
 1729                    ;
 1730                    EnumG
 1731                  )
 1732            ),
 1733    consult_clause( (Head:-(Conditional)) ).
 1734
 1735bio_db_berkeley_predicate_assert_arity( N, _, _,  Pname, Get, Enum, Handle ) :-
 1736    functor( Head, Pname, N ),
 1737    Head =.. [Pname,Key|Args],
 1738    GetG  =.. [  Get, Handle, Key, Value ],
 1739    EnumG =.. [ Enum, Handle, Key, Value ],
 1740    Conditional =  ( ( ground(Key) -> 
 1741                    GetG
 1742                    ;
 1743                    EnumG
 1744                  )
 1745            ),
 1746    Unravel = bio_db_kv_db_value( Args, Value ),
 1747    consult_clause( (Head:-(Conditional,Unravel)) ).
 1748
 1749bio_db_rocks_predicate_assert_arity( 2, false, Pname, Get, Enum, Handle ) :-
 1750    !, % maybe this relevat to other modes too  (here mode is 2, false (=no duplicates)
 1751    Head =.. [Pname,Key,Value],
 1752    GetG  =.. [  Get, Handle, Key, Value ],
 1753    EnumG =.. [ Enum, Handle, Key, Value ],
 1754    Conditional =  ( ( ground(Key) -> 
 1755                    GetG
 1756                    ;
 1757                    EnumG
 1758                  )
 1759            ),
 1760    consult_clause( (Head:-(Conditional)) ).
 1761bio_db_rocks_predicate_assert_arity( N, false, Pname, Get, Enum, Handle ) :-
 1762    N > 2,
 1763    functor( Head, Pname, N ),
 1764    Head =.. [Pname,Key|Args],
 1765    GetG  =.. [  Get, Handle, Key, Value ],
 1766    EnumG =.. [ Enum, Handle, Key, Value ],
 1767    Conditional =  ( ( ground(Key) -> 
 1768                    GetG
 1769                    ;
 1770                    EnumG
 1771                  )
 1772            ),
 1773    Unravel = bio_db_kv_db_value( Args, Value ),
 1774    consult_clause( (Head:-(Conditional,Unravel)) ).
 1775bio_db_rocks_predicate_assert_arity( 2, true, Pname, Get, Enum, Handle ) :-
 1776    !, % maybe this relevat to other modes too  (here mode is 2, false (=no duplicates)
 1777    Head =.. [Pname,Key,Value],
 1778    GetG  =.. [  Get, Handle, Key, Values ],
 1779    EnumG =.. [ Enum, Handle, Key, Values ],
 1780    Conditional =  ( ( ground(Key) -> 
 1781                    (GetG, bio_db_rocks_multi_key_value(Values,Value) )
 1782                    ;
 1783                    (EnumG, bio_db_rocks_multi_key_value(Values,Value) )
 1784                  )
 1785            ),
 1786    consult_clause( (Head:-(Conditional)) ).
 1787bio_db_rocks_predicate_assert_arity( Arity, true, Pname, Get, Enum, Handle ) :-
 1788    Arity > 2,
 1789    functor( Head, Pname, Arity ),
 1790    Head =.. [Pname,Key|Args],
 1791    GetG  =.. [  Get, Handle, Key, ValueTerm ],
 1792    EnumG =.. [ Enum, Handle, Key, ValueTerm ],
 1793    % EnuTG =.. [ Enum, Handle, Key:_X, Value ],
 1794    Conditional =  ( ( ground(Key) -> 
 1795                    ( GetG, bio_db_rocks_multi_key_value(ValueTerm,Value) )
 1796                    ;
 1797                    ( EnumG, bio_db_rocks_multi_key_value(ValueTerm,Value) )
 1798                    % ( EnumG , ( (atomic(ProvKey),ProvKey=Key);ProvKey=Key:_) )
 1799                  )
 1800                ),
 1801    Unravel = bio_db_kv_db_value( Args, Value ),
 1802    consult_clause( (Head:-(Conditional,Unravel)) ).
 1803    
 1804
 1805bio_db_rocks_multi_key_value( [H|T], Value ) :-
 1806    !,
 1807    ( Value = H; member( Value, T ) ).
 1808bio_db_rocks_multi_key_value( Value, Value ).
 1809
 1810bio_db_kv_db_value( [H], Value ) :- !, Value = H.
 1811bio_db_kv_db_value( [H|T], H+Value ) :-
 1812    bio_db_kv_db_value( T, Value ).
 1813consult_clause( Clause ) :-
 1814    assert( Clause ).
 1815
 1816/*
 1817consult_clause( Clause ) :-
 1818    tmp_file_stream(text, File, Stream),
 1819    portray_clause( Stream, Clause ),
 1820    close( Stream ),
 1821    debug( bio_db, 'Consulting from: ~p', File ),
 1822    consult( File ),
 1823    true.
 1824*/
 1825
 1826bio_db_interfaces_ext( A+B, Ext ) :-
 1827    !,
 1828    bio_db_interfaces_ext( A, AExt ),
 1829    bio_db_interfaces_ext( B, BExt ),
 1830    atomic_list_concat( [AExt,BExt], '.', Ext ).
 1831bio_db_interfaces_ext( Iface, Ext ) :-
 1832    bio_db_interface_extensions( Iface, [Ext|_] ),
 1833    !.
 1834bio_db_interfaces_ext( Ext, Ext ).
 1835
 1836bio_db_pname_source( _Org, Db, Pname, Mode, DbFaces, Src ) :-
 1837    % fixme: make it play with Org ?
 1838    bio_db_interfaces_ext( DbFaces, Ext ),
 1839    Term =.. [Db,Pname],
 1840    debug( bio_db, 'Trying DB location: ~p, mode: ~w', [Term,Mode] ),
 1841    ( absolute_file_name( Term, Src, [access(Mode),extensions([Ext]),file_errors(fail)] )
 1842      ;
 1843      (  DbFaces==rocks,
 1844         file_name_extension(Pname,rocks,Rname),
 1845        Rerm =.. [Db,Rname],
 1846        absolute_file_name(Rerm,Src,[access(Mode),file_errors(fail),file_type(directory)])
 1847      )
 1848    ),
 1849    !.
 1850% The above is a short-cut this is the long way.
 1851% Works when single db provides both maps and graphs
 1852%
 1853bio_db_pname_source( Org, Db, Pname, Mode, DbFaces, Src ) :-
 1854    bio_db_interfaces_ext( DbFaces, Ext ),
 1855    % Term =.. [Db,Pname],
 1856    bio_db_pred_name_type( Pname, Type ),
 1857    directory_file_path( Org, Type, Rel ),
 1858    % Term =.. [bio_db_data,Type], % pre Org times
 1859    Term =.. [bio_db_data,Rel],
 1860    absolute_file_name( Term, Dir ),
 1861    file_name_extension( Pname, Ext, Bname ),
 1862    directory_file_path( Dir, Db, DbDir ),
 1863    directory_file_path( DbDir, Bname, Src ),
 1864    debug( bio_db, 'Trying DB location: ~p, mode: ~w', [Src,Mode] ), % fixme: debug_call, with success/failure
 1865    ( absolute_file_name( Src, _, [access(Mode),file_errors(fail)])
 1866       ;
 1867      (  DbFaces==rocks,
 1868        absolute_file_name( Src, _, [access(Mode),file_errors(fail),file_type(directory)] )
 1869      )
 1870    ),
 1871    !.
 1872    % absolute_file_name( Pname, Src, [access(Mode),extensions([Ext]),file_errors(fail)] ).
 bio_db_source_info(+File, -InfoF)
Generate Info filename corresponding to the database filename at File.

*/

 1879bio_db_source_info( File, InfoF ) :-
 1880    file_name_extension( Stem, Ext, File ),
 1881    atom_concat( Stem, '_info', InfoStem ),
 1882    file_name_extension( InfoStem, Ext, InfoF ).
 bio_db_predicate_info(+PidOrPname, -InfoName)
Generate the information predicate name of a Pid or of Db predicate name.

*/

 1889bio_db_predicate_info( Pname/_Arity, InfoName ) :-
 1890    !,
 1891    atom_concat( Pname, '_info', InfoName ).
 1892bio_db_predicate_info( Pname, InfoName ) :-
 1893    atom( Pname ),
 1894    atom_concat( Pname, '_info', InfoName ).
 1895
 1896bio_db_pred_name_type( Pname, Type ) :-
 1897    atomic_list_concat( [_,_,Trd|_], '_', Pname ),
 1898    bio_db_pred_name_prefix_type( Trd, Type ).
 1899
 1900bio_db_pred_name_prefix_type( edge, graphs ) :- !.
 1901bio_db_pred_name_prefix_type( _, maps ).
 1902
 1903bio_db_load_call( false, Pname, Arity, Iface, File, _Call ) :-
 1904    ( Iface == prolog -> 
 1905        % ensure .qlf is created
 1906        file_name_extension( Stem, pl, File ),
 1907        Mess = 'Ensuring .qlf is also installed: ~w',
 1908        phrase('$messages':translate_message(debug(Mess,[Pname/Arity])), Lines),
 1909        print_message_lines(current_output, kind(informational), Lines),
 1910        load_files( scratch:Stem, [qcompile(auto),if(true)] ),
 1911        abolish( scratch:Pname/Arity )
 1912        ;
 1913        true
 1914    ).
 1915bio_db_load_call( true, Pname, Arity, Iface, File, Call ) :-
 1916    debug( bio_db, 'Loading pred: ~w, interface: ~a, file: ~w', [Pname/Arity,Iface,File] ),
 1917    ground( Iface ),
 1918    functor( Phead, Pname, Arity ),
 1919    ( predicate_property(Phead,imported_from(From) ) -> true; From = bio_db ),
 1920    abolish( From:Pname/Arity ),    % fixme: retractall/1 if we have problem with regenerations ?
 1921    % retractall(Phead),
 1922    atom_concat( Pname, '_info', InfoPname ),
 1923    dynamic( From:InfoPname/2 ),
 1924    % functor( Ihead, InfoPname, 2 ),
 1925    ( (From \== bio_db,\+ current_predicate(bio_db:InfoPname/2)) -> 
 1926            % fixme: test again:
 1927            From:export(InfoPname/2),
 1928            bio_db:import(From:InfoPname/2)
 1929            ;
 1930            true
 1931    ),
 1932    functor( InfoHead, InfoPname, 2),
 1933    retractall( From:InfoHead ),
 1934    bio_db_ensure_loaded( Iface, Pname/Arity, File, Handle, From ),
 1935    assert( bio_db_handle(Pname/Arity,Iface,File,Handle,From) ),
 1936    call( Call ).
 1937
 1938bio_db_predicate_type_sub_dir( edge, graphs ) :- !.
 1939bio_db_predicate_type_sub_dir( _, maps ).
 1940
 1941bio_db_map_call_db_pname( Call, Db, Pname, Arity ) :-
 1942    functor( Call, Pname, Arity ),
 1943    at_con( [Db|Parts], '_', Pname ),
 1944    bio_db_map_call_db_pname_check( Db, Parts, Pname, Arity ).
 1945    % bio_db_type_arity_check( Type, Arity ).
 1946
 1947bio_db_map_call_db_pname_check( Db, Parts, _Pname, _Arity ) :-
 1948     maplist( atom_length, [Db|Parts], [4,4,4,4] ),
 1949     !.
 1950bio_db_map_call_db_pname_check( _Db, _Parts, Pname, Arity ) :-
 1951    throw( not_a_db_pred(Pname/Arity), [pack(bio_db),pred(bio_db_serve/3)] ).
 1952
 1953% fixme: delete these 2 preds
 1954bio_db_type_arity_check( Type, Arity ) :-
 1955    bio_db_type_arity_known( Type, Arity ),
 1956    !.
 1957bio_db_type_arity_check( Type, Arity ) :-
 1958    throw( unknown_combination_of_type_arity(Type,Arity) ).
 1959
 1960% fixme: this now a bit outdated... maybe add name for special cases ?
 1961bio_db_type_arity_known( map, 2 ).
 1962bio_db_type_arity_known( map, 3 ).
 1963bio_db_type_arity_known( map, 4 ).
 1964bio_db_type_arity_known( map, 5 ).
 1965bio_db_type_arity_known( map, 7 ).
 1966bio_db_type_arity_known( edge, 3 ).
 1967bio_db_type_arity_known( edge, 2 ).
 1968
 1969bio_db_reply_delete_file( true, Local ) :-
 1970    debug( bio_db, 'Deleting file: ~p', Local ),
 1971    delete_file( Local ).
 1972bio_db_reply_delete_file( false, Local ) :-
 1973    debug( bio_db, 'NOT deleting file: ~p', Local ).
 1974
 1975/*
 1976bio_db_info_db_types( berkeley, RelType, DataTypes, Dup, DbTypes, KeyType, ValType ) :-
 1977    bio_db_info_berkeley_types( RelType, DataTypes, Dup, DbTypes, KeyType, ValType ).
 1978bio_db_info_db_types( rocks, RelType, DataTypes, Dup, DbTypes, KeyType, ValType ) :-
 1979    % bio_db_info_rocks_types( RelType, DataTypes, Dup, DbTypes, KeyType, ValType ).
 1980    bio_db_info_rocks_types( RelType, DataTypes, Dup, DbTypes, KeyType, ValType ).
 1981
 1982bio_db_info_rocks_types( relation_type(1,1), DataTypes, Dup, DbTypes, KeyType, ValType ) :- 
 1983    DataTypes =.. [data_types,PlKeyType,PlValsTypes],
 1984    bio_db_info_rocks_type( PlKeyType, KeyType ),
 1985    bio_db_info_rocks_type( PlValsTypes, ValType ),
 1986    DbTypes = [key(KeyType),value(ValType)].
 1987    */
 1988% fixme: change all the calls and remove this
 1989bio_db_info_db_types( Iface, RelType, DataTypes, Dup, DbTypes, KeyType, ValType ) :-
 1990    bio_db_info_interface_types( RelType, DataTypes, Iface, Dup, DbTypes, KeyType, ValType ).
 1991
 1992% bio_db_info_berkeley_types( relation_type(1,MR), data_types(Kt,Vt), Dup, DbTypes, KeyType, ValType ) :-   
 1993bio_db_info_interface_types( relation_type(1,MR), data_types(Kt,Vt), Iface, Dup, DbTypes, KeyType, ValType ) :- 
 1994    ( MR =:= 1 -> Dup = false; Dup = true ),
 1995    !,  % Arity = 2 (from the form of data_types...
 1996    bio_db_info_interface_type( Kt, Iface, KeyType ),
 1997    bio_db_info_interface_type( Vt, Iface, ValType ),
 1998    DbTypes = [key(KeyType),value(ValType)].
 1999bio_db_info_interface_types( relation_type(1,MR), DtTypes, Iface, Dup, DbTypes, KeyType, ValType ) :-   
 2000    ( MR =:= 1 -> Dup = false; Dup = true ),
 2001    !,  % Arity = 2 (from the form of data_types...
 2002    functor( DtTypes, _, Arity ),
 2003    Arity > 2,
 2004    !,
 2005    arg( 1, DtTypes, Kt ),
 2006    bio_db_info_interface_type( Kt, Iface, KeyType ),
 2007    ValType = term,
 2008    DbTypes = [key(KeyType),value(term)].
 2009bio_db_info_interface_types( RelType, DtTypes, Iface, Dup, DbTypes, KeyType, ValType ) :-
 2010    ( RelType = relation_type(1,1) -> Dup = false; Dup = true ),
 2011    arg( 1, DtTypes, Kt ),
 2012    functor( DtTypes, _, Arity ),
 2013    ( Arity > 2 -> ValType = term
 2014                ; 
 2015
 2016                arg( 2, DtTypes, Vt ),
 2017                    bio_db_info_interface_type( Vt, Iface, ValType )
 2018    ),
 2019    bio_db_info_interface_type( Kt, Iface, KeyType ),
 2020    DbTypes = [key(KeyType),value(term)].
 2021
 2022bio_db_info_interface_type( [Singleton], Iface, Type ) :-  !,
 2023    bio_db_info_interface_unit_type( Iface, Singleton, Type ).
 2024bio_db_info_interface_type( [_,_|_], _Iface, term ) :- !.  % a bit of a shortcut
 2025bio_db_info_interface_type( Singleton, Iface, Type ) :-
 2026    bio_db_info_interface_unit_type( Iface, Singleton, Type ).
 2027
 2028bio_db_info_interface_unit_type( berkeley, Unit, Type ) :-
 2029    bio_db_berkeley_type( Unit, Type ).
 2030bio_db_info_interface_unit_type( rocks, Unit, Type ) :-
 2031    bio_db_rocks_type( Unit, Type ).
 2032
 2033bio_db_rocks_type( term, term ).
 2034bio_db_rocks_type( atom, atom ).
 2035bio_db_rocks_type( integer, int64 ). % rocks also has int32
 2036bio_db_rocks_type( number, atom ).  % rocks has doubles and floats
 2037
 2038bio_db_berkeley_type( term, term ).
 2039bio_db_berkeley_type( atom, atom ).
 2040bio_db_berkeley_type( integer, c_long ).
 2041bio_db_berkeley_type( number, atom ).
 2042
 2043% this is a mock implementation see library(os) or library(os_) 
 2044% for the real one
 2045os_path_( Dir, File, Path ) :-
 2046    ground( Dir ),
 2047    ground( File ),
 2048    !,
 2049    directory_file_path( Dir, File, Path ).
 2050os_path_1( Dir, File, Path ) :-
 2051    ground( Path ),
 2052    directory_file_path( DirSl, File, Path ),
 2053    atom_concat( Dir, '/', DirSl ).
 2054    
 2055pack_errors:message( close_to_info(Pid) ) -->
 2056    ['Predicate: ~w, is not currently served, info depend on the opening interface.'-[Pid]].
 2057pack_errors:message( not_a_db_pred(Pid) ) -->
 2058    ['Predicate identifier: ~w, not of a db predicate.'-[Pid]].
 2059pack_errors:message( not_served(Pid) ) -->
 2060    ['Predicate: ~w, is not currently served.'-[Pid]].
 2061pack_errors:message( failed_to_load(Iface,Pid,File) ) -->
 2062    ['Failed to load predicate: ~w, for backend: ~w, from file: ~p.'-[Pid,Iface,File]].
 2063
 2064% add at_halt, close databases particularly berkeley ones
 2065:- at_halt( bio_db_close_connections ). 2066:- initialization( bio_db_paths, after_load ). 2067
 2068:- multifile sandbox:safe_primitive/1. 2069
 2070bio_sandbox_clause(sandbox:safe_primitive(bio_db:Head)) :-
 2071    module_property(bio_db, exports(PIList)),
 2072    member(Name/Arity, PIList),
 2073    (   sub_atom(Name, 0, _, _, edge_)
 2074    ;   sub_atom(Name, 0, _, _, map_)
 2075    ),
 2076    functor(Head, Name, Arity).
 2077
 2078term_expansion(bio_db_interface, Clauses) :-
 2079    findall(Clause, bio_sandbox_clause(Clause), Clauses).
 2080
 2081bio_db_interface.
 2082sandbox:safe_primitive(bio_db:bio_db_info(_,_,_)).
 2083sandbox:safe_primitive(bio_db:bio_db_info(_,_,_,_))