1:- module(lsp_reading_source, [ file_lines_start_end/2,
    2                                read_term_positions/2,
    3                                read_term_positions/4,
    4                                file_offset_line_position/4,
    5                                find_in_term_with_positions/5,
    6                                position_to_match/3,
    7                                subterm_leaf_position/4
    8                              ]).

LSP Reading Source

Module for reading in Prolog source code with positions, mostly wrapping prolog_read_source_term/4.

author
- James Cash
To be done
- Files using quasi-quotations currently aren't supported; need to teach prolog_read_source_term/4 to load correctly

*/

   21:- use_module(library(apply)).   22:- use_module(library(apply_macros)).   23:- use_module(library(clpfd)).   24:- use_module(library(prolog_source)).   25:- use_module(library(readutil), [ read_line_to_codes/2 ]).   26:- use_module(library(yall)).   27
   28%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   29% Specialized reading predicates
   30%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 file_lines_start_end(+Path:text, -LineCharRange:list) is det
Construct a mapping of file offsets to line numbers in the file at Path. LineCharRange will be a list containing terms like =line_start_end(LineNumber, LineOffsetStart, LineOffsetEnd)=
   37file_lines_start_end(Path, LineCharRange) :-
   38    Acc = line_data([], line(1, 0)),
   39    setup_call_cleanup(
   40        open(Path, read, Stream),
   41        ( repeat,
   42          read_line_to_codes(Stream, Line),
   43          stream_property(Stream, position(Position)),
   44          stream_position_data(char_count, Position, NewLineStart),
   45          arg(2, Acc, line(LastLine, LastLineStart)),
   46          arg(1, Acc, Data),
   47          LastLineEnd is NewLineStart - 1,
   48          nb_setarg(1, Acc, [(LastLineStart-LastLineEnd)-LastLine|Data]),
   49          NextLine is LastLine + 1,
   50          nb_setarg(2, Acc, line(NextLine, NewLineStart)),
   51          Line == end_of_file, !
   52        ),
   53        close(Stream)),
   54    arg(1, Acc, Ranges),
   55    list_to_rbtree(Ranges, RangeToLine),
   56    maplist([Range-Line, Line-Range]>>true, Ranges, InvRanges),
   57    list_to_rbtree(InvRanges, LineToRange),
   58    LineCharRange = RangeToLine-LineToRange.
 read_term_positions(+Path:text, -TermsWithPositions:list) is det
Read in all the terms in the file at Path, using prolog_read_source_term/4, to a list of dictionaries. Each dictionary has the following keys:
term
The term read in, with variables replace with the term '$var'(VariableName).
pos
The position of the term (see [[prolog_read_source_term/4]]).
subterm
The position of the subterms in term (see [[prolog_read_source_term/4]]).
variable_names
List of Name=Var terms for the variables in Term. Note that the variables in term have already been replace with var(Name)
comments
Comments in the term, with the same format as prolog_read_source_term/4
   76read_term_positions(Path, TermsWithPositions) :-
   77    Acc = data([]),
   78    prolog_canonical_source(Path, SourceId),
   79    setup_call_cleanup(
   80        prolog_open_source(SourceId, Stream),
   81        ( repeat,
   82          prolog_read_source_term(Stream, Term, _Ex, [term_position(TermPos),
   83                                                      subterm_positions(SubTermPos),
   84                                                      variable_names(VarNames),
   85                                                      comments(Comments),
   86                                                      % maybe use `error` for running standalone?
   87                                                      syntax_errors(dec10)]),
   88          maplist([Name=Var]>>( Var = '$var'(Name) ), VarNames),
   89          arg(1, Acc, Lst),
   90          nb_setarg(1, Acc, [_{term: Term, pos: TermPos, subterm: SubTermPos,
   91                               varible_names: VarNames, comments: Comments}|Lst]),
   92          Term = end_of_file, !
   93        ),
   94        prolog_close_source(Stream)),
   95    arg(1, Acc, TermsWithPositionsRev),
   96    reverse(TermsWithPositionsRev, TermsWithPositions).
 read_term_positions(+Path:text, +Start:integer, +End:integer, -TermsWithPositions:list) is det
Read in all the terms in the file at Path between Start and End, using prolog_read_source_term/4, to a list of dictionaries. Each dictionary has the following keys:
term
The term read in, with variables replace with the term '$var'(VariableName).
pos
The position of the term (see [[prolog_read_source_term/4]]).
subterm
The position of the subterms in term (see [[prolog_read_source_term/4]]).
variable_names
List of Name=Var terms for the variables in Term. Note that the variables in term have already been replace with var(Name)
comments
Comments in the term, with the same format as prolog_read_source_term/4
  114read_term_positions(Path, Start, End, TermsWithPositions) :-
  115    Acc = data([]),
  116    prolog_canonical_source(Path, SourceId),
  117    setup_call_cleanup(
  118        prolog_open_source(SourceId, Stream),
  119        ( repeat,
  120          prolog_read_source_term(Stream, Term, _Ex, [term_position(TermPos),
  121                                                      subterm_positions(SubTermPos),
  122                                                      variable_names(VarNames),
  123                                                      comments(Comments),
  124                                                      % maybe use `error` for running standalone?
  125                                                      syntax_errors(dec10)]),
  126          arg(1, SubTermPos, TermStart),
  127          TermStart >= Start,
  128          maplist([Name=Var]>>( Var = '$var'(Name) ), VarNames),
  129          arg(1, Acc, Lst),
  130          nb_setarg(1, Acc, [_{term: Term, pos: TermPos, subterm: SubTermPos,
  131                               varible_names: VarNames, comments: Comments}|Lst]),
  132          arg(2, SubTermPos, TermEnd),
  133          once(( Term = end_of_file ; TermEnd >= End )), !
  134        ),
  135        prolog_close_source(Stream)),
  136    arg(1, Acc, TermsWithPositionsRev),
  137    reverse(TermsWithPositionsRev, TermsWithPositions).
  138
  139
  140%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  141% Using LineCharMap
  142%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  143
  144%! file_offset_line_position(+LineCharMap:term, ?Offset:integer, ?Line:integer, ?Column:integer) is semidet.
  145%
  146%  True when LineCharMap is a term as created by
  147%  file_lines_start_end/2, Offset is the offset into the file, Line is
  148%  the line number and Column is the character within that line.
  149%
  150%  Presumably either Offset is ground or Line & Column are.
  151file_offset_line_position(LineCharMap-_, CharCount, Line, LinePosition) :-
  152    ground(CharCount), !,
  153    rb_lookup_range(CharCount, Start-_End, Line, LineCharMap),
  154    LinePosition #= CharCount - Start.
  155file_offset_line_position(_-LineCharMap, CharCount, Line, LinePosition) :-
  156    rb_lookup(Line, Start-_End, LineCharMap),
  157    CharCount #= Start + LinePosition.
  158
  159%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  160% Red-black trees helper
  161%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  162rb_lookup_range(Key, KeyRange, Value, t(_, Tree)) =>
  163    rb_lookup_range_(Key, KeyRange, Value, Tree).
  164
  165rb_lookup_range_(_Key, _KeyRange, _Value, black('', _, _, '')) :- !, fail.
  166rb_lookup_range_(Key, KeyRange, Value, Tree) :-
  167    arg(2, Tree, Start-End),
  168    compare(CmpS, Key, Start),
  169    compare(CmpE, Key, End),
  170    rb_lookup_range_(t(CmpS, CmpE), Key, Start-End, KeyRange, Value, Tree).
  171
  172rb_lookup_range_(t(>, <), _, Start-End, KeyRange, Value, Tree) =>
  173    arg(3, Tree, Value),
  174    KeyRange = Start-End.
  175rb_lookup_range_(t(=, _), _, Start-End, KeyRange, Value, Tree) =>
  176    arg(3, Tree, Value),
  177    KeyRange = Start-End.
  178rb_lookup_range_(t(_, =), _, Start-End, KeyRange, Value, Tree) =>
  179    arg(3, Tree, Value),
  180    KeyRange = Start-End.
  181rb_lookup_range_(t(<, _), Key, _, KeyRange, Value, Tree) =>
  182    arg(1, Tree, NTree),
  183    rb_lookup_range_(Key, KeyRange, Value, NTree).
  184rb_lookup_range_(t(_, >), Key, _, KeyRange, Value, Tree) =>
  185    arg(4, Tree, NTree),
  186    rb_lookup_range_(Key, KeyRange, Value, NTree).
  187
  188%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  189% Searching through read results
  190%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  191
  192position_to_match(LineCharRange, found_at(_, From-To), Match) :- !,
  193    file_offset_line_position(LineCharRange, From, FromLine1, FromCharacter),
  194    file_offset_line_position(LineCharRange, To, ToLine1, ToCharacter),
  195    succ(FromLine0, FromLine1),
  196    succ(ToLine0, ToLine1),
  197    Match = _{range: _{start: _{line: FromLine0, character: FromCharacter},
  198                       end: _{line: ToLine0, character: ToCharacter}}}.
  199position_to_match(LineCharRange, found_at(_, term_position(_, _, FFrom, FTo, _)), Match) :-
  200    file_offset_line_position(LineCharRange, FFrom, FromLine1, FromCharacter),
  201    file_offset_line_position(LineCharRange, FTo, ToLine1, ToCharacter),
  202    succ(FromLine0, FromLine1),
  203    succ(ToLine0, ToLine1),
  204    Match = _{range: _{start: _{line: FromLine0, character: FromCharacter},
  205                       end: _{line: ToLine0, character: ToCharacter}}}.
  206
  207:- meta_predicate find_in_term_with_positions(2, +, +, -, -).
 find_in_term_with_positions(+Search:callable, +Term, +Positions, -Matches, -Tail) is det
True when Search is a callable that takes two arguments, a term and its position, and is true if the term should be included in match, Term is the term in which to search, Positions is the subterm positions as given from read_term_positions/2, Matches is a list of the found matching terms, and Tail is the tail of the Matches list.
  215find_in_term_with_positions(Needle, Term, Position, Matches, Tail) :-
  216    call(Needle, Term, Position), !, % recurse?
  217    Matches = [found_at(Term, Position)|Tail].
  218find_in_term_with_positions(Needle, Term, term_position(_, _, _, _, SubPoses), Matches, Tail) :- !,
  219    find_in_term_subterm(Needle, Term, 1, SubPoses, Matches, Tail).
  220find_in_term_with_positions(Needle, Term, list_position(_, _, Elms, TailPos), Matches, Tail) :- !,
  221    find_in_term_list(Needle, Term, Elms, TailPos, Matches, Tail).
  222find_in_term_with_positions(Needle, Term, brace_term_position(_, _, ArgPos), Matches, Tail) :- !,
  223    Term = {Term0},
  224    find_in_term_with_positions(Needle, Term0, ArgPos, Matches, Tail).
  225find_in_term_with_positions(Needle, Term, parentheses_term_position(_, _, ContentPos), Matches, Tail) :- !,
  226    find_in_term_with_positions(Needle, Term, ContentPos, Matches, Tail).
  227find_in_term_with_positions(Needle, Term, dict_position(_, _, _, _, ContentPos), Matches, Tail) :- !,
  228    find_in_term_dict(Needle, Term, ContentPos, Matches, Tail).
  229find_in_term_with_positions(_, _Term, _Pos, Tail, Tail).
  230
  231find_in_term_dict(_, _, [], Tail, Tail) :- !.
  232find_in_term_dict(Needle, Term, [Pos|Poses], Matches, Tail) :-
  233    key_value_position(_KVFrom, _KVTo, _SF, _ST, Key, _KeyPos, ValuePos) = Pos,
  234    get_dict(Key, Term, Value),
  235    find_in_term_with_positions(Needle, Value, ValuePos, Matches, Tail0),
  236    find_in_term_dict(Needle, Term, Poses, Tail0, Tail).
  237
  238find_in_term_list(_, _, [], none, Tail, Tail) :- !.
  239find_in_term_list(Needle, TailElt, [], TailPos, Matches, Tail) :- !,
  240    find_in_term_with_positions(Needle, TailElt, TailPos, Matches, Tail).
  241find_in_term_list(Needle, [X|Xs], [Pos|Poses], TailPos, Matches, Tail) :-
  242    find_in_term_with_positions(Needle, X, Pos, Matches, Tail0),
  243    find_in_term_list(Needle, Xs, Poses, TailPos, Tail0, Tail).
  244
  245find_in_term_subterm(_, _, _, [], Tail, Tail) :- !.
  246find_in_term_subterm(Needle, Term, Arg, [Position|Positions], Matches, Tail) :-
  247    arg(Arg, Term, SubTerm),
  248    NextArg is Arg + 1,
  249    find_in_term_with_positions(Needle, SubTerm, Position, Matches, Matches0),
  250    find_in_term_subterm(Needle, Term, NextArg, Positions, Matches0, Tail).
 subterm_leaf_position(+Term, +Offset, +SubTermPoses, ?Leaf) is semidet
  253subterm_leaf_position(Term, Offset, From-To, Term) :- between(From, To, Offset), !.
  254subterm_leaf_position(Term, Offset, term_position(_, _, FFrom, FTo, _), Term) :-
  255    between(FFrom, FTo, Offset), !.
  256subterm_leaf_position(Term, Offset, term_position(From, To, _, _, Subterms), Leaf) :-
  257    between(From, To, Offset), !,
  258    functor(Term, _, Arity, _),
  259    between(1, Arity, Arg),
  260    arg(Arg, Term, Subterm),
  261    nth1(Arg, Subterms, SubtermPos),
  262    subterm_leaf_position(Subterm, Offset, SubtermPos, Leaf), !.
  263subterm_leaf_position(Term, Offset, list_position(From, To, Elms, _), Leaf) :-
  264    between(From, To, Offset),
  265    length(Elms, NElms),
  266    between(1, NElms, Idx),
  267    nth1(Idx, Term, Elm),
  268    nth1(Idx, Elms, ElmPos),
  269    subterm_leaf_position(Elm, Offset, ElmPos, Leaf), !.
  270subterm_leaf_position(Term, Offset, list_position(From, To, Elms, TailPos), Leaf) :-
  271    between(From, To, Offset), TailPos \= none, !,
  272    length(Elms, NElms),
  273    length(Head, NElms),
  274    append(Head, Tail, Term),
  275    subterm_leaf_position(Tail, Offset, TailPos, Leaf), !.
  276subterm_leaf_position(Term, Offset, brace_term_position(From, To, BracesPos), Leaf) :-
  277    between(From, To, Offset), !,
  278    Term = {Term0},
  279    subterm_leaf_position(Term0, Offset, BracesPos, Leaf).
  280subterm_leaf_position(Term, Offset, parentheses_term_position(From, To, ContentPos), Leaf) :-
  281    between(From, To, Offset), !,
  282    subterm_leaf_position(Term, Offset, ContentPos, Leaf).
  283subterm_leaf_position(Term, Offset, dict_position(_From, _To, TagFrom, TagTo, _KVPoses), Leaf) :-
  284    between(TagFrom, TagTo, Offset), !,
  285    is_dict(Term, Leaf).
  286subterm_leaf_position(Term, Offset, dict_position(From, To, _TagFrom, _TagTo, KVPoses), Leaf) :-
  287    between(From, To, Offset), !,
  288    member(key_value_position(KVFrom, KVTo, _SF, _ST, Key, _KeyPos, ValuePos), KVPoses),
  289    between(KVFrom, KVTo, Offset), !,
  290    % keys of a literal dict aren't of interest, I think?
  291    get_dict(Key, Term, Value),
  292    subterm_leaf_position(Value, Offset, ValuePos, Leaf)