%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%                 T O K E N I S E R  /  P A R S E R
%
%                 Author: Mantis H.M. Cheng (May/30/1994)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%% tokenise( +ListOfChar, -ListOfToken )
%	"ListOfToken" is parsed list of tokens of "ListOfChar"
%
tokenise( [], [] ) :- !.
tokenise( L, [T|Ts] ) :-
	next_token( L, L1, T ),
	rem_space(L1, L2),
	!,
	tokenise(L2, Ts).

%% next_expression( -Expression )
%   returns the next input expression from standard input.
%   it returns the expression 'end_of_file' when end of file
%   is reached, or 'error' when an error occurs in the input.
%
next_expression( S ) :-
	next_char( [], L, '[> ' ),
	get_next_expression( L, S ), !.

%% get_next_expression( +ListOfChars, -Expression)
%
get_next_expression( L, S ) :-
	 next_token( L, L1, T ),
	 tokenise_exp( T, L1, Ts ),
	 statement( S, Ts, [] ),
%         expression( S, Ts, [end] ),
	 !.
get_next_expression( _, error ).


%% tokenise_exp( +Token, +Chars, -Tokens)
%       Tokenises Chars into a list of 'Tokens'.
%       'Token' is the look-ahead token
tokenise_exp( end_of_file, _, [end_of_file] ) :- !.   % end of file
tokenise_exp( end,         _, [end] )         :- !.   % terminator
tokenise_exp( T, L, [T|Ts] ) :-
	next_char( L, L1, '<] ' ), !,
	next_token( L1, L2, T1 ),
	!,
	tokenise_exp( T1, L2, Ts ).


%% next_char( +Chars, -Chars', +Prompt)
%       returns the next non-white-space character
%       it is deterministic
next_char( [], S, P ) :-                    % end of line
	ask_input( P, S0 ), !,
	next_char( S0, S, P ).
next_char( [37|_], S, P ) :-                % (%) comment line
	ask_input( P, S0 ), !,
	next_char( S0, S, P ).
next_char( [C|S0], S1, P ) :-
	is_white_space( C ), !,
	next_char( S0, S1, P ).
next_char( S, S, _ ).


%% ask_input( +Prompt, -ListOfChars )
%       returns a list of ascii values corresponding
%       to the input
%
%ask_input( Prompt, Chars) :-   (BNR-Prolog version)
%       write(Prompt), 
%       readln( X ),
%       name(X, Chars).
%
ask_input( Prompt, Chars ) :-
	prompt_user( Prompt ),
	get0( Char ),
	read_line( Char, Chars ), !.


read_line( -1, [-1] )  :- !.      % end of file
read_line( 10, [32] )  :- !.      % line-feed
read_line( 13, [32] )  :- !.      % carriage-return
read_line( C,  [C|Cs] ) :-
	get0( C0 ),
	read_line( C0, Cs ).

:- dynamic prompt/1.

prompt_on :-
	retract(prompt(_)),
	assert(prompt(on)).

prompt_off :-
	retract(prompt(_)),
	assert(prompt(off)).

prompt_user( P ) :-
	prompt( on ), !,
	write( P ).
prompt_user( _ ).


prompt( on ).   % initially, it is on.


%% next_token( +Chars, -RemainingChars, -Token)
%
next_token( [60,60 | Rest]   , Rest    , '<<').
next_token( [62,62 | Rest]   , Rest    , '>>').
next_token( [91,91 | Rest]   , Rest    , '[[').
next_token( [93,93 | Rest]   , Rest    , ']]').
next_token( [123,125 | Rest] , Rest    , '{}').
next_token( [91,93 | Rest]   , Rest    , '[]').
next_token( [46  | Rest]     , []      , end) :- rem_space( Rest, []), !. % ~.
  %
  % First parse operators
  %
next_token( List, Rest, O) :- operator(List, Rest, O).
  %
  % next parse spme predefined symbols
  %
next_token( [-1  | Rest]     , Rest    , end_of_file ).
next_token( [40  | Rest]     , Rest    , '(').
next_token( [41  | Rest]     , Rest    , ')').
next_token( [123 | Rest]     , Rest    , '{').
next_token( [125 | Rest]     , Rest    , '}').
next_token( [91  | Rest]     , Rest    , '[').
next_token( [93  | Rest]     , Rest    , ']').
next_token( [124 | Rest]     , Rest    , '|').
next_token( [44  | Rest]     , Rest    , ',').
  %
  % variables
  %
next_token( [95 | List], Rest, var(T) ) :-     % 95 == ~_
	!, identifier(I, List, Rest), name(T, [95|I]).
next_token( [C|List], Rest, T ) :- 
	is_alpha(C), !, identifier(I, List, Rest), name(T1, [C|I]),
	symbol_val(C, T1, T).
  %
  % numbers
  %
next_token( [C | List], Rest, num(T) ) :-
	is_digit(C),  !, numb( N,  List, Rest),  aton([C|N], T).  
  %
  % strings and quoted strings   39 == ~'  34 == ~"
  %
next_token( [39|L],  R, const(T) ) :-
	quote_item(Q, L, [39|R]), !, name(T, Q).
next_token( [34|L], R, const(T) ) :-
	string(Q, L, [34|R]),  !,  name(T, Q).
next_token( [C|L], L, C) :- !.


  % remove all whitespace
rem_space( [C | List], Rest) :- 
	is_white_space(C),
	rem_space( List, Rest),!.
rem_space( [37 | _], []) :- !.     % (%) comments
rem_space( L, L).


%% numb( -NumChars, +Chars, -RemainingChars)
numb( [C|Cs], [C|L], R) :-
	is_digit(C), !, numb(Cs, L, R).
numb( [],  L, L).


%% digits( -NumChars, +Chars, -RemainingChars)
digits( [C|Cs], [C|L], R) :-
	is_digit(C), !,  digits(Cs, L, R).
digits( [], L, L).


%% identifier( -NumChars, +Chars, -RemainingChars)
identifier( [C|Cs], [C|List], Rest) :-
	is_alphanum(C), !, identifier(Cs, List, Rest).
identifier( [], Rest, Rest).


%% quote_item( -NumChars, +Chars, -RemainingChars)
quote_item( [C|Cs], [C|L], R) :- % 39 == ~'
	C =\= 39, quote_item( Cs, L, R).
quote_item( [], L, L).

%% string( -NumChars, +Chars, -RemainingChars)
string( [C|Cs], [C|L], R) :-  % 34 == ~"
	C =\= 34, string( Cs, L, R).
string( [], L, L).


symbol_val( _, S, S)            :- keyword(S), !.
symbol_val( _, S, S)            :- op_decl(S,_,_), !.
symbol_val( C, S, uid(S) )      :- is_upper(C), !.
symbol_val( C, S, lid(S) )      :- is_lower(C), !.


keyword('STOP').
keyword('SKIP').
keyword(done).
keyword(if).
keyword(then).
keyword(tt).
keyword(ff).
keyword(true).
keyword(false).


%=====================
% auxiliary predicates
%=====================

%% valid_id( +Chars )
%      holds if Chars denote a legal identifier
%
valid_id( [] )     :- !.
valid_id( [C|Cs] ) :- is_alphanum(C), valid_id(Cs).


%% is_alphanum( +Ascii )
%      holds if Ascii denotes a valid character in an identifier
%
is_alphanum( C ) :- is_alpha( C ), !.
is_alphanum( C ) :- is_digit( C ), !.
is_alphanum( 95 ).  % 95 == ~_
is_alphanum( 39 ).  % 39 == ~'


%% is_digit( +Ascii )
is_digit( C ) :-
	48 =< C,
	C =< 57.


%
% valid characters starting a symbol
%
%% is_lower( +Ascii )
is_lower( C ) :-
	97 =< C,
	C =< 122.
%% is_upper( +Ascii )
is_upper( C ) :-
	65 =< C,
	C =< 90.

%% is_alpha( +Ascii )
is_alpha( C ) :- is_lower( C ), !.
is_alpha( C ) :- is_upper( C ).


%% is_sign( +Ascii )
is_sign( 43 ).  % 43 == ~+
is_sign( 45 ).  % 45 == ~-


%% is_exponent( +Ascii )
is_exponent( 101 ).  % 101 == ~e
is_exponent(  69 ).  % 69  == ~E


%% is_white_space( +Ascii )
is_white_space( 32 ).
is_white_space( 9 ).  % HT


%
% change the internal sign of a number if necessar.
%
%% sign_extension( +Ascii )
sign_extension( 43, S, S ) :- !.  % 43 == ~+
sign_extension( 45, S1, S2 ) :- S2 is -S1.  % 45 == ~-

%
%% aton( +Asciis, -Number)
% aton( L, N ) :-
%       converts a list of digits L into a number (integer or float).
%
aton( L, N ) :-
	integral_part( L, _, 0, N ).


%% integral_part( +Asciis, -Asciis', +Integer, -Integer' )
%integral_part( [46|L], [46|L], N, N ) :- !.  % 46 == ~.
integral_part( [C|L], [C|L], N, N ) :-
	is_exponent( C ), !.
integral_part( [], [], N, N ) :- !.
integral_part( [C|L], L1, A, V ) :-
	V1 is (C-48)+A*10,
	integral_part( L, L1, V1, V ).


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%                         P A R S E R  (in DCG)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% operator declarations
statement(op_defn(O,T,A,P))    --> 
        op_type(T), op_assoc(A), [num(P)], 
        {P>=0,P=<1200,!}, op_name(O), [end].

statement(end_of_file)        --> [end_of_file].

% any other input expression defined by operators
statement(E)                   --> expression(E), [end].

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%               O P E R A T O R     P A R S E R  (in DCG)
%
% Treating terms as ops, an op is declared as
%     op(N,T,O)
% where N is the precedence between 0 and 1200,
%       T is the type of operator: fx, fy, xf, yf, xfx, xfy, yfx, and
%       O is name of the op.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

expression(T)                  --> term(1200,_,T).

term(N,M,T)                    --> term1(N,M1,L), term3(N,M1,L,M,T).

%
% non left-recursive terms
%
term1(_,0,ifthen(B,E))         --> [if], term(999,_,B), [then], term(999,_,E).
term1(_,0,func(F,N,[A|As]))    --> 
       function_name(F), ['('], term(999,_,A), arguments(As), [')'],
       {!, length([A|As],N)}.
term1(_,0, PT )                --> ['('], {!}, expression(PT), [')'].
term1(_,0, box(A) )            --> ['[['], term(999,_,A), [']]'], {!}.
term1(_,0, diamond(A) )        --> ['<<'], term(999,_,A), ['>>'], {!}.
term1(_,0, set([A|As]) )       --> 
       ['{'], term(999,_,A), arguments(As), ['}'], {!}.
term1(_,0, [])                 --> ['[]'].
term1(_,0, set([]))            --> ['{}'].
term1(_,0, var(void))          --> [var('_')].     % anonymous variable
term1(_,0, var(T))             --> [var(T)], { T \== '_' }.
term1(_,0, const(T))           --> [const(T)].
term1(_,0, num(T))             --> [num(T)].
term1(_,0, uid(T))             --> [uid(T)].
term1(_,0, lid(T))             --> [lid(T)].
term1(_,0, bool(true))         --> [true].
term1(_,0, bool(false))        --> [false].
term1(_,0, form(tt))           --> [tt].
term1(_,0, form(ff))           --> [ff].
term1(_,0, T)                  --> [T], {keyword(T)}.

term1(_,0,[E|Es])              --> 
       ['['], expression(E), expression_list(Es), [']'].
term1(B,N,T)                   --> [O], 
       {op_decl(O,fx,N), N=<B,N1 is N-1}, 
       term(N1,_,T1), {check_sign(O,T1,T)}.
term1(B,N,T)                   --> [O], 
       {op_decl(O,fy,N),N=<B}, 
       term(N,_,T1), {check_sign(O,T1,T)}.
term1(_,0,T)                   --> [T], {op_decl(T,_,_)}. 

%
% left recursive terms
%
term3(N,LLP,LLPT,M,PT)          -->
       term2(N,LLP,LLPT,LP,LPT), 
       term3(N,LP,LPT,M,PT).
term3(_,M,PT,M,PT)              --> [].

term2(N,M1,LPT,M,op(O,LPT))     --> [O], 
       {op_decl(O,xf,M),M1<M,M=<N}.
term2(N,M1,LPT,M,op(O,LPT))     --> [O], 
       {op_decl(O,yf,M), M1 =< M, M =< N}.
term2(N,M1,LPT,M,op(O,LPT,RPT)) --> [O],
       {op_decl(O,xfx,M),M1 < M, M =< N, N1 is M - 1}, 
       term(N1,_,RPT).
term2(N,M1,LPT,M,op(O,LPT,RPT)) --> [O],
       {op_decl(O,yfx,M), M1 =< M, M =< N, N1 is M - 1}, 
       term(N1,_,RPT).
term2(N,M1,LPT,M,op(O,LPT,RPT)) --> [O],
       {op_decl(O,xfy,M), M1 < M, M =< N}, 
       term(M,_,RPT).


arguments([A|As])       --> [','], term(999,_,A), arguments(As).
arguments([])           --> [].

function_name(uid(F))   --> [uid(F)].
function_name(lid(F))   --> [lid(F)].

expression_list(E)      --> ['|'], {!}, expression(E).
expression_list([E|Es]) --> [','], {!}, expression(E), expression_list(Es).
expression_list([])     --> [].

op_type(infix)          --> [lid(infix)].
op_type(prefix)         --> [lid(prefix)].
op_type(postfix)        --> [lid(postfix)].

op_assoc(non)           --> [lid(non)].
op_assoc(assoc)         --> [lid(assoc)].
op_assoc(left)          --> [lid(left)].
op_assoc(right)         --> [lid(right)].

op_name(O)              --> [uid(O)].
op_name(O)              --> [lid(O)].
op_name(O)              --> [const(O)].


check_sign( '+', num(T), num(T) ) :- !.
check_sign( '-', num(T), num(T1) ) :- !, T1 is -T.
check_sign( O  , T,      op(O,T) ).


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%                  O P E R A T O R     D A T A B A S E 
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

  % clauses for operator declarations
:- multifile operatorN/3 ,operator3/3, operator2/3, operator1/3, op_decl/3.
% :- dynamic operatorN/3 ,operator3/3, operator2/3, operator1/3, op_decl/3.

binary_op( O ) :- op_decl(O,xfx,_), !.
binary_op( O ) :- op_decl(O,xfy,_), !.
binary_op( O ) :- op_decl(O,yfx,_).


invalid_op_decl(infix,assoc).
invalid_op_decl(prefix,left).
invalid_op_decl(prefix,right).
invalid_op_decl(postfix,left).
invalid_op_decl(postfix,right).


%% add_op( +O, +T, +A, +P )
%      holds if the operator O with type T, associativity A and
%      precedence P can be added into the database; false if
%      O has already been defined
% 
add_op( O, infix,  non,   P ) :-
        add_op( O, xfx, P ), !.
add_op( O, infix,  left,  P ) :-
        add_op( O, yfx, P ), !.
add_op( O, infix,  right, P ) :-
        add_op( O, xfy, P ), !.
add_op( O, prefix, non,   P ) :-
        add_op( O, fx, P ), !.
add_op( O, prefix, assoc, P ) :-
        add_op( O, fy, P ), !.
add_op( O, postfix, non,   P ) :-
        add_op( O, xf, P ), !.
add_op( O, postfix, assoc,  P ) :-
        add_op( O, yf, P ).

        
add_op( O, T, P ) :-
        \+ op_decl(O,T,_),
        assert( op_decl(O,T,P) ),
        name( O, Os ),
        add_operator( O, Os ).


add_operator( _, Os ) :-
        valid_id( Os ), !.
add_operator( O, Os ) :-
        % not valid_id( Os ), !.
        length( Os, N ),
        append( Os, S2, S1 ),
        add_new_op( N, O, S1, S2 ).


%% add_new_op( N, O, S1, S2 )
%      adding a new operator O with N characters in S1-S2
%
add_new_op( 3, O, S1, S2 ) :-
        !, assert( operator3(S1,S2,O) ).
add_new_op( 2, O, S1, S2 ) :-
        !, assert( operator2(S1,S2,O) ).
add_new_op( 1, O, S1, S2 ) :-
        !, assert( operator1(S1,S2,O) ).
add_new_op( _, O, S1, S2 ) :-
        assert( (operatorN(S1,S2,O):-!) ).

%% operator( L1, L2, O )
%      holds if L1-L2 composes of a legal operator symbol O
%      O must be composed of punctuation characters only.
%        
operator( S1, S2, O ) :-
        operatorN( S1, S2, O ), !.
operator( S1, S2, O ) :-
        operator3( S1, S2, O ), !.
operator( S1, S2, O ) :-
        operator2( S1, S2, O ), !.
operator( S1, S2, O ) :-
        operator1( S1, S2, O ).

        
find_op(O,infix,non,P)     :- op_decl(O,xfx,P).
find_op(O,infix,left,P)    :- op_decl(O,yfx,P).
find_op(O,infix,right,P)   :- op_decl(O,xfy,P).
find_op(O,postfix,non,P)   :- op_decl(O,xf,P).
find_op(O,postfix,assoc,P) :- op_decl(O,yf,P).
find_op(O,prefix,non,P)    :- op_decl(O,fx,P).
find_op(O,prefix,assoc,P)  :- op_decl(O,fy,P).

