(* Copyright (C) 1992, Digital Equipment Corporation *) (* All rights reserved. *) (* See the file COPYRIGHT for a full description. *) (* File: Scanner.m3 *) (* Last modified on Mon Jun 8 09:19:24 PDT 1992 by kalsow *) (* modified on Sat Mar 16 00:25:08 1991 by muller *) (* modified on Fri Oct 19 10:52:56 1990 by nr@princeton.edu *) UNSAFE MODULE Scanner; IMPORT Error, String, Token, Rd, UnsafeRd, Word; IMPORT Target, M3, Thread, Host, MBuf; CONST MaxFiles = 200; MaxStack = 40; MaxLines = 100000; MaxString = 4095; MaxBuffer = 4096; UndoPad = 4; EOFChar = '\000'; MinRsrvd = ORD (LAST (Token.T)) + 1; MaxRsrvd = 50; TYPE CharSet = SET OF CHAR; TK = Token.T; StringList = UNTRACED REF RECORD str: String.T; next: StringList END; CONST WhiteSpace = CharSet {' ', '\n', '\t', '\f'}; AlphaNumerics = CharSet {'a'..'z', 'A'..'Z', '0'..'9', '_'}; Digits = CharSet {'0'..'9'}; OctalDigits = CharSet {'0'..'7'}; HexAlpha = CharSet {'a'..'f'}; HexALPHA = CharSet {'A'..'F'}; TYPE InputBuffer = REF ARRAY [-UndoPad .. MaxBuffer-1] OF CHAR; TYPE FileState = RECORD ch : CHAR; offs : INTEGER; (* fileno * MaxLines + lineno *) rd : Rd.T; buf : InputBuffer; buf_len : INTEGER; buf_ptr : INTEGER; sym : Symbol; ignore : StringList; (* pragmas to ignore *) END; VAR input : Rd.T; input_buf : InputBuffer; input_len : INTEGER; input_ptr : INTEGER; ch : CHAR; missing : String.T; ignore : StringList; nFiles : INTEGER; files : ARRAY [0..MaxFiles] OF String.T; local_files : ARRAY [0..MaxFiles] OF String.T; tos : INTEGER; stack : ARRAY [0..MaxStack] OF FileState; buf : ARRAY [0..MaxString] OF CHAR; reserved : ARRAY [0..MaxRsrvd] OF M3.Value; nReserved : INTEGER; LINE : String.T; NOWARN : String.T; PRAGMA : String.T; PROCEDURE Initialize () = BEGIN missing := String.Add ("_missing_id_"); nFiles := 0; tos := 0; nReserved := 0; LINE := String.Add ("LINE"); NOWARN := String.Add ("NOWARN"); PRAGMA := String.Add ("PRAGMA"); END Initialize; PROCEDURE Reset () = <*FATAL ANY*> BEGIN WHILE (tos > 0) DO Pop (); Host.CloseRd (input); END; input_buf := NIL; input_len := 0; input_ptr := 0; ch := ' '; ignore := NIL; offset := 0; nLines := 0; nPushed := 0; cur.token := TK.tEOF; END Reset; PROCEDURE Push (name: String.T; file: Rd.T) = BEGIN INC (nPushed); WITH z = stack[tos] DO z.ch := ch; z.offs := offset; z.sym := cur; z.rd := input; z.buf := input_buf; z.buf_len := input_len; z.buf_ptr := input_ptr; z.ignore := ignore; END; INC (tos); offset := FileNumber (name) * MaxLines + 1; ch := ' '; ignore := NIL; input := file; input_ptr := 0; input_len := 0; input_buf := stack[tos].buf; IF (input_buf = NIL) THEN input_buf := NEW (InputBuffer); stack[tos].buf := input_buf; END; GetToken (); (* prime the input stream *) END Push; PROCEDURE Pop () = BEGIN DEC (tos); WITH z = stack[tos] DO ch := z.ch; offset := z.offs; cur := z.sym; input := z.rd; input_buf := z.buf; input_ptr := z.buf_ptr; input_len := z.buf_len; ignore := z.ignore; END; END Pop; PROCEDURE FileNumber(filename:String.T): INTEGER = (* returns index into files of filename, adding it if it doesn't exist *) BEGIN (* often we'll hit the current file *) WITH index = offset DIV MaxLines DO IF files[index] = filename THEN RETURN index END; END; (* linear search is painful, but N < 200 and comparison being an integer compare should mitigate the pain *) FOR i := 0 TO nFiles-1 DO IF files[i] = filename THEN RETURN i END; END; files[nFiles] := filename; local_files[nFiles] := NIL; INC(nFiles); RETURN nFiles-1; END FileNumber; PROCEDURE Here (VAR file: String.T; VAR line: INTEGER) = BEGIN file := files [offset DIV MaxLines]; line := offset MOD MaxLines; END Here; PROCEDURE LocalHere (VAR file: String.T; VAR line: INTEGER) = VAR fnum := offset DIV MaxLines; BEGIN IF (local_files[fnum] = NIL) THEN local_files[fnum] := String.FileTail (files[fnum]); END; file := local_files [fnum]; line := offset MOD MaxLines; END LocalHere; PROCEDURE Match1 (t: Token.T; READONLY a: Token.Set) = BEGIN IF (cur.token = t) THEN GetToken (); ELSE Match (t, a, Token.EmptySet); END; END Match1; PROCEDURE Match (t: Token.T; READONLY a, b: Token.Set) = VAR fail: Token.Set; BEGIN IF (cur.token = t) THEN GetToken (); ELSE fail := a + b + Token.Set {t}; Fail ("missing \'" & String.ToText (Token.name [t]) & "\'", fail); IF (cur.token = t) THEN GetToken () END; END; END Match; PROCEDURE MatchID1 (READONLY a: Token.Set): String.T = VAR id: String.T; BEGIN IF (cur.token = TK.tIDENT) THEN id := cur.string; GetToken (); ELSE id := MatchID (a, Token.EmptySet); END; RETURN id; END MatchID1; PROCEDURE MatchID (READONLY a, b: Token.Set): String.T = VAR id: String.T; BEGIN IF (cur.token = TK.tIDENT) THEN id := cur.string; GetToken (); ELSE Fail ("missing identifier", a + b + Token.Set {TK.tIDENT}); IF (cur.token = TK.tIDENT) THEN id := cur.string; GetToken (); ELSE id := missing; END; END; RETURN id; END MatchID; PROCEDURE Fail (msg: TEXT; READONLY stop: Token.Set) = VAR t: TEXT; BEGIN t := "syntax error: " & msg; CASE cur.token OF | TK.tIDENT, TK.tTEXTCONST, TK.tREALCONST, TK.tLONGREALCONST, TK.tEXTENDEDCONST => Error.Str (cur.string, t); | TK.tCARDCONST, TK.tCHARCONST => Error.Int (cur.value, t); ELSE (* no extra info *) Error.Msg (t); END; WHILE (cur.token # TK.tEOF) AND NOT (cur.token IN stop) DO GetToken (); END; END Fail; PROCEDURE NoteReserved (name: String.T; value: M3.Value) = BEGIN <* ASSERT String.GetClass (name) = 0 *> String.SetClass (name, MinRsrvd + nReserved); reserved [nReserved] := value; INC (nReserved); END NoteReserved; <*INLINE*> PROCEDURE GetCh () = <*FATAL Rd.Failure, Thread.Alerted*> BEGIN IF (input_ptr < input_len) THEN ch := input_buf[input_ptr]; INC (input_ptr); ELSIF UnsafeRd.FastEOF (input) THEN ch := EOFChar; ELSE input_len := Rd.GetSub (input, SUBARRAY(input_buf^, UndoPad, MaxBuffer)); input_ptr := 0; GetCh (); END; END GetCh; PROCEDURE GetToken () = VAR i: INTEGER; BEGIN LOOP (* skip white space *) WHILE (ch IN WhiteSpace) DO IF (ch = '\n') THEN INC (offset); INC (nLines) END; GetCh (); END; (* remember where this token starts *) cur.offset := offset; CASE ch OF | 'a'..'z', 'A'..'Z' => (* scan an identifier *) i := 0; WHILE (ch IN AlphaNumerics) DO buf [i] := ch; INC (i); GetCh (); END; cur.string := String.FromStr (buf, i); i := String.GetClass (cur.string); IF (i < ORD (Token.First_Keyword)) THEN cur.token := TK.tIDENT; cur.defn := NIL; ELSIF (i <= ORD (LAST (TK))) THEN cur.token := VAL (i, TK); cur.defn := NIL; ELSE cur.token := TK.tIDENT; cur.defn := reserved [i - MinRsrvd]; END; RETURN; | '0'..'9' => ScanNumber (); RETURN; | '\'' => ScanChar (); RETURN; | '\"' => ScanText (); RETURN; | '+' => cur.token := TK.tPLUS; GetCh (); RETURN; | '-' => cur.token := TK.tMINUS; GetCh (); RETURN; | '/' => cur.token := TK.tSLASH; GetCh (); RETURN; | '&' => cur.token := TK.tAMPERSAND; GetCh (); RETURN; | ',' => cur.token := TK.tCOMMA; GetCh (); RETURN; | ';' => cur.token := TK.tSEMI; GetCh (); RETURN; | '[' => cur.token := TK.tLBRACKET; GetCh (); RETURN; | '{' => cur.token := TK.tLBRACE; GetCh (); RETURN; | '^' => cur.token := TK.tARROW; GetCh (); RETURN; | '#' => cur.token := TK.tSHARP; GetCh (); RETURN; | ')' => cur.token := TK.tRPAREN; GetCh (); RETURN; | ']' => cur.token := TK.tRBRACKET; GetCh (); RETURN; | '}' => cur.token := TK.tRBRACE; GetCh (); RETURN; | '|' => cur.token := TK.tBAR; GetCh (); RETURN; | EOFChar => cur.token := TK.tEOF; RETURN; | '*' => (* '*>' '*' *) GetCh (); IF (ch = '>') THEN cur.token := TK.tENDPRAGMA; GetCh (); ELSE cur.token := TK.tASTERISK; END; RETURN; | '=' => (* '=' '=>' *) GetCh (); IF (ch = '>') THEN cur.token := TK.tIMPLIES; GetCh (); ELSE cur.token := TK.tEQUAL; END; RETURN; | ':' => (* ':' ':=' *) GetCh (); IF (ch = '=') THEN cur.token := TK.tASSIGN; GetCh (); ELSE cur.token := TK.tCOLON; END; RETURN; | '.' => (* '.' '..' *) GetCh (); IF (ch = '.') THEN cur.token := TK.tDOTDOT; GetCh (); ELSE cur.token := TK.tDOT; END; RETURN; | '(' => (* '('*' '(' *) GetCh (); IF (ch = '*') THEN ScanComment (); ELSE cur.token := TK.tLPAREN; RETURN; END; | '>' => (* '>' '>=' *) GetCh (); IF (ch = '=') THEN cur.token := TK.tGREQUAL; GetCh (); ELSE cur.token := TK.tGREATER; END; RETURN; | '<' => (* '<' '<=' '<:' '<*' *) GetCh (); IF (ch = '=') THEN cur.token := TK.tLSEQUAL; GetCh (); ELSIF (ch = ':') THEN cur.token := TK.tSUBTYPE; GetCh (); ELSIF (ch = '*') THEN ScanPragma (); ELSE cur.token := TK.tLESS; END; RETURN; ELSE Error.Int (ORD (ch), "Illegal character"); GetCh (); END; (*case*) END; (*loop*) END GetToken; PROCEDURE ScanNumber () = CONST MaxInt = Target.MAXINT; MaxDecimal = MaxInt DIV 10; MaxWord = Word.Not (0); VAR val, len, i, base, digit: INTEGER; intTooLarge := FALSE; max, wordVal: Word.T; BEGIN (* scan the decimal digits *) val := 0; i := 0; WHILE (ch IN Digits) DO buf[i] := ch; INC (i); IF (val > MaxDecimal) THEN intTooLarge := TRUE; val := val DIV 10; END; val := val * 10; digit := ORD (ch) - ORD ('0'); IF (digit > MaxInt - val) THEN intTooLarge := TRUE; digit := 0; END; INC (val, digit); GetCh (); END; IF (ch = '_') THEN IF intTooLarge THEN Error.Msg ("integer too large"); END; (* scan a based integer *) base := val; IF (base < 2) OR (16 < base) THEN Error.Int (base, "illegal base for based literal, 10 used"); base := 10; END; len := 0; max := Word.Divide (MaxWord, base); wordVal := 0; LOOP GetCh (); IF (ch IN Digits) THEN digit := ORD (ch) - ORD ('0'); ELSIF (ch IN HexALPHA) THEN digit := ORD (ch) - ORD ('A') + 10; ELSIF (ch IN HexAlpha) THEN digit := ORD (ch) - ORD ('a') + 10; ELSIF (len = 0) THEN Error.Msg("missing digits in based literal"); EXIT ELSE val := wordVal; EXIT; END; IF (digit >= base) THEN Error.Int (digit, "illegal digit in based literal"); ELSIF Word.GT (wordVal, max) THEN Error.Msg ("based value too large"); ELSE wordVal := Word.Times (wordVal, base); END; IF Word.GT (digit, Word.Minus (MaxWord, wordVal)) THEN Error.Msg ("based value too large"); ELSE wordVal := Word.Plus (wordVal, digit); END; INC (len); END; cur.token := TK.tCARDCONST; cur.value := val; ELSIF (ch = '.') THEN (* scan a floating point number *) buf[i] := '.'; INC (i); GetCh (); (* eat the '.' *) IF (ch = '.') THEN (* we saw "dddd.." *) (***** Rd.UnGetChar (input); *****) DEC (input_ptr); input_buf[input_ptr] := '.'; cur.token := TK.tCARDCONST; cur.value := val; IF intTooLarge THEN Error.Msg ("integer too large"); END; RETURN; END; (* scan the fractional digits *) IF NOT (ch IN Digits) THEN Error.Msg ("missing digits in real fraction"); buf[i] := '0'; INC (i); END; WHILE (ch IN Digits) DO buf[i] := ch; INC (i); GetCh () END; (* check for the exponent *) IF (ch = 'e') OR (ch = 'E') THEN buf[i] := 'e'; INC (i); cur.token := TK.tREALCONST; ELSIF (ch = 'd') OR (ch = 'D') THEN buf[i] := 'e'(* NOT 'd' for C *); INC (i); cur.token := TK.tLONGREALCONST; ELSIF (ch = 'x') OR (ch = 'X') THEN buf[i] := 'e'(* NOT 'x' for C *); INC (i); cur.token := TK.tEXTENDEDCONST; ELSE (* real constant with no exponent *) cur.token := TK.tREALCONST; cur.string := String.FromStr (buf, i); RETURN ; END; GetCh (); (* eat the exponent entry char *) (* get the exponent sign *) IF (ch = '+') THEN buf[i] := '+'; INC (i); GetCh (); ELSIF (ch = '-') THEN buf[i] := '-'; INC (i); GetCh (); ELSE buf[i] := '+'; END; (* finally, get the exponent digits *) IF NOT (ch IN Digits) THEN Error.Msg ("missing digits in real exponent"); buf[i] := '0'; INC (i); END; WHILE (ch IN Digits) DO buf[i] := ch; INC (i); GetCh (); END; cur.string := String.FromStr (buf, i); ELSE (* already scanned a decimal integer *) cur.token := TK.tCARDCONST; cur.value := val; IF intTooLarge THEN Error.Msg ("integer too large"); END; END; END ScanNumber; PROCEDURE ScanChar () = BEGIN cur.token := TK.tCHARCONST; cur.value := 0; GetCh (); IF (ch = '\'') THEN Error.Msg ("missing character in character literal"); GetCh (); RETURN; ELSIF (ch = '\n') OR (ch = '\r') OR (ch = '\f') THEN Error.Msg ("end-of-line encountered in character literal"); RETURN; ELSIF (ch = '\\') THEN GetCh (); IF (ch = 'n') THEN cur.value := ORD ('\n'); GetCh (); ELSIF (ch = 't') THEN cur.value := ORD ('\t'); GetCh (); ELSIF (ch = 'r') THEN cur.value := ORD ('\r'); GetCh (); ELSIF (ch = 'f') THEN cur.value := ORD ('\f'); GetCh (); ELSIF (ch = '\\') THEN cur.value := ORD ('\\'); GetCh (); ELSIF (ch = '\'') THEN cur.value := ORD ('\''); GetCh (); ELSIF (ch = '\"') THEN cur.value := ORD ('\"'); GetCh (); ELSIF (ch IN OctalDigits) THEN cur.value := GetOctalChar (); ELSE Error.Msg ("unknown escape sequence in character literal"); END; ELSIF (ch = EOFChar) THEN Error.Msg ("EOF encountered in character literal"); RETURN ; ELSE (* a simple character literal *) cur.value := ORD (ch); GetCh (); END; IF (ch # '\'') THEN Error.Msg ("missing closing quote on character literal"); ELSE GetCh (); END; END ScanChar; PROCEDURE ScanText () = VAR i: INTEGER; mbuf: MBuf.T := NIL; PROCEDURE Stuff (c: CHAR) = BEGIN IF (i < NUMBER (buf)) THEN buf [i] := c; INC (i); ELSIF (i = NUMBER (buf)) THEN mbuf := MBuf.New (); MBuf.PutSub (mbuf, buf); MBuf.PutChar (mbuf, c); INC (i); ELSE MBuf.PutChar (mbuf, c); INC (i); END; END Stuff; BEGIN i := 0; cur.token := TK.tTEXTCONST; GetCh (); LOOP IF (ch = '\"') THEN GetCh (); EXIT; ELSIF (ch = '\n') OR (ch = '\r') OR (ch = '\f') THEN Error.Msg ("end-of-line encountered in text literal"); EXIT; ELSIF (ch = '\\') THEN GetCh (); IF (ch = 'n') THEN Stuff ('\n'); GetCh (); ELSIF (ch = 't') THEN Stuff ('\t'); GetCh (); ELSIF (ch = 'r') THEN Stuff ('\r'); GetCh (); ELSIF (ch = 'f') THEN Stuff ('\f'); GetCh (); ELSIF (ch = '\\') THEN Stuff ('\\'); GetCh (); ELSIF (ch = '\'') THEN Stuff ('\''); GetCh (); ELSIF (ch = '\"') THEN Stuff ('\"'); GetCh (); ELSIF (ch IN OctalDigits) THEN Stuff (VAL (GetOctalChar (), CHAR)); ELSE Error.Msg ("unknown escape sequence in text literal"); END; ELSIF (ch = EOFChar) THEN Error.Msg ("EOF encountered in text literal"); EXIT; ELSE (* a simple character *) Stuff (ch); GetCh (); END; END; IF (mbuf = NIL) THEN cur.string := String.FromStr (buf, i); ELSE cur.string := String.Add (MBuf.ToText (mbuf)); END; END ScanText; PROCEDURE GetOctalChar (): INTEGER = VAR value: INTEGER; BEGIN <* ASSERT ch IN OctalDigits *> value := ORD (ch) - ORD ('0'); GetCh (); IF NOT (ch IN OctalDigits) THEN BadOctal (); RETURN value END; value := value * 8 + ORD (ch) - ORD ('0'); GetCh (); IF NOT (ch IN OctalDigits) THEN BadOctal (); RETURN value END; value := value * 8 + ORD (ch) - ORD ('0'); GetCh (); RETURN value; END GetOctalChar; PROCEDURE BadOctal () = BEGIN Error.Msg ("octal character constant must have 3 digits"); END BadOctal; PROCEDURE ScanComment () = VAR nest, save: INTEGER; start: INTEGER; BEGIN start := cur.offset; GetCh (); nest := 1; WHILE (nest > 0) DO IF (ch = '*') THEN GetCh (); IF (ch = ')') THEN DEC (nest); GetCh (); END; ELSIF (ch = '(') THEN GetCh (); IF (ch = '*') THEN INC (nest); GetCh (); END; ELSIF (ch = EOFChar) THEN save := offset; offset := start; Error.Msg ("EOF encountered in comment"); offset := save; nest := 0; ELSIF (ch = '\n') THEN INC (offset); INC (nLines); GetCh (); ELSE GetCh (); END; END; END ScanComment; PROCEDURE ScanPragma () = VAR nest, save, start, i, lineno, fileno: INTEGER; ss: StringList; BEGIN start := cur.offset; GetCh(); (* '*' *) (* skip white space *) WHILE (ch IN WhiteSpace) DO IF (ch = '\n') THEN INC (offset); INC (nLines); END; GetCh(); END; (* scan an identifier *) i := 0; WHILE (ch IN AlphaNumerics) DO buf [i] := ch; INC (i); GetCh (); END; cur.string := String.FromStr (buf, i); cur.token := VAL (String.GetClass (cur.string), TK); IF (Token.First_Pragma<=cur.token) AND (cur.token<=Token.Last_Pragma) THEN RETURN; END; IF (cur.string = LINE) THEN GetToken (); (* LINE *) IF (cur.token # TK.tCARDCONST) THEN Error.Msg ("missing line number on LINE pragma; skipping to \'*>\'"); WHILE (cur.token # TK.tENDPRAGMA) AND (cur.token # TK.tEOF) DO GetToken (); END; IF (cur.token = TK.tENDPRAGMA) THEN GetToken () END; RETURN; END; lineno := cur.value; fileno := offset DIV MaxLines; GetToken (); (* CARD "line number" *) IF (cur.token = TK.tTEXTCONST) THEN fileno := FileNumber (cur.string); GetToken(); (* TEXT "filename" *) END; offset := fileno * MaxLines + lineno - 1; IF (cur.token # TK.tENDPRAGMA) THEN Error.Msg ("missing \'*>\' on LINE pragma"); ELSE GetToken (); (* fetch the next one *) END; RETURN; ELSIF (cur.string = NOWARN) THEN Error.IgnoreWarning (cur.offset); GetToken (); (* NOWARN *) IF (cur.token # TK.tENDPRAGMA) THEN Error.Msg ("missing \'*>\' on NOWARN pragma"); ELSE GetToken (); (* fetch the next one *) END; RETURN; ELSIF (cur.string = PRAGMA) THEN GetToken (); (* PRAGMA *) WHILE (cur.token = TK.tIDENT) OR ((Token.First_Pragma<=cur.token) AND (cur.token<=Token.Last_Pragma)) OR ((Token.First_Keyword<=cur.token) AND (cur.token<=Token.Last_Keyword)) DO ignore := NEW (StringList, str := cur.string, next := ignore); GetToken (); (* IDENT *) IF (cur.token # TK.tCOMMA) THEN EXIT END; GetToken (); (* COMMA *) END; IF (cur.token # TK.tENDPRAGMA) THEN Error.Msg ("missing \'*>\' on PRAGMA pragma"); ELSE GetToken (); (* fetch the next real token *) END; RETURN; ELSE (* scan and ignore the list *) ss := ignore; WHILE (ss # NIL) AND (ss.str # cur.string) DO ss := ss.next END; IF (ss = NIL) THEN Error.WarnStr (2, cur.string, "unrecognized pragma (ignored)"); END; END; (* scan over and ignore the offending pragma *) nest := 1; WHILE (nest > 0) DO IF (ch = '*') THEN GetCh(); IF (ch = '>') THEN DEC(nest); GetCh(); END; ELSIF (ch = '<') THEN GetCh(); IF (ch = '*') THEN INC(nest); GetCh(); END; ELSIF (ch = EOFChar) THEN save := offset; offset := start; Error.Msg ("EOF encountered in pragma"); offset := save; nest := 0; ELSIF (ch = '\n') THEN INC (offset); INC (nLines); GetCh(); ELSE GetCh(); END; END; GetToken (); (* get the next token *) END ScanPragma; BEGIN END Scanner.