INTERFACE M3CLex; (***************************************************************************) (* Copyright (C) Olivetti 1989 *) (* All Rights reserved *) (* *) (* Use and copy of this software and preparation of derivative works based *) (* upon this software are permitted to any person, provided this same *) (* copyright notice and the following Olivetti warranty disclaimer are *) (* included in any copy of the software or any modification thereof or *) (* derivative work therefrom made by any person. *) (* *) (* This software is made available AS IS and Olivetti disclaims all *) (* warranties with respect to this software, whether expressed or implied *) (* under any law, including all implied warranties of merchantibility and *) (* fitness for any purpose. In no event shall Olivetti be liable for any *) (* damages whatsoever resulting from loss of use, data or profits or *) (* otherwise arising out of or in connection with the use or performance *) (* of this software. *) (***************************************************************************) IMPORT Text; IMPORT IO; IMPORT M3AST_LX; IMPORT M3CHash, M3CToken, M3CReservedWord, M3CSrcPos; (* Lexer for Modula 3 *) (* WARNING: This lexer makes extensive use of <*INLINE*> procedures; the parser which uses it must inline lexer functions and the lexer depends on being able to inline M3CHash functions; without inlining the lexer will be uncomfortably slow *) TYPE T <: ROOT; (* Type representing a lexer *) (* A 'CallBack' object is used by the lexer to notify the user of tokens the lexer cannot handle *) CallBack = OBJECT METHODS badChar(ch: CHAR) RAISES {}; (* Bad character found *) comment(comment: Text.T) RAISES {}; (* Comment found *) pragma(pragma: Text.T) RAISES {}; (* Pragma found *) END; Symbol_rep = M3AST_LX.Symbol_rep; (* Represents an identifier *) Literal_rep = M3AST_LX.Literal_rep; (* Represents a numeric, character *) (* or text literal *) REVEAL Symbol_rep <: M3CHash.Id; (* Identifiers and literals are *) Literal_rep <: M3CHash.Id; (* both stored in hash tables *) PROCEDURE New( s: IO.Stream; identifiers: M3CReservedWord.Table; literals: M3CHash.Table; callBack: CallBack; init: T := NIL) : T RAISES {}; (* Creates a new lexer. The lexer will read from the given stream, 's'. Any identifiers found will be put into the 'identifiers' hash table. Note that this table already contains all the reserved words; hence only one hash lookup/entry operation is needed for a reserved word or identifier. Any literals found will be put in the 'literals' hash table. The appropriate 'callBack' method will be called when a bad character, comment or pragma is found. If 'init' is NIL (the usual case) 'New' creates and initializes a new lexer. If 'init' is non NIL 'New' just initializes the lexer 'init'. 'init' should only be non NIL when initializing a newly created object which is a subtype of 'M3CLex.T' *) (* It is a checked runtime error to use a NIL lexer as an argument to any of the following procedures *) <*INLINE*> PROCEDURE Current(t: T): M3CToken.T RAISES {}; (* Returns the current token *) <*INLINE*> PROCEDURE Next(t: T): M3CToken.T RAISES {IO.Error}; (* Advances to the next token and then returns the (new) current token *) <*INLINE*> PROCEDURE Position(t: T): M3CSrcPos.T RAISES {}; (* Returns the current position *) <*INLINE*> PROCEDURE Literal(t: T): Literal_rep RAISES {}; (* If 'Current(t) IN M3CToken.T.Literals' returns the representation of the current literal. When the lexer encounters a literal it creates a text which it puts into the literal hash table. The texts can be distinguished as follows: Valid numeric literals: text starts with a digit and ends with a hex digit Valid text literals: text starts and ends with double quote character Valid char literals: text starts and ends with single quote character Sometimes the lexer finds an invalid literal. A numeric literal is invalid if it is a based number and one of its digits is out of range e.g. 2_13. A character or text literal could be missing its closing quote. In these cases the lexer returns an invalid literal. Invalid literals always have at least one character and their first character can still be used to distinguish the literal type (numerics start with a digit, texts with double quote, chars with single quote). They can be spotted because their last character is inappropriate; e.g. not a hex digit if the literal is numeric. If 'NOT Current(t) IN M3CToken.T.Literals' the result of 'Literal' is undefined *) <*INLINE*> PROCEDURE Identifier(t: T): Symbol_rep RAISES {}; (* If 'Current(t) = M3CToken.T.Identifier' the result is a handle for the hashed text of the current identifier. Otherwise the result is undefined *) PROCEDURE Disable(t: T) RAISES {}; (* Disables the lexer; any call of 'Next' will return 'M3CToken.T.Void' and the position will not advance *) PROCEDURE Disabled(t: T): BOOLEAN RAISES {}; (* Returns TRUE iff 't' is disabled *) PROCEDURE Reset(t: T; pos := M3CSrcPos.Null; s: IO.Stream := NIL) RAISES {}; (* Resets 't'; sets the current symbol to 'M3CToken.T.Void' and enables 't' if it is disabled. If 'pos' is not 'M3CSrcPos.Null' sets the lexer position to be 'pos'. If 's' is not NIL sets the lexer stream to be 's' and sets the lexer position to be 'pos' if 'pos # M3CSrcPos.Null' or line 1 offset 0 otherwise. *) PROCEDURE TokenToText(token: M3CToken.T): Text.T RAISES {}; (* Returns a text describing the given token, suitable for use in error messages *) PROCEDURE CurrentTokenToText(t: T): Text.T RAISES {}; (* Returns a text describing the current token for 't'; this may give more information than 'TokenToText(Current(t))' because it incorporates identifier names and literal values if appropriate *) END M3CLex.