(* Copyright (C) 1992, Digital Equipment Corporation *) (* All rights reserved. *) (* See the file COPYRIGHT for a full description. *) (* Created by J.Stolfi on Nov 1990. *) (* Last modified on Sun Mar 1 14:21:51 PST 1992 by meehan *) (* modified on Wed Feb 12 12:38:09 PST 1992 by muller *) (* modified on Wed Nov 20 17:53:46 PST 1991 by stolfi *) INTERFACE SxSyntax; (* A syntax table for reading and printing symbolic expressions. An SxSyntax.T is an object that describes a particular syntax for symbolic expressions. Although very general in principle, the internal representation and the tools in this interface are biased towards the "official" symbolic expression syntax, plus a limited range of "canonical" extensions and variants of it. Specifically, the client can easily modify the standard syntax in the following ways: * specify filters that are automatically called by Read and Print to transform specific REF types or lists beginning with specific symbols; * provide custom routines for converting symbol names and numeric literals into arbitrary values * disable standard token types (text, character, etc.) * declare that certain characters should be treated as whitespace. With a little more effort, the client also can: * add new cases to the '#' notation, e.g. #'0000101000', and define other split-syntax characters besides '#'. * add new token types distinguished by their leading character, e.g. `foo or $error Index: symbolic expressions; expressions, symbolic *) IMPORT List, Rd, Sx, SxSymbol, FWr, Wr, Thread; TYPE CHARS = ARRAY OF CHAR; TYPE T = Sx.Syntax; Private <: ROOT; REVEAL Sx.Syntax = Private BRANDED OBJECT METHODS Copy(): T; (* Returns a copy of the syntax table. *) Read(rd: Rd.T; root: SxSymbol.T): REFANY RAISES {Sx.ReadError, Rd.EndOfFile, Rd.Failure, Thread.Alerted}; (* Reads one symbolic expression from the reader, leaving the reader positioned at the following character. Symbols are interned relative to "root". Here is how Read works: An SxSyntax.T object contains a /character dispatch table/ that maps characters to Parser routines. Read takes the first character of the input stream, gets the corresponding parser from this table, and invokes it with the same arguments that were passed to Read. In any case, the output of the parser (a REFANY value) is a candidate for /input filtering/. The SxSyntax.T object contains a pair of tables of /input filter/ routines, one keyed by SxSymbol.T and the other by typecode. If the output of the Parser is a List.T beginning with one of the symbols in the first table, or is a REF value whose typecode is in the second table, Read passes it through the corresponding filtering routine. In any case, if the resulting REF is the special value /NoValue/, Read discards it and repeats the whole process again where the Parser left off. Otherwise, Read returns that value as result. In the case of tokens characterized by a two-character prefix, such as '#|foo|#' in the standard syntax, the leading character ('#') is assigned a Parser that reads the next character, and picks the appropriate SubParser from a second-level character dispatch table. Read raises Rd.EndOfFile if it reaches the end of the reader while skipping any leading whitespace. It raises Sx.ReadError if it the input expression is malformed or incomplete. *) ReadUntil(rd: Rd.T; delim: CHAR; root: SxSymbol.T): List.T RAISES {Sx.ReadError, Rd.Failure, Thread.Alerted}; (* Repeatedly reads symbolic expressions from the reader, ignoring whitespace, until /delim/ is encountered, returning the expressions in a list. Raises Sx.ReadError if any expression is malformed or incomplete, or end-of-file occurs before /delim/ is found. *) Print( fwr: FWr.T; value: REFANY; elision: Sx.Elision; root: SxSymbol.T; ) RAISES {Sx.PrintError, Wr.Failure, Thread.Alerted}; (* Prints a symbolic expression to /fwr/, eliding the object at depths and lengths greater than that specified by "elision". Symbols in /value/ are printed relative to the given /root/ (Sx.PrintError is raised if /value/ contains any symbol that is not a proper descendant of /root/). Here is how Print works: First, the given value is a candidate for output filtering. The SxSyntax.T object contains a pair of tables of /output filter/ routines, one keyed by SxSymbol.T and the other by typecode. If the given value is a List.T beginning with one of the symbols in the first table, or is a REF value whose typecode is in the second table, Print passes it through the corresponding filtering routine. The SxSyntax.T object also contains two internal tables of specialized printing routines, one keyed by symbols, and the other by typecodes. Print selects the appropriate routine by looking up the (possibly filtered) value in these two tables, in the same way it selected the output filter. If this lookup fails, Print uses a default printing routine also stored in the SxSyntax.T object. In any case, Print calls the selected routine to pretty-print the given value to /fwr/, possibly calling Print recursively. *) END; (**********************************************************) (* THE STANDARD SYNTAX *) (**********************************************************) PROCEDURE Standard(): T; (* Returns a new copy of the standard syntax table. *) (**********************************************************) (* FILTERS *) (**********************************************************) VAR (*CONST*) NoValue: REFANY; (* Any of the input filters, parsers, and converters below may return NoValue to signify that the token it just parsed should be ignored by Read. *) PROCEDURE SetInputListFilter(t: T; symbol: SxSymbol.T; filter: Filter); (* Register /filter/ as a data transformation procedure to be called by Read whenever it parses a list starting with the given symbol. *) PROCEDURE SetInputRefFilter(t: T; typeCode: INTEGER; filter: Filter); (* Register /filter/ as a data transformation procedure to be called by Read whenever it parses a value with given TYPECODE. *) PROCEDURE SetOutputListFilter(t: T; symbol: SxSymbol.T; filter: Filter); (* Register /filter/ as a data transformation procedure to be called by Print before printing a list starting with the given symbol. *) PROCEDURE SetOutputRefFilter(t: T; typeCode: INTEGER; filter: Filter); (* Register /filter/ as a data transformation procedure to be called by Print before printing an object with given type code. *) EXCEPTION FilterError(TEXT); TYPE Filter = OBJECT METHODS apply( value: REFANY; (* The value to be converted *) root: SxSymbol.T; (* The root symbol *) syntax: T; (* The syntax table *) ): REFANY RAISES {FilterError}; (* A Filter is a procedure that converts REFANY's to REFANY's. It may raise FilterError when the /value/ is malformed. *) END; (**********************************************************) (* CHANGING THE SYMBOL/NUMBER SYNTAX *) (**********************************************************) PROCEDURE SetSymbolConverter(t: T; converter: SymbolConverter); (* Register a procedure to be used to parse symbol-like tokens *) TYPE SymbolConverter = OBJECT METHODS apply(VAR name: CHARS; root, parent: SxSymbol.T; syntax: T): REFANY RAISES {Sx.ReadError}; (* A SymbolConverter is called to process symbol names. This procedure is called once for each simple component /name/ of the symbol, from left to right, after all escapes and quotes have been resolved. The parameter /parent/ is the result of parsing all simple components of the symbol that come before /name/. SymbolConverter should return the SxSymbol.T that corresponds to the notation /parent/./name/. The parameter /root/ is the original symbol root passed to Read. The SymbolConverter may modify the contents of /name/ if it needs temporary storage. *) END; PROCEDURE SetIntConverter(t: T; converter: IntConverter); (* Register a procedure for converting integer literals into REF values. *) TYPE IntConverter = OBJECT METHODS apply(VAR chars: CHARS; syntax: T): REFANY RAISES {Sx.ReadError}; (* An IntConverter is called after the numeric literal has been extracted from the input stream, and has been determined to be an integer literal satisfying the syntax integer => ["-"|"+"] digit+ | digit+ "_" ["-"|"+"] hexdigit+ hexdigit => digit | "a".."z" | "A".."Z" digit => "0".."9" In the case of literals with explicit base in the Modula-3 style ("8_007777"), the base is NOT guaranteed to be in [2..36], and the digits are NOT guaranteed to be in the range [0..base-1]. The /apply/ routine should check these conditions, and raise Sx.ReadError if not true. The IntConverter may modify the contents of /chars/ if it needs temporary storage. *) END; PROCEDURE SetFloatConverter(t: T; converter: FloatConverter); (* Register a procedure for converting "float" literals into REF values. *) TYPE FloatConverter = OBJECT METHODS apply(VAR chars: CHARS; syntax: T): REFANY RAISES {Sx.ReadError}; (* A FloatConverter is called after the numeric literal has been extracted from the input stream, and has been determined to be a floating-point literal satisfying the syntax float => ["-"|"+"] digit* "." digit* [exponent] | ["-"|"+"] digit+ exponent exponent => ("E"|"e"|"d"|"D") [["-"|"+"] digit+] The FloatConverter may modify the contents of /chars/ if it needs temporary storage. *) END; PROCEDURE SetOtherNumConverter(t: T; converter: OtherNumConverter); (* Register a procedure for converting number-like tokens that are neither integers nor floats into REF values. (The default is to treat those tokens as syntax errors). *) TYPE OtherNumConverter = OBJECT METHODS apply(VAR chars: CHARS; syntax: T): REFANY RAISES {Sx.ReadError}; (* An OtherNumConverter is called to process tokens that look like numbers (in the sense that they begin with a digit, or a sign followed by a digit), but do not follow the syntax of integer and float literals, as described above. The OtherNumConverter may modify the contents of /chars/ if it needs temporary storage. *) END; (**********************************************************) (* SPECIAL CHARACTERS *) (**********************************************************) (* The procedures | | MakeCharSpace | SetCharParser | MakeCharIllegal | SetTwoCharParser | MakeTwoCharIllegal | MakeCharSymNum | specify what to do with tokens that begin with a given character. or two-character combination. Calling any of these procedures on a given character /ch/ undoes the effect of all previous calls of the other procedures for that character. In particular, SetCharParser, SetTwoCharParser, MakeCharSpace, and MakeCharIllegal, and MakeTwoCharIllegal prevent further use of /ch/ in symbol and numeric literals (unless it is escaped). On the other hand, these procedures do not affect the meaning of the second character of "sharp" tokens, or the contents of text and character literals. For example, MakeCharSpace(t, '_') changes the meaning of _foo but not those of #_foo or "_foo" or '_' *) PROCEDURE SetCharParser(t: T; ch: CHAR; parser: Parser); (* Specifies the parsing procedure to use for all tokens beginning with the given character /ch/. Can be used to define new token types (e.g., `foo or $error) or to change the syntax and semantics of existing types (text and character literals). If /parser=NIL/, /ch/ will not be not allowed as the first character of any token, or as a constituent of symbols and numeric literals. *) PROCEDURE MakeCharIllegal(t: T; ch: CHAR); (* Equivalent to SetCharParser(t, ch, NIL) *) PROCEDURE MakeCharSpace(t: T; ch: CHAR); (* Specifies that /ch/ should be treated as a space. *) TYPE Parser = OBJECT METHODS apply( rd: Rd.T; ch: CHAR; root: SxSymbol.T; syntax: T ): REFANY RAISES {Sx.ReadError, Rd.Failure, Thread.Alerted}; (* A Parser is a routine called by Read to parse a symbolic (sub-)expression that begins with the specified character. On entry to /apply/, "rd" is positioned right after the character that triggered it. The /apply/ routine may consume zero or more additional characters from "rd" to compute its result, and it may recursively invoke Sx.Read. If unexpected end-of-file or some other syntax error occurs during the parsing, /apply/ should raise Sx.Sx.ReadError. *) END; PROCEDURE SetTwoCharParser(t: T; ch, next: CHAR; parser: SubParser); (* Registers the procedure to be used for parsing tokens beginning with /ch/ and /next/. If /parser=NIL/, the characters /ch/ and /next will not be allowed as the first two characters of any token. In any case, /ch/ will not be allowed as a constituent of symbols and numeric literals. Also cancels the effect of all previous calls SetCharParser(t, ch). *) PROCEDURE MakeTwoCharIllegal(t: T; ch, next: CHAR); (* Equivalent to SetTwoCharParser(t, ch, next, NIL). *) TYPE SubParser = OBJECT METHODS apply( rd: Rd.T; ch, next: CHAR; root: SxSymbol.T; syntax: T ): REFANY RAISES {Sx.ReadError, Rd.EndOfFile, Rd.Failure, Thread.Alerted}; (* A SubParser is a routine called by Read to parse the body of tokens that begin with a specific two-character combination. On entry to the SubParser, "rd" is positioned right after the two-character combination that triggered it (/ch/ followed by /next/). The routine may consume zero or more additional characters from "rd" to compute its result, and it may recursively invoke Sx.Read. If an unexpected end-of-file or some other syntax error is encountered during this parsing, /apply/ should raise Sx.Sx.ReadError. *) END; PROCEDURE MakeCharSymNum(t: T; ch: CHAR); (* Specifies that /ch/ is allowed as part of symbol names or numeric literals (even as the leading character). *) PROCEDURE SetSymNumParser(t: T; parser: Parser); (* Register a procedure to be used to read and parse the tokens that look like symbols or numbers. This call permanently overrides the effect of SetSymbolConverter, SetIntConverter, and SetFloatConverter. *) (**********************************************************) (* PRINTING SPECIAL VALUES *) (**********************************************************) PROCEDURE SetRefPrinter(t: T; typeCode: INTEGER; printer: Printer); (* Registers the given printer routine for REF values with given type code. *) PROCEDURE SetListPrinter(t: T; symbol: SxSymbol.T; printer: Printer); (* Registers the given printer routine for List.T values that begin with the given symbol. *) PROCEDURE SetDefaultPrinter(t: T; printer: Printer); (* Registers the printer routine to be used for REF values that have no printer routine on their own. *) TYPE Printer = OBJECT METHODS apply( fwr: FWr.T; value: REFANY; elision: Sx.Elision; root: SxSymbol.T; syntax: T; ) RAISES {Sx.PrintError, Wr.Failure, Thread.Alerted}; (* A Printer is a procedure that is called by Print to output objects of a specific REF type, or lists that begin with a specific symbol. The /apply/ routine need not flush the writer, and should not add any whitespace space before or after the printed value. *) END; VAR (*CONST*) IllegalValuePrinter: Printer; (* A Printer that always raises Sx.PrintError. *) (**********************************************************) (* INDENTATION *) (**********************************************************) PROCEDURE SetIndentation(t: T; indentation: CARDINAL); (* Specifies the extra indentation per level to use when printing multi-line expresions. *) CONST StandardIndentation = 2; (* The indentation for the standard syntax *) END SxSyntax.