UNSAFE MODULE FieldList; (* UNSAFE so we can use ADR. *) (* Copyright Samuel P. Harbison 1991. All rights reserved. Permission is hereby granted to use, copy, modify, prepare integrated and derivative works of and distribute this software for non-commercial purposes, provided that you retain the copyright notice and disclaimer in the software. This software is provided "as-is" and Samuel P. Harbison disclaims all warranties with regard to this softtware, including all implied warranties of merchantability and fitness of purpose. Written by Samuel P. Harbison, Pine Creek Software; Suite 300; 305 South Craig Street; Pittsburgh, PA 15213; USA. Phone&FAX: +1 (412) 681 9811. E-mail: harbison@bert.pinecreek.com. Part of the Pine Creek Modula-3 Library. Version 1.0. Printed documentation can be obtained by contacting the author. *) IMPORT Rd, Wr, Text, Stdio, Fmt, Cstdlib; <*UNUSED*> CONST Copyright = "Copyright 1991 Samuel P. Harbison\000"; <*FATAL ANY*> CONST DefaultLineLength = 80; LargeLineLength = 256; (* Expected line lengths; can be exceeded with impunity. *) DefaultSize = 20; (* How many fields we expect at first. *) EndMarker = '\000'; (* Always added to end of line as an extra character. However, there's nothing special about it, and the line could contain this character within it without confusion. *) TYPE Kind = {Empty, (* means field is not present *) None, (* means not yet figured out *) Whitespace, (* when T.keepWS = TRUE; value never cached *) SimpleText, (* not a number or quoted; value may be cached *) Integer, (* an integer; value is cached *) Real, (* a floating-point number; value is cached *) QuotedText (* quoted text or char; value is cached *) }; Descriptors = REF ARRAY OF FieldDescriptor; FieldDescriptor = RECORD (* describes one field *) start, len: CARDINAL := 0; (* start and length of field *) (* These are positions of the original field in the original string. In the case of quoted strings, they point to the interior of the quoted string. *) kind := Kind.Empty; cache: REF Cache := NIL; (* Depending on kind, this field may contain a cached value. *) END; (* Once we discover something about a field, we remember it. *) Cache = RECORD integer: INTEGER := 0; (* set only if kind = Integer *) real: LONGREAL (*EXTENDED*) := 0.0D0; (* set only if kind = Real *) text: TEXT := NIL; (* set any time we retrieve the text form of the field *) next: REF Cache := NIL; (* Cache free list link *) END; REVEAL T = Public BRANDED "FieldList.T" OBJECT originalLine: TEXT; (* original line, just kept because it's cheap to *) chars: REF ARRAY OF CHAR := NIL; (* copy of original line, with extra char at end; this copy is the subject of the action *) totalLength: CARDINAL := 0; (* =Text.Length(originalLine), i.e., does NOT include the extra character at end of T.chars *) nFields: CARDINAL := 0; (* number of fields *) fds: Descriptors := NIL; (* location and type of each field in line *) cacheList: REF Cache := NIL; (* Values in the following fields determine how the next get operation behaves. *) ws := DefaultWS; (* which characters are considered whitespace *) keepWS := DefaultKeepWS; (* if True, whitespace is treated as a separate field *) quotes := DefaultQuotes; (* if True, accept M3-style quoted strings and chars *) OVERRIDES init := init; getText := getText; getLine := getLine; number := number; line := line; lineLength := lineLength; isWhitespace := isWhitespace; isNumber := isNumber; isInteger := isInteger; isReal := isReal; integer := integer; real := real; length := length; pos := pos; text := text; put := put; char := char; equal := equal; setAttr := setAttr; getAttr := getAttr; dump := dump; END; VAR stats: RECORD NewCache := 0; UsedCache := 0; NewChars := 0; NewFDS := 0; NewRefAddr := 0; NewRefInteger := 0; NewRefBoolean := 0; NewRefSetOfChar := 0; END; PROCEDURE GetCache(t: T; integer := 0; real := 0.0D0; text: TEXT := NIL): REF Cache = (* Return a cache containing the indicated information. We expect only one of the parameters to be useful. Get the cache from t's free list or allocate it fresh. *) VAR cache: REF Cache; BEGIN IF t.cacheList = NIL THEN cache := NEW(REF Cache); INC(stats.NewCache); ELSE cache := t.cacheList; t.cacheList := cache.next; INC(stats.UsedCache); END; cache.integer := integer; cache.real := real; cache.text := text; cache.next := NIL; RETURN cache; END GetCache; PROCEDURE AddDescriptor(t: T; READONLY fd: FieldDescriptor) = (* Increment the number of fields, and store fd as the descriptor for the new field. *) BEGIN IF t.nFields >= NUMBER(t.fds^) THEN (* Must extend the array of descriptors. *) WITH n = NUMBER(t.fds^), new = NEW(Descriptors, 2 * n) DO SUBARRAY(new^, 0, n) := t.fds^; t.fds := new; INC(stats.NewFDS); END; END; t.fds[t.nFields] := fd; (* Field-by-field copy of descriptor. *) INC(t.nFields); END AddDescriptor; PROCEDURE ScanField(t: T; start: CARDINAL): CARDINAL = (* Scans the line beginning at fd.start for for a non-whitespace field. Expects t.chars[fd.start] is not whitespace. Sets fd.len and fd.kind. Returns the number of characters scanned; this will be the same as fd.len except for quoted strings, in which case fd.len may be shorter since it refers only to the interior of the string. *) VAR next := start; ch := t.chars[next]; len := 0; matchQuote: CHAR; BEGIN (* There are three kinds of non-whitespace fields: quoted strings, quoted characters, and other. *) IF t.quotes AND (ch = '\"' OR ch = '\'') THEN (* Try to parse as a quoted string; if we fail, we back up and try as regular text. *) matchQuote := ch; INC(next); INC(len); ch := t.chars[next]; WHILE next < t.totalLength AND ch # matchQuote DO (* Skip over escaped character, if there's room *) IF ch = '\\' THEN IF next < t.totalLength - 1 THEN INC(next); INC(len); (* Skip '\\' *) ELSE EXIT; (* premature termination; ch is now '\\' *) END END; INC(next); INC(len); ch := t.chars[next]; END; IF ch = matchQuote THEN (* We found a legal quoted string *) AddDescriptor(t, FieldDescriptor { start := start + 1, len := len - 1, kind := Kind.QuotedText, cache := NIL }); RETURN len + 1; (* include the trailing quote *) END; (* End of line without matching quote. Back up to beginning and scan as regular text *) next := start; ch := t.chars[next]; len := 0; END; (* Gather nonwhitespace characters *) WHILE next < t.totalLength AND NOT (ch IN t.ws) DO INC(len); INC(next); ch := t.chars[next]; END; IF len > 0 THEN AddDescriptor(t, FieldDescriptor{ start := start, len := len, kind := Kind.None, (* could be text or number *) cache := NIL }); END; RETURN len; END ScanField; PROCEDURE ScanWhitespace(t: T; start: CARDINAL): CARDINAL = VAR next := start; (* next position in array to inspect *) ch := t.chars[next]; (* next character in array to inspect *) len := 0; (* length of whitespace *) (* Locate whitespace in line beginning at start. Adds to field list if necessary *) BEGIN WHILE next < t.totalLength AND (ch IN t.ws) DO INC(len); INC(next); ch := t.chars[next]; END; IF len > 0 AND t.keepWS THEN AddDescriptor(t, FieldDescriptor{ start := start, len := len, kind := Kind.Whitespace, cache := NIL}); END; RETURN len; END ScanWhitespace; PROCEDURE NewChars(t: T; minLength: CARDINAL) = (* Increase length of t.chars, at least to minLength. Try to minimize reallocations. We assume minLength > NUMBER(t.chars^). *) VAR size: CARDINAL; BEGIN IF minLength > LargeLineLength THEN size := minLength + minLength DIV 8; ELSIF minLength > DefaultLineLength THEN size := LargeLineLength; ELSE size := DefaultLineLength END; t.chars := NEW(REF ARRAY OF CHAR, size); INC(stats.NewChars); END NewChars; PROCEDURE SetupLine(t: T; text: TEXT) = BEGIN (* Clean up any previous stuff in field list. *) (* Don't free t.chars; it will be reused. *) IF t.fds = NIL THEN t.fds := NEW(Descriptors, DefaultSize); INC(stats.NewFDS); ELSE FOR i := 0 TO LAST(t.fds^) DO WITH fd = t.fds[i] DO IF fd.kind = Kind.Empty THEN EXIT; (* Rest of array is already clean *) END; IF fd.cache # NIL THEN (* Place used cache on free list *) fd.cache.next := t.cacheList; t.cacheList := fd.cache; END; fd := FieldDescriptor{0, 0, Kind.Empty, NIL}; END; END; END; (* Store the line in the T object; convert it to an array of characters, and create an initial descritpr array (if necessary). *) t.originalLine := text; t.totalLength := Text.Length(text); t.nFields := 0; (* Convert text to an array of chars. The array will be at least one character longer than the line for faster processing. Reuse any existing array if it is long enough. *) IF t.chars = NIL OR NUMBER(t.chars^) < t.totalLength + 1 THEN NewChars(t, t.totalLength + 1); END; Text.SetChars(t.chars^, text); t.chars[t.totalLength] := EndMarker; END SetupLine; PROCEDURE Scan(t: T; text: TEXT) = VAR next: CARDINAL; (* index of next char in string *) len: CARDINAL; (* number of characters processed *) BEGIN SetupLine(t, text); (* place text in t; set up other parts *) (* Break line into fields. Each time we go around this loop we skip leading whitespace characters and then collect a non-whitespace field. The descriptor fd keeps track of progress. *) next := 0; WHILE next < t.totalLength DO (* Skip whitespace; treat it as a field if requested. *) len := ScanWhitespace(t, next); INC(next, len); (* Collect a non-whitespace field. *) len := ScanField(t, next); INC(next, len); END (*WHILE*); END Scan; PROCEDURE Parse(t: T; n: Field) = (* See if field n contains a number (real or integer); if it does, compute it and cache the result. NOTE: Some of this code depends on the fact that the first character following the field cannot be part of a number. This is certainly true of every field but the last, and the last one is followed by an EndMarker char, which cannot be part of a number. *) VAR num := 0; len := 0; next := 0; minLen := 1; (* min # of chars to be a legal number *) ch: CHAR; negative:= FALSE; expIndex := 0; PROCEDURE CollectInteger(): INTEGER = VAR num := 0; BEGIN (* We actually collect -ABS(n), to handle FIRST(INTEGER). *) WHILE ORD(ch) <= ORD('9') AND ORD(ch) >= ORD('0') DO num := num * 10 - (ORD(ch) - ORD('0')); INC(len); INC(next); ch := t.chars[next]; END; IF num # FIRST(INTEGER) THEN num := -num; END; RETURN num; END CollectInteger; BEGIN WITH fd = t.fds[n] DO len := 0; next := fd.start; ch := t.chars[next]; IF ch = '-' OR ch = '+' THEN negative := ch = '-'; INC(minLen); (* sign plus a digit *) INC(len); INC(next); ch := t.chars[next]; END; num := CollectInteger(); IF negative AND num # FIRST(INTEGER) THEN num := -num; END; IF len = fd.len AND len >= minLen THEN (* Field is a simple integer *) fd.kind := Kind.Integer; IF fd.cache = NIL THEN fd.cache := GetCache(t); END; fd.cache.integer := num; RETURN; END; (* There is more left in the field; it must be either a floating-point number or simply text. We'll scan it to see if it's a legal floating-point number. *) IF ch = '.' THEN INC(minLen); INC(len); INC(next); ch := t.chars[next]; EVAL CollectInteger(); (* fraction *) END; IF len < fd.len AND len >= minLen AND (ch = 'E' OR ch = 'e' OR ch = 'D' OR ch = 'd' OR ch = 'X' OR ch = 'x') THEN expIndex := next; INC(len); INC(next); ch := t.chars[next]; IF ch = '-' OR ch = '+' THEN INC(len); INC(next); ch := t.chars[next]; END; minLen := len + 1; (* must find one more digit *) EVAL CollectInteger(); (* exponent *) END; (* If we're at the end of the field, its a FP number. We let C's strtod to the conversion, which means we have to clean up the field a bit. *) IF len = fd.len AND len >= minLen THEN VAR r: LONGREAL; endPatch, expPatch: CHAR; sptr: ADDRESS; eptr := NEW(UNTRACED REF ADDRESS); BEGIN INC(stats.NewRefAddr); endPatch := t.chars[next]; t.chars[next] := '\000'; IF expIndex > 0 THEN expPatch := t.chars[expIndex]; t.chars[expIndex] := 'e'; END; sptr := ADR(t.chars[fd.start]); eptr^ := NIL; r := Cstdlib.strtod(sptr, eptr); IF eptr^ # ADR(t.chars[next]) THEN Wr.PutText(Stdio.stderr, Fmt.F("Parse of real failed, sptr=%s, eptr=%s\n", Fmt.Addr(sptr), Fmt.Addr(eptr^))); END; (* Reinstall patches *) t.chars[next] := endPatch; IF expIndex > 0 THEN t.chars[expIndex] := expPatch; END; (* Cache result *) fd.kind := Kind.Real; IF fd.cache = NIL THEN fd.cache := GetCache(t); END; fd.cache.real := r; RETURN; END(*block*); END(*IF*); (* If we get here, the field is not a number. *) fd.kind := Kind.SimpleText; END(*WITH*); END Parse; PROCEDURE getText(self: T; text: TEXT) = BEGIN Scan(self, text); END getText; PROCEDURE getLine(self: T; rd: Rd.T := NIL) RAISES {Rd.EndOfFile, Rd.Failure} = BEGIN IF rd = NIL THEN rd := Stdio.stdin; END; Scan(self, Rd.GetLine(rd)); END getLine; PROCEDURE number(self: T): CARDINAL = BEGIN RETURN self.nFields; END number; PROCEDURE isWhitespace(self: T; n: Field): BOOLEAN = BEGIN IF n > self.nFields THEN RETURN FALSE; END; WITH fd = self.fds[n] DO RETURN fd.kind = Kind.Whitespace; END; END isWhitespace; PROCEDURE isNumber(self: T; n: Field): BOOLEAN = VAR result := FALSE; BEGIN IF n < self.nFields THEN WITH fd = self.fds[n] DO IF fd.kind = Kind.None THEN Parse(self, n); END; result := fd.kind = Kind.Integer OR fd.kind = Kind.Real; END; END; RETURN result; END isNumber; PROCEDURE isInteger(self: T; n: Field): BOOLEAN = VAR result := FALSE; BEGIN IF n < self.nFields THEN WITH fd = self.fds[n] DO IF fd.kind = Kind.None THEN Parse(self, n); END; result := fd.kind = Kind.Integer; END END; RETURN result; END isInteger; PROCEDURE isReal(self: T; n: Field): BOOLEAN = VAR result := FALSE; BEGIN IF n < self.nFields THEN WITH fd = self.fds[n] DO IF fd.kind = Kind.None THEN Parse(self, n); END; result := fd.kind = Kind.Real; END END; RETURN result; END isReal; PROCEDURE integer(self: T; n: Field): INTEGER = VAR result: INTEGER := 0; BEGIN IF n < self.nFields THEN WITH fd = self.fds[n] DO IF fd.kind = Kind.None THEN Parse(self, n); END; IF fd.kind = Kind.Integer THEN result := fd.cache.integer; ELSIF fd.kind = Kind.Real THEN result := ROUND(fd.cache.real); END END; END; RETURN result; END integer; PROCEDURE real(self: T; n: Field): LONGREAL (*EXTENDED*) = VAR result: LONGREAL := 0.0D0; BEGIN IF n < self.nFields THEN WITH fd = self.fds[n] DO IF fd.kind = Kind.None THEN Parse(self, n); END; IF fd.kind = Kind.Real THEN result := fd.cache.real; ELSIF fd.kind = Kind.Integer THEN result := FLOAT (fd.cache.integer, LONGREAL); END END; END; RETURN result; END real; PROCEDURE char(self: T; n: Field; i: CARDINAL): CHAR = VAR result := '\000'; BEGIN IF n < self.nFields THEN WITH fd = self.fds[n] DO IF i < fd.len THEN result := self.chars[fd.start + i]; END; END; END; RETURN result; END char; PROCEDURE equal( self: T; n: Field; text: TEXT; caseSensitive := TRUE): BOOLEAN = VAR fCh, tCh: CHAR; BEGIN WITH f = self.text(n), fn = self.length(n), tn = Text.Length(text) DO IF fn # tn THEN RETURN FALSE; END; IF caseSensitive THEN RETURN Text.Equal(f, text); END; FOR i := 0 TO fn - 1 DO fCh := Text.GetChar(f, i); IF fCh >= 'A' AND fCh <= 'Z' THEN fCh := VAL(ORD(fCh) - ORD('A') + ORD('a'), CHAR); END; tCh := Text.GetChar(text, i); IF tCh >= 'A' AND tCh <= 'Z' THEN tCh := VAL(ORD(tCh) - ORD('A') + ORD('a'), CHAR); END; IF fCh # tCh THEN RETURN FALSE; END; END; END; RETURN TRUE; END equal; PROCEDURE length(self: T; n: Field): CARDINAL = VAR result: CARDINAL := 0; BEGIN IF n < self.nFields THEN (* Not right for quoted strings! *) result := self.fds[n].len; END; RETURN result; END length; PROCEDURE pos(self: T; n: Field): CARDINAL = VAR result: CARDINAL := self.totalLength; BEGIN IF n < self.nFields THEN result := self.fds[n].start; END; RETURN result; END pos; PROCEDURE line(self: T): TEXT = BEGIN RETURN self.originalLine; END line; PROCEDURE lineLength(self: T): CARDINAL = BEGIN RETURN self.totalLength; END lineLength; PROCEDURE text(self: T; n: Field): TEXT = VAR result: TEXT; BEGIN IF n >= self.nFields THEN result := ""; ELSE (* Want the text for field n *) WITH fd = self.fds[n] DO IF fd.cache # NIL AND fd.cache.text # NIL THEN result := fd.cache.text; ELSE (* Could be quoted or unquoted text, whitespace, or a number. Get the (nonquoted) text and cache it. *) result := Text.FromChars( SUBARRAY(self.chars^, fd.start, fd.len)); IF fd.cache # NIL THEN fd.cache.text := result; ELSE fd.cache := GetCache(self, text := result, integer := 0, real := 0.0D0); END; END; END; END; RETURN result; END text; PROCEDURE put(self: T; n: Field; wr: Wr.T := NIL) = BEGIN IF wr = NIL THEN wr := Stdio.stdout; END; IF n < self.nFields THEN WITH fd = self.fds[n] DO FOR i := fd.start TO fd.start + fd.len - 1 DO Wr.PutChar(wr, self.chars[i]); END; END; END; END put; PROCEDURE init(self: T): T = (* Not currently used, but here as a placeholder *) BEGIN RETURN self; END init; PROCEDURE setAttr(self: T; attribute: TEXT; val: REFANY) RAISES {Error} = BEGIN IF val = NIL THEN RAISE Error; END; IF Text.Equal(attribute, "whitespace") THEN TYPECASE val OF REF SET OF CHAR(v) => self.ws := v^; ELSE RAISE Error; END; ELSIF Text.Equal(attribute, "keep_whitespace") THEN TYPECASE val OF REF BOOLEAN(v) => self.keepWS := v^; ELSE RAISE Error; END; ELSIF Text.Equal(attribute, "quoted_fields") THEN TYPECASE val OF REF BOOLEAN(v) => self.quotes := v^; ELSE RAISE Error; END; ELSE RAISE Error; (* bad attribute name *) END; END setAttr; PROCEDURE getAttr(self: T; attribute: TEXT): REFANY RAISES {Error} = (* Return the value of the named attribute *) BEGIN IF Text.Equal(attribute, "whitespace") THEN RETURN RefSetOfChar(self.ws); ELSIF Text.Equal(attribute, "keep_whitespace") THEN RETURN RefBoolean(self.keepWS); ELSIF Text.Equal(attribute, "quoted_fields") THEN RETURN RefBoolean(self.quotes); ELSE RAISE Error; (* bad attribute name *) END; END getAttr; PROCEDURE RefInteger(val: INTEGER): REF INTEGER = VAR result := NEW(REF INTEGER); BEGIN INC(stats.NewRefInteger); result^ := val; RETURN result; END RefInteger; PROCEDURE RefBoolean(val: BOOLEAN): REF BOOLEAN = VAR result := NEW(REF BOOLEAN); BEGIN INC(stats.NewRefBoolean); result^ := val; RETURN result; END RefBoolean; PROCEDURE RefSetOfChar(READONLY val: SET OF CHAR): REF SET OF CHAR = VAR result := NEW(REF SET OF CHAR); BEGIN INC(stats.NewRefSetOfChar); result^ := val; RETURN result; END RefSetOfChar; PROCEDURE dump(<*UNUSED*> self: T; <*UNUSED*> wr: Wr.T := NIL) = BEGIN END dump; (* UNCOMMENT THIS WHEN DEBUGGING PROCEDURE dump(self: T; wr: Wr.T := NIL) = VAR textCache: TEXT; BEGIN IF wr = NIL THEN wr := Stdio.stdout; END; Wr.PutText(wr, Fmt.F("Line=\"%s\" %s chars, %s fields, %s descriptors\n", self.originalLine, Fmt.Int(self.totalLength), Fmt.Int(self.nFields), Fmt.Int(NUMBER(self.fds^)))); FOR f := 0 TO LAST(self.fds^) DO WITH fd = self.fds[f] DO IF fd.kind # Kind.Empty THEN Wr.PutText(wr, Fmt.F("%2s: fd=(start=%s, len=%s, kind=%s, ", Fmt.Int(f), Fmt.Int(fd.start),Fmt.Int(fd.len), Fmt.Int(ORD(fd.kind)))); IF fd.cache = NIL THEN Wr.PutText(wr, "cache = NIL)\n"); ELSE IF fd.cache.text = NIL THEN textCache := "NIL" ELSE textCache := fd.cache.text; END; Wr.PutText(wr, Fmt.F("cache = (int=%s, real=%s, text=\"%s\"))\n", Fmt.Int(fd.cache.integer), Fmt.LongReal(fd.cache.real), textCache)); END; END; END(*WITH*); END (*FOR*); Wr.PutText(wr, "Statistics\n"); Wr.PutText(wr, "NewCache = " & Fmt.Int(stats.NewCache) & "\n"); Wr.PutText(wr, "UsedCache = " & Fmt.Int(stats.UsedCache) & "\n"); Wr.PutText(wr, "NewChars = " & Fmt.Int(stats.NewChars) & "\n"); Wr.PutText(wr, "NewFDS = " & Fmt.Int(stats.NewFDS) & "\n"); Wr.PutText(wr, "NewRefAddr = " & Fmt.Int(stats.NewRefAddr) & "\n"); Wr.PutText(wr, "NewRefInteger = " & Fmt.Int(stats.NewRefInteger) & "\n"); Wr.PutText(wr, "NewRefBoolean = " & Fmt.Int(stats.NewRefBoolean) & "\n"); Wr.PutText(wr, "NewRefSetOfChar = " & Fmt.Int(stats.NewRefSetOfChar) & "\n"); VAR n := 0; c: REF Cache := self.cacheList; BEGIN WHILE c # NIL DO INC(n); c := c.next; END; Wr.PutText(wr, Fmt.F("Cache list has %s entries.\n", Fmt.Int(n))); END; END dump; DEBUG*) BEGIN END FieldList. (* Local Variables: *) (* tab-width: 4 *) (* End: *)