(* Copyright (C) 1992, Digital Equipment Corporation *) (* All rights reserved. *) (* See the file COPYRIGHT for a full description. *) (* File: String.m3 *) (* Last modified on Mon Jun 8 14:16:05 PDT 1992 by kalsow *) (* modified on Wed Nov 28 02:23:29 1990 by muller *) UNSAFE MODULE String; IMPORT MBuf, Text, TextF, Word, Module, Convert; CONST NO_TEXT = -1; (* textID *) C_RESERVED = -2; (* internal string class *) MIN_BUFFER = 8192 - 2 * BYTESIZE (INTEGER); (* leave room for headers *) TYPE StrPtr = UNTRACED REF CHAR; Buffer = UNTRACED REF ARRAY OF CHAR; REVEAL T = UNTRACED BRANDED REF RECORD prefix : T := NIL; suffix : T := NIL; start : StrPtr := NIL; length : INTEGER := 0; class : INTEGER := 0; hash : INTEGER := 0; textID : INTEGER := NO_TEXT; uid : INTEGER := NO_TEXT; next : T := NIL; END; (* There are three variants of a String.T: (textID # NO_TEXT) => the whole string is in texts[textID] (start # NIL) => the characters in [start..start+length) ELSE => prefix & suffix *) CONST Digits = ARRAY [0..9] OF CHAR {'0','1','2','3','4','5','6','7','8','9'}; VAR nextChar : CARDINAL := 0; nTexts : INTEGER := 0; hashTable := NEW (REF ARRAY OF T, 2048); texts := NEW (REF ARRAY OF TEXT, 256); chars := NEW (Buffer, MIN_BUFFER); int_cache := ARRAY [0..31] OF T {NIL, ..}; next_t : T := NIL; i_suffix : T := NIL; m_suffix : T := NIL; nStrings : INTEGER := 0; (*-------------------------------------------------------------- exported ---*) PROCEDURE Add (x: TEXT): T = VAR t: T; my_id: INTEGER; BEGIN IF (next_t = NIL) THEN next_t := NEW (T) END; my_id := InternText (x); (* speculative *) next_t.textID := my_id; next_t.start := NIL; next_t.length := Text.Length (x); next_t.prefix := NIL; next_t.suffix := NIL; t := Intern (); IF (t.textID # my_id) THEN (* we already had this text *) DEC (nTexts) END; RETURN t; END Add; PROCEDURE AddInt (i: INTEGER): T = BEGIN IF (i < 0) OR (LAST (int_cache) < i) THEN RETURN NewInt (i) END; IF int_cache [i] = NIL THEN int_cache [i] := NewInt (i) END; RETURN int_cache [i]; END AddInt; PROCEDURE FromStr (READONLY buf: ARRAY OF CHAR; length: INTEGER): T = VAR t: T; BEGIN IF (next_t = NIL) THEN next_t := NEW (T) END; length := MIN (length, NUMBER (buf)); next_t.textID := NO_TEXT; next_t.start := ADR (buf [0]); next_t.length := length; next_t.prefix := NIL; next_t.suffix := NIL; t := Intern (); IF (next_t = NIL) THEN (* a new string! *) IF (t.textID = NO_TEXT) THEN (* it's using the new characters! *) IF (nextChar + length > LAST (chars^)) THEN chars := NEW (Buffer, MAX (MIN_BUFFER, length+1)); nextChar := 0; END; SUBARRAY (chars^, nextChar, length) := SUBARRAY (buf, 0, length); chars [nextChar + length] := '\000'; t.start := ADR (chars [nextChar]); INC (nextChar, length+1) ELSE (* there's a text version of the string *) t.start := NIL; END; END; RETURN t; END FromStr; PROCEDURE Concat (a, b: T): T = BEGIN IF (a = NIL) OR (a.length = 0) THEN RETURN b END; IF (b = NIL) OR (b.length = 0) THEN RETURN a END; IF (next_t = NIL) THEN next_t := NEW (T) END; next_t.textID := NO_TEXT; next_t.start := NIL; next_t.length := a.length + b.length; next_t.prefix := a; next_t.suffix := b; RETURN Intern (); END Concat; PROCEDURE Unique (root: T): T = VAR suffix: T; j: INTEGER; counter := Module.CurrentCounter (); BEGIN (* bump the counter *) j := LAST (counter); WHILE (counter[j] = '9') DO counter[j] := '0'; DEC (j); END; counter[j] := VAL (ORD (counter[j]) + 1, CHAR); Module.SetCurrentCounter (counter); (* get the right suffix: 'I' or 'M' *) IF Module.IsInterface () THEN IF i_suffix = NIL THEN i_suffix := Add ("I") END; suffix := i_suffix; ELSE IF m_suffix = NIL THEN m_suffix := Add ("M") END; suffix := m_suffix; END; RETURN Concat (Concat (root, Module.CurrentName ()), Concat (FromStr (counter, NUMBER (counter)), suffix)); END Unique; PROCEDURE ToText (t: T): TEXT = VAR x: TEXT; BEGIN IF (t = NIL) THEN RETURN NIL END; IF (t.textID = NO_TEXT) THEN x := TextF.New (t.length); Flatten (t, x^, 0); t.textID := InternText (x); END; RETURN texts [t.textID]; END ToText; PROCEDURE Put (wr: MBuf.T; t: T) = VAR txt: TEXT; p: StrPtr; BEGIN IF (t = NIL) THEN (* done *) ELSIF (t.textID # NO_TEXT) THEN txt := texts[t.textID]; FOR i := 0 TO t.length-1 DO EmitChar (wr, txt[i]) END; ELSIF (t.start # NIL) THEN p := t.start; FOR i := 0 TO t.length-1 DO EmitChar (wr, p^); INC (p, ADRSIZE (CHAR)); END; ELSE Put (wr, t.prefix); Put (wr, t.suffix); END; END Put; PROCEDURE PutChars (wr: MBuf.T; t: T) = VAR txt: TEXT; p: StrPtr; n := 0; BEGIN IF (t = NIL) THEN (* done *) ELSIF (t.textID # NO_TEXT) THEN txt := texts[t.textID]; FOR i := 0 TO t.length-1 DO EmitCharLiteral (wr, txt[i], i, n) END; ELSIF (t.start # NIL) THEN p := t.start; FOR i := 0 TO t.length-1 DO EmitCharLiteral (wr, p^, i, n); INC (p, ADRSIZE (CHAR)); END; ELSE PutChars (wr, t.prefix); IF (Length (t.prefix) > 0) THEN MBuf.PutText (wr, ",\n") END; PutChars (wr, t.suffix); END; END PutChars; PROCEDURE PutStack (wr: MBuf.T; s: Stack) = BEGIN FOR i := 0 TO s.top - 1 DO Put (wr, s.stk[i]); END; END PutStack; PROCEDURE SetClass (t: T; class: CARDINAL) = BEGIN t.class := class; END SetClass; PROCEDURE GetClass (t: T): INTEGER = BEGIN IF (t.class < 0) THEN RETURN 0 ELSE RETURN t.class; END; END GetClass; PROCEDURE Length (t: T): INTEGER = BEGIN IF (t = NIL) THEN RETURN 0; ELSE RETURN t.length; END; END Length; PROCEDURE GetUID (t: T): INTEGER = BEGIN RETURN t.uid; END GetUID; PROCEDURE SetUID (t: T; uid: INTEGER) = BEGIN t.uid := uid; END SetUID; PROCEDURE Hash (t: T): INTEGER = BEGIN IF (t = NIL) THEN RETURN 953; ELSE RETURN t.hash; END; END Hash; PROCEDURE IsReservedC (t: T): BOOLEAN = BEGIN RETURN (t # NIL) AND (t.class = C_RESERVED); END IsReservedC; PROCEDURE FileTail (t: T): T = VAR x, len: INTEGER; txt: TEXT; BEGIN IF (t = NIL) THEN RETURN NIL END; txt := ToText (t); (* search for the last slash in the string *) len := t.length; x := len - 1; WHILE (x >= 0) AND (txt[x] # '/') DO DEC (x) END; IF (x < 0) THEN (* no slashes *) RETURN t END; (* else, build and return the new tail *) RETURN FromStr (SUBARRAY (txt^, x+1, len-x-1), len); END FileTail; (*-------------------------------------------------------------- internal ---*) PROCEDURE NewInt (i: INTEGER): T = <*FATAL Convert.Failed*> VAR len: INTEGER; buf: ARRAY [0..BITSIZE(INTEGER)-1] OF CHAR; BEGIN len := Convert.FromInt (buf, i); RETURN FromStr (buf, len); END NewInt; PROCEDURE InternText (x: TEXT): INTEGER = VAR n := nTexts; BEGIN IF (n > LAST (texts^)) THEN ExpandTexts () END; texts[n] := x; INC (nTexts); RETURN n; END InternText; PROCEDURE ExpandTexts () = VAR new := NEW (REF ARRAY OF TEXT, 2 * NUMBER (texts^)); BEGIN FOR i := 0 TO LAST (texts^) DO new[i] := texts[i] END; texts := new; END ExpandTexts; PROCEDURE Intern (): T = VAR hash, bucket: INTEGER; t: T; BEGIN (* search the hash table *) next_t.hash := 0; hash := InternHash (next_t, 0); bucket := hash MOD NUMBER (hashTable^); t := hashTable[bucket]; WHILE (t # NIL) DO IF (t.hash = hash) AND Equal (t, next_t) THEN (* we found a hit! *) IF (t.textID = NO_TEXT) THEN (* remember the new TEXT if it exists *) t.textID := next_t.textID; END; RETURN t; END; t := t.next; END; (* we didn't find the string => add it to the hash table *) t := next_t; t.hash := hash; t.next := hashTable [bucket]; hashTable [bucket] := t; next_t := NIL; (* since we've used it! *) INC (nStrings); IF (nStrings > 2 * NUMBER (hashTable^)) THEN ExpandHashTable () END; RETURN t; END Intern; PROCEDURE ExpandHashTable () = VAR n_old := NUMBER (hashTable^); n_new := n_old * 2 + 7; new := NEW (REF ARRAY OF T, n_new); t, u : T; x : INTEGER; BEGIN FOR i := 0 TO n_new - 1 DO new[i] := NIL END; FOR i := 0 TO n_old - 1 DO t := hashTable [i]; WHILE (t # NIL) DO u := t.next; x := t.hash MOD n_new; t.next := new [x]; new [x] := t; t := u; END; END; hashTable := new; END ExpandHashTable; PROCEDURE InternHash (t: T; hash: INTEGER): INTEGER = VAR p: StrPtr; txt: TEXT; BEGIN IF (t = NIL) THEN RETURN 0 END; IF (hash = 0) AND (t.hash # 0) THEN RETURN t.hash END; IF (t.textID # NO_TEXT) THEN txt := texts [t.textID]; FOR i := 0 TO t.length - 1 DO hash := 2 * hash + ORD (txt[i]); END; ELSIF (t.start # NIL) THEN p := t.start; FOR i := 0 TO t.length - 1 DO hash := 2 * hash + ORD (p^); INC (p, ADRSIZE (CHAR)); END; ELSE (* a concatentation *) hash := InternHash (t.prefix, hash); hash := InternHash (t.suffix, hash); END; RETURN hash; END InternHash; PROCEDURE Equal (a, b: T): BOOLEAN = VAR a_len := a.length; b_len := b.length; a_ptr : StrPtr; b_ptr : StrPtr; a_buf : ARRAY [0..49] OF CHAR; b_buf : ARRAY [0..49] OF CHAR; a_txt : TEXT := NIL; b_txt : TEXT := NIL; BEGIN IF (a_len # b_len) THEN RETURN FALSE END; IF (a.textID # NO_TEXT) THEN a_ptr := ADR (texts[a.textID][0]); ELSIF (a.start # NIL) THEN a_ptr := a.start; ELSIF (a_len < NUMBER (a_buf)) THEN Flatten (a, a_buf, 0); a_ptr := ADR (a_buf[0]); ELSE a_txt := TextF.New (a_len); Flatten (a, a_txt^, 0); a_ptr := ADR (a_txt[0]); END; IF (b.textID # NO_TEXT) THEN b_ptr := ADR (texts[b.textID][0]); ELSIF (b.start # NIL) THEN b_ptr := b.start; ELSIF (b_len < NUMBER (b_buf)) THEN Flatten (b, b_buf, 0); b_ptr := ADR (b_buf[0]); ELSE b_txt := TextF.New (b_len); Flatten (b, b_txt^, 0); b_ptr := ADR (b_txt[0]); END; FOR i := 0 TO a_len-1 DO IF (a_ptr^ # b_ptr^) THEN (* not equal! *) IF (a_txt # NIL) THEN a.textID := InternText (a_txt) END; IF (b_txt # NIL) THEN b.textID := InternText (b_txt) END; RETURN FALSE; END; INC (a_ptr, ADRSIZE (CHAR)); INC (b_ptr, ADRSIZE (CHAR)); END; (* intern any new texts *) IF (a_txt = NIL) AND (b_txt = NIL) THEN (* ok *) ELSIF (a_txt = NIL) THEN IF (a.textID # NO_TEXT) THEN b.textID := a.textID; ELSE b.textID := InternText (b_txt); a.textID := b.textID; END; ELSIF (b_txt = NIL) THEN IF (b.textID # NO_TEXT) THEN a.textID := b.textID; ELSE a.textID := InternText (a_txt); b.textID := a.textID; END; ELSE (* both are texts are new *) a.textID := InternText (a_txt); b.textID := a.textID; END; RETURN TRUE; END Equal; PROCEDURE Flatten (t: T; VAR buf: ARRAY OF CHAR; start: INTEGER) = CONST N = 1024; VAR txt: TEXT; p: UNTRACED REF ARRAY [0..N-1] OF CHAR; len: INTEGER; BEGIN WHILE (t # NIL) DO IF (t.textID # NO_TEXT) THEN txt := texts [t.textID]; len := t.length; SUBARRAY (buf, start, len) := SUBARRAY (txt^, 0, len); t := NIL; ELSIF (t.start # NIL) THEN p := ADR (t.start^); len := t.length; WHILE (len >= N) DO SUBARRAY (buf, start, N) := p^; INC (p, ADRSIZE (p^)); INC (start, N); DEC (len, N); END; IF (len > 0) THEN SUBARRAY (buf, start, len) := SUBARRAY (p^, 0, len); END; t := NIL; ELSE Flatten (t.suffix, buf, start + Length (t.prefix)); t := t.prefix; END; END; END Flatten; PROCEDURE EmitCharLiteral (wr: MBuf.T; c: CHAR; i: INTEGER; VAR n: INTEGER) = BEGIN IF (i > 0) THEN MBuf.PutChar (wr, ',') END; IF (n >= 20) THEN MBuf.PutChar (wr, '\n'); n := 0 END; MBuf.PutChar (wr, '\''); EmitChar (wr, c); MBuf.PutChar (wr, '\''); INC (n); END EmitCharLiteral; PROCEDURE EmitChar (wr: MBuf.T; c: CHAR) = VAR i: INTEGER; BEGIN IF (c < ' ') OR (c = '\"') OR (c = '\'') OR ('~' < c) OR (c = '\\') THEN i := Word.And (ORD (c), 255); MBuf.PutChar (wr, '\\'); MBuf.PutChar (wr, Digits[i DIV 64]); i := Word.And (i, 63); MBuf.PutChar (wr, Digits[i DIV 8]); i := Word.And (i, 7); MBuf.PutChar (wr, Digits[i]); ELSE (* simple graphic character *) MBuf.PutChar (wr, c); END; END EmitChar; (*-------------------------------------------------------- initialization ---*) CONST RW = ARRAY OF TEXT { "asm", "auto", "break", "case", "char", "const", "continue", "default", "do", "double", "else", "enum", "extern", "float", "for", "goto", "if", "int", "long", "register", "return", "short", "signed", "sizeof", "static", "struct", "switch", "typedef", "union", "unsigned", "void", "volatile", "while", (*** gcc 2.0 hacks ***) "inline", "typeof" }; PROCEDURE Initialize () = VAR t: T; BEGIN FOR i := 0 TO LAST (hashTable^) DO hashTable[i] := NIL; END; FOR i := 0 TO LAST (RW) DO t := Add (RW [i]); <* ASSERT t.class = 0 *> t.class := C_RESERVED; END; END Initialize; PROCEDURE Reset () = VAR t: T; BEGIN FOR i := FIRST (hashTable^) TO LAST (hashTable^) DO t := hashTable[i]; WHILE (t # NIL) DO t.uid := NO_TEXT; t := t.next END; END; END Reset; BEGIN END String.