(* Copyright (C) 1992, Digital Equipment Corporation *) (* All rights reserved. *) (* See the file COPYRIGHT for a full description. *) (* Created by stolfi on Thu Sep 6 0:58:48 PDT 1990 *) (* Last modified on Sat Jun 27 15:46:07 PDT 1992 by muller *) (* modified on Thu Sep 19 20:40:05 1991 by kalsow *) (* modified on Tue May 14 13:01:28 PDT 1991 by stolfi *) MODULE RefIntTbl; IMPORT List, Thread, Word; VAR (*CONST*) NullKey: Key := NEW(BRANDED REF INTEGER); (* Marks pristine entries *) DeletedKey: Key := NEW(BRANDED REF INTEGER); (* Marks deleted entries *) CONST NullValue = 0; VAR (*CONST*) NullEntry := Entry{NullKey, NullValue}; TYPE Entry = RECORD key: Key; value: Value END; Buckets = REF ARRAY OF Entry; TableRec = RECORD mutex: MUTEX; buckets: Buckets; (* The bucket area *) nullKeyPresent: BOOLEAN; (* TRUE iff NullKey is in table *) nullKeyValue: Value; (* value of NullKey, if present *) deletedKeyPresent: BOOLEAN; (* TRUE iff DeletedKey is in table *) deletedKeyValue: Value; (* value of DeletedKey, if present *) hashProc: HashProc; (* key hashing procedure, or NIL *) equalProc: EqualProc; (* key equality predicate, or NIL *) (* Current statistics: *) avgProbes: REAL; (* Running average of probes per operation *) numFilled: CARDINAL; (* Current number of really occupied slots *) numDeleted: CARDINAL; (* Current number of DeletedKey entries *) END; TYPE TT = T BRANDED OBJECT rec: TableRec OVERRIDES in := In; get := Get; put := Put; delete := Delete; clear := Clear; copy := Copy; toKeyList := ToKeyList; toValueList := ToValueList; toAssocList := ToAssocList; enumerate := Enumerate; END; CONST StepMultiplier = 1052823; (* Multiplier for step hashing *) CONST (* Thresholds for rehashing the table: *) MaxAvgProbes = 4.0; (* Max average probes/query *) MaxDeletedEntries = 0.75; (* Max deleted entries in table *) MinExpansion = 1.25; (* Minimum expansion factor when rehasing *) CONST Decay = 0.0625; (* decay for running average of number of probes *) CONST standardSize = ARRAY OF CARDINAL{ 23, 89, 181, 359, 719, 1447, 2887, 4093, 5791, 8191, 11579, 16381, 23167, 32749, 46337, 65521, 92681, 131071, 185363, 262139, 370723, 524287, 741431, 1048573, 1482907, 2097143, 2965819, 4194301, 5931641, 8388593, 11863279, 16777213, 23726561, 33554393, 47453111, 67108859, 94906249, 134217689 (* Should be enough...8-) *) }; (* Standard table sizes. They must all be prime, and increase in roughly geometric progression. The smaller sizes increase faster to offset the allocation overhead. (I hope). *) CONST MaxBuckets = standardSize[LAST(standardSize)]; MinBuckets = standardSize[FIRST(standardSize)]; PROCEDURE New( hashProc: HashProc; equalProc: EqualProc; initialSize: CARDINAL := 1; ): T RAISES {} = VAR table: TT := NEW(TT); BEGIN WITH t = table.rec DO t.mutex := NEW (Thread.Mutex); t.buckets := NewBuckets(initialSize); t.hashProc := hashProc; t.equalProc := equalProc; t.nullKeyPresent := FALSE; t.nullKeyValue := NullValue; t.deletedKeyPresent := FALSE; t.deletedKeyValue := NullValue; t.avgProbes := 0.0; t.numFilled := 0; t.numDeleted := 0; END; RETURN table END New; PROCEDURE NewBuckets(size: CARDINAL): Buckets RAISES {} = VAR i: CARDINAL; BEGIN (* Round /size/ to next largest standard size: *) i := 0; WHILE (i < LAST(standardSize)) AND (size > standardSize[i]) DO INC(i) END; size := standardSize[i]; WITH rb = NEW(Buckets, size), buck = rb^ DO FOR i := 0 TO LAST(buck) DO buck[i] := NullEntry END; RETURN rb END END NewBuckets; PROCEDURE Put( table: TT; key: Key; READONLY value: Value; ): BOOLEAN RAISES {} = VAR probes: CARDINAL; found, crowded: BOOLEAN; BEGIN WITH t = table.rec DO LOCK t.mutex DO (* General case: *) WITH hash = t.hashProc(key) DO LOOP found := DoPut( t, t.buckets^, hash, key, value, (*OUT*) probes, crowded ); IF NOT crowded THEN t.avgProbes := (1.0 - Decay) * t.avgProbes + Decay * FLOAT(probes); IF (t.avgProbes > (MaxAvgProbes)) AND (NUMBER(t.buckets^) < MaxBuckets) THEN AvgProbesTooHigh(t, probes); END; RETURN found END; (* T is too crowded to insert this item: *) Rehash(t, expand := TRUE) END; END END END; END Put; PROCEDURE DoPut( VAR t: TableRec; VAR buck: ARRAY OF Entry; hash: Word.T; key: Key; READONLY value: Value; VAR (*OUT*) probes: CARDINAL; VAR (*OUT*) crowded: BOOLEAN; ): BOOLEAN RAISES {} = (* Stores data in bucket array, returns TRUE iff key was already present. Sets /crowded/ to TRUE (and returns FALSE) if put failed because table was too crowded/unbalanced *) VAR i, free: CARDINAL; BEGIN WITH M = NUMBER(buck), chainBase = Word.Mod(hash, M), chainStep = 1 + Word.Mod(Word.Times(hash, StepMultiplier), M - 1) DO i := chainBase; probes := 0; free := M; LOOP INC(probes); WITH entry = buck[i] DO (* Examine entry *) IF (entry.key = NullKey) THEN (* Reached an empty slot: Key is not there *) IF free # M THEN buck[free].key := key; buck[free].value := value; DEC(t.numDeleted) ELSE entry.key := key; entry.value := value END; INC(t.numFilled); crowded := FALSE; RETURN FALSE ELSIF (entry.key = DeletedKey) THEN (* Deleted slot --- treat as a wrong key, but remember it *) IF free = M THEN free := i END; ELSIF t.equalProc(entry.key, key) THEN (* Key is already there: *) entry.value := value; crowded := FALSE; RETURN TRUE END; (* Should we go on? *) IF probes >= M THEN (* Reached end of list, key is not there *) IF free = M THEN (* No space to insert *) crowded := TRUE ELSE (* Insert in lieu of deleted slot *) buck[free].key := key; buck[free].value := value; DEC(t.numDeleted); INC(t.numFilled); crowded := FALSE END; RETURN FALSE END END; i := (i + chainStep) MOD M END END; END DoPut; PROCEDURE Get(table: TT; key: Key): Value RAISES {NotFound} = VAR value: Value; BEGIN IF In(table, key, value) THEN RETURN value ELSE RAISE NotFound END END Get; PROCEDURE In( table: TT; key: Key; VAR (*OUT*) value: Value; ): BOOLEAN RAISES {} = VAR probes: CARDINAL; found: BOOLEAN; BEGIN WITH t = table.rec DO LOCK t.mutex DO (* General case: *) WITH hash = t.hashProc(key) DO found := DoIn( t, t.buckets^, hash, key, (*OUT*) value, (*OUT*) probes ); t.avgProbes := (1.0 - Decay) * t.avgProbes + Decay * FLOAT(probes); IF (t.avgProbes > (MaxAvgProbes)) AND (NUMBER(t.buckets^) < MaxBuckets) THEN AvgProbesTooHigh(t, probes); END; RETURN found END END END END In; PROCEDURE DoIn( VAR t: TableRec; VAR buck: ARRAY OF Entry; hash: Word.T; key: Key; VAR (*OUT*) value: Value; VAR (*OUT*) probes: CARDINAL; ): BOOLEAN RAISES {} = (* Locates key in bucket array, sets /value/, returns TRUE; If not found, returns FALSE. *) VAR i, free: CARDINAL; BEGIN WITH M = NUMBER(buck), chainBase = Word.Mod(hash, M), chainStep = 1 + Word.Mod(Word.Times(hash, StepMultiplier), M - 1) DO i := chainBase; probes := 0; free := M; LOOP INC(probes); WITH entry = buck[i] DO IF (entry.key = NullKey) THEN (* Reached end of chain, didn't find key *) value := NullValue; RETURN FALSE ELSIF (entry.key = DeletedKey) THEN (* Treat as wrong key, but remember it *) IF free = M THEN free := i END; ELSIF t.equalProc(key, entry.key) THEN value := entry.value; IF free # M THEN (* Relocate entry to deleted slot: *) buck[free] := entry; entry.key := DeletedKey; entry.value := NullValue; END; RETURN TRUE ELSIF probes >= M THEN (* Reached end of chain, didn't find key *) value := NullValue; RETURN FALSE END END; i := (i + chainStep) MOD M END END; END DoIn; PROCEDURE Delete( table: TT; key: Key; VAR (*OUT*) value: Value ): BOOLEAN RAISES {} = VAR probes: CARDINAL; found: BOOLEAN; BEGIN WITH t = table.rec DO LOCK t.mutex DO (* General case: *) WITH hash = t.hashProc(key) DO found := DoDelete( t, t.buckets^, hash, key, (*OUT*) value, (*OUT*) probes ); t.avgProbes := (1.0 - Decay) * t.avgProbes + Decay * FLOAT(probes); IF (t.avgProbes > (MaxAvgProbes)) AND (NUMBER(t.buckets^) < MaxBuckets) THEN AvgProbesTooHigh(t, probes); END; IF found THEN WITH M = NUMBER(t.buckets^) DO IF (FLOAT(t.numDeleted) > (MaxDeletedEntries) * FLOAT(M)) AND M > MinBuckets THEN Rehash(t, expand := FALSE) END END END; RETURN found END END END END Delete; PROCEDURE DoDelete( VAR t: TableRec; VAR buck: ARRAY OF Entry; hash: Word.T; key: Key; VAR (*OUT*) value: Value; VAR (*OUT*) probes: CARDINAL; ): BOOLEAN RAISES {} = (* Locates key in bucket array; if found, replaces entry's key by DeletedKey, replaces entry's value by NullValue, sets /value/ to old entry's value, and return TRUE; if not found, sets /value/ to NullValue and returns FALSE. *) VAR i: CARDINAL; BEGIN WITH M = NUMBER(buck), chainBase = Word.Mod(hash, M), chainStep = 1 + Word.Mod(Word.Times(hash, StepMultiplier), M - 1) DO i := chainBase; probes := 0; LOOP INC(probes); WITH entry = buck[i] DO IF (entry.key = NullKey) THEN value := NullValue; RETURN FALSE ELSIF (NOT (entry.key = DeletedKey)) AND t.equalProc(entry.key, key) THEN value := entry.value; entry.key := DeletedKey; entry.value := NullValue; INC(t.numDeleted); DEC(t.numFilled); RETURN TRUE ELSIF probes >= M THEN value := NullValue; RETURN FALSE END END; i := (i + chainStep) MOD M END END; END DoDelete; PROCEDURE AvgProbesTooHigh(VAR t: TableRec; <*UNUSED*> probes: CARDINAL) RAISES {} = (* Expands t.buckets because of too many probes/operation *) BEGIN IF FLOAT(t.numFilled) > FLOAT(NUMBER(t.buckets^))*(1.0- (MaxDeletedEntries)) THEN Rehash(t, expand := TRUE) ELSE (* Table already too big to expand; reset average probes, and hope for the best *) t.avgProbes := 0.0 END END AvgProbesTooHigh; PROCEDURE Rehash( VAR t: TableRec; expand: BOOLEAN; (* TRUE if called because table is too crowded *) ) RAISES {} = VAR oldBuckets, newBuckets: Buckets; size: CARDINAL; success: BOOLEAN; BEGIN REPEAT (* Create a new bucket array of sufficient size, and swap bucket arrays: *) oldBuckets := t.buckets; WITH idealOccupancy = (1.0 - 1.0/FLOAT( (MaxAvgProbes)))/1.5, idealNewSize = ROUND(FLOAT(t.numFilled) / idealOccupancy) DO size := idealNewSize END; IF expand THEN WITH effectiveSize = NUMBER(oldBuckets^) - t.numDeleted, minNewSize = ROUND(FLOAT(effectiveSize) * (MinExpansion)) DO size := MAX(size, minNewSize); <* ASSERT size > NUMBER(oldBuckets^) - t.numDeleted *> END END; newBuckets := NewBuckets(size); <* ASSERT NUMBER(newBuckets^) >= t.numFilled *> t.buckets := newBuckets; t.numDeleted := 0; t.numFilled := 0; expand := TRUE; (* Second and later attempts should expand *) (* Move old entries to new bucket array, and clean old one: *) success := CopyEntries(t, oldBuckets^, newBuckets^); (* Discard old bucket array: *) oldBuckets := NIL; (* Make sure that second and later attempts really expand table: *) expand := TRUE; UNTIL success; (* Reset running probe average to expected average: *) WITH M = NUMBER(t.buckets^) DO t.avgProbes := FLOAT(M + 1)/FLOAT(M + 1 - t.numFilled) END; END Rehash; PROCEDURE CopyEntries( VAR t: TableRec; VAR (*IO*) old, new: ARRAY OF Entry; ): BOOLEAN RAISES {} = (* Moves entries from /old/ buckets to the /new/ ones, in hashing order if possible. Assumes /new/ is empty to begin with, and ensures that /old/ is clean upon return. Returns TRUE if all elements were hashed properly, FALSE otherwise. *) VAR probes, next: CARDINAL; found, crowded: BOOLEAN; BEGIN crowded := FALSE; next := 0; (* Ignore number of probes, except if it exceeds the max probes per item: *) FOR i := 0 TO LAST(old) DO WITH e = old[i] DO IF (e.key = NullKey) THEN (* Ignore *) ELSIF (e.key = DeletedKey) THEN e.key := NullKey ELSE IF crowded THEN (* Insert into first free slot: *) WHILE NOT (new[next].key = NullKey) DO INC(next) END; new[next] := e; INC(next) ELSE (* Try hashing into new table. *) (* Do not worry about the average number of probes yet. *) WITH hash = t.hashProc(e.key) DO found := DoPut( t, new, hash, e.key, e.value, (*OUT*) probes, crowded ); END; END; e := NullEntry END; END; END; RETURN NOT crowded END CopyEntries; PROCEDURE Clear(table: TT) RAISES {} = BEGIN WITH t = table.rec DO LOCK t.mutex DO t.nullKeyPresent := FALSE; t.nullKeyValue := NullValue; t.deletedKeyPresent := FALSE; t.deletedKeyValue := NullValue; WITH buck = t.buckets^ DO FOR i := 0 TO LAST(buck) DO WITH e = buck[i] DO IF NOT (e.key = NullKey) THEN e := NullEntry END END END; END; t.avgProbes := 0.0; t.numDeleted := 0; t.numFilled := 0; END END END Clear; PROCEDURE Copy(table: TT): T RAISES {} = BEGIN WITH copy = NEW(TT), c = copy.rec, t = table.rec DO LOCK t.mutex DO c := t; c.mutex := NEW (Thread.Mutex); c.buckets := NEW(Buckets, NUMBER(t.buckets^)); c.buckets^ := t.buckets^; RETURN copy END END END Copy; PROCEDURE ToKeyList(table: TT): List.T RAISES {} = VAR list: List.T; refKey: REFANY; BEGIN WITH t = table.rec DO LOCK t.mutex DO list := NIL; IF t.nullKeyPresent THEN refKey := NullKey; list := List.New(refKey, list) END; IF t.deletedKeyPresent THEN refKey := DeletedKey; list := List.New(refKey, list) END; WITH buck = t.buckets^ DO FOR i := 0 TO LAST(buck) DO WITH e = buck[i] DO IF NOT (e.key = NullKey) AND NOT (e.key = DeletedKey) THEN refKey := e.key; list := List.New(refKey, list) END END END END; RETURN list; END END END ToKeyList; PROCEDURE ToValueList(table: TT): List.T RAISES {} = VAR list: List.T; refValue: REF INTEGER; BEGIN WITH t = table.rec DO LOCK t.mutex DO list := NIL; IF t.nullKeyPresent THEN refValue := NEW(REF INTEGER); refValue^ := t.nullKeyValue; list := List.New(refValue, list) END; IF t.deletedKeyPresent THEN refValue := NEW(REF INTEGER); refValue^ := t.deletedKeyValue; list := List.New(refValue, list) END; WITH buck = t.buckets^ DO FOR i := 0 TO LAST(buck) DO WITH e = buck[i] DO IF NOT (e.key = NullKey) AND NOT (e.key = DeletedKey) THEN refValue := NEW(REF INTEGER); refValue^ := e.value; list := List.New(refValue, list) END END END END; RETURN list; END END END ToValueList; PROCEDURE ToAssocList(table: TT): List.T RAISES {} = VAR list: List.T; refKey: REFANY; refValue: REF INTEGER; BEGIN WITH t = table.rec DO LOCK t.mutex DO list := NIL; IF t.nullKeyPresent THEN refKey := NullKey; refValue := NEW(REF INTEGER); refValue^ := t.nullKeyValue; list := List.New(List.List2(refKey, refValue), list) END; IF t.deletedKeyPresent THEN refKey := DeletedKey; refValue := NEW(REF INTEGER); refValue^ := t.deletedKeyValue; list := List.New(List.List2(refKey, refValue), list) END; WITH buck = t.buckets^ DO FOR i := 0 TO LAST(buck) DO WITH e = buck[i] DO IF NOT (e.key = NullKey) AND NOT (e.key = DeletedKey) THEN refKey := e.key; refValue := NEW(REF INTEGER); refValue^ := e.value; list := List.New(List.List2(refKey, refValue), list) END END END END; RETURN list; END END END ToAssocList; PROCEDURE Enumerate( table: TT; proc: EnumerateProc; data: REFANY; VAR (*OUT*) key: Key; VAR (*OUT*) value: Value ): BOOLEAN = VAR BEGIN WITH t = table.rec DO LOCK t.mutex DO IF t.nullKeyPresent THEN key := NullKey; IF proc(data, key, t.nullKeyValue) THEN value := t.nullKeyValue; RETURN TRUE END; END; IF t.deletedKeyPresent THEN key := DeletedKey; IF proc(data, key, t.deletedKeyValue) THEN value := t.deletedKeyValue; RETURN TRUE END; END; WITH buck = t.buckets^ DO FOR i := 0 TO LAST(buck) DO WITH e = buck[i] DO IF NOT (e.key = NullKey) AND NOT (e.key = DeletedKey) THEN key := e.key; IF proc(data, key, e.value) THEN value := e.value; RETURN TRUE END; END END END END; key := NullKey; value := NullValue; RETURN FALSE; END END END Enumerate; BEGIN END RefIntTbl.