Views
DB_database.stl
by
Paul McJones
—
last modified
2021-02-25 11:33
- History
-
Action Performed by Date and Time Comment Publish Paul McJones 2021-02-25 11:33 No comments.
The main big strings and database implementation, with test.
--files: string_utility_pak.stl,sort_pak.stl,get_lines_pak.stl,random_pak.stl
-- disk_records_refct.stl, B_tree_for_bigs_refct.stl, B_tree_for_bigs_wdix.stl, B_tree_for_bigs_wdoc.stl,
-- B_tree_for_bigs_dbix.stl, Big_stg_for_wdoc.stl
--file database_top.stl test_bs
package big_string_pak;
const bs_code_pts := {"copy", "simple", "optimized", "chop", "tree", "concat_join", "concat_balance", "concat_chop",
"nocon_opt", "nocon", "nocon_left", "nocon_right", "nocon_iter", "nocon_del", "nocon_ins", "nocon_to_simp"};
-- code points to be traversed
procedure bs_from_stg(stg); -- create a big string from a string
procedure stg_from_bigstg(rec); -- make a string from a big_string
procedure bs_slice(rec,i,j); -- the slice extraction operation
procedure bs_length(rec); -- length of a bigstring
procedure bs_set_slice(rw rec,i,j,stg); -- the slice assignment operation
-- (PUBLIC FOR DEBUGGING ONLY)
procedure stg_past(j,rec); -- make a string from a the part of a big_string from j onward
procedure stg_till(j,rec); -- make a string from a the part of a big_string up to j
procedure chop_up(first,mid,last); -- chop the concatenation of three strings into a tuple of string records
procedure print_raw(); -- raw print for debugging
procedure bnr_voc(rec,j); -- j'th member of vector of children for B_tree_for_bigstring records
procedure bs_check_leaves(rec); -- leaf consistency check
end big_string_pak;
package body big_string_pak;
use setldb,byteutil,disk_records_pak,db_records,B_tree_for_bigstring,string_utility_pak;
procedure bs_from_stg(stg); -- make a big_string from a string
-- we first chop up the string into a tuple of string records
if #(pieces_tup := chop_up("",stg,"")) = 1 then return make_record(pieces_tup(1)); end if;
-- see comment on the 'chop_up' routine, below. If the whole string fits in
-- just one string record, we simply return that record.
--print("#pieces_tup: ",#pieces_tup);
--print("Total len ",+/[#y:y in pieces_tup]);
return bnr_make_from_tuple(pieces_tup); -- otherwise convert the list of sections returned into a tree
end bs_from_stg;
procedure stg_from_bigstg(rec); -- make a string from a big_string
if dr_load(rec)(type_byte) = string_record then return sr_slice(rec,1,sr_length(rec)); end if;
-- get string from record
if not dr_is_compound(rec) then -- concatenate strings from records
return "" +/ [sr_slice(ch,1,sr_length(ch)): j in [1..num_childr(rec)] | (ch := bnr_voc(rec,j)) /= OM];
end if;
return "" +/ [stg_from_bigstg(bnr_voc(rec,j)): j in [1..num_childr(rec)]]; -- else proceed recursively
end stg_from_bigstg;
procedure stg_past(j,rec); -- make a string from a the part of a big_string from j onward
if dr_load(rec)(type_byte) = string_record then -- get string from record
if j > (sl := sr_length(rec)) then
abort("Illegal character index in simple stg_past operation: " + str(j) + ", " + sl);
end if;
return sr_slice(rec,j,sl); -- return slice from j onward
end if;
-- get the child containing the first character past j
if not (exists chix in [1..nc := num_childr(rec)] | bnr_get_ch_cum(rec,chix) >= j) then
abort("Illegal character index in stg_past operation: " + str(j) + ", " + nc);
end if;
prev_cum := if chix = 1 then 0 else bnr_get_ch_cum(rec,chix - 1) end if; -- the previous cumulant
return stg_past(j - prev_cum,bnr_voc(rec,chix))
+/ [stg_from_bigstg(bnr_voc(rec,j)): j in [chix + 1..nc]];
end stg_past;
procedure stg_till(j,rec); -- make a string from a the part of a big_string up to j
if dr_load(rec)(type_byte) = string_record then -- simple string record case
if j > (sl := sr_length(rec)) then
abort("Illegal character index in simple stg_past operation: " + str(j) + ", " + sl);
end if;
return sr_slice(rec,1,j); -- return slice up to j
end if;
-- get the child containing the first character past j
if not (exists chix in [1..nc := num_childr(rec)] | bnr_get_ch_cum(rec,chix) >= j) then
abort("Illegal character index in stg_past operation: " + str(j) + ", " + nc);
end if;
prev_cum := if chix = 1 then 0 else bnr_get_ch_cum(rec,chix - 1) end if; -- the previous cumulant
return ("" +/ [stg_from_bigstg(bnr_voc(rec,j)): j in [1..chix - 1]])
+ stg_till(j - prev_cum,bnr_voc(rec,chix));
end stg_till;
procedure bs_slice(rec,i,j); -- the slice extraction operation concatenates all the characters
-- between the two indicated positions. the 'node' can be either
-- the top of a tree, or can be a simple string_record
-- for a tree, we locate the sections F, L containing the first and last characters, and
-- return the concatenation of the part of F past i, the part of L up to j,
-- and the concatenation of the strings associated with all intermediate child trees
--print("bs_slice: ",i," ",j);
if i < 1 or j < i - 1 then
abort("Illegal second and/or first parameters in string extraction operation" + str(j) + ", " + str(i));
end if;
if dr_load(rec)(type_byte) = string_record then -- simple string record case
sl := sr_length(rec);
if j > sl then -- get string from record
print("bs_slice: Illegal character index in simple string extraction operation: " + str(j) + ", " + str(sl)); stop;
end if;
--print("bs_slice bottom: ",i," ",j," ",sr_slice(rec,i,j));
return sr_slice(rec,i,j);
end if;
-- get the child containing the first character past i
if not (exists chixi in [1..nc := num_childr(rec)] | bnr_get_ch_cum(rec,chixi) >= i) then
abort("Illegal first character index in string extraction operation: " + str(i) + ", " + str(nc));
end if;
-- get the child containing the first character past j
if not (exists chixj in [1..nc := num_childr(rec)] | bnr_get_ch_cum(rec,chixj) >= j) then
abort("Illegal second character index in string extraction operation: " + str(j) + ", " + str(nc));
end if;
prev_cumi := if chixi = 1 then 0 else bnr_get_ch_cum(rec,chixi - 1) end if; -- the cumulant previous to i
prev_cumj := bnr_get_ch_cum(rec,chixj - 1); -- the cumulant previous to j
if chixi = chixj then -- proceed recursively
return bs_slice(bnr_voc(rec,chixi),i - prev_cumi,j - prev_cumi);
end if;
xxx:=(stg_past(i - prev_cumi,bnr_voc(rec,chixi)) +/[stg_from_bigstg(bnr_voc(rec,k)): k in [chixi + 1..chixj - 1]])
+ stg_till(j - prev_cumj,bnr_voc(rec,chixj)); -- proceed recursively: from, middle, end
return xxx;
return (stg_past(i - prev_cumi,bnr_voc(rec,chixi)) +/[stg_from_bigstg(bnr_voc(rec,k)): k in [chixi + 1..chixj - 1]])
+ stg_till(j - prev_cumj,bnr_voc(rec,chixj)); -- proceed recursively: from, middle, end
end bs_slice;
procedure bnr_voc(rec,j); -- j'th member of vector of children for B_tree_for_bigstring records
stg := dr_load(rec); -- load this string
cjstrt := (j - 1) * 4 + bnr_ch_start;
return stg(cjstrt..cjstrt + 3); -- return child rec
end bnr_voc;
procedure bnr_get_ch_cum(rec,j); -- get the cum value for the j-th child of this node
return int_of_5(dr_load(rec)(bnr_cum_start + (j - 1) * 5..bnr_cum_start + j * 5 - 1));
end bnr_get_ch_cum;
procedure chop_up(first,mid,last); -- chop the concatenation of three strings into a tuple of string records
-- if all three pieces fit into one, two or three sections, return these. Otherwise join as much as possible of the
-- middle into two pieces, and then the reminder of the middle into approximately equal-sized pieces.
-- first and lat are assumed to be no more than one record size each
if (total := (nf := #first) + (nm := #mid) + (nl := #last)) <= sr_hi_lim then -- put all into one piece
return [first + mid + last];
end if;
if total < 2 * sr_hi_lim then -- two pieces are enough
center := total/2; -- half the total
if center <= nf then
return [first(1..center), first(center + 1..) + mid + last];
end if;
if center <= nf + nm then
return [first + mid(1..used := center - nf), mid(used + 1..) + last];
end if;
return [first + mid + last(1..used := center - nf - nm), last(used + 1..)];
end if; -- otherwise first + mid cannot fit into one piece
if total <= 3 * sr_hi_lim then -- if they fit into 3 pieces, put as much as possible
-- of first and mid into one piece, and chop up the remainder
return [first + mid(1..used := sr_hi_lim - nf)] + chop_up("",mid(used + 1..),last);
end if; -- otherwise put as much as possible of mid together with first and last, and
-- cut the reminder of the middle into approximately equal-sized pieces.
first := first + mid(1..used := sr_hi_lim - nf);
last := mid((new_end := nm - (sr_hi_lim - nl)) + 1..) + last;
npieces := (rem := new_end - used)/sr_hi_lim; -- number of full-sized pieces
psm1 := sr_hi_lim - 1;
if (extra_part := rem mod sr_hi_lim) = 0 then -- fits into a list of full pieces
return [first] + [mid(j..j + psm1): j in [used + 1,used + sr_hi_lim + 1..new_end - psm1]] + [last];
else -- there is a bit left over
start_last_2 := new_end - psm1 - extra_part; -- starting character of last 2 sections
mid_last_2 := start_last_2 + (psm1 + extra_part)/2; -- middle character of last 2 sections
-- use list of all but 1 full piece, plus two smaller pieces.
return [first] + [mid(j..j + psm1): j in [used + 1,used + sr_hi_lim + 1..new_end - 2 * psm1 - extra_part]]
+ [mid(start_last_2..mid_last_2),mid(mid_last_2 + 1..new_end)] + [last];
end if;
end chop_up;
procedure make_record(stg); -- creates one record from string
rec := set_type(dr_new_rec(),string_record); sr_set_slice(rec,1,0,stg);
return rec;
end make_record;
procedure pass_pass(stg); print(stg); pass(stg); end pass_pass; -- print and pass for debugging
procedure bs_set_slice(rw rec,i,j,stg); -- the slice assignment operation
--print("bs_set_slice: ",i," ",j," ",#stg," ",abs(dr_load(rec)(type_byte)));
if i < 1 or j < i - 1 then
abort("Illegal second and first parameters in string extraction operation" + str(j) + ", " + str(i));
end if;
-- since rec will change, ensure that it has just one copy.
-- This will up the refcounts of its children if necessary
if refcount(int_of_4(rec)) > 1 then -- must copy
pass("copy");
stgg := dr_load(rec); new_r := dr_new_rec();
dr_setrecbuf(new_r,stgg);
dr_dirtify(new_r);
increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original
end if;
if (contents := dr_load(rec))(type_byte) = string_record then -- simple string record case
-- in the simple string record case we extract the un-written-over portions of the string,
-- and chop up F + stg + L to get a new list of leaves, which are made into a node or a tree.
pass("simple");
--print("simple: ",i," ",j," ",#stg);
if j > (sl := sr_length(rec)) then -- get string from record
abort("bs_set_slice: Illegal character index in simple string extraction operation: " + str(j) + ", " + str(sl));
end if;
-- if F + stg + L fits in just one section, we perform the operation as a string slice assignment,
-- to avoid string copying where unnecessary
if (new_len := (i - 1) + (lstg := #stg) + (sl - j)) <= sr_hi_lim then -- handle as an optimized case
pass("optimized");
sr_set_slice(rec,i,j,stg); -- replace slice
--print("optimized case: ",i," ",j," ",sr_slice(rec,1,sr_length(rec)));
return; -- done with this optimized case
else -- we must actually chop up F + stg + L
pass("chop");
first := contents(sr_char_start..sr_char_start + i - 2); -- till the i-1-th character
last := contents(sr_char_start + j..sr_char_start + sl - 1); -- from the j + 1-st character
pieces_tup := chop_up(first,stg,last);
end if;
incref(rec,-1); -- the old form of the record will lose one reference
rec := bnr_make_from_tuple(pieces_tup); -- convert the list of sections returned into a tree
return;
end if; -- otherwise we deal with the B-tree case
pass("tree");
if i > (len_this := bnr_get_cum(rec)) + 1 or j > len_this then
abort("Out of range first parameter or second in string assignment operation " + str(j) + ", " + str(i) + " Length is " + str(len_this));
end if;
if i = len_this + 1 then -- we have a simple concatenation operation
if (lstg := #stg) < sr_low_lim then -- balance or join with last node
[last_node,lcum] := bnr_comp_cum(rec,OM); -- get the last node, and its string contents
ln_stg := sr_slice(last_node,1,lln := sr_length(last_node));
if lln + lstg <= sr_hi_lim then -- join with last node
pass("concat_join");
old_last_node := last_node;
incref(old_last_node,1); -- must increment to prevent length of referenced node from changing
-- ********** TOTO: FIX THIS IN YOUR BIG_STRING CODE **********
sr_set_slice(last_node,lln + 1,lln,stg); -- last_node is copied and old_last_node loses a ref
bnr_set_comp(rec,lcum,last_node); -- insert the (new) last node; old_last_node loses another ref
incref(last_node,-1); -- the variable 'last_node' is now dead
return; -- done with this optimized case
end if; -- otherwise we will balance
pass("concat_balance");
old_last_node := last_node;
incref(old_last_node,1); -- must increment to prevent length of referenced node from changing
-- ********** TOTO: FIX THIS IN YOUR BIG_STRING CODE **********
[new_ln_stg,stg] := chop_up(ln_stg,stg,""); -- cut into two string sections
sr_set_slice(last_node,1,lln,new_ln_stg);
bnr_set_comp(rec,lcum,last_node); -- insert the (possibly new) last node
incref(last_node,-1); -- the variable 'last_node' is now dead
bnr_insert(rec,OM,leaf := make_record(stg)); -- insert extra part at the end
incref(leaf,-1); -- the variable 'leaf' is now dead
return; -- done with this optimized case
end if;
pass("concat_chop");
pieces_tup := chop_up("",stg,"");
--print("concatenation: ",pieces_tup);
for piece in pieces_tup loop -- append all these pieces to the existing record
bnr_insert(rec,OM,leaf := make_record(piece)); -- at the end
incref(leaf,-1); -- the variable 'leaf' is now dead
end loop; -- note that the first insert may copy rec and decrement its refcount if it is shared;
-- in this case, ref will correctly be repaced by the new copy
return; -- done with this case
end if; -- end of the case in which insertion is at the very end
-- otherwise we need to make an insertion into the middle of an existing big_string
-- we extract the un-written-over portions F and L of the first and last sections affected,
-- and chop up F + stg + L to get a new list of leaves. These replace the leaves
-- of the original tree, running from the first to the last section affected.
-- get the child containing the first character past i
[leafi,cumi] := bnr_comp_cum(rec,i); -- get the leaf containing the first character past i, and its cumulant
[leafj,cumj] := bnr_comp_cum(rec,j); -- get the leaf containing the first character past j, and its cumulant
--print("insertion in middle: ",cumi," ",cumj);
-- if cumi = cumj, and F + stg + L fits in just one section and does not fall beneath the required minimum
-- size, we perform the operation as a string slice assignment, to avoid string copying where unnecessary
if cumi = cumj -- we may be able to work in an existing section
and (new_len := (lstg := #stg) + (sli := sr_length(leafi)) - (j - i + 1)) <= sr_hi_lim
and new_len >= sr_low_lim then
pass("nocon_opt");
prev_cum := cumi - sli; -- cumulated length before the i-th (= j-th) leaves
old_leafi := leafi;
incref(old_leafi,1); -- must increment to prevent length of referenced node from changing
-- ********** TOTO: FIX THIS IN YOUR BIG_STRING CODE **********
sr_set_slice(leafi,i - prev_cum,j - prev_cum,stg);
-- replace slice; leafi is copied, so its old version will lose 1 ref
bnr_set_comp(rec,i,leafi); -- reset set the leaf of rec, since it will have changed
-- the old version now loses another ref
incref(leafi,-1); -- the variable leafi is now dead (compensate for extra ref added by set_comp)
return; -- done with this optimized case
end if;
contentsi := dr_load(leafi); sli := sr_length(leafi); irel := i - (cumi - sli);
first := contentsi(sr_char_start..sr_char_start + irel - 2); -- till the i-1-th character
contentsj := dr_load(leafj); slj := sr_length(leafj); jrel := j - (cumj - slj);
last := contentsj(sr_char_start + jrel..sr_char_start + slj - 1); -- from the j + 1-st character
pieces_tup := chop_up(first,stg,last);
-- now the strings in pieces_tup must be turned into leaves which replace the leaves of the tree,
-- starting from leafj and ending with leafi. This iteration uses the cumulants and works backwards,
-- testing for the appearance of leafi. If this is encountered before pieces_tup is exhausted, the
-- reminming elements of pieces_tup are inserted at the appropriate position. If pieces_tup
-- is exhausted before leafi is encountered, the leaves forward from the one replaced up
-- to and including leafi is deleted.
-- however, the case in which pieces_tup contins just one element may be special, since this may fall below the
-- minimum number of characters wanted for a leaf
pass("nocon");
if #pieces_tup = 1 and #(all := pieces_tup(1)) < sr_low_lim then
-- we balance or join the short string we have with any left or right neighbor leaf
if cumi > sli then -- there is a left neighbor
pass("nocon_left");
[left_neighbor,-] := left_pair := bnr_comp_cum(rec,cumi - sli); -- get the left neighbor and its cumulant
left_neighbor_stg := sr_slice(left_neighbor,1,sr_length(left_neighbor));
pieces_tup := chop_up(left_neighbor_stg,all,""); -- use this as the new pieces_tup
[leafi,cumi] := left_pair; -- use left_neighbor as the leafi
elseif cumj < len_this then -- there is a right neighbor
pass("nocon_right");
[right_neighbor,-] := right_pair := bnr_comp_cum(rec,cumj + 1); -- get the right neighbor and its cumulant
right_neighbor_stg := sr_slice(right_neighbor,1,sr_length(right_neighbor));
pieces_tup := chop_up("",all,right_neighbor_stg); -- use this as the new pieces_tup
[leafj,cumj] := right_pair; -- use left_neighbor as the leafi
end if;
end if; -- after this the treatment is like that of all other cases
cum_now := cumj; -- the following iteration is governed by the cumulants
pieces_tup_ix_now := #pieces_tup; -- start at the end of pieces_tup
while cum_now >= cumi and pieces_tup_ix_now >= 1 loop
pass("nocon_iter");
oll := sr_length(bnr_comp(rec,cum_now)); -- the leaf which will be replaced
bnr_set_comp(rec,cum_now,leaf := make_record(pieces_tup(pieces_tup_ix_now)));
incref(leaf,-1); -- the variable 'leaf' is now dead
--print("chopping up: ",#pieces_tup," ",hexify(leaf));
pieces_tup_ix_now -:= 1; -- backwards in pieces_tup
cum_now -:= oll; -- backwards in the sequence of leaves
end loop;
while cum_now >= cumi loop -- may need to make extra deletions
pass("nocon_del");
oll := sr_length(bnr_comp(rec,cum_now)); -- the leaf which will be replaced
bnr_set_comp(rec,cum_now,OM);
cum_now -:= oll; -- backwards in the sequence of leaves
end loop;
just_past_prev := cum_now + 1; -- just past the leaf previous to leafi
while pieces_tup_ix_now >= 1 loop -- may need to make extra insertions
pass("nocon_ins");
bnr_insert(rec,just_past_prev,leaf := make_record(pieces_tup(pieces_tup_ix_now)));
incref(leaf,-1); -- the variable 'leaf' is now dead
pieces_tup_ix_now -:= 1; -- backwards in pieces_tup
end loop;
if num_childr(old_rec := rec) = 1 then -- move from tree to simple string record representation
pass("nocon_to_simp");
rec := bnr_voc(rec,1); incref(rec,1); incref(old_rec,-1);
-- the child inherits one reference from its parent, so its refcount does not change
end if;
--print("tree_dump after: ",str(bnr_dump(rec)));
end bs_set_slice;
procedure bs_length(rec); -- length of a bigstring = string record length or tree length
return if dr_load(rec)(type_byte) = string_record then sr_length(rec) else bnr_get_cum(rec) end if;
end bs_length;
procedure print_raw(); print(big_ix); end print_raw; -- raw print for debugging
procedure bs_check_leaves(rec); -- checks that leaves have required maximum and minimum lengths
if (refcount(int_of_4(rec)) <= 0) then
print("**** MEMORY ERROR **** node with zero refcount in tree: ",hexify(rec)); return false;
end if;
if (contents := dr_load(rec))(type_byte) = string_record then -- node is a leaf
if (srl := sr_length(rec)) > sr_hi_lim then
print("EXCESSIVE STRING LENGTH DETECTED IN NODE ",hexify(rec)," ",hexify(contents));
return false;
end if;
return true; -- node is ok
end if; -- otherwise we proceed recursively
if (bsl := bs_length(rec)) /= (ls := #stg_from_bigstg(rec)) then
print("***** Length Discrepancy ***** ",bsl," - ",ls," -> ",hexify(rec)," ",hexify(dr_load(rec))); stop;
end if;
return forall j in [1..num_childr(rec)] | bs_check_leaves_in(bnr_voc(rec,j));
procedure bs_check_leaves_in(rec); -- inner workhorse
if (refcount(int_of_4(rec)) <= 0) then
print("**** MEMORY ERROR **** node with zero refcount in subtree: ",hexify(rec)); return false;
end if;
if (contents := dr_load(rec))(type_byte) = string_record then -- node is a leaf
if (srl := sr_length(rec)) > sr_hi_lim then
print("EXCESSIVE STRING LENGTH DETECTED IN LEAF ",hexify(rec)," ",srl," ",hexify(contents));
return false;
end if;
if srl < sr_low_lim then
print("INSUFFICIENT STRING LENGTH DETECTED IN LEAF ",hexify(rec)," ",srl," ",hexify(contents));
return false;
end if;
return true; -- leaf is ok
end if; -- otherwise we proceed recursively
if (bsl := bs_length(rec)) /= (ls := #stg_from_bigstg(rec)) then
print("***** Length Discrepancy In Leaf***** ",bsl," ",ls); stop;
end if;
return forall j in [1..num_childr(rec)] | bs_check_leaves_in(bnr_voc(rec,j));
end bs_check_leaves_in;
end bs_check_leaves;
end big_string_pak;
class big_string; -- simple wrapper object for big_string_pak
var rec; -- disk record for big_string; this is either as simple string record or a B-tree root record
-- (MADE PUBLIC FOR MEMORY USAGE DEBUGGING ONLY)
procedure create(stg); -- create a big string from a string
procedure print_raw(); -- raw print for debugging
end big_string;
class body big_string; -- simple wrapper object for big_string_pak
use setldb,big_string_pak;
procedure create(stg); rec := bs_from_stg(stg); end create; -- create a big string from a string
procedure selfstr; return stg_from_bigstg(rec); end; -- string form of a big string
procedure self(i..j); -- the slice extraction operation concatenates
-- all the characters between the two indicated positions
return bs_slice(rec,i,j);
end;
procedure self(i..j) := stg; -- slice assignment operation
bs_set_slice(rec,i,j,stg); -- this can change rec; when the refcount of this object goes to 0,
-- rec should be erased
end;
procedure #self; return bs_length(rec); end; -- length of this string
procedure print_raw(); print(big_ix); end print_raw;
-- raw print for debugging
end big_string;
program test_bs; -- test program for big_string package and its associated class
use setldb,big_string_pak,big_string;
use byteutil,disk_records_pak,db_records,B_tree_for_bigstring,string_utility_pak;
var orig := "123456789a123456789b123456789c" + "123456789d123456789e123456789f" + "123456789g123456789h123456789i"
+ "123456789j123456789k123456789lABCDE"; -- string for tests
var orig2 := "ABCDEFGHIaABCDEFGHIbABCDEFGHIc" + "ABCDEFGHIdABCDEFGHIeABCDEFGHIf" + "ABCDEFGHIg1ABCDEFGHIhABCDEFGHIi"
+ "ABCDEFGHIjABCDEFGHIkABCDEFGHIzZZZZZ"; -- string for tests
code_pts := bs_code_pts + bnr_code_pts; -- code points to be traversed
database_file_name := "bs_test_file"; -- set the name of the file to be used
package_tests; -- test the underlying package
big_string_class_test; -- test the class version
report_points_passed(); -- get traversal report
procedure check_mem(caption,rec); -- memory check utility
incref(rec,-1); print(caption," ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if));
end check_mem;
procedure package_tests; -- tests of the underlying package
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := orig)); print("short reconstruction: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 1",rc);
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := orig + orig(1..10))); print("long reconstruction: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 2",rc);
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := 5 * orig + orig(1..10))); print("longer reconstruction: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 3",rc);
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := 5 * orig)); print("longer reconstruction 2: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 4",rc);
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := 2 * orig + orig(1..10))); print("longer reconstruction 3: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 5",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := orig)); print("end of short reconstruction: ",s1(5..) = s2);
check_mem("memcheck 6",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := orig + orig(1..10))); print("end of long reconstruction: ",s1(5..) = s2);
check_mem("memcheck 7",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := 5 * orig + orig(1..10))); print("end of longer reconstruction: ",s1(5..) = s2);
check_mem("memcheck 8",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := 5 * orig)); print("end of longer reconstruction 2: ",s1(5..) = s2);
check_mem("memcheck 9",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := 2 * orig + orig(1..10))); print("end of longer reconstruction 3: ",s1(5..) = s2);
check_mem("memcheck 10",rc);
s2 := stg_till(95,rc := bs_from_stg(s1 := orig)); print("start of short reconstruction: ",s1(1..95) = s2);
check_mem("memcheck 11",rc);
s2 := stg_till(130,rc := bs_from_stg(s1 := orig + orig(1..10))); print("start of long reconstruction: ",s1(1..130) = s2);
check_mem("memcheck 12",rc);
s2 := stg_till(630,rc := bs_from_stg(s1 := 5 * orig + orig(1..10))); print("start of longer reconstruction: ",s1(1..630) = s2);
check_mem("memcheck 13",rc);
s2 := stg_till(620,rc := bs_from_stg(s1 := 5 * orig)); print("start of longer reconstruction 2: ",s1(1..620) = s2);
check_mem("memcheck 14",rc);
s2 := stg_till(255,rc := bs_from_stg(s1 := 2 * orig + orig(1..10))); print("start of longer reconstruction 3: ",s1(1..255) = s2);
check_mem("memcheck 15",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := orig),5,95); print("short slice: ",s1(5..95) = s2);
check_mem("memcheck 16",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := orig + orig(1..10)),5,130); print("long slice: ",s1(5..130) = s2);
check_mem("memcheck 17",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := 5 * orig + orig(1..10)),5,630); print("longer slice: ",s1(5..630) = s2);
check_mem("memcheck 18",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := 5 * orig),5,620); print("longer slice 2: ",s1(5..620) = s2);
check_mem("memcheck 19",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := 2 * orig + orig(1..10)),5,255); print("longer slice 3: ",s1(5..255) = s2);
check_mem("memcheck 20",rc);
rc := bs_from_stg(s1 := 4 * orig + orig(1..10));
bs_set_slice(rc,3,500,""); s1(3..500) := ""; -- test slice insertion operation, long form
s2 := stg_from_bigstg(rc); print("long slice shortening insert: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 21",rc);
rc := bs_from_stg(s1 := orig + orig(1..10));
bs_set_slice(rc,3,2,"XX"); s1(3..2) := "XX"; -- test slice insertion operation, long form
s2 := stg_from_bigstg(rc); print("long slice insert: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 22",rc);
rc := bs_from_stg(s1 := orig + orig(1..10));
bs_set_slice(rc,3,2,3 * orig2);s1(3..2) := 3 * orig2; -- test slice insertion operation, long form
s2 := stg_from_bigstg(rc); print("long slice long insert: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 23",rc);
rc := bs_from_stg(s1 := orig);
bs_set_slice(rc,1,3,"XX"); s1(1..3) := "XX"; -- test slice assignment operation, short form
s2 := stg_from_bigstg(rc); print("short slice shrink-a-bit: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 24",rc);
rc := bs_from_stg(s1 := orig);
bs_set_slice(rc,1,3,""); s1(1..3) := ""; -- test slice assignment operation, short form
s2 := stg_from_bigstg(rc); print("short slice shrinkage: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 25",rc);
rc := bs_from_stg(s1 := orig);
bs_set_slice(rc,1,3,"YYYYY"); s1(1..3) := "YYYYY"; -- test slice assignment operation, short form
s2 := stg_from_bigstg(rc); print("short slice expand: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 26",rc);
rc := bs_from_stg(s1 := orig);
bs_set_slice(rc,1,0,"ZZZZZ"); s1(1..0) := "ZZZZZ"; -- test slice assignment operation, short form
s2 := stg_from_bigstg(rc); print("short slice insert: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 27",rc);
rc := bs_from_stg(s1 := orig + orig(1..10)); rl := bs_length(rc);
bs_set_slice(rc,rl + 1,rl,"XX"); s1(rl + 1..rl) := "XX"; -- test slice append operation, short form
s2 := stg_from_bigstg(rc); print("long slice append: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 28",rc);
rc := bs_from_stg(s1 := 2 * orig + orig(1..10)); rl := bs_length(rc);
bs_set_slice(rc,rl + 1,rl,"WWWWWWW"); s1(rl + 1..rl) := "WWWWWWW"; -- test slice append operation, long form
s2 := stg_from_bigstg(rc); print("long slice append 2: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 29",rc);
rc := bs_from_stg(s1 := 5 * orig + orig(1..10)); rl := bs_length(rc);
bs_set_slice(rc,rl + 1,rl,"YYYYY"); s1(rl + 1..rl) := "YYYYY"; -- test slice append operation, longer form
s2 := stg_from_bigstg(rc); print("long slice append 3: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 30",rc);
rc := bs_from_stg(s1 := 5 * orig + orig(1..10)); rl := bs_length(rc);
bs_set_slice(rc,rl + 1,rl,3 * orig); s1(rl + 1..rl) := 3 * orig; -- test slice append operation, longer form
s2 := stg_from_bigstg(rc); print("long slice long append: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 30A",rc);
-- additional tests of the slice assignment operations, to proble all the special cases
-- in the code, and verify the absence of memory leaks or faulty deallocations
end package_tests;
procedure big_string_class_test; -- test program for big_string class
the_stg := big_string(s1 := orig);
print("string length and selfstr: ",#the_stg = #s1," ",str(the_stg) = s1);
print("short slice class: ",the_stg(5..95) = s1(5..95) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 31",the_stg.rec);
the_stg := big_string(s1 := orig + orig(1..10));
print("long slice class: ",the_stg(5..130) = s1(5..130) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 32",the_stg.rec);
the_stg := big_string(s1 := 5 * orig + orig(1..10));
print("longer slice class: ",the_stg(5..630) = s1(5..630) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 33",the_stg.rec);
the_stg := big_string(s1 := 5 * orig);
print("longer slice class 2: ",the_stg(5..620) = s1(5..620) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 34",the_stg.rec);
the_stg := big_string(s1 := 2 * orig + orig(1..10));
print("longer slice class 3: ",the_stg(5..255) = s1(5..255) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 35",the_stg.rec);
the_stg := big_string("");
the_stg(1..0) := 5 * "abcde"; print("insert from null: ",str(the_stg) = 5 * "abcde");
the_stg(26..25) := 5 * orig; print("tail insert from null: ",
str(the_stg) = (5 * "abcde" + 5 * orig) and bs_check_leaves(the_stg.rec)); --
check_mem("memcheck 36",the_stg.rec);
the_stg := big_string(ori := ori_copy := 5 * "abcde" + 5 * orig);
--print("the_stg after creation: ",str(the_stg)," --- the_stg: ",the_stg); -- ?????????? SETL PRINT BUG ??????????
--print("the_stg after creation: ",str(the_stg)); -- THIS WORKS OK; PREVIOUS LINE DOES NOT
print("string length, selfstr, length check,selfstr check: ",#the_stg = #ori," ",str(the_stg) = ori);
stg_copy := the_stg; incref(the_stg.rec,1); -- note that an extra copy has been created
print("slice extraction and original: ",the_stg(1..10) = ori(1..10) and bs_check_leaves(the_stg.rec));
print("second slice extraction and original: ",the_stg(10..20) = ori(10..20));
print("slice extraction check: ",the_stg(1..10) = ori(1..10));
print("second slice extraction check: ",the_stg(10..20) = ori(10..20));
print("tail slice check: ",the_stg(10..#ori) = ori(10..#ori));
the_stg(1..0) := (extra := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."); ori2 := ori;
ori(1..0) := extra;
print("concatenation of 50 characters at start: ",#the_stg," ",str(the_stg) = ori and bs_check_leaves(the_stg.rec));
print("the_stg: ",str(the_stg)); print(); print(str(stg_copy));
incref(the_stg.rec,-1); -- delete the string
-- we must try long and short pure insertions, long and short impure insertions
-- into a single section, and long and short insertions which erase longer
-- runs
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(1..0) := "XXX";
the_stg(1..0) := "XXX"; print("insertion of 3 characters at start: ",#the_stg," ",str(the_stg) = ori);
-- pure short insertion case
incref(the_stg.rec,-1); -- delete the string
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(21..20) := "XXX";
the_stg(21..20) := "XXX"; print("insertion of 3 characters after position 20: ",#the_stg," ",str(the_stg) = ori); -- pure short insertion case
incref(the_stg.rec,-1); -- delete the string
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(10..12) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";
the_stg(10..12) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";
print("50 Xs replace chars 10 thru 12: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg.rec,-1); -- delete the string
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy;
the_stg(10..400) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."; ori(10..400) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";
print("50 Xs replace chars 10 thru 400: ",#the_stg," ",str(the_stg) = ori); --the_stg.print_raw();
incref(the_stg.rec,-1); -- delete the string
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(10..12) := "XXX";
the_stg(10..12) := "XXX"; print("3 Xs replace chars 10 thru 12: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg.rec,-1); -- delete the string
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(150..625) := "XXX";
the_stg(150..625) := "XXX"; print("3 Xs replace chars 150 thru 625: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg.rec,-1); -- delete the string
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(10..400) := "XXX";
the_stg(10..400) := "XXX"; print("3 Xs replace chars 10 thru 40: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg.rec,-1); -- delete the string
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(10..12) := "";
the_stg(10..12) := ""; print("chars 10 thru 12 deleted: ",#the_stg," ",str(the_stg) = ori); -- pure deletion case
incref(the_stg.rec,-1); -- delete the string
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(10..400) := "";
the_stg(10..400) := ""; print("chars 10 thru 40 deleted: ",#the_stg," ",str(the_stg) = ori); -- pure long deletion case
incref(the_stg.rec,-1); -- delete the string
print("first character deletion test"); ori := ori_copy;
the_stg1 := stg_copy; incref(stg_copy.rec,1); the_stg1(1..1) := ""; ori(1..1) := ""; print(str(the_stg1) = ori);
the_stg2 := the_stg1; incref(the_stg1.rec,1); the_stg2(1..1) := ""; ori(1..1) := ""; print(str(the_stg2) = ori);
the_stg3 := the_stg2; incref(the_stg2.rec,1); the_stg3(1..1) := ""; ori(1..1) := ""; print(str(the_stg3) = ori);
incref(the_stg3.rec,-1); incref(the_stg2.rec,-1); incref(the_stg1.rec,-1);
-- delete the string copy
check_mem("memcheck 37",stg_copy.rec); -- final erasure
end big_string_class_test;
end test_bs;
-- ************** THE MAIN DATABASE CLASS AND ITS AUXILIARY CLASSES **************
-- Internally, the database 'records' are long string sections, defined
-- by their start, end characters [start, the_end]. These are accessed through an index,
-- which is a cumulating vector of pairs [record_id, rec_len] with both record_id and cum_len
-- in increasing order. record_id cumulates using the maximum function, and rec_len by integer addition.
-- Each database has a to_string method which can be applied to each of its
-- records, which in turn generates word_list(record); This word list is
-- used to insert all the word occurences in the record into a 'word index'
-- associated with the database.
-- To find a record in the database given its key, we write
-- [[key,rec_len],key_ix,cum] := big_ix{key..key_cum};
-- start := cum - rec_len + 1;
-- Insertions of new records into the database are simply
-- start := #db_record_string; db_record_string(start + 1..start) := binstr(record);
-- where 'record' is the new record, followed by
-- id_ctr +:= 1; db_index{OM} := [hex_rep(id_ctr),#db_record_string];
-- and then by insertion of the word occurences in the record
-- into the word index.
-- To over_write a record within the database string db, we write
-- db_record_string(start..the_end) := x := binstr(record), followed by
-- db_index(key_ix) := [key,#x]; and by appropriate revision of the word index.
-- Database deletions are then db_record_string(start..the_end) := "", followed by
-- big_ix(key_ix) := OM; and by deletion of the word occurences in the record
-- from the word index.
package db_iterator_pak; -- contains_iterator package for the SETL database class
const
simple := 1, union := 2, difference := 3, intersection := 4;
sel iterator_kind(1); -- can be simple, union, difference, unstarted intersection,
-- started intersection
-- components of simple iterators
sel occs_tree(2); -- big tuple containing the record keys referencing the words in the_word_ix
sel current_occ_ptr(3); -- pointer to index of current occurence
sel occs_start(4); -- first item of occurence list
sel occs_end(5); -- last item of occurence list
-- components of compound iterators
sel L1_child(2),L2_child(3); -- sub-iterators if compound iteration
sel last_L1(4),last_L2(5); -- items previously obtained for this cycle of
-- compound iteration
var written_refcount,written_free_list,written_in_use; -- DEBUGGING QUANTITIES, PALCED HER FOR CONVENIENCE
procedure itr_create(occs_tup,beg,nd); -- create a simple contains_iterator for a SETL database
procedure itr_union(iter1,iter2); -- iterator union
procedure itr_intersection(iter1,iter2); -- iterator intersection
procedure itr_diff(iter1,iter2); -- iterator difference
procedure itr_start(tup); -- begin iterating over records containing word
procedure itr_next(tup); -- continue iterating over records containing word
procedure itr_number(tup); -- count operator
procedure itr_arb(tup); -- arb operator; returns first element
procedure itr_destroy(tup); -- destructor routine; eliminates reference to occs_tup
end db_iterator_pak;
package body db_iterator_pak; -- contains_iterator package for the SETL database class
use setldb,B_tree_for_wdocstring,big_stg_for_wdoc_pak,byteutil,disk_records_pak,db_records;
-- the underlying record and paging libraries
procedure itr_create(occs_tup,beg,nd); -- create a simple contains_iterator for a SETL database
--print("itr_create: ",beg," ",nd);
tup := [1..3]; -- create the representing tuple. The last 2 components, occs_start and occs_end, are OM
tup.iterator_kind := simple;
tup.current_occ_ptr := newat(); -- initial reference is to OM
tup.occs_tree := occs_tup; -- retain occurence list identity, and create an extra reference to it
incref(occs_tup,1); -- add one reference to occs_tup
if beg = OM then return tup; end if; -- return a null iterator
-- search for the desired word. If it is not found, set current_oc = OM;
-- this will terminate iteration as soon as it begins. Otherwise set ^current_occ_ptr
-- to point to the first identifier in the identifiers list following the
-- word (or to OM if this list is empty.) Iteration will terminate as soon as
-- the next word item in the word index is encountered.
-- The word index is maintained as a vector of items word, num_occs, ...
-- etc. with the number of words as a cumulant
tup.occs_end := nd;
^tup.current_occ_ptr := (tup.occs_start := beg) - 1; -- first item of occurence list
--print("created iterator: ",tup);
return tup;
end itr_create;
procedure itr_destroy(tup); -- destructor routine; eliminates a reference to occs_tup
incref(tup.occs_tree,-1); -- eliminate one reference to occs_tup
tup.current_occ_ptr := OM; -- prevent subsequent iteration
end itr_destroy;
procedure itr_union(iter1,iter2); -- iterator union
-- last_L1(4),last_L2(5) are initialized to pointer atoms
new_iter := [OM,OM,OM,newat(),newat()]; -- create an uninitialized new iterator
new_iter.iterator_kind := union;
new_iter.L1_child := iter1; -- set the two children
new_iter.L2_child := iter2;
return new_iter;
end itr_union;
procedure itr_intersection(iter1,iter2); -- iterator intersection
-- last_L1(4),last_L2(5) are initialized to pointer atoms
new_iter := [OM,OM,OM,newat(),newat()]; -- create an uninitialized new iterator
new_iter.iterator_kind := intersection;
new_iter.L1_child := iter1; -- set the two children
new_iter.L2_child := iter2;
return new_iter;
end itr_intersection;
procedure itr_diff(iter1,iter2); -- iterator difference
-- last_L1(4),last_L2(5) are initialized to pointer atoms
new_iter := [OM,OM,OM,newat(),newat()]; -- create an uninitialized new iterator
new_iter.iterator_kind := difference;
new_iter.L1_child := iter1; -- set the two children
new_iter.L2_child := iter2;
return new_iter;
end itr_diff;
procedure itr_start(tup); -- begin iterating over records containing word
case tup.iterator_kind
when simple => -- restart the iteration if possible
if tup.occs_start /= OM then ^(tup.current_occ_ptr) := tup.occs_start - 1; end if;
when union,difference,intersection => -- for the compound cases, just set up both values
itr_start(tup.L1_child); itr_start(tup.L2_child); -- start both children
^(tup.last_L1) := itr_next(tup.L1_child); -- set up both initial values
^(tup.last_L2) := itr_next(tup.L2_child);
end case;
end itr_start;
procedure itr_next(tup); -- continue iterating over records containing word
case tup.iterator_kind
when simple =>
-- if uninitialized or terminated, return OM
if (tcop := tup.current_occ_ptr) = OM or ^tcop = OM then return OM; end if;
if (current_ix := (^tcop +:= 1)) > tup.occs_end then
^tcop := OM; return OM; -- iteration ends at occs_end
end if;
return int_of_4(bswo_comp(tup.occs_tree,current_ix));
-- otherwise return the current record identifier, as an integer
when union => -- for the union, we simply return the smaller of the two
-- items, and advance it
if (las_1 := ^(tup.last_L1)) = OM then
if ^(tup.last_L2) = OM then return OM; end if; -- iteration is finished
to_return := ^(tup.last_L2);
^(tup.last_L2) := itr_next(tup.L2_child); -- advance the L2 iteration
return to_return; -- return the L2 iterator
elseif (las_2 := ^(tup.last_L2)) = OM then
to_return := ^(tup.last_L1);
^(tup.last_L1) := itr_next(tup.L1_child); -- advance the L1 iteration
return to_return; -- return the L1 iterator
elseif las_1 <= las_2 then
to_return := las_1; -- return the smaller
if las_1 = las_2 then ^(tup.last_L2) := itr_next(tup.L2_child); end if;
-- advance L2 if it equals the L1 being returned
^(tup.last_L1) := itr_next(tup.L1_child); -- advance the smaller iteration
return to_return; -- return the L1 iterator
else
to_return := las_2; -- return the smaller
^(tup.last_L2) := itr_next(tup.L2_child); -- advance the smaller iteration
return to_return; -- return the L2 iterator
end if;
when difference => -- for the difference L1 - L2, we return and advance the L1 item if
-- it is the smaller of the two; otherwise we advance the L2 item
-- until it becomes equal to or greater than the L1 item. At each
-- equality, the L1 item is also increased.
--print("difference iter; last_L1,last_L2: ",^(tup.last_L1)," ",^(tup.last_L2));
if (las_1 := ^(tup.last_L1)) = OM then return OM; end if; -- iteration is finished
if (las_2 := ^(tup.last_L2)) = OM or las_1 < las_2 then
to_return := las_1; -- return the L1 iterator
^(tup.last_L1) := itr_next(tup.L1_child); -- advance the L1 iteration
return to_return; -- return the L1 iterator
elseif las_1 = las_2 then -- bypass common element
^(tup.last_L1) := itr_next(tup.L1_child); -- advance the L1 iteration
end if; -- now last_L1 is larger
while (las_1 := ^(tup.last_L1)) /= OM loop -- we move forward (in both lists if necessary)
-- till last_L2 exceeds last_L1
while (las_2 := ^(tup.last_L2)) /= OM and las_1 > las_2 loop
^(tup.last_L2) := itr_next(tup.L2_child);
end loop;
if las_1 = las_2 then -- must advance last_L1 again
^(tup.last_L1) := itr_next(tup.L1_child);
continue;
end if;
to_return := las_1; -- return the L1 iterator
^(tup.last_L1) := itr_next(tup.L1_child); -- advance the L1 iteration
return to_return; -- return the L1 iterator
end loop;
return OM; -- iteration is finished
when intersection => -- for the intersection, we advance the smaller of the two
-- items until a common value is found, when we advance both
while (las_1 := ^(tup.last_L1)) /= OM and (las_2 := ^(tup.last_L2)) /= OM loop
if las_1 = las_2 then -- we have an intersection element
to_return := las_1; -- return the L1 iterator
^(tup.last_L1) := itr_next(tup.L1_child); -- advance the L1 iteration
return to_return; -- return the L1 iterator
end if;
if las_1 < las_2 then -- advance the smaller element
^(tup.last_L1) := itr_next(tup.L1_child); -- advance the L1 iteration
else
^(tup.last_L2) := itr_next(tup.L2_child); -- advance the L2 iteration
end if;
end loop;
return OM; -- iteration is finished
end case;
end itr_next;
procedure itr_number(tup); -- count operator
case tup.iterator_kind
when simple =>
if tup.occs_start = OM then return 0; end if; -- word not found
return tup.occs_end - tup.occs_start + 1;
when union,difference,intersection =>
-- count occurences matching criterion
num := 0; itr_start(tup);
while itr_next(tup) /= OM loop num +:= 1; end loop;
return num;
end case;
end itr_number;
procedure itr_arb(tup); -- arb operator; returns first element
--print("itr_arb: ",tup.iterator_kind);
case tup.iterator_kind
when simple => -- restart the iteration if necessary
if (tos := tup.occs_start) = OM then return OM; end if;
iof := int_of_4(bswo_comp(tup.occs_tree,tos));
return iof;
-- otherwise return the first record identifier from the sequence for this word
when union,difference,intersection => -- for the compound cases, just set up both values
itr_start(tup); -- begin iterating over record