You are here: Home Projects SETL SETL2 Source code DB_database.stl
Views
Document Actions

DB_database.stl

by Paul McJones last modified 2021-02-25 11:33
History
Action Performed by Date and Time Comment
Publish Paul McJones 2021-02-25 11:33 No comments.

The main big strings and database implementation, with test.

--files: string_utility_pak.stl,sort_pak.stl,get_lines_pak.stl,random_pak.stl
--			disk_records_refct.stl, B_tree_for_bigs_refct.stl, B_tree_for_bigs_wdix.stl, B_tree_for_bigs_wdoc.stl,
--			B_tree_for_bigs_dbix.stl, Big_stg_for_wdoc.stl
--file database_top.stl  test_bs
package big_string_pak;

const bs_code_pts := {"copy", "simple", "optimized", "chop", "tree", "concat_join", "concat_balance", "concat_chop", 	
	"nocon_opt", "nocon", "nocon_left", "nocon_right", "nocon_iter", "nocon_del", "nocon_ins", 	"nocon_to_simp"};
			-- code points to be traversed

	procedure bs_from_stg(stg);				-- create a big string from a string
	procedure stg_from_bigstg(rec);			-- make a string from a big_string
	procedure bs_slice(rec,i,j);			-- the slice extraction operation
	procedure bs_length(rec);				-- length of a bigstring
	procedure bs_set_slice(rw rec,i,j,stg);	-- the slice assignment operation
												
												-- (PUBLIC FOR DEBUGGING ONLY)
	procedure stg_past(j,rec);				-- make a string from a the part of a big_string from j onward
	procedure stg_till(j,rec);				-- make a string from a the part of a big_string up to j
	procedure chop_up(first,mid,last);		-- chop the concatenation of three strings into a tuple of string records
	procedure print_raw(); 					-- raw print for debugging
	procedure bnr_voc(rec,j);				-- j'th member of vector of children for B_tree_for_bigstring records
	procedure bs_check_leaves(rec);			-- leaf consistency check

end big_string_pak;

package body big_string_pak;

use setldb,byteutil,disk_records_pak,db_records,B_tree_for_bigstring,string_utility_pak;

procedure bs_from_stg(stg);					-- make a big_string from a string

												-- we first chop up the string into a tuple of string records
	if #(pieces_tup := chop_up("",stg,"")) = 1 then return make_record(pieces_tup(1)); end if;
						-- see comment on the 'chop_up' routine, below. If the whole string fits in 
						-- just one string record, we simply return that record.
--print("#pieces_tup: ",#pieces_tup);
--print("Total len ",+/[#y:y in pieces_tup]);

	return bnr_make_from_tuple(pieces_tup);		-- otherwise convert the list of sections returned into a tree

end bs_from_stg;

procedure stg_from_bigstg(rec);					-- make a string from a big_string

	if dr_load(rec)(type_byte) = string_record then return sr_slice(rec,1,sr_length(rec)); end if;
		-- get string from record

	if not dr_is_compound(rec) then 			-- concatenate strings from records
		return  "" +/ [sr_slice(ch,1,sr_length(ch)): j in [1..num_childr(rec)] | (ch := bnr_voc(rec,j)) /= OM];
	end if;

	return "" +/ [stg_from_bigstg(bnr_voc(rec,j)): j in [1..num_childr(rec)]];	-- else proceed recursively
	
end stg_from_bigstg;

procedure stg_past(j,rec);					-- make a string from a the part of a big_string from j onward

	if dr_load(rec)(type_byte) = string_record then			-- get string from record

		if j > (sl := sr_length(rec)) then 
			abort("Illegal character index in simple stg_past operation: " + str(j) + ", " + sl);
		end if;

		return sr_slice(rec,j,sl); 		-- return slice from j onward 

	end if;
	
							-- get the child containing the first character past j 
	if not (exists chix in [1..nc := num_childr(rec)] | bnr_get_ch_cum(rec,chix) >= j) then 
		abort("Illegal character index in stg_past operation: " + str(j) + ", " + nc);
	end if;
	
	prev_cum := if chix = 1 then 0 else bnr_get_ch_cum(rec,chix - 1) end if;	-- the previous cumulant

	return stg_past(j - prev_cum,bnr_voc(rec,chix)) 
							+/ [stg_from_bigstg(bnr_voc(rec,j)): j in [chix + 1..nc]];
	
end stg_past;

procedure stg_till(j,rec);					-- make a string from a the part of a big_string up to j
		
	if dr_load(rec)(type_byte) = string_record then		-- simple string record case

		if j > (sl := sr_length(rec)) then 
			abort("Illegal character index in simple stg_past operation: " + str(j) + ", " + sl);
		end if;
		
		return sr_slice(rec,1,j); 			-- return slice up to j

	end if;
	
							-- get the child containing the first character past j 
	if not (exists chix in [1..nc := num_childr(rec)] | bnr_get_ch_cum(rec,chix) >= j) then 
		abort("Illegal character index in stg_past operation: " + str(j) + ", " + nc);
	end if;
	
	prev_cum := if chix = 1 then 0 else bnr_get_ch_cum(rec,chix - 1) end if;	-- the previous cumulant

	return  ("" +/ [stg_from_bigstg(bnr_voc(rec,j)): j in [1..chix - 1]]) 
												+ stg_till(j - prev_cum,bnr_voc(rec,chix));
	
end stg_till;

procedure bs_slice(rec,i,j);		-- the slice extraction operation concatenates all the characters
									-- between the two indicated positions. the 'node' can be either 
									-- the top of a tree, or can be a simple string_record
				-- for a tree, we locate the sections F, L containing the first and last characters, and
				-- return the concatenation of the part of F past i, the part of L up to j,
				-- and the concatenation of the strings associated with all intermediate child trees
--print("bs_slice: ",i," ",j);	
	if i < 1 or j < i - 1 then 
		abort("Illegal second and/or first parameters in string extraction operation" + str(j) + ", " + str(i));
	end if;
			
	if dr_load(rec)(type_byte) = string_record then		-- simple string record case

		sl := sr_length(rec);
		if j > sl then 				-- get string from record
			print("bs_slice: Illegal character index in simple string extraction operation: " + str(j) + ", " + str(sl)); stop;
		end if;
--print("bs_slice bottom: ",i," ",j," ",sr_slice(rec,i,j));		
		return sr_slice(rec,i,j); 

	end if;

							-- get the child containing the first character past i 
	if not (exists chixi in [1..nc := num_childr(rec)] | bnr_get_ch_cum(rec,chixi) >= i) then 
		abort("Illegal first character index in string extraction operation: " + str(i) + ", " + str(nc));
	end if;
							-- get the child containing the first character past j 
	if not (exists chixj in [1..nc := num_childr(rec)] | bnr_get_ch_cum(rec,chixj) >= j) then 
		abort("Illegal second character index in string extraction operation: " + str(j) + ", " + str(nc));
	end if;
	
	prev_cumi := if chixi = 1 then 0 else bnr_get_ch_cum(rec,chixi - 1) end if;	-- the cumulant previous to i
	prev_cumj := bnr_get_ch_cum(rec,chixj - 1);									-- the cumulant previous to j
	
	if chixi = chixj then 				-- proceed recursively
		return bs_slice(bnr_voc(rec,chixi),i - prev_cumi,j - prev_cumi); 
	end if;

	xxx:=(stg_past(i - prev_cumi,bnr_voc(rec,chixi)) +/[stg_from_bigstg(bnr_voc(rec,k)): k in [chixi + 1..chixj - 1]]) 
					+ stg_till(j - prev_cumj,bnr_voc(rec,chixj));	-- proceed recursively: from, middle, end


	return xxx;
	return (stg_past(i - prev_cumi,bnr_voc(rec,chixi)) +/[stg_from_bigstg(bnr_voc(rec,k)): k in [chixi + 1..chixj - 1]]) 
					+ stg_till(j - prev_cumj,bnr_voc(rec,chixj));	-- proceed recursively: from, middle, end

end bs_slice;

procedure bnr_voc(rec,j);					-- j'th member of vector of children for B_tree_for_bigstring records

	stg := dr_load(rec);					-- load this string
	cjstrt := (j - 1) * 4 + bnr_ch_start; 
	return stg(cjstrt..cjstrt + 3);		-- return child rec
	
end bnr_voc;

procedure bnr_get_ch_cum(rec,j);	-- get the cum value for the j-th child of this node
	
	return int_of_5(dr_load(rec)(bnr_cum_start + (j - 1) * 5..bnr_cum_start + j * 5 - 1));
	
end bnr_get_ch_cum;

procedure chop_up(first,mid,last);		-- chop the concatenation of three strings into a tuple of string records

	-- if all three pieces fit into one, two or three sections, return these. Otherwise join as much as possible of the 
	-- middle into two pieces, and then the reminder of the middle into approximately equal-sized pieces.
	-- first and lat are assumed to be no more than one record size each

	if (total := (nf := #first) + (nm := #mid) + (nl := #last)) <= sr_hi_lim then 		-- put all into one piece
		return [first + mid + last]; 
	end if;

	if total < 2 * sr_hi_lim then		-- two pieces are enough

		center := total/2;		-- half the total
		
		if center <= nf then 
			return [first(1..center), first(center + 1..) + mid + last]; 
		end if;

		if center <= nf + nm then 
			return [first + mid(1..used := center - nf), mid(used + 1..) + last]; 
		end if;

		return [first  + mid + last(1..used := center - nf - nm), last(used + 1..)]; 

	end if;			-- otherwise first + mid cannot fit into one piece
			
	if total <= 3 * sr_hi_lim then 			-- if they fit into 3 pieces, put as much as possible 
												-- of first and mid into one piece, and chop up the remainder
	 	return [first + mid(1..used := sr_hi_lim - nf)] + chop_up("",mid(used + 1..),last);

	end if;			-- otherwise put as much as possible of mid together with first and last, and
					-- cut the reminder of the middle into approximately equal-sized pieces.
	first := first + mid(1..used := sr_hi_lim - nf);
	last := mid((new_end := nm - (sr_hi_lim - nl)) + 1..) + last;
	
	npieces := (rem := new_end - used)/sr_hi_lim;		-- number of full-sized pieces
	psm1 := sr_hi_lim - 1;
	
	if (extra_part := rem mod sr_hi_lim) = 0 then		-- fits into a list of full pieces
	
		return [first] + [mid(j..j + psm1): j in [used + 1,used + sr_hi_lim + 1..new_end - psm1]] + [last];
	
	else												-- there is a bit left over

		start_last_2 := new_end - psm1 - extra_part;			-- starting character of last 2 sections
		mid_last_2 := start_last_2 + (psm1 + extra_part)/2;			-- middle character of last 2 sections
						
											-- use list of all but 1 full piece, plus two smaller pieces.
		return [first] + [mid(j..j + psm1): j in [used + 1,used + sr_hi_lim + 1..new_end - 2 * psm1 - extra_part]]
			 + [mid(start_last_2..mid_last_2),mid(mid_last_2 + 1..new_end)] + [last];
	
	end if;
	
end chop_up;

procedure make_record(stg);		-- creates one record from string

	rec := set_type(dr_new_rec(),string_record); sr_set_slice(rec,1,0,stg);
	return rec;
	
end make_record;

procedure pass_pass(stg); print(stg); pass(stg); end pass_pass;		-- print and pass for debugging

procedure bs_set_slice(rw rec,i,j,stg);		-- the slice assignment operation
--print("bs_set_slice: ",i," ",j," ",#stg," ",abs(dr_load(rec)(type_byte)));
	if i < 1 or j < i - 1 then 
		abort("Illegal second and first parameters in string extraction operation" + str(j) + ", " + str(i));
	end if;
											-- since rec will change, ensure that it has just one copy. 
											-- This will up the refcounts of its children if necessary
	if refcount(int_of_4(rec)) > 1 then						-- must copy
pass("copy");
		stgg := dr_load(rec); new_r := dr_new_rec(); 
		dr_setrecbuf(new_r,stgg);
		dr_dirtify(new_r);
		increfs(new_r,1); incref(rec,-1); rec := new_r;		-- substitute copy for original
	end if;

	if (contents := dr_load(rec))(type_byte) = string_record then		-- simple string record case
			-- in the simple string record case we extract the un-written-over portions of the string, 
			-- and chop up F + stg + L to get a new list of leaves, which are made into a node or a tree.
pass("simple");
--print("simple: ",i," ",j," ",#stg);
		if j > (sl := sr_length(rec)) then 				-- get string from record
			abort("bs_set_slice: Illegal character index in simple string extraction operation: " + str(j) + ", " + str(sl));
		end if;
					-- if F + stg + L fits in just one section, we perform the operation as a string slice assignment,
					-- to avoid string copying where unnecessary 

		if (new_len := (i - 1) + (lstg := #stg) + (sl - j)) <= sr_hi_lim then				-- handle as an optimized case
pass("optimized");
			sr_set_slice(rec,i,j,stg);		-- replace slice
--print("optimized case: ",i," ",j," ",sr_slice(rec,1,sr_length(rec)));			
			return;				-- done with this optimized case
			
		else 						-- we must actually chop up F + stg + L
pass("chop");					
			first := contents(sr_char_start..sr_char_start + i - 2); 	-- till the i-1-th character
			last := contents(sr_char_start + j..sr_char_start + sl - 1);		-- from the j + 1-st character
			pieces_tup := chop_up(first,stg,last);

		end if;
		
		incref(rec,-1);		-- the old form of the record will lose one reference
		
		rec := bnr_make_from_tuple(pieces_tup);		-- convert the list of sections returned into a tree
	
		return;
		
	end if;		-- otherwise we deal with the B-tree case
pass("tree");
	if i > (len_this := bnr_get_cum(rec)) + 1 or j > len_this then 
		abort("Out of range first parameter or second in string assignment operation " + str(j) + ", " + str(i) + " Length is " + str(len_this));
	end if;

	if i = len_this + 1 then		-- we have a simple concatenation operation
		
		if (lstg := #stg) < sr_low_lim then			-- balance or join with last node
		
			[last_node,lcum] := bnr_comp_cum(rec,OM);			-- get the last node, and its string contents
			ln_stg := sr_slice(last_node,1,lln := sr_length(last_node));

			if lln + lstg <= sr_hi_lim then	-- join with last node
pass("concat_join");	
				
				old_last_node := last_node;
				incref(old_last_node,1);			-- must increment to prevent length of referenced node from changing
							-- ********** TOTO: FIX THIS IN YOUR BIG_STRING CODE **********			
				sr_set_slice(last_node,lln + 1,lln,stg);		-- last_node is copied and old_last_node loses a ref
				bnr_set_comp(rec,lcum,last_node);		-- insert the (new) last node; old_last_node loses another ref
				incref(last_node,-1);				-- the variable 'last_node' is now dead

				return;									-- done with this optimized case
			end if;										-- otherwise we will balance
pass("concat_balance");
			old_last_node := last_node;
			incref(old_last_node,1);			-- must increment to prevent length of referenced node from changing
							-- ********** TOTO: FIX THIS IN YOUR BIG_STRING CODE **********			
			[new_ln_stg,stg] := chop_up(ln_stg,stg,"");	-- cut into two string sections
			sr_set_slice(last_node,1,lln,new_ln_stg);
			bnr_set_comp(rec,lcum,last_node);			-- insert the (possibly new) last node
			incref(last_node,-1);						-- the variable 'last_node' is now dead
			bnr_insert(rec,OM,leaf := make_record(stg));	-- insert extra part at the end
			incref(leaf,-1);							-- the variable 'leaf' is now dead
			return;										-- done with this optimized case
			
		end if;
pass("concat_chop");		
		pieces_tup := chop_up("",stg,"");
--print("concatenation: ",pieces_tup);		
		for piece in pieces_tup loop		-- append all these pieces to the existing record
			bnr_insert(rec,OM,leaf := make_record(piece));			-- at the end
			incref(leaf,-1);	-- the variable 'leaf' is now dead
		end loop;		-- note that the first insert may copy rec and decrement its refcount if it is shared;
						-- in this case, ref will correctly be repaced by the new copy 
				
		return;		-- done with this case

	end if;		-- end of the case in which insertion is at the very end
	
		-- otherwise we need to make an insertion into the middle of an existing big_string
	-- we extract the un-written-over portions F and L of the first and last sections affected,
	-- and chop up F + stg + L to get a new list of leaves. These replace the leaves 
	-- of the original tree, running from the first to the last section affected.
							-- get the child containing the first character past i 
	[leafi,cumi] := bnr_comp_cum(rec,i);		-- get the leaf containing the first character past i, and its cumulant 
	[leafj,cumj] := bnr_comp_cum(rec,j);		-- get the leaf containing the first character past j, and its cumulant 
--print("insertion in middle: ",cumi," ",cumj);	
			-- if cumi = cumj, and F + stg + L fits in just one section and does not fall beneath the required minimum 
			-- size, we perform the operation as a string slice assignment, to avoid string copying where unnecessary 

	if cumi = cumj			-- we may be able to work in an existing section
		and (new_len := (lstg := #stg) + (sli := sr_length(leafi)) - (j - i + 1)) <= sr_hi_lim 
			and new_len >= sr_low_lim then
pass("nocon_opt");
		prev_cum := cumi - sli;			-- cumulated length before the i-th (= j-th) leaves
		old_leafi := leafi;
		incref(old_leafi,1);			-- must increment to prevent length of referenced node from changing
							-- ********** TOTO: FIX THIS IN YOUR BIG_STRING CODE **********			

		sr_set_slice(leafi,i - prev_cum,j - prev_cum,stg);
											-- replace slice; leafi is copied, so its old version will lose 1 ref
		bnr_set_comp(rec,i,leafi);			-- reset set the leaf of rec, since it will have changed
											-- the old version now loses another ref
		incref(leafi,-1);					-- the variable leafi is now dead (compensate for extra ref added by set_comp)
		return;								-- done with this optimized case

	end if;
	
	contentsi := dr_load(leafi); sli := sr_length(leafi); irel := i - (cumi - sli);
	first := contentsi(sr_char_start..sr_char_start + irel - 2); 	-- till the i-1-th character
	
	contentsj := dr_load(leafj); slj := sr_length(leafj); jrel := j - (cumj - slj);
	last := contentsj(sr_char_start + jrel..sr_char_start + slj - 1);		-- from the j + 1-st character

	pieces_tup := chop_up(first,stg,last);

	-- now the strings in pieces_tup must be turned into leaves which replace the leaves of the tree,
	-- starting from leafj and ending with leafi. This iteration uses the cumulants and works backwards,
	-- testing for the appearance of leafi. If this is encountered before pieces_tup is exhausted, the 
	-- reminming elements of pieces_tup are inserted at the appropriate position. If pieces_tup
	-- is exhausted before leafi is encountered, the leaves forward from the one replaced up
	-- to and including leafi is deleted.
	
	-- however, the case in which pieces_tup contins just one element may be special, since this may fall below the 
	-- minimum number of characters wanted for a leaf
pass("nocon");
	if #pieces_tup = 1 and #(all := pieces_tup(1)) < sr_low_lim then
			-- we balance or join the short string we have with any left or right neighbor leaf 

		if cumi > sli then		-- there is a left neighbor
pass("nocon_left");
			[left_neighbor,-] := left_pair := bnr_comp_cum(rec,cumi - sli);	-- get the left neighbor and its cumulant
			left_neighbor_stg := sr_slice(left_neighbor,1,sr_length(left_neighbor));
			pieces_tup := chop_up(left_neighbor_stg,all,"");		-- use this as the new pieces_tup
			[leafi,cumi] := left_pair;								-- use left_neighbor as the leafi 
			
		elseif cumj < len_this then 						-- there is a right neighbor
pass("nocon_right");
			[right_neighbor,-] := right_pair := bnr_comp_cum(rec,cumj + 1);	-- get the right neighbor and its cumulant
			right_neighbor_stg := sr_slice(right_neighbor,1,sr_length(right_neighbor));
			pieces_tup := chop_up("",all,right_neighbor_stg);		-- use this as the new pieces_tup
			[leafj,cumj] := right_pair;								-- use left_neighbor as the leafi 
		end if;
		
	end if;		-- after this the treatment is like that of all other cases
	
	cum_now := cumj;						-- the following iteration is governed by the cumulants
	pieces_tup_ix_now := #pieces_tup;		-- start at the end of pieces_tup

	while cum_now >= cumi and pieces_tup_ix_now >= 1 loop
pass("nocon_iter");		
		oll := sr_length(bnr_comp(rec,cum_now));		-- the leaf which will be replaced
		
		bnr_set_comp(rec,cum_now,leaf := make_record(pieces_tup(pieces_tup_ix_now)));
		incref(leaf,-1);	-- the variable 'leaf' is now dead
--print("chopping up: ",#pieces_tup," ",hexify(leaf));			
		pieces_tup_ix_now -:= 1;		-- backwards in pieces_tup
		cum_now -:= oll;			-- backwards in the sequence of leaves
		
	end loop;

	while cum_now >= cumi loop			-- may need to make extra deletions
pass("nocon_del");		
		oll := sr_length(bnr_comp(rec,cum_now));		-- the leaf which will be replaced
		bnr_set_comp(rec,cum_now,OM);
		cum_now -:= oll;			-- backwards in the sequence of leaves
	end loop;
	
	just_past_prev := cum_now + 1;		-- just past the leaf previous to leafi
	
	while pieces_tup_ix_now >= 1 loop		-- may need to make extra insertions
pass("nocon_ins");		
		bnr_insert(rec,just_past_prev,leaf := make_record(pieces_tup(pieces_tup_ix_now)));
		incref(leaf,-1);	-- the variable 'leaf' is now dead
		
		pieces_tup_ix_now -:= 1;		-- backwards in pieces_tup
		
	end loop;
	
	if num_childr(old_rec := rec) = 1 then			-- move from tree to simple string record representation
pass("nocon_to_simp");
		rec := bnr_voc(rec,1); incref(rec,1); incref(old_rec,-1);
				-- the child inherits one reference from its parent, so its refcount does not change
	end if;
--print("tree_dump after: ",str(bnr_dump(rec)));

end bs_set_slice;

procedure bs_length(rec);		-- length of a bigstring = string record length or tree length

	return if dr_load(rec)(type_byte) = string_record then sr_length(rec) else bnr_get_cum(rec) end if;

end bs_length;
	
procedure print_raw(); print(big_ix); end print_raw;			-- raw print for debugging

procedure bs_check_leaves(rec);		-- checks that leaves have required maximum and minimum lengths
	
	if (refcount(int_of_4(rec)) <= 0) then
		print("**** MEMORY ERROR **** node with zero refcount in tree: ",hexify(rec)); return false;
	end if;

	if (contents := dr_load(rec))(type_byte) = string_record then	-- node is a leaf

		if (srl := sr_length(rec)) > sr_hi_lim then 
			print("EXCESSIVE STRING LENGTH DETECTED IN NODE ",hexify(rec)," ",hexify(contents));
			return false;
		end if;
		
		return true;		-- node is ok
	end if;		-- otherwise we proceed recursively
	
	if (bsl := bs_length(rec)) /= (ls := #stg_from_bigstg(rec)) then 
		print("***** Length Discrepancy ***** ",bsl," - ",ls," -> ",hexify(rec)," ",hexify(dr_load(rec))); stop;
	end if; 

	return forall j in [1..num_childr(rec)] | bs_check_leaves_in(bnr_voc(rec,j));

procedure bs_check_leaves_in(rec);		-- inner workhorse
	
	if (refcount(int_of_4(rec)) <= 0) then
		print("**** MEMORY ERROR **** node with zero refcount in subtree: ",hexify(rec)); return false;
	end if;

	if (contents := dr_load(rec))(type_byte) = string_record then	-- node is a leaf

		if (srl := sr_length(rec)) > sr_hi_lim then 
			print("EXCESSIVE STRING LENGTH DETECTED IN LEAF ",hexify(rec)," ",srl," ",hexify(contents));
			return false;
		end if;

		if srl < sr_low_lim then 
			print("INSUFFICIENT STRING LENGTH DETECTED IN LEAF ",hexify(rec)," ",srl," ",hexify(contents));
			return false;
		end if;

		return true;		-- leaf is ok

	end if;		-- otherwise we proceed recursively
	
	if (bsl := bs_length(rec)) /= (ls := #stg_from_bigstg(rec)) then 
		print("***** Length Discrepancy In Leaf***** ",bsl," ",ls); stop;
	end if; 

	return forall j in [1..num_childr(rec)] | bs_check_leaves_in(bnr_voc(rec,j));

end bs_check_leaves_in;

end bs_check_leaves;

end big_string_pak;

class big_string;				-- simple wrapper object for big_string_pak

	var rec;	-- disk record for big_string; this is either as simple string record or a B-tree root record 
				-- (MADE PUBLIC FOR MEMORY USAGE DEBUGGING ONLY)

	procedure create(stg);		-- create a big string from a string
	procedure print_raw(); 			-- raw print for debugging
end big_string;

class body big_string;			-- simple wrapper object for big_string_pak

use setldb,big_string_pak;
	
	procedure create(stg); rec := bs_from_stg(stg); end create;		-- create a big string from a string

	procedure selfstr; return stg_from_bigstg(rec);	end;	-- string form of  a big string

	procedure self(i..j);		-- the slice extraction operation concatenates 
								-- all the characters between the two indicated positions
		return bs_slice(rec,i,j);
	end;
	
	procedure self(i..j) := stg;		-- slice assignment operation

		bs_set_slice(rec,i,j,stg);		-- this can change rec; when the refcount of this object goes to 0, 
										-- rec should be erased
	end;
	
	procedure #self; return bs_length(rec);	end;	-- length of this string
	
	
	procedure print_raw(); print(big_ix); end print_raw;
			-- raw print for debugging
end big_string;

program test_bs;		-- test program for big_string package and its associated class
use setldb,big_string_pak,big_string;
use byteutil,disk_records_pak,db_records,B_tree_for_bigstring,string_utility_pak;
				
var orig := "123456789a123456789b123456789c" + "123456789d123456789e123456789f" + "123456789g123456789h123456789i"
			 + "123456789j123456789k123456789lABCDE";		-- string for tests
var orig2 := "ABCDEFGHIaABCDEFGHIbABCDEFGHIc" + "ABCDEFGHIdABCDEFGHIeABCDEFGHIf" + "ABCDEFGHIg1ABCDEFGHIhABCDEFGHIi"
			 + "ABCDEFGHIjABCDEFGHIkABCDEFGHIzZZZZZ";		-- string for tests

code_pts := bs_code_pts + bnr_code_pts;		-- code points to be traversed

database_file_name := "bs_test_file";		-- set the name of the file to be used

package_tests;					-- test the underlying package
big_string_class_test;			-- test the class version

report_points_passed();			-- get traversal report

procedure check_mem(caption,rec);		-- memory check utility
	incref(rec,-1); print(caption," ",str(if is_tuple(cm := check_memory()) then cm(1) else cm end if));
end check_mem;

procedure package_tests;		-- tests of the underlying package

s2 := stg_from_bigstg(rc := bs_from_stg(s1 := orig)); print("short reconstruction: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 1",rc);
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := orig + orig(1..10))); print("long reconstruction: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 2",rc);
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := 5 * orig + orig(1..10))); print("longer reconstruction: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 3",rc);
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := 5 * orig)); print("longer reconstruction 2: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 4",rc);
s2 := stg_from_bigstg(rc := bs_from_stg(s1 := 2 * orig + orig(1..10))); print("longer reconstruction 3: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 5",rc);

s2 := stg_past(5,rc := bs_from_stg(s1 := orig)); print("end of short reconstruction: ",s1(5..) = s2);
check_mem("memcheck 6",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := orig + orig(1..10))); print("end of long reconstruction: ",s1(5..) = s2);
check_mem("memcheck 7",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := 5 * orig + orig(1..10))); print("end of longer reconstruction: ",s1(5..) = s2);
check_mem("memcheck 8",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := 5 * orig)); print("end of longer reconstruction 2: ",s1(5..) = s2);
check_mem("memcheck 9",rc);
s2 := stg_past(5,rc := bs_from_stg(s1 := 2 * orig + orig(1..10))); print("end of longer reconstruction 3: ",s1(5..) = s2);
check_mem("memcheck 10",rc);

s2 := stg_till(95,rc := bs_from_stg(s1 := orig)); print("start of short reconstruction: ",s1(1..95) = s2);
check_mem("memcheck 11",rc);
s2 := stg_till(130,rc := bs_from_stg(s1 := orig + orig(1..10))); print("start of long reconstruction: ",s1(1..130) = s2);
check_mem("memcheck 12",rc);
s2 := stg_till(630,rc := bs_from_stg(s1 := 5 * orig + orig(1..10))); print("start of longer reconstruction: ",s1(1..630) = s2);
check_mem("memcheck 13",rc);
s2 := stg_till(620,rc := bs_from_stg(s1 := 5 * orig)); print("start of longer reconstruction 2: ",s1(1..620) = s2);
check_mem("memcheck 14",rc);
s2 := stg_till(255,rc := bs_from_stg(s1 := 2 * orig + orig(1..10))); print("start of longer reconstruction 3: ",s1(1..255) = s2);
check_mem("memcheck 15",rc);

s2 := bs_slice(rc := bs_from_stg(s1 := orig),5,95); print("short slice: ",s1(5..95) = s2);
check_mem("memcheck 16",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := orig + orig(1..10)),5,130); print("long slice: ",s1(5..130) = s2);
check_mem("memcheck 17",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := 5 * orig + orig(1..10)),5,630); print("longer slice: ",s1(5..630) = s2);
check_mem("memcheck 18",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := 5 * orig),5,620); print("longer slice 2: ",s1(5..620) = s2);
check_mem("memcheck 19",rc);
s2 := bs_slice(rc := bs_from_stg(s1 := 2 * orig + orig(1..10)),5,255); print("longer slice 3: ",s1(5..255) = s2);
check_mem("memcheck 20",rc);

rc := bs_from_stg(s1 := 4 * orig + orig(1..10)); 
bs_set_slice(rc,3,500,"");	s1(3..500) := "";	-- test slice insertion operation, long form
s2 := stg_from_bigstg(rc); print("long slice shortening insert: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 21",rc);

rc := bs_from_stg(s1 := orig + orig(1..10)); 
bs_set_slice(rc,3,2,"XX");	s1(3..2) := "XX";	-- test slice insertion operation, long form
s2 := stg_from_bigstg(rc); print("long slice insert: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 22",rc);

rc := bs_from_stg(s1 := orig + orig(1..10)); 
bs_set_slice(rc,3,2,3 * orig2);s1(3..2) := 3 * orig2;		-- test slice insertion operation, long form
s2 := stg_from_bigstg(rc); print("long slice long insert: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 23",rc);

rc := bs_from_stg(s1 := orig);
bs_set_slice(rc,1,3,"XX");	s1(1..3) := "XX";	-- test slice assignment operation, short form
s2 := stg_from_bigstg(rc); print("short slice shrink-a-bit: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 24",rc);

rc := bs_from_stg(s1 := orig);
bs_set_slice(rc,1,3,""); s1(1..3) := "";		-- test slice assignment operation, short form
s2 := stg_from_bigstg(rc); print("short slice shrinkage: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 25",rc);

rc := bs_from_stg(s1 := orig);
bs_set_slice(rc,1,3,"YYYYY");	s1(1..3) := "YYYYY";	-- test slice assignment operation, short form
s2 := stg_from_bigstg(rc); print("short slice expand: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 26",rc);

rc := bs_from_stg(s1 := orig);
bs_set_slice(rc,1,0,"ZZZZZ");	s1(1..0) := "ZZZZZ";	-- test slice assignment operation, short form
s2 := stg_from_bigstg(rc); print("short slice insert: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 27",rc);

rc := bs_from_stg(s1 := orig + orig(1..10)); rl := bs_length(rc);
bs_set_slice(rc,rl + 1,rl,"XX"); s1(rl + 1..rl) := "XX";		-- test slice append operation, short form
s2 := stg_from_bigstg(rc); print("long slice append: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 28",rc);

rc := bs_from_stg(s1 := 2 * orig + orig(1..10)); rl := bs_length(rc);
bs_set_slice(rc,rl + 1,rl,"WWWWWWW"); s1(rl + 1..rl) := "WWWWWWW";		-- test slice append operation, long form
s2 := stg_from_bigstg(rc); print("long slice append 2: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 29",rc);

rc := bs_from_stg(s1 := 5 * orig + orig(1..10)); rl := bs_length(rc);
bs_set_slice(rc,rl + 1,rl,"YYYYY"); s1(rl + 1..rl) := "YYYYY";		-- test slice append operation, longer form
s2 := stg_from_bigstg(rc); print("long slice append 3: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 30",rc);

rc := bs_from_stg(s1 := 5 * orig + orig(1..10)); rl := bs_length(rc);
bs_set_slice(rc,rl + 1,rl,3 * orig); s1(rl + 1..rl) := 3 * orig;		-- test slice append operation, longer form
s2 := stg_from_bigstg(rc); print("long slice long append: ",s1 = s2 and bs_check_leaves(rc));
check_mem("memcheck 30A",rc);

-- additional tests of the slice assignment operations, to proble all the special cases
-- in the code, and verify the absence of memory leaks or faulty deallocations

end package_tests;

procedure big_string_class_test;		-- test program for big_string class

the_stg := big_string(s1 := orig);
print("string length and selfstr: ",#the_stg = #s1," ",str(the_stg) = s1);
print("short slice class: ",the_stg(5..95) = s1(5..95) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 31",the_stg.rec);

the_stg := big_string(s1 := orig + orig(1..10));
print("long slice class: ",the_stg(5..130) = s1(5..130) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 32",the_stg.rec);

the_stg := big_string(s1 := 5 * orig + orig(1..10));
print("longer slice class: ",the_stg(5..630) = s1(5..630) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 33",the_stg.rec);

the_stg := big_string(s1 := 5 * orig);
print("longer slice class 2: ",the_stg(5..620) = s1(5..620) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 34",the_stg.rec);

the_stg := big_string(s1 := 2 * orig + orig(1..10));
print("longer slice class 3: ",the_stg(5..255) = s1(5..255) and bs_check_leaves(the_stg.rec));
check_mem("memcheck 35",the_stg.rec);

the_stg := big_string("");
the_stg(1..0) := 5 * "abcde"; print("insert from null: ",str(the_stg) = 5 * "abcde"); 
the_stg(26..25) := 5 * orig; print("tail insert from null: ",
				str(the_stg) = (5 * "abcde" + 5 * orig) and bs_check_leaves(the_stg.rec)); --
check_mem("memcheck 36",the_stg.rec);

the_stg := big_string(ori := ori_copy := 5 * "abcde" + 5 * orig);
--print("the_stg after creation: ",str(the_stg)," --- the_stg: ",the_stg); -- ?????????? SETL PRINT BUG ??????????
--print("the_stg after creation: ",str(the_stg)); -- THIS WORKS OK; PREVIOUS LINE DOES NOT
print("string length, selfstr, length check,selfstr check: ",#the_stg = #ori," ",str(the_stg) = ori);
stg_copy := the_stg; incref(the_stg.rec,1);		-- note that an extra copy has been created

print("slice extraction and original: ",the_stg(1..10) = ori(1..10) and bs_check_leaves(the_stg.rec)); 

print("second slice extraction and original: ",the_stg(10..20) = ori(10..20));
print("slice extraction check: ",the_stg(1..10) = ori(1..10)); 
print("second slice extraction check: ",the_stg(10..20) = ori(10..20));
print("tail slice check: ",the_stg(10..#ori) = ori(10..#ori));

the_stg(1..0) := (extra := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."); ori2 := ori;
ori(1..0) :=  extra;
print("concatenation of 50 characters at start: ",#the_stg," ",str(the_stg) = ori and bs_check_leaves(the_stg.rec)); 

print("the_stg: ",str(the_stg)); print(); print(str(stg_copy));
incref(the_stg.rec,-1);		-- delete the string

	-- we must try long and short pure insertions, long and short impure insertions 
	-- into a single section, and long and short insertions which erase longer 
	-- runs
the_stg := stg_copy; incref(stg_copy.rec,1); ori := ori_copy; ori(1..0) := "XXX";
the_stg(1..0) := "XXX"; print("insertion of 3 characters at start: ",#the_stg," ",str(the_stg) = ori);
		-- pure short insertion case
incref(the_stg.rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy.rec,1); ori := ori_copy; ori(21..20) := "XXX";
the_stg(21..20) := "XXX"; print("insertion of 3 characters after position 20: ",#the_stg," ",str(the_stg) = ori);		-- pure short insertion case
incref(the_stg.rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy.rec,1); ori := ori_copy; ori(10..12) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";
the_stg(10..12) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX."; 
print("50 Xs replace chars 10 thru 12: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg.rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy.rec,1); ori := ori_copy; 
the_stg(10..400) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";  ori(10..400) := "XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.XXXX.";
print("50 Xs replace chars 10 thru 400: ",#the_stg," ",str(the_stg) = ori);		--the_stg.print_raw();
incref(the_stg.rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy.rec,1); ori := ori_copy; ori(10..12) := "XXX";
the_stg(10..12) := "XXX"; print("3 Xs replace chars 10 thru 12: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg.rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy.rec,1); ori := ori_copy; ori(150..625) := "XXX";
the_stg(150..625) := "XXX"; print("3 Xs replace chars 150 thru 625: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg.rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy.rec,1); ori := ori_copy; ori(10..400) := "XXX";
the_stg(10..400) := "XXX"; print("3 Xs replace chars 10 thru 40: ",#the_stg," ",str(the_stg) = ori);
incref(the_stg.rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy.rec,1); ori := ori_copy; ori(10..12) := "";
the_stg(10..12) := ""; print("chars 10 thru 12 deleted: ",#the_stg," ",str(the_stg) = ori);		-- pure deletion case
incref(the_stg.rec,-1);		-- delete the string

the_stg := stg_copy;  incref(stg_copy.rec,1); ori := ori_copy; ori(10..400) := "";
the_stg(10..400) := ""; print("chars 10 thru 40 deleted: ",#the_stg," ",str(the_stg) = ori);		-- pure long deletion case
incref(the_stg.rec,-1);			-- delete the string

print("first character deletion test"); ori := ori_copy; 
the_stg1 := stg_copy; incref(stg_copy.rec,1); the_stg1(1..1) := ""; ori(1..1) := ""; print(str(the_stg1) = ori);
the_stg2 := the_stg1; incref(the_stg1.rec,1); the_stg2(1..1) := ""; ori(1..1) := ""; print(str(the_stg2) = ori);
the_stg3 := the_stg2; incref(the_stg2.rec,1); the_stg3(1..1) := ""; ori(1..1) := ""; print(str(the_stg3) = ori);
incref(the_stg3.rec,-1); incref(the_stg2.rec,-1); incref(the_stg1.rec,-1); 
 		-- delete the string copy

check_mem("memcheck 37",stg_copy.rec);		-- final erasure

end big_string_class_test;

end test_bs;
	
--	************** THE MAIN DATABASE CLASS AND ITS AUXILIARY CLASSES **************

	-- Internally, the database 'records' are long string sections, defined 
	-- by their start, end characters [start, the_end]. These are accessed through an index, 
	-- which is a cumulating vector of pairs [record_id, rec_len] with both record_id and cum_len
	-- in increasing order. record_id cumulates using the maximum function, and rec_len by integer addition.
	
	-- Each database has a to_string method which can be applied to each of its 
	-- records, which in turn generates word_list(record); This word list is 
	-- used to insert all the word occurences in the record into a 'word index' 
	-- associated with the database.
	
	-- To find a record in the database given its key, we write  
	-- 		[[key,rec_len],key_ix,cum] := big_ix{key..key_cum}; 
	--		start := cum - rec_len + 1;
		
	-- Insertions of new records into the database are simply 
	--		start := #db_record_string; db_record_string(start + 1..start) := binstr(record); 
	-- where 'record' is the new record, followed by 
	-- 		id_ctr +:= 1; db_index{OM} := [hex_rep(id_ctr),#db_record_string];
	-- and then by insertion of the word occurences in the record
	-- into the word index.
	
	-- To over_write a record within the database string db, we write
	-- db_record_string(start..the_end) := x := binstr(record), followed by
	-- db_index(key_ix) := [key,#x]; and by appropriate revision of the word index.
	
	-- Database deletions are then db_record_string(start..the_end) := "", followed by 
	-- big_ix(key_ix) := OM; and by deletion of the word occurences in the record
	-- from the word index.

package db_iterator_pak;		-- contains_iterator package for the SETL database class

	const 
		simple := 1, union := 2, difference := 3, intersection := 4;
	
	sel iterator_kind(1);			-- can be simple, union, difference, unstarted intersection,
										-- started intersection 
					-- components of simple iterators
	sel occs_tree(2);				-- big tuple containing the record keys referencing the words in the_word_ix
	sel current_occ_ptr(3);			-- pointer to index of current occurence
	sel occs_start(4);				-- first item of occurence list
	sel occs_end(5);				-- last item of occurence list
	
					-- components of compound iterators
	sel L1_child(2),L2_child(3); 	-- sub-iterators if compound iteration
	sel last_L1(4),last_L2(5); 	-- items previously obtained for this cycle of 
										-- compound iteration

	var written_refcount,written_free_list,written_in_use;		-- DEBUGGING QUANTITIES, PALCED HER FOR CONVENIENCE
	
	procedure itr_create(occs_tup,beg,nd);			-- create a simple contains_iterator for a SETL database
	procedure itr_union(iter1,iter2);				-- iterator union
	procedure itr_intersection(iter1,iter2);		-- iterator intersection
	procedure itr_diff(iter1,iter2);				-- iterator difference
	procedure itr_start(tup);						-- begin iterating over records containing word
	procedure itr_next(tup);						-- continue iterating over records containing word
	procedure itr_number(tup);						-- count operator
	procedure itr_arb(tup);							-- arb operator; returns first element
	procedure itr_destroy(tup);						-- destructor routine; eliminates reference to occs_tup

end db_iterator_pak;

package body db_iterator_pak;	-- contains_iterator package for the SETL database class

use setldb,B_tree_for_wdocstring,big_stg_for_wdoc_pak,byteutil,disk_records_pak,db_records;
						-- the underlying record and paging libraries 

procedure itr_create(occs_tup,beg,nd);		-- create a simple contains_iterator for a SETL database
--print("itr_create: ",beg," ",nd);
	tup := [1..3];				-- create the representing tuple. The last 2 components, occs_start and occs_end, are OM
	tup.iterator_kind := simple;
	tup.current_occ_ptr := newat();		-- initial reference is to OM
	tup.occs_tree := occs_tup;			-- retain occurence list identity, and create an extra reference to it
	incref(occs_tup,1);		--  add one reference to occs_tup
	if beg = OM then return tup; end if;		-- return a null iterator
	
			-- search for the desired word. If it is not found, set current_oc = OM;
			-- this will terminate iteration as soon as it begins. Otherwise set ^current_occ_ptr 
			-- to point to the first identifier in the identifiers list following the 
			-- word (or to OM if this list is empty.) Iteration will terminate as soon as
			-- the next word item in the word index is encountered.
			
			-- The word index is maintained as a vector of items word, num_occs, ...
			-- etc. with the number of words as a cumulant
	tup.occs_end := nd;
	^tup.current_occ_ptr := (tup.occs_start := beg) - 1;	-- first item of occurence list
	
--print("created iterator: ",tup);
	return tup;

end itr_create;

procedure itr_destroy(tup);						-- destructor routine; eliminates a reference to occs_tup
	incref(tup.occs_tree,-1);		-- eliminate one reference to occs_tup
	tup.current_occ_ptr := OM;		-- prevent subsequent iteration
end itr_destroy;

procedure itr_union(iter1,iter2);		-- iterator union
										-- last_L1(4),last_L2(5) are initialized to pointer atoms
	new_iter := [OM,OM,OM,newat(),newat()]; 	-- create an uninitialized new iterator
	new_iter.iterator_kind := union;
	new_iter.L1_child := iter1;		-- set the two children
	new_iter.L2_child := iter2;
	
	return new_iter;

end itr_union;

procedure itr_intersection(iter1,iter2);		-- iterator intersection
										-- last_L1(4),last_L2(5) are initialized to pointer atoms
	
	new_iter := [OM,OM,OM,newat(),newat()];			-- create an uninitialized new iterator
	new_iter.iterator_kind := intersection;
	new_iter.L1_child := iter1;		-- set the two children
	new_iter.L2_child := iter2;
	
	return new_iter;

end itr_intersection;

procedure itr_diff(iter1,iter2);		-- iterator difference
										-- last_L1(4),last_L2(5) are initialized to pointer atoms
	
	new_iter := [OM,OM,OM,newat(),newat()];	 	-- create an uninitialized new iterator
	new_iter.iterator_kind := difference;

	new_iter.L1_child := iter1;		-- set the two children
	new_iter.L2_child := iter2;
	
	return new_iter;

end itr_diff;

procedure itr_start(tup);			-- begin iterating over records containing word

	case tup.iterator_kind

	when simple =>		-- restart the iteration if possible

		if tup.occs_start /= OM then ^(tup.current_occ_ptr) := tup.occs_start - 1; end if;

	when union,difference,intersection =>	-- for the compound cases, just set up both values

		itr_start(tup.L1_child); itr_start(tup.L2_child);			-- start both children

		^(tup.last_L1) := itr_next(tup.L1_child);					-- set up both initial values
		^(tup.last_L2) := itr_next(tup.L2_child);
	end case;

end itr_start;

procedure itr_next(tup);			-- continue iterating over records containing word
	
	case tup.iterator_kind

	when simple =>
						-- if uninitialized or terminated, return OM
		if (tcop := tup.current_occ_ptr)  = OM or ^tcop = OM then return OM; end if;

		if (current_ix := (^tcop +:= 1)) > tup.occs_end  then 
			^tcop := OM; return OM;		 -- iteration ends at occs_end
		end if;
			
		return int_of_4(bswo_comp(tup.occs_tree,current_ix));
						-- otherwise return the current record identifier, as an integer

	when union =>	-- for the union, we simply return the smaller of the two 
					-- items, and advance it

		if (las_1 := ^(tup.last_L1)) = OM then
		
			if ^(tup.last_L2) = OM then return OM; end if;		-- iteration is finished
			to_return := ^(tup.last_L2);
			^(tup.last_L2) := itr_next(tup.L2_child);	-- advance the L2 iteration
			return to_return;		-- return the L2 iterator
			
		elseif (las_2 := ^(tup.last_L2)) = OM then

			to_return := ^(tup.last_L1);
			^(tup.last_L1) := itr_next(tup.L1_child);	-- advance the L1 iteration
			return to_return;		-- return the L1 iterator

		elseif las_1 <= las_2 then

			to_return := las_1;		-- return the smaller

			if las_1 = las_2 then ^(tup.last_L2) := itr_next(tup.L2_child); end if;
				-- advance L2 if it equals the L1 being returned
				
			^(tup.last_L1) := itr_next(tup.L1_child);	-- advance the smaller iteration 
			return to_return;		-- return the L1 iterator

		else

			to_return := las_2;		-- return the smaller
			^(tup.last_L2) := itr_next(tup.L2_child);	-- advance the smaller iteration
			return to_return;		-- return the L2 iterator

		end if;
		
	when difference =>	-- for the difference L1 - L2, we return and advance the L1 item if 
						-- it is the smaller of the two; otherwise we advance the L2 item
						-- until it becomes equal to or greater than the L1 item. At each
						-- equality, the L1 item is also increased.
--print("difference iter; last_L1,last_L2: ",^(tup.last_L1)," ",^(tup.last_L2));

		if (las_1 := ^(tup.last_L1)) = OM then return OM; end if;		-- iteration is finished
		
		if (las_2 := ^(tup.last_L2)) = OM or las_1 < las_2 then

			to_return := las_1;		-- return the L1 iterator
			^(tup.last_L1) := itr_next(tup.L1_child);	-- advance the L1 iteration 
			return to_return;		-- return the L1 iterator

		elseif las_1 = las_2 then	-- bypass common element

			^(tup.last_L1) := itr_next(tup.L1_child);	-- advance the L1 iteration 
			
		end if;		-- now last_L1 is larger
		
		while (las_1 := ^(tup.last_L1)) /= OM loop	-- we move forward (in both lists if necessary)
									-- till last_L2 exceeds last_L1

			while (las_2 := ^(tup.last_L2)) /= OM and las_1 > las_2 loop
				^(tup.last_L2) := itr_next(tup.L2_child);
			end loop;

			if las_1 = las_2 then 	-- must advance last_L1 again
				^(tup.last_L1) := itr_next(tup.L1_child);
				continue; 
			end if;	
			
			to_return := las_1;		-- return the L1 iterator
			^(tup.last_L1) := itr_next(tup.L1_child);	-- advance the L1 iteration 
			return to_return;		-- return the L1 iterator

		end loop;

		return OM;		-- iteration is finished

	when intersection =>	-- for the intersection, we advance the smaller of the two 
							-- items until a common value is found, when we advance both

		while (las_1 := ^(tup.last_L1)) /= OM and (las_2 := ^(tup.last_L2)) /= OM loop
			
			if las_1 = las_2 then -- we have an intersection element
			
				to_return := las_1;		-- return the L1 iterator
				^(tup.last_L1) := itr_next(tup.L1_child);	-- advance the L1 iteration 
				return to_return;		-- return the L1 iterator
			
			end if;
			
			if las_1 < las_2 then -- advance the smaller element
				^(tup.last_L1) := itr_next(tup.L1_child);	-- advance the L1 iteration 
			else
				^(tup.last_L2) := itr_next(tup.L2_child);	-- advance the L2 iteration 
			end if;
			
		end loop;

		return OM;		-- iteration is finished

	end case;

end itr_next;

procedure itr_number(tup);		-- count operator

	case tup.iterator_kind

	when simple =>

		if tup.occs_start = OM then return 0; end if;		-- word not found
		
		return tup.occs_end - tup.occs_start + 1;
	
	when union,difference,intersection =>
		-- count occurences matching criterion
		
		num := 0; itr_start(tup);
		while itr_next(tup) /= OM loop num +:= 1; end loop;
		return num;

	end case;

end itr_number;

procedure itr_arb(tup);		-- arb operator; returns first element

--print("itr_arb: ",tup.iterator_kind);	
	case tup.iterator_kind

	when simple =>		-- restart the iteration if necessary

		if (tos := tup.occs_start) = OM then return OM; end if;	
		iof := int_of_4(bswo_comp(tup.occs_tree,tos));
		
		return iof;
				-- otherwise return the first record identifier from the sequence for this word

	when union,difference,intersection =>	-- for the compound cases, just set up both values

		itr_start(tup);			-- begin iterating over record