You are here: Home Projects SETL SETL2 Source code DB_btree_wdoc.stl
Views
Document Actions

DB_btree_wdoc.stl

by Paul McJones last modified 2021-02-25 11:30
History
Action Performed by Date and Time Comment
Publish Paul McJones 2021-02-25 11:30 No comments.

"B-tree variant for word occurence_strings, with 2 cumulants in parent, refcounts."

-- file B_tree_for_bigs_wdoc.stl
package B_tree_for_wdocstring;		-- B-trees, realized as objects
-- ***********************************************************************************************
-- ****** B-tree variant for word occurence_strings, with 2 cumulants in parent, refcounts  ******
-- ***********************************************************************************************
	-- Refcounts can be ignored except in the wo_set_comp, wo_insert, split_node, share_right, join_left,
	-- and join_left routines.
-- this B-tree structures the wdoccs_big_string which holds word occurences
const wo_code_pts := {"wo_last", "wo_nlast2", "wo_nlast1", "wo_nc1", "wo_nc2", "wo_fpbot", "wo_fpsame", 
	"wo_fpinfirst", "wo_fpinmid", "wo_fpinlast", "wo_set_copy", "wo_set_last1", "wox_set_last1", 
	"wo_set_last2", "wox_set_last2", "wo_set_nlast1", "wo_set_nlast2", "wo_set_ndel_nc", "wo_set_ndel_comp", 
	"wo_set_ndel_end", "wo_set_del_nc", "wo_set_del_have", "wo_set_del_comp", 
	"wo_set_del_comp_end", "wo_set_del_enough", "wo_set_del_canpull", "wo_set_del_canjoin", 
	"wo_set_enough", "wo_set_cutback", "wo_in_copy", "wo_in_nend2", "wo_in_nend1", "wo_in_nend3", 
	"wo_in_end3", "wo_in_endnc", "wo_in_ncnosplit", "wo_in_ncsplit", "wo_in_endcomp", 
	"wo_in_endcomp_nos", "wo_in_endcomp_split", "wo_in_endcomp_nosthis", "wo_in_endcomp_splitthis", 
	"wo_in_nendnc_nos", "wo_in_nendnc_split", "wo_in_nendcomp_nos", "wo_in_nendcomp_split", 
	"wo_in_nendcomp_nosthis", "wo_in_nendcomp_splitthis", "wo_pull_left", "wo_pull_right", 
	"wo_share_copy", "wo_share_copy2", "wo_share_move_left",
	"wo_share_move_right", "wo_jleft_copy", "wo_jleft", "wo_jright_copy", "wo_jright"};	
					-- code points to be traversed

var debug_flag := false;
var prior_debug_c := 0,debug_c := 0;		-- global variables for debugging

var tree_level := 0;			-- for tracking tree level during insertion

--procedure wo_create();   
--procedure wo_get_cum(rec);
--procedure wo_get_cum2(rec);
--procedure wo_comp(rec,j);
--procedure wo_comp2(rec,j); 
--procedure wo_comp_cum(rec,x);   
--procedure wo_comp_cum2(rec,x);

procedure wo_first_past_to(rec,key,beg,ennd);		-- search for a key in the indicated range
--procedure wo_set_comp(rw rec,w,x);	-- assignment of the first component whose cumulant is at least w; w must be in range
--procedure wo_set_comp2(rw rec,w,x);-- assignment of the first component whose second cumulant is at least w
--procedure wo_insert(rw rec,j,x);	-- insertion before j-th component; or at the end if j = OM
--procedure wo_insert2(rw rec,j,x);	-- insertion before component with second cumulant at least j; or at end (j = OM)

procedure wo_dump(rec);			-- get tuple from B-tree representation (DEBUGGING ONLY)
procedure show_hex(occstg);		-- show an occurences string as comma-separated hex (DEBUGGING ONLY)
procedure shexify(stg);		-- put into abbreviated hex (DEBUGGING ONLY)
procedure wo_check_tree_structure(tree);		-- recursive check of tree structure (DEBUGGING ONLY)

end B_tree_for_wdocstring;

package body B_tree_for_wdocstring;		-- B-trees, realized as objects
use setldb,byteutil,disk_records_pak,db_records,string_utility_pak;

				-- there are two cumulants, the total number of occurences and the corresponding record id 

procedure get_ch_cum(rec,j);	-- get the integer cum value for the j-th child of this node
	
	return int_of_5(dr_load(rec)(wo_cum_start + (j - 1) * 5..wo_cum_start + j * 5 - 1));
	
end get_ch_cum;

procedure hwo_get_cum(rec);              -- get the integer cum value of the fina l child of this node

        return get_ch_cum(rec,num_childr(rec));

end hwo_get_cum;

procedure hwo_get_cum2(rec);             -- get the record id string cum value of the final child of this node

        return get_ch_cum2(rec,num_childr(rec));

end hwo_get_cum2;



procedure set_ch_cum(rec,j,cum_int);	-- set the integer cum value for the j-th child of this node

	stg := dr_load(rec);		-- make sure that this record is loaded
	stg(wo_cum_start + (j - 1) * 5..wo_cum_start + j * 5 - 1) := stg_of_5(cum_int);
	dr_setrecbuf(rec,stg); dr_dirtify(rec);
	
end set_ch_cum;

procedure get_ch_cum2(rec,j);	-- get the record id string cum value for the j-th child of this node
	
	return dr_load(rec)(wo_cum2_start + (j - 1) * 4..wo_cum2_start + j * 4 - 1);
	
end get_ch_cum2;

procedure hwo_create();                                          -- creation rout ine

        rec := dr_new_rec();                       -- create a new record (at this point, the record is loaded
                                                                -- dirty, and ho
        set_type(rec,wdoccs_str_node_ncr);      -- set to non-compound tree
        return rec;

end hwo_create;


procedure set_ch_cum2(rec,j,new_rec_id);	-- set the record id string cum value for the j-th child of this node

	stg := dr_load(rec);		-- make sure that this record is loaded
	stg(wo_cum2_start + (j - 1) * 4..wo_cum2_start + j * 4 - 1) := new_rec_id;
	dr_setrecbuf(rec,stg); dr_dirtify(rec);
	
end set_ch_cum2;

procedure vect_of_children(rec);					-- gets vector of children, as a string of 4-byte record numbers
	
	nch := num_childr(rec);		-- number of children
	return dr_load(rec)(wo_ch_start..wo_ch_start - 1 + nch * 4);
	
end vect_of_children;

procedure set_vect_of_children(rec,stg);	-- sets vector of children, from a string of 4-byte record numbers
--print("set_vect_of_children: ",hexify(rec)," ",hexify(stg));
	set_num_childr(rec,(nstg := #stg)/4);	-- set the number of children (also loads)
	missing := wo_cum_start - wo_ch_start - #stg;
--if missing < 0 then print("missing,wo_cum_start,wo_ch_start,#stg: ",missing," ",#stg," ",hexify(stg)); stop; end if;
	stg2:=dr_load(rec);
	stg2(wo_ch_start..wo_cum_start - 1) := (stg + missing * "\x00");
	
		-- set the children, remembering not to change the length of the string section containing them

	dr_setrecbuf(rec,stg2); dr_dirtify(rec);
	
end set_vect_of_children;

procedure vect_of_cums(rec);					-- gets vector of cums, as a string of 5-byte fields
	
	nch := num_childr(rec);		-- number of children
	return dr_load(rec)(wo_cum_start..wo_cum_start - 1 + nch * 5);
	
end vect_of_cums;

procedure set_vect_of_cums(rec,stg);	-- sets vector of cumulants, from a string of 5-byte record numbers

	missing := wo_cum2_start - wo_cum_start - #stg;
	stg2:=dr_load(rec);		-- force load
	stg2(wo_cum_start..wo_cum2_start - 1) := (stg + missing * "\x00");
	
		-- set the cumulants, remembering not to change the length of the string section containing them
	dr_setrecbuf(rec,stg2); dr_dirtify(rec);

end set_vect_of_cums;

procedure vect_of_cums2(rec);					-- gets vector of cums, as a string of 4-byte fields
	
	nch := num_childr(rec);		-- number of children
	return dr_load(rec)(wo_cum2_start..wo_cum2_start - 1 + nch * 4);
	
end vect_of_cums2;

procedure set_vect_of_cums2(rec,stg);	-- sets vector of children, from a string of 4-byte record numbers

	missing := rec_size + 1 - wo_cum2_start - #stg;
	stg2:=dr_load(rec);		-- force load
	stg2(wo_cum2_start..rec_size) := (stg + missing * "\x00");
		-- set the children, remembering not to change the length of the string section containing them

	dr_setrecbuf(rec,stg2); dr_dirtify(rec);
	
end set_vect_of_cums2;

procedure voc(rec,j);					-- n'th member of vector of children

	stg := dr_load(rec);					-- load this string
	cjstrt := (j - 1) * 4 + wo_ch_start; 
	return stg(cjstrt..cjstrt + 3);		-- return child rec
	
end voc;

procedure set_voc(rec,j,chrec);				-- set n'th member of vector of children

	cjstrt := (j - 1) * 4 + wo_ch_start; 
	stg2:=dr_load(rec);		-- force load; then set the first character of the record
	stg2(cjstrt..cjstrt + 3) := chrec;
	dr_setrecbuf(rec,stg2); dr_dirtify(rec);

end set_voc;


procedure wo_dump(rec);			-- get tuple from nested representation
				-- is compound representation or direct, depending on whether 'is_compound' flag is set
				-- note that this shows the vector of components, without their cumulants
				-- we ignore the cumulants in the tree nodes
var indent := 0;
return wo_dump_in(rec);			-- call inner workhorse

procedure wo_dump_in(rec);			-- inner workhorse

	if dr_is_compound(rec) then 		-- compound case
		indent +:= 1; nc := num_childr(rec);
		stg :=  (["\n" + (indent * "   ") + "("] +/ [wo_dump_in(voc(rec,j)): j in [1..nc]]) 
					+ ["\n" + (indent * "   ")+ "[" + str(nc) + "]"  
						+ str(wo_get_cum(rec)) + "," + shexify(wo_get_cum2(rec)) + ")"]; 
		indent -:= 1;
		return stg;
	end if;						-- done with compound case

	t := [];
	for j in [1..num_childr(rec)] loop
		chj := voc(rec,j);
		svj := if (lvj := wo_length(chj)) > 0 then show_hex(wo_slice(chj,1,lvj)) else "" end if; t with:= svj;
	end loop;
	
	return ["\n" + (indent * "   ") + "("] + t + [str(wo_get_cum(rec)) + "," + shexify(wo_get_cum2(rec)) + ")"];

end wo_dump_in;

end wo_dump;

procedure show_hex(occstg);		-- show an occurences string as comma-separated hex
	return "" +/ [shexify(occstg(j..j + 3)) + if j > #occstg - 4 then "" else "," end if: j in [1,5..#occstg]];
end show_hex;

procedure shexify(stg);		-- put into abbreviated hex
	ns := #(stg := hexify(stg)); zers := span(stg,"0"); 
	return if ns = 8 then ":" + stg  else ":" + stg + ":" + str(#zers) end if; 
end shexify;		

procedure hwo_comp(rec,j);               -- fetch of component containing cumulant j
        return comp_cum_ix(rec,1,j)(1);
end hwo_comp;

procedure hwo_comp2(rec,j);              -- fetch of component containing second cumulant j
        return comp_cum_ix(rec,2,j)(1);
end hwo_comp2;

procedure hwo_comp_cum(rec,x);           -- fetch of x-th component, with cumulan ts
        return comp_cum_ix(rec,1,x);
end hwo_comp_cum;

procedure hwo_comp_cum2(rec,x);          -- fetch of component containing second cumulant x, with cumulants
        return comp_cum_ix(rec,2,x);
end hwo_comp_cum2;

procedure comp_cum_ix(rec,srch_on,x);		-- fetch of x-th component by search on specified cumulant
					-- component should be found by binary search
					-- search for the first index component with cumulant past
					-- the specified x; Return OM if there is none such.
					-- this returns the triple [leaf,int_cum_of_leaf,second_cum_of_leaf]
								-- NOTE: this should be by binary search

							-- find the first node for which a cumulant >= j
	if x = OM then 				-- want last node
pass("wo_last");
		j := num_childr(rec); 
		the_cum := if srch_on = 1 then get_ch_cum(rec,j) else get_ch_cum2(rec,j) end if;
	else
		if srch_on = 2 then 
			iofx := int_of_4(x);
pass("wo_nlast2");
		else
pass("wo_nlast1");
		end if;
		if not (exists j in [1..num_childr(rec)] | 
			if srch_on = 1 then (the_cum := get_ch_cum(rec,j)) >= x else 
				int_of_4(the_cum := get_ch_cum2(rec,j)) >= iofx end if) then 
--print("******** return OM: ",x," ",nc := num_childr(rec)," ",wo_dump(rec)," ",if nc > 0 then get_ch_cum(rec,nc) else "NONE" end if); stop;
			return OM; 
		end if;
	end if;
			-- desired node not found

	if not dr_is_compound(rec) then			 	-- if node is not compound we have what we want
		if srch_on = 1 then 
pass("wo_nc1");
			return [voc(rec,j),the_cum,get_ch_cum2(rec,j)];
		else 
pass("wo_nc2");
			return [voc(rec,j),get_ch_cum(rec,j),the_cum];
		end if;
	end if;
			
	if srch_on = 1 then prev_cum := if j = 1 then 0 else get_ch_cum(rec,j - 1) end if; end if;
			-- the preceding cumulant (needed in the integer case only)
	
	res := comp_cum_ix(voc(rec,j),srch_on,if x = OM or srch_on = 2 then x else x - prev_cum end if);
			-- continue the search recursively

	[rec_no,retcum,retcum2] := res;
	
	return [rec_no] + [prev_cum + retcum,retcum2];

end comp_cum_ix;

procedure wo_first_past_to(rec,key,beg,ennd);		-- search for a key in the indicated range
				-- this routine returns the pair [first_past_key,its_ix]

	stg := dr_load(rec);		-- read contents of the record
	
				-- if the node is a wdoccs_string_record we search for the key directly
	if stg(type_byte) = wdoccs_string_record then
pass("wo_fpbot");
		ikey := int_of_4(key);		-- convert to integer for purposes of comparison

		if exists j in [beg..ennd min wo_length(rec)] | int_of_4(wos := wo_slice(rec,j,j)) >= ikey then
			return [wos,j];		--  (this should be a binary search)
		end if;
	
		return OM;		-- otherwise the desired key cannot be found
	
	end if;
			-- otherwise find the children containing the start and end leaves of the specified range
	must := (exists ixbeg in [1..ncr := num_childr(rec)] | get_ch_cum(rec,ixbeg) >= beg);
if not must then print("** INDICATED STARTING POINT NOT PRESENT IN OCCURENCE VECTOR ** : ncr,beg:", ncr,beg); stop; end if;
	must := (exists ixend in [ixbeg..ncr] |  get_ch_cum(rec,ixend) >= ennd);
if not must then print("** INDICATED ENDING POINT NOT PRESENT IN OCCURENCE VECTOR ** ncr,ennd:", ncr,ennd); stop; end if;
	
	prev_cum := if ixbeg = 1 then 0 else get_ch_cum(rec,ixbeg - 1) end if;

	if ixbeg = ixend then			-- both children are the same; search down recursively
pass("wo_fpsame");
		if (res := wo_first_past_to(voc(rec,ixbeg),key,beg - prev_cum,ennd - prev_cum)) = OM then 
			return OM;		-- the  desired key cannot be found
		end if;
		
		[first_past_key,its_ix] := res; return [first_past_key,its_ix + prev_cum];
		
	end if;	-- otherwise search the first node to its end, then the middle nodes, then the last
	
	if (res := wo_first_past_to(
			first_ch := voc(rec,ixbeg),key,beg - prev_cum,get_ch_cum(rec,ixbeg) - prev_cum)) /= OM then 
					-- desired key is found in first child
		[first_past_key,its_ix] := res; 
pass("wo_fpinfirst");
		return [first_past_key,its_ix + prev_cum];
	end if;	

	ikey := int_of_4(key);		-- convert to integer for purposes of comparison

	if exists j in [ixbeg + 1,ixend - 1] | int_of_4(get_ch_cum2(rec,j)) >= ikey then
										-- have found appropriate child
										-- (should be binary search)

		chj := voc(rec,j); chj_len := (get_ch_cum(rec,j) - (prev_cum := get_ch_cum(rec,j - 1)));

		[first_past_key,its_ix] := wo_first_past_to(chj,key,1,chj_len);
pass("wo_fpinmid");
		return [first_past_key,its_ix + prev_cum];
			
	end if;

	prev_cum := get_ch_cum(rec,ixend - 1);		-- get the previous cum
	
	if (res := wo_first_past_to(voc(rec,ixend),key,1,ennd - prev_cum)) /= OM then 
					-- desired key is found in last child
		[first_past_key,its_ix] := res; 
pass("wo_fpinlast");
		return [first_past_key,its_ix + prev_cum];
	end if;	
	
	return OM;		-- otherwise the desired key cannot be found
	
end wo_first_past_to;

procedure hwo_set_comp(rw rec,w,x);-- assignment of the first component whose second cumulant is at least w
	set_comp_ix(rec,1,w,x);
end hwo_set_comp;

procedure hwo_set_comp2(rw rec,w,x);-- assignment of the first component whose second cumulant is at least w
	set_comp_ix(rec,2,w,x);
end hwo_set_comp2;

procedure set_comp_ix(rw rec,srch_on,w,x);
	-- assignment of the first component whose specified cumulant is at least w; w must be in range
	-- we must first copy rec if its refcount is greater than 1, and transfer one reference
	-- from its old to its copied version
--print("set_comp_ix: ",wo_dump(rec));
	if refcount(int_of_4(rec)) > 1 then						-- must copy
pass("wo_set_copy");
		stg := dr_load(rec); new_r := dr_new_rec();dr_setrecbuf(new_r,stg); dr_dirtify(new_r); 
		increfs(new_r,1); incref(rec,-1); rec := new_r;		-- substitute copy for original
	end if;
	
	ic := dr_is_compound(rec); if srch_on = 2 then wint := int_of_4(w); end if;
	ncr := num_childr(rec);
	
	if w = OM then			-- make change in last node
		if srch_on = 1 then 
pass("wo_set_last1");
			w := get_ch_cum(rec,ix := ncr); 
		else 
pass("wo_set_last2");
			w := get_ch_cum2(rec,ix := ncr); 
		end if; 

		if ix = 0 then print("Deletion or change at end of empty tree is not allowed."); stop; end if;

	elseif not (exists ix in [1..ncr] |  
		if srch_on = 1 then get_ch_cum(rec,ix) >= w else int_of_4(get_ch_cum2(rec,ix)) >= wint end if) then 

		if srch_on = 1 then 
pass("wox_set_last1");
			w := get_ch_cum(rec,ix := ncr); 
		else 
pass("wox_set_last2");
			w := get_ch_cum2(rec,ix := ncr); 
		end if; 

		if ix = 0 then print("Deletion or change at very end of empty tree is not allowed."); stop; end if;

	end if;
								-- get the local cumulant of the child
	if srch_on = 1 then
		old_cum := if ic then wo_get_cum(voc(rec,ix)) else wo_length(voc(rec,ix)) end if;
					-- find the last cumulant value of voc(rec,ix), or the length of active part of voc(rec,ix) 
pass("wo_set_nlast1");
	else
pass("wo_set_nlast2");
		old_cum := if ic then wo_get_cum2(voc(rec,ix)) else wo_slice(nd,wol := wo_length(nd),wol) end if;
					-- find the last cumulant value of voc(rec,ix), or the last occurence in voc(rec,ix) 
	end if;
--print("old_cum: ",old_cum," ",hexify(dr_load(voc(rec,ix))));			 
	if srch_on = 1 then prev_cum := if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if; end if;
					
	if x /= OM then 			-- we are not dealing with a deletion

		if not ic then 			-- non-compound case; we must add the change in the leaf cumulant
								-- to the cumulant of all following nodes. 
			old_x := voc(rec,ix);
			set_voc(rec,ix,x);  	-- change the component
			xfref(old_x,x);		-- transfer reference from old to new component
			cum_change := wo_length(x) - old_cum;
			update_cums(rec,ix,cum_change);		-- update the cums,starting with the given child

			set_ch_cum2(rec,ix,wo_slice(x,wol := wo_length(x),wol));
				-- modify the cum of the changed component. Note that the following 
				-- second cumulants need not be changed. However, if ix references the 
				-- final child of rec, a cum2 entry in the parent of rec may need to be changed below.
pass("wo_set_ndel_nc");
			return;				-- done with this case
		end if;						-- otherwise we have the compound case
pass("wo_set_ndel_comp");
		the_child := voc(rec,ix);				-- the next operation may copy the child
		wo_set_comp(the_child,if srch_on = 1 then w - prev_cum else w end if,x);		
							-- make the change in the child
		cum_change := wo_get_cum(the_child) - old_cum;
		
		set_voc(rec,ix,the_child);		-- put the revised, possibly copied child back into position
			-- the 'update_cums' procedure which we call now must then start with the (properly set)
			-- cumulant of the preceding node, and then left_add the change in the the cumulant
			-- to the cumulant of this child to all the subsequent children. This assumes that 
			-- 		(new_d - old_d) + c + old_d + e = c + new_d + e
			-- for all cumulant values. This is obviously true for values using associative-commutative
			-- cumulator functions with an inverse, and also in the (string) case where a + b = b.
	
		update_cums(rec,ix,cum_change);			-- update the cums,starting with the given child

				-- the final cumulant of the_child might have been changed by 
				-- the preceding wo_set_comp(the_child,..) operation; see preceding comment.
		if ix = ncr then 
			set_ch_cum2(rec,ix,wo_get_cum2(the_child)); 
pass("wo_set_ndel_end");
		end if;
		
--print("compound self,ix: ",wo_get_cum()," ",ix);
		return;				-- done with the non-deletion case
	
	end if;			-- otherwise we are dealing with a deletion
	
	if not dr_is_compound(rec) then 		-- non-compound case

		the_ch := vect_of_children(rec);
		the_child := the_ch(4 * ix - 3..4 * ix);	-- capture the child before removal
		incref(the_child,-1);					-- drop one reference

		the_ch(4 * ix - 3..4 * ix) := "";
pass("wo_set_del_nc");

		if #the_ch > 0 then		-- need not delete cumulant if no remaining children
			stg := vect_of_cums(rec);				-- get the vector of cums
			stg((ix - 1) * 5 + 1..ix * 5) := "";		-- drop one element
			set_vect_of_cums(rec,stg);				-- put back into place
			stg := vect_of_cums2(rec);				-- get the second vector of cums
			stg((ix - 1) * 4 + 1..ix * 4) := "";		-- drop one element
			set_vect_of_cums2(rec,stg);				-- put back into place
pass("wo_set_del_have");
		end if;

				-- delete the child from the string of descendants
		set_vect_of_children(rec,the_ch);		-- delete the ix-th node

				-- delete the cumulant of the ix-th node. note that this has already been loaded
		cum_change := -old_cum;
		update_cums(rec,ix,cum_change);			-- update the cums,starting with the appropriate child
		return;				-- done with this case

	end if;				-- otherwise we are dealing with a deletion in a compound case
		
	the_child := voc(rec,ix);		-- get the child

	old_ch_leafsum := wo_get_cum(old_child := the_child);
	wo_set_comp(the_child,w - prev_cum,OM);		-- make the deletion in the child
	new_ch_leafsum := wo_get_cum(the_child);
	set_voc(rec,ix,the_child := the_child);		-- re-insert the possibly modified child
	cum_change := new_ch_leafsum - old_ch_leafsum;
	update_cums(rec,ix,cum_change);			-- update the cums,starting with the given child
			-- since the last child may have been deleted, we also need to update the second cum
pass("wo_set_del_comp");
	if ix = ncr then 
		set_ch_cum2(rec,ix,wo_get_cum2(the_child)); 
pass("wo_set_del_comp_end");
	end if;
	
		-- now it is possible that the child has lost enough children to have fallen below the required wo_low_lim
		-- if this has happened, we attempt to share or join children with one of the adjacent siblings
	
	if num_childr(the_child) >= wo_low_lim then 
pass("wo_set_del_enough");
		return; 
	end if;	-- otherwise try to join or share
				
	if pull_from_left(rec,ix) or pull_from_right(rec,ix) then 
pass("wo_set_del_canpull");
		return; 
	end if;

	if join_with_left(rec,ix) or join_with_right(rec,ix) then 
pass("wo_set_del_canjoin");
		null; 
	end if;

			-- In the 'join' case, either the left or the right join must work,
			-- since in this compound case we must have at least one sibling.
			-- but we must check to see if the node being processed has fallen to
			-- just one child, and if it has, replace it by its single child.
	
	if num_childr(rec) > 1 then 
pass("wo_set_enough");
		return; 
	end if;

pass("wo_set_cutback");
	ch_rec := dr_load(the_ch := voc(rec,1)); dr_load(rec); 
	dr_setrecbuf(rec,ch_rec); dr_dirtify(rec);
	set_vect_of_children(the_ch,"");		-- the children have all moved
	incref(the_ch,-1);		-- child data is inherited from single child, which loses a reference

end set_comp_ix;

procedure update_cums(rec,ix,cum_change);	-- update the cums of this tree,starting with the given child

				-- now add this difference to all subsequent children
	for j in [ix..nvc := num_childr(rec)] loop
		set_ch_cum(rec,j,cum_change + get_ch_cum(rec,j));
	end loop;
		
end update_cums;

procedure cumulate(rec);
	-- initalize the cumulants of a node whose children are either leaves or already initialized
	
	--print("CUMULATE!");
	ic := dr_is_compound(rec);		-- determine if compound 

	the_cum := 0;

	for  j in [1..num_childr(rec)] loop
		nd := voc(rec,j);		-- get the j-th child
		--print("Child ",j," = ",int_of_4(nd));
		nd_cum := if ic then wo_get_cum(nd) else wo_length(nd) end if;
		--print("nd_cum=",nd_cum);
				-- cumulant of the final child of the subnode, or occurence string length
		the_cum := the_cum + nd_cum;
		set_ch_cum(rec,j,the_cum);  		-- update the child's cumulant value
		nd_cum2 := if ic then wo_get_cum2(nd) else wo_slice(nd,wol := wo_length(nd),wol) end if;
		--print(int_of_4(nd_cum2));
				-- cumulant2 of the final child of the subnode, or last element of occurence string
		set_ch_cum2(rec,j,nd_cum2);  		-- update the child's cumulant value
	end loop;

end cumulate;

procedure hwo_insert(rw rec,j,x);		-- insertion before j-th component; or at the end if j = OM
	insert(rec,1,j,x);
end hwo_insert;

procedure hwo_insert2(rw rec,j,x);		-- insertion before component with second cumulant at least j;
	insert(rec,2,j,x);				-- or at the end if j = OM
end hwo_insert2;

procedure insert(rw rec,srch_on,j,x);	-- insertion before j-th component with at least specified cumulant;

	tree_level := 0;				-- note that we are at the top of the tree
	insert_in(rec,srch_on,j,x);		-- call the inner routine
	
end insert;

procedure insert_in(rw rec,srch_on,j,x);	-- insertion before j-th component with at least specified cumulant;
								-- or at the end if j = OM
								-- component should be found by binary search

	if refcount(int_of_4(rec)) > 1 then						-- must copy
pass("wo_in_copy");
		stg := dr_load(rec); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg); dr_dirtify(new_r); 
		increfs(new_r,1); incref(rec,-1); rec := new_r;		-- substitute copy for original
	end if;
	
	result := OM; 	-- in case desired element not found
	ic := dr_is_compound(rec); ncr := num_childr(rec);
	the_cum := wo_get_cum(rec);	-- get final cumulant of this tree
	wol := x_cum := wo_length(x);	-- cumulant value of the leaf x
	x_cum2 := wo_slice(x,wol,wol);	-- second cumulant value of the leaf x

	if not ic then incref(x,1); end if;	-- at the bottom level, the inserted leaf takes on an extra reference		
	
	if j /= OM then 		-- look for target node of insertion, if any

		if srch_on = 2 then 
pass("wo_in_nend2");
			iofj := int_of_4(j); 
		else
pass("wo_in_nend1");
		end if;

		if exists ix in [1..ncr] | 
			if srch_on = 1 then (cum := get_ch_cum(rec,ix)) >= j else 
				int_of_4(cum := get_ch_cum2(rec,ix)) >= iofj end if then 
			result := [ix,if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if,cum]; 
			nd := voc(rec,ix);
pass("wo_in_nend3");
		else 
pass("wo_in_end3");
			result := OM;
		end if;
	end if;
	
	if result = OM then 		-- we have insertion at the very end
		
		if not ic then -- simply append to vector
pass("wo_in_endnc");
			set_vect_of_children(rec,vect_of_children(rec) + x);
					-- add the cumulant of x to the present cumulant of this tree
			set_ch_cum(rec,nvc := ncr + 1,the_cum := the_cum + x_cum);
			set_ch_cum2(rec,nvc,x_cum2);
--print("vect_of_children(rec): ",nvc," ",num_childr(rec)," ",ncr," ",wo_hi_lim + 1," ",hexify(vect_of_children(rec))); 
			if tree_level > 0 or nvc <= wo_hi_lim then 			-- no need to split
pass("wo_in_ncnosplit");
				return; 
			end if;		-- otherwise we must split, and becomes compound
pass("wo_in_ncsplit");
								-- note that in this case we are at the very top of the tree
			set_vect_of_children(rec,two_halves(rec));	-- split into 2 non-compound subtrees
			set_ch_cum(rec,1,wo_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum);				
			set_ch_cum2(rec,1,wo_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wo_get_cum2(voc(rec,2)));				
			set_is_compound(rec,true);					-- note that it is indeed compound
			return;				-- done with this case

		end if;		-- otherwise we have insertion at the very end of a compound vector 
		
		last_child:= voc(rec,nvc := num_childr(rec));		-- get the last child
		tree_level +:= 1;						-- go down a level
		insert_in(last_child,srch_on,OM,x);				-- insert at the end of this last child
		tree_level -:= 1;						-- come up a level
		set_voc(rec,ncr,last_child);			-- insert the possibly modified child back into vect_of_children
							-- add the cumulant of x to the present cumulant of this tree
		set_ch_cum(rec,ncr,the_cum := the_cum + x_cum);
		set_ch_cum2(rec,ncr,x_cum2);
pass("wo_in_endcomp");
		if num_childr(last_child) <= wo_hi_lim then 		-- no need to split
pass("wo_in_endcomp_nos");
			return; 
		end if;			-- otherwise we must split the last child
pass("wo_in_endcomp_split");

		split_node(rec,nvc);	-- split the nvc-th node into two.  we insert an empty node to the right
							-- of node nvc, and then move half the children of the nvc-th node into the new node	

		nvc := num_childr(rec);
		if tree_level > 0 or nvc <= wo_hi_lim then 		-- no need to split this node
pass("wo_in_endcomp_nosthis");
			return;				-- done with this case
		end if;
		-- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level
			
		set_vect_of_children(rec,two_halves(rec));
		set_ch_cum(rec,1,wo_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum);				
		set_ch_cum2(rec,1,wo_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wo_get_cum2(voc(rec,2)));				
pass("wo_in_endcomp_splitthis");
		return;				-- done with this case

	end if;		-- end of case of insertion at the very end
				-- in the remaining cases we have an insertion before one of our nodes
	
	 [ix,prev_cum,cum] := result;	-- decode the result returned, geting the insertion result and the preceding cumulant

	if not ic then -- insertion at appropriate position in non-compound vector
		
		the_ch := vect_of_children(rec); the_ch(4 * ix - 3..4 * ix - 4) := x;
		the_cums := vect_of_cums(rec); the_cums2 := vect_of_cums2(rec); 
		the_cums(5 * ix - 4..5 * ix - 5) := 
			if ix = 1 then "\x00\x00\x00\x00\x00" else the_cums(5 * ix - 9..5 * ix - 5) end if;
		the_cums2(4 * ix - 3..4 * ix - 4) := x_cum2;
		set_vect_of_children(rec,the_ch);				-- make insertion into list of children
		set_vect_of_cums(rec,the_cums);				-- make insertion into list of cums
		set_vect_of_cums2(rec,the_cums2);				-- make insertion into list of cums
		update_cums(rec,ix,x_cum);					-- adjust the given and following cumulants

		if tree_level > 0 or (nvc := num_childr(rec)) <= wo_hi_lim then		-- no need to split
pass("wo_in_nendnc_nos");
			return;				-- done with this case
		end if;				-- otherwise we must split, and becomes compound
pass("wo_in_nendnc_split");

		-- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level
			
		set_vect_of_children(rec,two_halves(rec));
		set_ch_cum(rec,1,wo_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum + x_cum);				
		set_ch_cum2(rec,1,wo_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wo_get_cum2(voc(rec,2)));				
		set_is_compound(rec,true);					-- note that it is indeed compound
--print("just became compound: ",dr_is_compound(rec)," ",wo_dump(rec)); stop;
		return;									-- done with this case

	end if;		-- othewise we deal with the compound case
	
	c := voc(rec,ix);		-- get the child into which the insertion will now be made
								-- and get the position in this child at which the insertion will be made
	tree_level +:= 1;						-- go down a level
	insert_in(c,srch_on,if srch_on = 1 then j - prev_cum else j end if,x);		-- insert x into this child
	tree_level -:= 1;						-- come up a level

	set_voc(rec,ix,c);				-- put the possibly revised child back into position
	update_cums(rec,ix,x_cum);		-- adjust the given and following cumulants
	set_ch_cum2(rec,ix,wo_get_cum2(c)); 	-- adjust the second cumulant on record for the child
									-- which may have changed if the insertion was at the end of the child
	
	if #(vect_of_children(c))/4 <= wo_hi_lim then 	-- no need to split the child
pass("wo_in_nendcomp_nos");
		return; 
	end if;						-- otherwise we must split the  child
pass("wo_in_nendcomp_split");

	split_node(rec,ix);				-- split the child into two.  we insert an empty node to the right
								-- of node ix, and then move half the nodes into it	

	nvc := num_childr(rec);
	if tree_level > 0 or nvc <= wo_hi_lim then 		-- no need to split this node
pass("wo_in_nendcomp_nosthis");
		return;				-- done with this case
	end if;		-- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level
pass("wo_in_nendcomp_splitthis");
		
	set_vect_of_children(rec,two_halves(rec));
	set_ch_cum(rec,1,wo_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum + x_cum);				
	set_ch_cum2(rec,1,wo_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wo_get_cum2(voc(rec,2)));				

end insert_in;

procedure two_halves(rec);		-- split this tree into two halves
	-- the two nodes created share the children of the original tree, so  that the refcounts of 
	-- these children need no adjustment. The nodes created each have a refcount of 1.
	-- Note that this routine is only called if rec has no parent.
	
	the_type := if (ic := dr_is_compound(rec)) then wdoccs_str_node_record else wdoccs_str_node_ncr end if;
	u1 := dr_new_rec(); u2 := dr_new_rec();			-- make and initialize two subtrees
	set_type(u1,the_type); set_type(u2,the_type);	-- the halves are compound iff this tree is compound

	-- we must subtract the cumulant of the last retained child from all the children that move
	-- to get the cumulant of the left node
	hnvc := (nvc := num_childr(rec))/2; cum_last_retained := get_ch_cum(rec,hnvc);
	
	for j in [hnvc + 1..nvc] loop			-- subtract this from the cumulant of each child that will move
		set_ch_cum(rec,j,get_ch_cum(rec,j) - cum_last_retained);
	end loop;
	
	vocums := vect_of_cums(rec);		-- get the vector of cums
	vocums2 := vect_of_cums2(rec);		-- get the vector of cums
	set_vect_of_children(u1,(voch := vect_of_children(rec))(1..4 * hnvc));
	set_vect_of_cums(u1,vocums(1..5 * hnvc));			-- u1 gets half the children and cums
	set_vect_of_cums2(u1,vocums2(1..4 * hnvc));			-- u1 gets half the children and cums

	set_vect_of_cums(u2,vocums(5 * hnvc + 1..));	-- the second half inherits adjusted cumulants from the original tree
	set_vect_of_cums2(u2,vocums2(4 * hnvc + 1..));			-- likewise for the second cums

	set_vect_of_children(u2,voch(4 * hnvc + 1..));		-- u2 gets the other half of  the children
--print("two_halves: ",str(wo_dump(u1)),"\n",str(wo_dump(u2)),"\n",hexify(voch(4 * hnvc + 1..)),"\n",hexify(dr_load(u2)));
	return u1 + u2;
				-- assemble the two nodes of the new compound tree; return as a string

end two_halves;

procedure pull_from_left(rec,k);		-- split children with left sibling

	if k = 1 or num_childr(voc(rec,k - 1)) <= wo_low_lim then 
		return false; 
	end if;
pass("wo_pull_left");

	share_right(rec,k - 1);
	return true;

end pull_from_left;

procedure pull_from_right(rec,k);	-- split children with right sibling

	if k >= (nrv := num_childr(rec)) or num_childr(voc(rec,k + 1)) <= wo_low_lim then
		return false;
	end if;
pass("wo_pull_right");

	share_right(rec,k); 
	return true;

end pull_from_right;

procedure share_right(rec,k);	-- share children with right-hand sibling
	
		-- we divide the children of the k-th node, together with those of the k+1'st,
		-- into two roughly equal groups, and make these the k-th and k+1'st nodes. The
		-- cumulative totals must be adjusted in the k-th node, in the
		-- children moved between nodes, and in the children of the k+1'st node    
	-- this routine must first copy the nodes among which children will move, if they have more than 1 reference.
	-- but it does not change the number of references to the children, so that their refcounts need no adjustment
	
	nchkp1 := #(rkp1 := vect_of_children(ndkp1 := voc(rec,k + 1)))/4;
	nchk := #(rk := vect_of_children(ndk := voc(rec,k)))/4;	-- get the two groups of children, and their lengths
	numleft := (nchk + nchkp1)/2;	-- half the children; the left-hand will get this number of children

	if refcount(int_of_4(ndkp1)) > 1 then						-- must copy
pass("wo_share_copy");
		stg := dr_load(ndkp1); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg); dr_dirtify(new_r);
		incref(ndkp1,-1); ndkp1 := new_r;		-- substitute copy for original
		increfs(new_r,1); set_voc(rec,k + 1,new_r);
	end if;

	if refcount(int_of_4(ndk)) > 1 then						-- must copy
pass("wo_share_copy2");
		stg := dr_load(ndk); new_r := dr_new_rec();  dr_setrecbuf(new_r,stg); dr_dirtify(new_r);
		incref(ndk,-1); ndk := new_r;		-- substitute copy for original
		increfs(new_r,1); set_voc(rec,k,new_r);
	end if;
	
	if numleft > nchk then		-- children will move left
pass("wo_share_move_left");		
		num_mov := numleft - nchk;		-- the number that will move left
--print("move left: ",num_mov," ",hexify(dr_load(ndk)),"\n",hexify(dr_load(ndkp1)));		
			-- we must subtract the cumulant of the last child moving left
			-- from the cumulants of all the right-hand children which do not move, 
			-- and must add this to the cumulant of the k-th node. We must also
			-- add the cumulant of the last left-hand child of ndk to the cumulants of all 
			-- the right-hand children which do move. 

		ndk_cum := get_ch_cum(rec,k);			-- get the cumulant of ndk
		right_cum := wo_get_cum(ndk);			-- cumulant of the last child of ndk

		moved_cum := get_ch_cum(ndkp1,num_mov);		-- cumulant of the last child of ndkp1 that moves 
		set_ch_cum(rec,k,moved_cum + ndk_cum);		-- add moved_cum to the cumulant of the k-th node
		set_ch_cum2(rec,k,get_ch_cum2(ndkp1,num_mov));	-- correct the second cumulant of the k-th node
		
		for j in [1..num_mov] loop
			set_ch_cum(ndkp1,j,right_cum + get_ch_cum(ndkp1,j));
		end loop;
		
		for j in [num_mov + 1..nchkp1] loop
			set_ch_cum(ndkp1,j,get_ch_cum(ndkp1,j) - moved_cum);
		end loop;
			
--print("move the children: ",rk," ",rkp1," ",num_mov); 

											-- and now we must move the corresponding cums
		cumvp1 := vect_of_cums(ndkp1);			-- second vector of cums
--print("ndkp1: ",hexify(ndkp1)," ",hexify(dr_load(ndkp1))); stop;
		set_vect_of_cums(ndk,vect_of_cums(ndk) + cumvp1(1..num_mov * 5));		-- move the cums in
		set_vect_of_cums(ndkp1,cumvp1(num_mov * 5 + 1..));					-- move the cums out
			-- since the number of cums of ndkp1 is defined by its number of children, we need not edit that list 
		cumvp1 := vect_of_cums2(ndkp1);			-- second vector of second cums
--print("ndkp1: ",hexify(ndkp1)," ",hexify(dr_load(ndkp1))); stop;
		set_vect_of_cums2(ndk,vect_of_cums2(ndk) + cumvp1(1..num_mov * 4));		-- move the cums in
		set_vect_of_cums2(ndkp1,cumvp1(num_mov * 4 + 1..));					-- move the cums out

		set_vect_of_children(ndk,rk + rkp1(1..4 * num_mov));		-- now actually move the children
		set_vect_of_children(ndkp1,rkp1(4 * num_mov + 1..));

	else						-- children will move right
pass("wo_share_move_right");		
				-- we must subtract the cumulant of the last remaining child 
				-- from that of each of the children moving right. The remaining cumulant of
				-- the last child moving right must then be added to the cumulants of
				-- all the original children of the (k + 1)-st node
		
		rem_left_cum := get_ch_cum(ndk,numleft);		-- cumulant of the last remaining child of ndk

		for j in [numleft + 1..nchk] loop	-- subtract this from the cum of all the nodes moving right
			set_ch_cum(ndk,j,get_ch_cum(ndk,j) - rem_left_cum);
		end loop;
		
		total_moved_cum := get_ch_cum(ndk,nchk);		-- get the cumulant of the last node moving right
		
		for j in [1..nchkp1] loop	-- add this to the cum of all the children of the (k + 1)-st node
			set_ch_cum(ndkp1,j,total_moved_cum + get_ch_cum(ndkp1,j));
		end loop;

		set_ch_cum(rec,k,get_ch_cum(rec,k) - total_moved_cum);
				-- the cumulant of the last node moving right must be subtracted from the cumulant of the k-th node
		set_ch_cum2(rec,k,get_ch_cum2(ndk,numleft));	-- update the second cum of the moved node
											-- and now we must move the corresponding cums
		cumv := vect_of_cums(ndk);				-- first vector of cums
		cumvp1 := vect_of_cums(ndkp1);			-- second vector of cums
		set_vect_of_cums(ndkp1,cumv(5 * numleft + 1..5 * nchk) + vect_of_cums(ndkp1));		-- move the cums
			-- since the number of cums of ndk is defined by its number of children, we need not edit that list