Views
DB_btree_wdix.stl
by
Paul McJones
—
last modified
2021-02-25 11:28
- History
-
Action Performed by Date and Time Comment Publish Paul McJones 2021-02-25 11:28 No comments.
"B-tree variant for word index, with 2 cumulants (no. of occs, last wd. in parent), refcounts."
-- file B_tree_for_bigs_wdix.stl
package B_tree_for_wdix; -- B-trees, realized as objects
-- ***************************************************************************************************************
-- ****** B-tree variant for word index, with 2 cumulants (no. of occs, last wd. in parent), refcounts ******
-- ***************************************************************************************************************
-- Refcounts can be ignored except in the wix_set_comp, wix_insert, split_node, share_right, join_left,
-- and join_left routines.
-- This B-tree represents the words-to-number-of-occurences index
-- Given an indexable word associated with a database, this tree indexes to the start, in the word-occurences
-- bigstring, of the corresponding occurences list. It stores (as cumuants) the cumulated length
-- of all such occurences lists, and the first 10 or 11 characters of each indexed word.
-- To deal with indexed words larger than this, we use a special 'self-defining' encoding, keep the first part
-- of each excessivley long word in the tree, and store the remainder of the word (which can be of any
-- length up to 32K characters) ast the start of the corresponding occurences section.
-- (For compatibility with existing code, a zone whose length is a multiple of 4 bytes is used for this.)
-- The self-defining encoding used is as follows. Words of up to 128 bytes are padded with zero bytes
-- to at least 11 bytes (but if longer than 11 bytes, always to one less than a multiple of 4 bytes),
-- and are preceded by an extra byte giving the string length. Words of more than 128 bytes
-- (but of no more than 32K bytes) are preceded by two bytes giving their string length (the first byte being
-- incremented by 128 to distinguish it from the preceding case), and padded with zero bytes to 2 less than
-- a multiple of 4 bytes.
-- This index stores words in the alphabetical order of these encoded forms. So to search for a word W
-- of up to 11 bytes, we use its encoded form to search as usual; when the first larger word FLW
-- has been found we know immediately from the first 12 bytes of FLW whether it equals W. We search
-- for words of more than 11 words in the same way. However, when the first potentially larger word is found
-- (examining the lead 12 bytes only) we may have to examine its additional bytes (stored at the start
-- of the corresponding occurences section) to determine whether this is indeed larger, and if
-- not my need to conduct a binary search among words with the same 12 lead bytes to find the first largest.
-- However, such coincidences of word length and first 10 actual bytes should be infrequent.
-- Obviously the treatment of such longer words is substantially less efficient than that of words of up to
-- 11 bytes in length. To avoid searches on longer words, the database fructions that transform of records
-- into lists of words should be designed to compress words where possible, e.g. by compressing integers,
-- and by using auxiliary encoding dictionaries to replace words by word numbers.
const wix_code_pts := {"wix_last1", "wix_last2", "wix_nlast2", "wix_nlast1", "wix_nc1", "wix_nc2",
"wix_set_copy", "wix_last1", "wix_last2", "wix_nc1",
"wix_comp1", "wix_ndel_nc", "wix_ndel_comp",
"wix_ndel_comp_last", "wix_ndel_comp_nlast", "wix_del_nc", "wix_del_nc_have",
"wix_del_comp", "wix_del_comp_last", "wix_del_comp_nlast", "wix_del_comp_enough",
"wix_del_comp_canpull", "wix_del_comp_canjoin", "wix_del_comp_npop", "wix_del_pop",
"wix_in_copy", "wix_in_nend2", "wix_in_nend1", "wix_in_nend3", "wix_in_end",
"wix_in_end_nc", "wix_in_end_nc_nos", "wix_in_end_nc_split", "wix_in_end_comp",
"wix_in_end_comp_nos", "wix_in_end_comp_split", "wix_in_end_comp_nosthis",
"wix_in_end_comp_splitthis", "wix_in_nend_nc", "wix_in_nend_nc_nos",
"wix_in_nend_nc_split", "wix_in_nend_comp", "wix_in_nend_comp_nos",
"wix_in_nend_comp_split", "wix_in_nend_comp_nosthis",
"wix_in_nend_comp_splitthis", "wix_halves_nc", "wix_halves_comp",
"wix_pull_left", "wix_pull_right", "wix_share_nc", "wix_share_comp",
"wix_share_copy", "wix_share_copy2", "wix_share_move_left", "wix_share_move_right",
"wix_join_left_comp", "wix_join_left_nc", "wix_join_left_copy", "wix_join_right_comp",
"wix_join_right_nc", "wix_join_right_copy"}; -- code points to be traversed
var debug_val := 0;
var prior_debug_c := 0,debug_c := 0; -- global variables for debugging
var tree_level; -- tree level for recursive insertion process
procedure wix_comp(rec,j); -- fetch of component containing cumulant j
procedure wix_comp2(rec,j); -- fetch of component containing second cumulant j
procedure wix_comp_cum(rec,x); -- fetch of component containing cumulant x, with cumulants
procedure wix_comp_cum2(rec,x); -- fetch of component containing second cumulant x, with cumulants
procedure wix_set_comp(rw rec,w,x); -- assignment of the first component whose cumulant is at least w; w must be in range
procedure wix_set_comp2(rw rec,w,x); -- assignment of the first component whose second cumulant is at least w
procedure wix_insert(rw rec,j,x); -- insertion before j-th component; or at the end if j = OM
procedure wix_insert2(rw rec,j,x); -- insertion before component with second cumulant at least j; or at end (j = OM)
procedure wix_get_cum(rec); -- get the cum value (string length) for this node
procedure wix_get_cum2(rec); -- get the record id string cum value of the final child of this node
procedure encode_wd(wd); -- puts word into length-prefixed, self-defining form described above
procedure decode_wd(stg); -- recovers word from length-prefixed, self-defining form described above
procedure wix_make_from_tuple(t); -- make B-tree representation from tuple (DEBUGGING ONLY)
procedure wix_dump(rec); -- get tuple from B-tree representation (DEBUGGING ONLY)
procedure num_leaves(a_tree); -- (DEBUGGING ONLY)
procedure wix_check_tree_structure(tree); -- recursive check of tree structure (DEBUGGING ONLY)
end B_tree_for_wdix;
package body B_tree_for_wdix; -- B-trees, realized as objects
use setldb,byteutil,disk_records_pak,db_records,string_utility_pak;
const zeroes11 := "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
const zeroes4 := "\x00\x00\x00\x00";
-- there are two cumulants, the total length of the records and the recid
procedure xwix_create(); -- creation routine
rec := dr_new_rec(); -- create a new record (at this point, the record is loaded
-- dirty, and holds all zeroes; so it is not compound)
set_type(rec,wd_index_node_ncr); -- set to non-compound tree
return rec;
end xwix_create;
procedure wix_get_cum(rec); -- get the integer cum value of the final child of this node
if int_of_4(rec) = 0 then print("bad rec in wix_get_cum: ",debug_val); stop; end if;
if (nc := num_childr(rec)) = 0 then return 0; end if;
return get_ch_cum(rec,nc);
end wix_get_cum;
procedure get_ch_cum(rec,j); -- get the integer cum value for the j-th child of this node
the_start := if dr_is_compound(rec) then wix_cum_start else wixnc_cum_start end if;
return int_of_5(dr_load(rec)(the_start + (j - 1) * 5..the_start + j * 5 - 1));
end get_ch_cum;
procedure set_ch_cum(rec,j,cum_int); -- set the integer cum value for the j-th child of this node
the_start := if dr_is_compound(rec) then wix_cum_start else wixnc_cum_start end if;
stg := dr_load(rec); -- make sure that this record is loaded
stg(the_start + (j - 1) * 5..the_start + j * 5 - 1) := stg_of_5(cum_int);
dr_setrecbuf(rec,stg);dr_dirtify(rec);
end set_ch_cum;
procedure wix_get_cum2(rec); -- get the record id string cum value of the final child of this node
if (nc := num_childr(rec)) = 0 then return OM; end if;
return get_ch_cum2(rec,nc);
end wix_get_cum2;
procedure get_ch_cum2(rec,j); -- get the record id string cum value for the j-th child of this node
the_start := if dr_is_compound(rec) then wix_cum2_start else wixnc_cum2_start end if;
return dr_load(rec)(the_start + (j - 1) * 12..the_start + j * 12 - 1);
end get_ch_cum2;
procedure set_ch_cum2(rec,j,enc_wd); -- set the record id string cum value for the j-th child of this node
the_start := if dr_is_compound(rec) then wix_cum2_start else wixnc_cum2_start end if;
stg := dr_load(rec); -- make sure that this record is loaded
stg(the_start + (j - 1) * 12..the_start + j * 12 - 1) := enc_wd + (12 - #enc_wd) * "\x00";
dr_setrecbuf(rec,stg);dr_dirtify(rec);
end set_ch_cum2;
procedure vect_of_children(rec); -- gets vector of children, as a string of 4-byte record numbers
if not dr_is_compound(rec) then return ""; end if;
nch := num_childr(rec); -- number of children
stg:=dr_load(rec);
return stg(wix_ch_start..wix_ch_start - 1 + nch * 4);
end vect_of_children;
procedure set_vect_of_children(rec,stg); -- sets vector of children, from a string of 4-byte record numbers
--print("set_vect_of_children: ",hexify(rec)," ",hexify(stg));
if not dr_is_compound(rec) then print("Illegal effort to set vector of children for non_compound node: debug_val = ",debug_val); stop; end if;
set_num_childr(rec,(nstg := #stg)/4); -- set the number of children (also loads)
missing := wix_cum_start - wix_ch_start - #stg;
--if missing < 0 then print("missing,wix_cum_start,wix_ch_start,#stg: ",missing," ",#stg," ",hexify(stg)); stop; end if;
stg2:=dr_load(rec);
stg2(wix_ch_start..wix_cum_start - 1) := (stg + missing * "\x00");
dr_setrecbuf(rec,stg2);dr_dirtify(rec);
end set_vect_of_children;
procedure vect_of_cums(rec); -- gets vector of cums, as a string of 5-byte fields
the_start := if dr_is_compound(rec) then wix_cum_start else wixnc_cum_start end if;
nch := num_childr(rec); -- number of children
return dr_load(rec)(the_start..the_start - 1 + nch * 5);
end vect_of_cums;
procedure set_vect_of_cums(rec,stg); -- sets vector of cumulants, from a string of 5-byte record numbers
the_start := if (ic := dr_is_compound(rec)) then wix_cum_start else wixnc_cum_start end if;
the_end := if ic then wix_cum2_start else wixnc_cum2_start end if;
missing := the_end - the_start - #stg;
stg2:=dr_load(rec); -- force load
stg2(the_start..the_end - 1) := (stg + missing * "\x00");
dr_setrecbuf(rec,stg2);dr_dirtify(rec);
end set_vect_of_cums;
procedure vect_of_cums2(rec); -- gets vector of cums, as a string of 4-byte fields
the_start := if dr_is_compound(rec) then wix_cum2_start else wixnc_cum2_start end if;
nch := num_childr(rec); -- number of children
return dr_load(rec)(the_start..the_start - 1 + nch * 12);
end vect_of_cums2;
procedure set_vect_of_cums2(rec,stg); -- sets vector of second cums, from a string of 12-byte words
the_start := if (ic := dr_is_compound(rec)) then wix_cum2_start else wixnc_cum2_start end if;
the_end := if ic then wix_cum2_end else wixnc_cum2_end end if;
missing := the_end - the_start + 1 - #stg;
if missing < 0 then print("Excessively long input stg to set_vect_of_cums2: ",the_start," ",the_end," ",#stg," ",stg); stop; end if;
stg2:=dr_load(rec); -- force load
stg2(the_start..the_end) := (stg + missing * "\x00");
dr_setrecbuf(rec,stg2);dr_dirtify(rec);
end set_vect_of_cums2;
procedure num_leaves(a_tree); -- debugging only
return if not dr_is_compound(a_tree) then num_childr(a_tree)
else 0 +/[num_leaves(voc(a_tree,j)): j in [1..num_childr(a_tree)]] end if;
end num_leaves;
procedure voc(rec,j); -- n'th member of vector of children
stg := dr_load(rec); -- load this string
if not dr_is_compound(rec) then print("Illegal effort to access vector of children for non_compound node"); stop; end if;
cjstrt := (j - 1) * 4 + wix_ch_start;
return stg(cjstrt..cjstrt + 3); -- return child rec
end voc;
procedure set_voc(rec,j,chrec); -- set n'th member of vector of children
cjstrt := (j - 1) * 4 + wix_ch_start;
stg:=dr_load(rec); -- force load; then set the first character of the record
if not dr_is_compound(rec) then print("Illegal effort to access element of vector of children for non_compound node"); stop; end if;
stg(cjstrt..cjstrt + 3) := chrec;
dr_setrecbuf(rec,stg);dr_dirtify(rec);
end set_voc;
procedure wix_make_from_tuple(t); -- make nested representation from tuple
--print("make_from_tuple: ",t);
if #t <= wixnc_hi_lim then -- put into just one section
stg := dr_load(rec := wix_create());
[icums,rids] := convert_tup_sect(t); -- convert to cumulant string form
set_vect_of_cums(rec,icums); set_vect_of_cums2(rec,rids);
set_num_childr(rec,#t); -- set the nominal number of children
return rec;
end if;
last_used := 0; -- the last component of t already incorporated into a tuple
tx := [ ];
for j in [1,wixnc_hi_lim..#t - 2 * (wixnc_hi_lim - 1)] loop
-- put the data into a list of non_compound B_tree nodes, saving enough for at least
-- one full final section
rec := wix_create();
[icums,rids] := convert_tup_sect(t(j..last_used := j + wixnc_hi_lim - 2));
-- convert one section of input to cumulant string form
--print("convert_tup_sect B: ",wixnc_hi_lim," ",#rids);
set_vect_of_cums(rec,icums); set_vect_of_cums2(rec,rids);
set_num_childr(rec,wixnc_hi_lim - 1); -- set the nominal number of children
tx with:= rec;
end loop;
unusedo2 := (#t - last_used)/2;
t1 := t(last_used + 1..last_used + unusedo2); t2 := t(last_used + unusedo2 + 1..);
rec := wix_create(); [icums,rids] := convert_tup_sect(t1);
set_num_childr(rec,unusedo2); -- set the nominal number of children
set_vect_of_cums(rec,icums); set_vect_of_cums2(rec,rids); tx with:= rec;
rec := wix_create(); [icums,rids] := convert_tup_sect(t2);
set_vect_of_cums(rec,icums); set_vect_of_cums2(rec,rids); tx with:= rec;
set_num_childr(rec,#rids/12); -- set the nominal number of children
t := tx; -- use the converted t
compound_now := false; -- otherwise we must chop into sections; bottom level sections are not compound
while (nt := #t) > wix_hi_lim loop
-- build tree progressively from bottom, continuing as long as list of nodes obtained is too long
sections := [t(j..j + wix_hi_lim - 2): j in [1,wix_hi_lim..nt - wix_hi_lim + 2]]; -- these are lists of nodes
if (nt mod (wix_hi_lim - 1)) /= 0 then -- may have to rearrange last 2 pieces
if #(piece := t((nrv := #sections) * (wix_hi_lim - 1) + 1..)) >= wix_low_lim then
sections with:= piece;
else -- otherwise rearrange last 2 pieces
ntp := #(twix_pieces := sections(nrv) + piece);
sections(nrv) := twix_pieces(1..ntpo2 := ntp/2);
sections with:= twix_pieces(ntpo2 + 1..);
end if;
end if;
t := [ ]; -- recalculate the vector of items to work with
for sec = sections(j) loop -- convert each section into a tree node
sec_node := dr_new_rec(); -- make a tree top for this tuple (i.e. this section)
set_type(sec_node,wd_index_node_record);
-- set the type of the record
sec_stg := "" +/ sec; -- convert the list of children to a string
set_vect_of_children(sec_node,sec_stg); -- set the list of children
cumulate(sec_node); -- initialize the cumulants
t with:= sec_node; -- assemble new vector of nodes
--print("sec_node: ",sec_node," ",hexify(dr_load(sec_node)));
end loop;
compound_now := true; -- after first iteration, sections are not compound
end loop;
rec := dr_new_rec(); -- create a new, top level record
t_stg := "" +/t; -- convert the list of children to a string
set_type(rec,wd_index_node_record); -- at this point must be compound
set_vect_of_children(rec,t_stg); -- these become the top-level children
cumulate(rec); -- initialize the final cumulants
return rec;
end wix_make_from_tuple;
procedure convert_tup_sect(t); -- converts a tuple of pairs [wd,occslen] into a pair of strings
cum := 0; -- integer cumulant, developed below
wds_stg := "" +/ [encode_wd(wd): [wd,occslen] in t];
len_stg := "" +/ [stg_of_5(cum): [wd,occslen] in t | (cum := cum + occslen) >= 0];
--print("convert_tup_sect: ",#wds_stg," ",wds_stg," ",t," ",[#wd: [wd,-] in t]);
return [len_stg,wds_stg];
end convert_tup_sect;
procedure wix_dump(rec); -- get tuple from nested representation
-- is compound representation or direct, depending on whether 'is_compound' flag is set
-- note that this shows the vector of components, without their cumulants
-- we ignore the cumulants in the tree nodes
var indent := 0;
return wix_dump_in(rec); -- call inner workhorse
procedure wix_dump_in(rec); -- inner workhorse
--print("wix_dump: ",hexify(dr_load(rec)));
if int_of_4(rec) = 0 then return ["ZERO_NODE"]; end if;
if dr_is_compound(rec) then -- compound case
indent +:= 1; nc := num_childr(rec);
stg := (["\n" + (indent * " ") + "("] +/ [wix_dump_in(voc(rec,j)): j in [1..nc]])
+ ["\n" + (indent * " ")+ "[" + str(nc) + "]" + decode_wd(wix_get_cum2(rec)) + ":" + str(wix_get_cum(rec)) + ")"];
indent -:= 1;
return stg;
end if; -- done with compound case
if (ncr := num_childr(rec)) = 0 then return ["(,:0)"]; end if;
t := []; -- otherwise we must analyze the non-compound case
prev := 0;
for j in [1..ncr] loop
int := (new := get_ch_cum(rec,j)) - prev; prev := new;
decwd := decode_wd(get_ch_cum2(rec,j));
t with := decwd + ":" + str(int);
end loop;
return ["\n" + (indent * " ") + "("] + t + [decode_wd(wix_get_cum2(rec)) + ":" + str(wix_get_cum(rec)) + ")"];
end wix_dump_in;
end wix_dump;
procedure encode_wd(wd); -- puts word into length-prefixed, self-defining form described above
--print("encode_wd: ",#(char(nc := #wd) + wd + zeroes11(nc + 1..))," ",#zeroes11," ",#wd);
if (nc := #wd) < 128 then return char(nc) + wd + -- encoding of string up to 128 bytes in length
if nc <= 11 then zeroes11(nc + 1..)
elseif (ncmod := (nc + 1) mod 4) = 0 then "" else zeroes4(ncmod + 1..) end if;
end if;
len_chars := char(nc/256 + 128) + char(nc mod 256);
return len_chars + wd + if (ncmod := (nc + 2) mod 4) = 0 then "" else zeroes4(ncmod + 1..) end if;
end encode_wd;
procedure decode_wd(stg); -- recovers word from length-prefixed, self-defining form described above
-- the case of strings of more than 11 but no more than 128 bytes is represented twice in this
-- preliminary version to allow the 'rest of the string' to be fetched if necessary
if (wdlen := abs(stg(1))) <= 11 then
return stg(2..wdlen + 1);
elseif wdlen < 128 then
return stg(2..wdlen + 1); -- but first get the rest of the string
else
wdlen := 256 * (wdlen - 128) + abs(stg(2));
return stg(3..wdlen + 2); -- but first get the rest of the string
end if;
end decode_wd;
procedure int_of_12(stg); -- convert a 12-byte string to an integer (for comparisons only)
sum := 0; ns := #stg;
for j in [1..12] loop sum := 256 * sum + if j > ns then 0 else abs(stg(j)) end if; end loop;
return sum;
end int_of_12;
procedure wix_comp(rec,j); -- fetch of component containing cumulant j
return if (ccix := comp_cum_ix(rec,1,j)) = OM then OM else ccix(1..2) end if;
end wix_comp;
procedure wix_comp2(rec,j); -- fetch of component containing second cumulant j
return if (ccix := comp_cum_ix(rec,2,j)) = OM then OM else ccix(1..2) end if;
end wix_comp2;
procedure wix_comp_cum(rec,x); -- fetch of x-th component, with cumulant
return comp_cum_ix(rec,1,x);
end wix_comp_cum;
procedure wix_comp_cum2(rec,x); -- fetch of component containing second cumulant x, with cumulant
return comp_cum_ix(rec,2,if x = OM then OM else encode_wd(x) end if);
end wix_comp_cum2;
procedure comp_cum_ix(rec,srch_on,x); -- fetch of x-th component by search on specified cumulant
-- returns value in the form [rid,len,cum_len]
-- component should be found by binary search
-- search for the first index component with cumulant past
-- the specified x; Return OM if there is none such.
-- NOTE: this should be by binary search
-- find the first node for which a cumulant >= j
if x = OM then -- want last node
j := num_childr(rec);
if srch_on = 1 then -- Note: only needed in non-compound case
the_cum := get_ch_cum(rec,j);
pass("wix_last1");
else
pass("wix_last2");
the_cum := get_ch_cum2(rec,j);
end if;
else
if srch_on = 2 then
iofx := int_of_12(x);
pass("wix_nlast2");
else
pass("wix_nlast1");
end if;
if not (exists j in [1..num_childr(rec)] |
if srch_on = 1 then (the_cum := get_ch_cum(rec,j)) >= x else
int_of_12(the_cum := get_ch_cum2(rec,j)) >= iofx end if) then
--print("******** return OM: ",x," ",nc := num_childr(rec)," ",wix_dump(rec)," ",if nc > 0 then get_ch_cum(rec,nc) else "NONE" end if);
return OM; -- desired node not found
end if;
end if;
prev_cum := if j = 1 then 0 else get_ch_cum(rec,j - 1) end if;
if not dr_is_compound(rec) then -- if node is not compound we have what we want
if srch_on = 1 then -- the integer cum was calculated above
pass("wix_nc1");
return [decode_wd(get_ch_cum2(rec,j)),the_cum - prev_cum,the_cum];
else -- the rec id was calculated above
int_cum := get_ch_cum(rec,j);
pass("wix_nc2");
return [decode_wd(the_cum),int_cum - prev_cum,int_cum];
end if;
end if; -- otherwise we deal with the compound case
res := comp_cum_ix(voc(rec,j),srch_on,if x = OM or srch_on = 2 then x else x - prev_cum end if);
-- continue the search recursively
[wd,noccs,lencum] := res; -- decode the result returned recursively
return [wd] + [noccs,prev_cum + lencum];
end comp_cum_ix;
procedure wix_set_comp(rw rec,w,x);-- assignment of the first component whose cumulant is at least w
if x /= OM then x := [encode_wd(x(1)),x(2)]; end if;
set_comp_ix(rec,1,w,x);
end wix_set_comp;
procedure wix_set_comp2(rw rec,w,x);-- assignment of the first component whose second cumulant is at least w
if x /= OM then x := [encode_wd(x(1)),x(2)]; end if;
if w = OM then set_comp_ix(rec,1,w,x); else set_comp_ix(rec,2,encode_wd(w),x); end if;
end wix_set_comp2;
procedure set_comp_ix(rw rec,srch_on,w,x);
-- assignment of the first component whose specified cumulant is at least w; w must be in range
-- we must first copy rec if its refcount is greater than 1, and transfer one reference
-- from its old to its copied version. The leaves x are supplied as pairs [encoded_stg,number_of_occs]
if refcount(int_of_4(rec)) > 1 then -- must copy
pass("wix_set_copy");
stg := dr_load(rec); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r);
increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original
end if;
ic := dr_is_compound(rec);
if srch_on = 2 and w /= OM then wint := int_of_12(w); end if;
ncr := num_childr(rec);
if w = OM then -- make change in last node
srch_on := 1; w := get_ch_cum(rec,ix := ncr); -- use search on first componet to find last child
if ix = 0 then print("Change at end of empty tree is not allowed."); stop; end if;
elseif not (exists ix in [1..ncr] |
if srch_on = 1 then get_ch_cum(rec,ix) >= w else int_of_12(get_ch_cum2(rec,ix)) >= wint end if) then
print("WIX - Search index " + str(w) + " out of range in assignment to cumulating vector " + hexify(rec)
+ " " + str(ncr) + " " + str(ic) + " " + hexify(dr_load(rec))); stop;
end if;
-- get the local cumulant of the child
if ic then
old_cum := wix_get_cum(voc(rec,ix));
pass("wix_nc1");
else
pass("wix_comp1");
old_cum := get_ch_cum(rec,ix) - if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if;
end if;
prev_cum := if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if;
if x /= OM then -- we are not dealing with a deletion
if not ic then -- non-compound case; we must add the change in the leaf cumulant
-- to the cumulant of all following nodes.
cum_change := x(2) - old_cum;
update_cums(rec,ix,cum_change); -- update the cums,starting with the given child
set_ch_cum2(rec,ix,x(1));
-- modify the cum of the changed component. Note that the following
-- second cumulants need not be changed. However, if ix references the
-- final child of rec, a cum2 entry in the parent of rec may need to be changed below.
pass("wix_ndel_nc");
return; -- done with this case
end if; -- otherwise we have the compound case
pass("wix_ndel_comp");
the_child := voc(rec,ix); -- the next operation may copy the child
set_comp_ix(the_child,srch_on,if srch_on = 1 then w - prev_cum else w end if,x);
-- make the change in the child
cum_change := wix_get_cum(the_child) - old_cum;
set_voc(rec,ix,the_child); -- put the revised, possibly copied child back into position
-- the 'update_cums' procedure which we call now must then start with the (properly set)
-- cumulant of the preceding node, and then left_add the change in the the cumulant
-- to the cumulant of this child to all the subsequent children. This assumes that
-- (new_d - old_d) + c + old_d + e = c + new_d + e
-- for all cumulant values. This is obviously true for values using associative-commutative
-- cumulator functions with an inverse, and also in the (string) case where a + b = b.
update_cums(rec,ix,cum_change); -- update the cums,starting with the given child
if ix = ncr then
set_ch_cum2(rec,ix,wix_get_cum2(the_child));
pass("wix_ndel_comp_last");
else
pass("wix_ndel_comp_nlast");
end if;
-- the final cumulant of the_child might have been changed by
-- the preceding set_comp_ix(the_child,..) operation; see preceding comment.
--print("compound self,ix: ",wix_get_cum()," ",ix);
return; -- done with the non-deletion case
end if; -- otherwise we are dealing with a deletion
if not dr_is_compound(rec) then -- non-compound case
pass("wix_del_nc");
if num_childr(rec) > 0 then -- need not delete cumulant if no remaining children
pass("wix_del_nc_have");
stg := vect_of_cums(rec); -- get the vector of cums
stg((ix - 1) * 5 + 1..ix * 5) := ""; -- drop one element
set_vect_of_cums(rec,stg); -- put back into place
stg := vect_of_cums2(rec); -- get the second vector of cums
stg((ix - 1) * 12 + 1..ix * 12) := ""; -- drop one element
set_vect_of_cums2(rec,stg); -- put back into place
end if;
-- delete the child from the string of descendants
set_num_childr(rec,num_childr(rec) - 1); -- count down the number of children
-- delete the cumulant of the ix-th node. note that this has already been loaded
cum_change := -old_cum;
update_cums(rec,ix,cum_change); -- update the cums,starting with the appropriate child
return; -- done with this case
end if; -- otherwise we are dealing with a deletion in a compound case
the_child := voc(rec,ix); -- get the child
old_ch_leafsum := wix_get_cum(old_child := the_child);
set_comp_ix(the_child,srch_on,if srch_on = 1 then w - prev_cum else w end if,OM); -- make the deletion in the child
new_ch_leafsum := wix_get_cum(the_child);
set_voc(rec,ix,the_child := the_child); -- re-insert the possibly modified child
cum_change := new_ch_leafsum - old_ch_leafsum;
update_cums(rec,ix,cum_change); -- update the cums,starting with the given child
-- since the last child may have been deleted, we also need to update the second cum
pass("wix_del_comp");
if ix = ncr then
pass("wix_del_comp_last");
set_ch_cum2(rec,ix,wix_get_cum2(the_child));
else
pass("wix_del_comp_nlast");
end if;
-- now it is possible that the child has lost enough children to have fallen below the required wix_low_lim
-- if this has happened, we attempt to share or join children with one of the adjacent siblings
if num_childr(the_child) >= wix_low_lim then -- otherwise try to join or share
pass("wix_del_comp_enough");
return;
end if;
if pull_from_left(rec,ix) or pull_from_right(rec,ix) then
pass("wix_del_comp_canpull");
return;
end if;
if join_with_left(rec,ix) or join_with_right(rec,ix) then
pass("wix_del_comp_canjoin");
null;
end if;
-- In the 'join' case, either the left or the right join must work,
-- since in this compound case we must have at least one sibling.
-- but we must check to see if the node being processed has fallen to
-- just one child, and if it has, replace it by its single child.
if num_childr(rec) > 1 then
pass("wix_del_comp_npop");
return;
end if;
pass("wix_del_pop");
ch_rec := dr_load(the_ch := voc(rec,1)); dr_load(rec); dr_setrecbuf(rec,ch_rec); dr_dirtify(rec);
if dr_is_compound(the_ch) then set_vect_of_children(the_ch,""); end if; -- the children have all moved
incref(the_ch,-1); -- child data is inherited from single child, which loses a reference
end set_comp_ix;
procedure update_cums(rec,ix,cum_change); -- update the cums of this tree,starting with the given child
-- add cum_change to all subsequent children
for j in [ix..nvc := num_childr(rec)] loop
set_ch_cum(rec,j,cum_change + get_ch_cum(rec,j));
end loop;
end update_cums;
procedure cumulate(rec);
-- initalize the cumulants of a node whose children are either leaves or already initialized
if not dr_is_compound(rec) then return; end if; -- non-compound nodes are already cumulated
the_cum := 0;
for j in [1..num_childr(rec)] loop
nd := voc(rec,j); -- get the j-th child
nd_cum := wix_get_cum(nd);
-- cumulant of the final child of the subnode, or occurence string length
the_cum := the_cum + nd_cum;
set_ch_cum(rec,j,the_cum); -- update the child's cumulant value
nd_cum2 := wix_get_cum2(nd);
-- cumulant2 of the final child of the subnode, or last element of occurence string
set_ch_cum2(rec,j,nd_cum2); -- update the child's cumulant value
end loop;
end cumulate;
procedure wix_insert(rw rec,j,x); -- insertion before j-th component; or at the end if j = OM
insert(rec,1,j,x);
end wix_insert;
procedure wix_insert2(rw rec,j,x); -- insertion before component with second cumulant at least j;
insert(rec,2,j,x); -- or at the end if j = OM
end wix_insert2;
procedure insert(rw rec,srch_on,j,x); -- insertion before j-th component with at least specified cumulant;
tree_level := 0; -- note that we are at the top of the tree
x := [encode_wd(x(1)),x(2)]; if srch_on = 2 and j /= OM then j := encode_wd(j); end if;
insert_in(rec,srch_on,j,x); -- call the inner routine
end insert;
procedure insert_in(rw rec,srch_on,j,x); -- inner recursion of insertion before j-th component
-- with at least specified cumulant; or at the end if j = OM
-- component should be found by binary search
--print("insert_in: ",str(wix_dump(rec)));
if refcount(int_of_4(rec)) > 1 then -- must copy
pass("wix_in_copy");
stg := dr_load(rec); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r);
increfs(new_r,1); incref(rec,-1); rec := new_r; -- substitute copy for original
end if;
result := OM; -- in case desired element not found
ic := dr_is_compound(rec); ncr := num_childr(rec);
the_cum := wix_get_cum(rec); -- get final cumulant of this tree
[x_cum2,x_cum] := x; -- cumulant values of the leaf x
if j /= OM then -- look for target node of insertion, if any
if srch_on = 2 then
iofj := int_of_12(j);
pass("wix_in_nend2");
else
pass("wix_in_nend1");
end if;
if exists ix in [1..ncr] |
if srch_on = 1 then (cum := get_ch_cum(rec,ix)) >= j else
int_of_12(cum := get_ch_cum2(rec,ix)) >= iofj end if then
result := [ix,if ix = 1 then 0 else get_ch_cum(rec,ix - 1) end if,cum];
if ic then nd := voc(rec,ix); end if;
pass("wix_in_nend3");
else
pass("wix_in_end");
result := OM;
end if;
end if;
if result = OM then -- we have insertion at the very end
if not ic then -- simply append to vector
pass("wix_in_end_nc");
set_num_childr(rec,nvc := ncr + 1);
-- add the cumulant of x to the present cumulant of this tree
set_ch_cum(rec,nvc,the_cum := the_cum + x_cum);
set_ch_cum2(rec,nvc,x_cum2);
--print("num_childr(rec): ",nvc," ",num_childr(rec)," ",ncr," ",wix_hi_lim + 1);
if tree_level > 0 or nvc <= wixnc_hi_lim then -- no need to split
pass("wix_in_end_nc_nos");
return;
end if;
-- otherwise we must split, and becomes compound
pass("wix_in_end_nc_split");
-- note that in this case we are at the very top of the tree
th := two_halves(rec); -- get the two halves
set_is_compound(rec,true); -- note that it is indeed compound
set_vect_of_children(rec,th); -- split into 2 non-compound subtrees
set_ch_cum(rec,1,wix_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum);
set_ch_cum2(rec,1,wix_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wix_get_cum2(voc(rec,2)));
return; -- done with this case
end if; -- otherwise we have insertion at the very end of a compound vector
pass("wix_in_end_comp");
last_child:= voc(rec,nvc := num_childr(rec)); -- get the last child
tree_level +:= 1; -- down a level
insert_in(last_child,srch_on,OM,x); -- insert at the end of this last child
tree_level -:= 1; -- back up a level
set_voc(rec,ncr,last_child); -- insert the possibly modified child back into vect_of_children
-- add the cumulant of x to the present cumulant of this tree
set_ch_cum(rec,ncr,the_cum := the_cum + x_cum);
set_ch_cum2(rec,ncr,x_cum2);
if (if dr_is_compound(last_child) then wix_hi_lim else wixnc_hi_lim end if) >= num_childr(last_child) then
pass("wix_in_end_comp_nos");
return; -- no need to split the child
end if;
-- otherwise we must split the last child
pass("wix_in_end_comp_split");
split_node(rec,nvc); -- split the nvc-th node into two. we insert an empty node to the right
-- of node nvc, and then move half the children of the nvc-th node into the new node
if tree_level > 0 or (nvc := num_childr(rec)) <= wix_hi_lim then -- no need to split this node
pass("wix_in_end_comp_nosthis");
return; -- done with this case
end if;
-- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level
pass("wix_in_end_comp_splitthis");
--print("set_vect_of_children: insertion A: ",dr_is_compound(rec), if not dr_is_compound(rec) then str(wix_dump(rec)) else "" end if);
set_vect_of_children(rec,two_halves(rec));
set_ch_cum(rec,1,wix_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum);
set_ch_cum2(rec,1,wix_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wix_get_cum2(voc(rec,2)));
return; -- done with this case
end if; -- end of case of insertion at the very end
-- in the remaining cases we have an insertion before one of our nodes
[ix,prev_cum,cum] := result; -- decode the result returned, geting the insertion result and the preceding cumulant
if not ic then -- insertion at appropriate position in non-compound vector
pass("wix_in_nend_nc");
the_cums := vect_of_cums(rec); the_cums2 := vect_of_cums2(rec);
the_cums(5 * ix - 4..5 * ix - 5) :=
if ix = 1 then "\x00\x00\x00\x00\x00" else the_cums(5 * ix - 9..5 * ix - 5) end if;
the_cums2(12 * ix - 11..12 * ix - 12) := x_cum2 + (12 - #x_cum2) * "\x00";
--print("#the_cums: ",#the_cums); if #the_cums > 70 then stop; end if;
set_vect_of_cums(rec,the_cums); -- make insertion into list of cums
set_vect_of_cums2(rec,the_cums2); -- make insertion into list of cums
set_num_childr(rec,nvc := ncr + 1); -- there is one more child
update_cums(rec,ix,x_cum); -- adjust the given and following cumulants
--print("inserted at position: ",ix," ",x," ",hexify(dr_load(rec)));
if tree_level > 0 or nvc <= wixnc_hi_lim then -- no need to split
pass("wix_in_nend_nc_nos");
return; -- done with this case
end if; -- otherwise we must split, and becomes compound
pass("wix_in_nend_nc_split");
-- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level
th := two_halves(rec); -- get the two halves
set_is_compound(rec,true); -- note that it is indeed compound
--print("set_vect_of_children: insertion B: ",dr_is_compound(rec), if not dr_is_compound(rec) then str(wix_dump(rec)) else "" end if);
set_vect_of_children(rec,th); -- split into 2 non-compound subtrees
set_ch_cum(rec,1,wix_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum + x_cum);
set_ch_cum2(rec,1,wix_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wix_get_cum2(voc(rec,2)));
--print("just became compound: ",dr_is_compound(rec)," ",wix_dump(rec)); stop;
return; -- done with this case
end if; -- otherwise we deal with the compound case
pass("wix_in_nend_comp");
c := voc(rec,ix); -- get the child into which the insertion will now be made
-- and get the position in this child at which the insertion will be made
tree_level +:= 1; -- down a level
insert_in(c,srch_on,if srch_on = 1 then j - prev_cum else j end if,x); -- insert x into this child
tree_level -:= 1; -- back up a level
set_voc(rec,ix,c); -- put the possibly revised child back into position
update_cums(rec,ix,x_cum); -- adjust the given and following cumulants
set_ch_cum2(rec,ix,wix_get_cum2(c)); -- adjust the second cumulant on record for the child
-- which may have changed if the insertion was at the end of the child
if (if dr_is_compound(c) then wix_hi_lim else wixnc_hi_lim end if) >= num_childr(c) then
pass("wix_in_nend_comp_nos");
return; -- no need to split the child
end if;
pass("wix_in_nend_comp_split");
-- otherwise we must split the child
split_node(rec,ix); -- split the child into two. we insert an empty node to the right
-- of node ix, and then move half the nodes into it
if tree_level > 0 or (nvc := num_childr(rec)) <= wix_hi_lim then -- no need to split this node
pass("wix_in_nend_comp_nosthis");
return; -- done with this case
end if; -- otherwise we are at the very top of the tree, so we must split it into two parts and create a new tree level
--print("set_vect_of_children: insertion C: ",dr_is_compound(rec), if not dr_is_compound(rec) then str(wix_dump(rec)) else "" end if);
pass("wix_in_nend_comp_splitthis");
set_vect_of_children(rec,two_halves(rec));
set_ch_cum(rec,1,wix_get_cum(voc(rec,1))); set_ch_cum(rec,2,the_cum + x_cum);
set_ch_cum2(rec,1,wix_get_cum2(voc(rec,1))); set_ch_cum2(rec,2,wix_get_cum2(voc(rec,2)));
end insert_in;
procedure two_halves(rec); -- split this tree into two halves
-- the two nodes created share the children of the original tree, so that the refcounts of
-- these children need no adjustment. The nodes created each have a refcount of 1.
-- Note that this routine is only called if rec has no parent.
the_type := if (ic := dr_is_compound(rec)) then wd_index_node_record else wd_index_node_ncr end if;
u1 := dr_new_rec(); u2 := dr_new_rec(); -- make and initialize two subtrees
set_type(u1,the_type); set_type(u2,the_type); -- the halves are compound iff this tree is compound
-- we must subtract the cumulant of the last retained child from all the children that move
-- to get the cumulant of the left node
hnvc := (nvc := num_childr(rec))/2; cum_last_retained := get_ch_cum(rec,hnvc);
for j in [hnvc + 1..nvc] loop -- subtract this from the cumulant of each child that will move
set_ch_cum(rec,j,get_ch_cum(rec,j) - cum_last_retained);
end loop;
vocums := vect_of_cums(rec); -- get the vector of cums
vocums2 := vect_of_cums2(rec); -- get the vector of cums
if ic then
pass("wix_halves_nc");
set_vect_of_children(u1,(voch := vect_of_children(rec))(1..4 * hnvc));
else
pass("wix_halves_comp");
set_num_childr(u1,hnvc); -- set the number of children of u1
end if;
set_vect_of_cums(u1,vocums(1..5 * hnvc)); -- u1 gets half the children and cums
set_vect_of_cums2(u1,vocums2(1..12 * hnvc)); -- u1 gets half the children and cums
set_vect_of_cums(u2,vocums(5 * hnvc + 1..)); -- the second half inherits adjusted cumulants from the original tree
set_vect_of_cums2(u2,vocums2(12 * hnvc + 1..)); -- likewise for the second cums
set_num_childr(u1,hnvc); -- set the number of children of u1
if ic then
set_vect_of_children(u2,voch(4 * hnvc + 1..)); -- u2 gets the other half of the children
else
set_num_childr(u2,nvc - hnvc); -- set the number of children of u2
end if;
--print("two_halves: ",str(wix_dump(u1)),"\n",str(wix_dump(u2)),"\n",hexify(voch(4 * hnvc + 1..)),"\n",hexify(dr_load(u2)));
return u1 + u2;
-- assemble the two nodes of the new compound tree; return as a string
end two_halves;
procedure hpull_from_left(rec,k); -- split children with left sibling
if k = 1 or num_childr(voc(rec,k - 1)) <= wix_low_lim then
return false;
end if;
pass("wix_pull_left");
share_right(rec,k - 1);
return true;
end hpull_from_left;
procedure hpull_from_right(rec,k); -- split children with right sibling
if k >= (nrv := num_childr(rec)) or num_childr(voc(rec,k + 1)) <= wix_low_lim then
return false;
end if;
pass("wix_pull_right");
share_right(rec,k);
return true;
end hpull_from_right;
procedure hshare_right(rec,k); -- share children with right-hand sibling
-- we divide the children of the k-th node, together with those of the k+1'st,
-- into two roughly equal groups, and make these the k-th and k+1'st nodes. The
-- cumulative totals must be adjusted in the k-th node, in the
-- children moved between nodes, and in the children of the k+1'st node
-- this routine must first copy the nodes among which children will move, if they have more than 1 reference.
-- but it does not change the number of references to the children, so that their refcounts need no adjustment
--print("share_right: ",k," ",str(wix_dump(rec)));
if (ic := dr_is_compound(ndk := voc(rec,k))) then -- a compound node is involved in the sharing
pass("wix_share_nc");
nchkp1 := #(rkp1 := vect_of_children(ndkp1 := voc(rec,k + 1)))/4;
nchk := #(rk := vect_of_children(ndk))/4; -- get the two groups of children, and their lengths
else
pass("wix_share_comp");
nchkp1 := num_childr(ndkp1 := voc(rec,k + 1)); nchk := num_childr(ndk);
end if;
numleft := (nchk + nchkp1)/2; -- half the children; the left-hand will get this number of children
if refcount(int_of_4(ndkp1)) > 1 then -- must copy
pass("wix_share_copy");
stg := dr_load(ndkp1); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r);
incref(ndkp1,-1); ndkp1 := new_r; -- substitute copy for original
increfs(new_r,1); set_voc(rec,k + 1,new_r);
end if;
if refcount(int_of_4(ndk)) > 1 then -- must copy
pass("wix_share_copy2");
stg := dr_load(ndk); new_r := dr_new_rec(); dr_setrecbuf(new_r,stg);dr_dirtify(new_r);
incref(ndk,-1); ndk := new_r; -- substitute copy for original
increfs(new_r,1); set_voc(rec,k,new_r);
end if;
if numleft > nchk then -- children will move left
pass("wix_share_move_left");
num_mov := numleft - nchk; -- the number that will move left
--print("move left: ",num_mov," ",hexify(dr_load(ndk)),"\n",hexify(dr_load(ndkp1)));
-- we must subtract the cumulant of the last child moving left
-- from the cumulants of all the right-hand children which do not move,
-- and must add this to the cumulant of the k-th node. We must also
-- add the cumulant of the last left-hand child of ndk to the cumulants of all
-- the right-hand children which do move.
ndk_cum := get_ch_cum(rec,k); -- get the cumulant of ndk
right_cum := wix_get_cum(ndk); -- cumulant of the last child of ndk
moved_cum := get_ch_cum(ndkp1,num_mov); -- cumulant of the last child of ndkp1 that moves
set_ch_cum(rec,k,moved_cum + ndk_cum); -- add moved_cum to the cumulant of the k-th node
set_ch_cum2(rec,k,get_ch_cum2(ndkp1,num_mov)); -- correct the second cumulant of the k-th node
for j in [1..num_mov] loop
set_ch_cum(ndkp1,j,right_cum + get_ch_cum(ndkp1,j));
end loop;
for j in [num_mov + 1..nchkp1] loop
set_ch_cum(ndkp1,j,get_ch_cum(ndkp1,j) - moved_cum);
end loop;
-- and now we must move the corresponding cums
cumvp1 := vect_of_cums(ndkp1); -- second vector of cums
--print("ndkp1: ",hexify(ndkp1)," ",hexify(dr_load(ndkp1))); stop;
set_vect_of_cums(ndk,vect_of_cums(ndk) + cumvp1(1..num_mov * 5)); -- move the cums in
set_vect_of_cums(ndkp1,cumvp1(num_mov * 5 + 1..)); -- move the cums out
-- since the number of cums of ndkp1 is defined by its number of children, we need not edit that list
cumvp1 := vect_of_cums2(ndkp1); -- second vector of second cums
--print("ndkp1: ",hexify(ndkp1)," ",hexify(dr_load(ndkp1))); stop;
set_vect_of_cums2(ndk,vect_of_cums2(ndk) + cumvp1(1..num_mov * 12)); -- move the cums in
set_vect_of_cums2(ndkp1,cumvp1(num_mov * 12 + 1..)); -- move the cums out
if ic then -- if the nodes are compound we must move children in addition to cumulants
set_vect_of_children(ndk,rk + rkp1(1..4 * num_mov)); -- now actually move the children
set_vect_of_children(ndkp1,rkp1(4 * num_mov + 1..));
else -- not compound; must adjust the number of children
set_num_childr(ndk,nchk + num_mov); set_num_childr(ndkp1,nchkp1 - num_mov);
end if;
--print("move the children: ",rk," ",rkp1," ",num_mov);
else -- children will move right
pass("wix_share_move_right");
-- we must subtract the cumulant of the last remaining child
-- from that of each of the children moving right. The remaining cumulant of
-- the last child moving right must then be added to the cumulants of
-- all the original children of the (k + 1)-st node
--print("numleft: ",numleft);
rem_left_cum := get_ch_cum(ndk,numleft); -- cumulant of the last remaining child of ndk
for j in [numleft + 1..nchk] loop -- subtract this from the cum of all the nodes moving right
set_ch_cum(ndk,j,get_ch_cum(ndk,j) - rem_left_cum);
end loop;
total_moved_cum := get_ch_cum(ndk,nchk); -- get the cumulant of the last node moving right
for j in [1..nchkp1] loop -- add this to the cum of all the children of the (k + 1)-st node
set_ch_cum(ndkp1,j,total_moved_cum + get_ch_cum(ndkp1,j));
end loop;
set_ch_cum(rec,k,get_ch_cum(rec,k) - total_moved_cum);
-- the cumulant of the last node moving right must be subtracted from the cumulant of the k-th node
set_ch_cum2(rec,k,get_ch_cum2(ndk,numleft)); -- update the second cum of the moved node
-- and now we must move the corresponding cums
cumv := vect_of_cums(ndk); -- first vector of cums
cumvp1 := vect_of_cums(ndkp1); -- second vector of cums
set_vect_of_cums(ndkp1,cumv(5 * numleft + 1..5 * nchk) + vect_of_cums(ndkp1)); -- move the cums
-- since the number of cums of ndk is defined by its number of children, we need not edit that list
cumv := vect_of_cums2(ndk); -- first vector of cums
cumvp1 := vect_of_cums2(ndkp1); -- second vector of cums
set_vect_of_cums2(ndkp1,cumv(12 * numleft + 1..12 * nchk) + vect_of_cums2(ndkp1)); -- move the cums
if ic then -- if the nodes are compound we must move children in addition to cumulants
set_vect_of_children(ndk,rk(1..4 * numleft)); -- now actually move the children
set_vect_of_children(voc(rec,k + 1),rk(4 * numleft + 1..) + rkp1);
else -- not compound; must adjust the number of children
set_num_childr(ndk,numleft); set_num_childr(ndkp1,nchk + nchkp1 - numleft);