$<PDPRSX
NEEDS "SYSTEM"
NEEDS "TITLE"
NEEDS "IOERROR"

GET "LIBHDR"
GET "SYSHDR"
GET "SPEHDR"
$>PDPRSX

$<CAP1
GET "LIBHDR"
GET "SYSHDR"
GET ".progs.spehdr"
$>CAP1


LET start() BE
$(
  LET root = 0
  LET seeds = TABLE 17,
		1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
  LET sorted.input = ?			// stream returned by MERGE
  LET dictwordvec1 = VEC wordsperword+2
  LET dictwordvec2 = VEC wordsperword+2
  LET dictwordvec3 = VEC wordsperword
  LET dictwordvec4 = VEC wordsperword

$<PDPRSX
  LET fvec = VEC 150
  LET firstversion = ?

    big.global := MAXINT		// to get a lot of globals
    findtvec := fvec
$>PDPRSX

$<CAP1
  LET sortfilevector =  VEC maxsortversion
  LET Cap.selectinput( stream )  BE UNLESS stream = 0 DO Old.selectinput( stream )
  LET Cap.selectoutput( stream ) BE UNLESS stream = 0 DO Old.selectoutput( stream )

  $<LOG
    start.time := COMPTIME()
  $>LOG
    sort.file.cap := sortfilevector	// vector for capabitities for anon sort files

    Old.selectinput  := SELECTINPUT	// Set up non-faulting routines
    Old.selectoutput := SELECTOUTPUT
    SELECTINPUT := Cap.selectinput
    SELECTOUTPUT:= Cap.selectoutput
    SYSOUT      := MSTREAM		// and the system output stream
$>CAP1

    syslexword, mylexword := dictwordvec1+2, dictwordvec2+2
    syslexword!-1, syslexword!-2 := FALSE, dictwordvec3
    mylexword!-1,  mylexword!-2  := FALSE, dictwordvec4
					// seed words.  Selected to be common,
					// a balanced start and spelt rite.
    seeds!1, seeds!2, seeds!3 := "the", "in", "we"
    seeds!4, seeds!5, seeds!6 := "be", "now", "to"
    seeds!7, seeds!8, seeds!9 := "will", "a", "for"
    seeds!10, seeds!11, seeds!12 := "line", "The", "this"
    seeds!13, seeds!14, seeds!15 := "use", "which", "with"
    seeds!16, seeds!17           := "system", "user"

$<LOG
  $<PDPRSX
    log := FINDOUTPUT("SORT.LOG")
  $>PDPRSX
  $<CAP1
    log	:= FINDOUTPUT(".SORTLOG")
  $>CAP1
$>LOG

    distinctwordcount := 0		// how many different words
    wordcount	      := 0		// how many in total
    correctionstodo   := 0		// how many to do
    errorcount        := 0		// how many mistakes
    firstquestion     := TRUE		// until we ask the first question
    can.correct.file  := TRUE		// Assume we can unless find otherwise
    linelength	      := MAXLINELENGTH	// in new lexicon
    correction.list   := 0		// none to do yet

    findio()				// get IO streams

$<PDPRSX
    firstversion := version
    heapvecsize := MAXVEC()		// and the pseudo heap
    heapvec     := GETVEC( heapvecsize )
    heapvecpointer := 0			// none allocated yet
    Set.priority( Low.priority )	// for sort phase
$>PDPRSX

$<CAP1

    // on CAP we get the pseudo heap as a new segment, reasonable size to start
    // with (7K). Will be expanded later in 4K increments
    heapvecsize := 7*1024

  $<HEAPTEST
    // The size of the heap is obtained from the HEAPSIZE parameter from
    // the command line with 7K used as the default.
    // Temporarily uses heapvecsize to hold return code.
    heapvecsize := KEYARG( "HEAPSIZE",PARMS.INTEGER)
    TEST heapvecsize = 0 & PARMS.RESULTS!2 < 1024*32
    THEN heapvecsize := PARMS.RESULTS!2
    ELSE
    $(
	UNLESS heapvecsize = PARMS.MISSING DO
	$(
	    LET o = OUTPUT()
	    SELECTOUTPUT(MSTREAM)
	    WRITEF("*NError reading HEAPSIZE - %S - Default used*N",
                   heapvecsize = 0 -> "Number too big",FAULT(ERRORCODE))
	    UNLESS o = 0 DO SELECTOUTPUT( o )
	$)
	heapvecsize := 7*1024
    $)
  $>HEAPTEST

    heapvec := GETSLOT()		// slot for pseudo heap capability
    NEWSEG( heapvec , heapvecsize , R.ACCESS | W.ACCESS )
    heapvecpointer := 0			// none allocated yet
    heapvecsize := heapvecsize - 1	// because BCPL vectors go from 0 (telegraph pole problem!)
$>CAP1

    seedset  := seeds			// setup the global

    oldnode  := getnode( l.string+1 )	// so it can be compared with
    (oldnode+l.string)%0 := 0		// has nothing in it

    build.tree (@root)			// build and write lots of trees
    write.tree( @root )			// write the partial left-over

$<PDPRSX
    ENDWRITE()				// close that
    FREEVEC( heapvec )			// done with trees

    sorted.input := merge()		// now merge all them
    version := version-firstversion - 1	// calculate how many we used
$>PDPRSX

$<CAP1
    sort.file.cap!version := GETSLOT()	// close that, saving capability in vector
    ENDWRITE()
    MOVECAP( K.N0 , sort.file.cap!version )
    FREESLOT( heapvec )			// finished with trees

    sorted.input := merge()		// now merge all them
    version := version+1		// Number of intermediate sort files used
$>CAP1

$<LOG
    SELECTOUTPUT( log )
  $<PDPRSX
    Writef("Done %N*N",time())
  $>PDPRSX
  $<CAP1
    WRITEF("Sorting finished after %N*N", COMPTIME() - start.time)
  $>CAP1
$>LOG

    open.lexicons()			// input and output, and setup

$<PDPRSX
    errors := FINDOUTFILE( findtvec!2 )	// find error output

//  ATTACH( SYSIN )			// done when we ask the first question
     set.priority( 0 )			// ie max allowed
       check( sorted.input )		// ask all the questions
     set.priority( Low.priority )	// back down
    DETACH( SYSIN )
$>PDPRSX

$<CAP1

    // If we have an ERRORS keyed arg then we use that file to report
    // errors if we can open it. If this fails to open tell them.
    // If fails or not given, don't bother to report errors.
    // If error file given save its name in errorname so can delete it if quit

    error.name := GETSLOT()
    errors := FINDOUTPUT("ERRORS")
    TEST errors=0 THEN
    $(
	UNLESS ERRORCODE = PARMS.MISSING DO
   	$(
	    LET error.error = FAULT( ERRORCODE )
	    LET o = OUTPUT()
	    SELECTOUTPUT(MSTREAM)
	    WRITEF("*N******Warning - Error file not opened - %S*N",error.error)
	    UNLESS o = 0 DO SELECTOUTPUT( o )
	$)
	FREESLOT( error.name )
    $)
    ELSE MOVECAP( K.N0 , error.name )

    sysin :=  findinfile("/M")
    check( sorted.input )		// ask all the questions
$>CAP1

    close.lexicons()			// and windup through them
    close.outputs()			// and do corrections
$<PDPRSX    Set.priority( 0 )		// raise for report stage	$>PDPRSX
    report()				// tell him analysis
$)

$<PDPRSX
AND set.priority( n ) BE
    SYSTEM(S.ALTP, 0,0, n)

AND STREAMBLOCK(stream) = ((G0>>1)!3 >> 1)!Stream >> 1
$>PDPRSX

AND report() BE
$(
  LET plural( n ) = [n NE 1->"s",""]
    Selectoutput(SYSOUT)

    writef ("%N words read (%N distinct) in %N line%S*N",
                wordcount, distinctwordcount, linecount, plural(linecount) )
    TEST errorcount=0 THEN Writes("No")
		      ELSE Writen(errorcount)
    writef (" error%S", plural( errorcount ) )


    UNLESS errorcount=0 DO
        TEST can.correct.file & (correctionstodo>0)
       	    THEN writef(", %N corrected",correctionstodo)
	    ELSE writes(", none corrected")

    Newline()
    Writef("Sorting used %N intermediate sort file%S",
		      version,             plural( version ) )
$)


AND close.outputs() BE
$(

$<PDPRSX
    Selectoutput(errors)
    TEST errorcount=0
	THEN Deleteoutput()
	ELSE Endwrite()

    IF correctionstodo > 0
    THEN $(
	    LET in = ?
	    in := FINDINFILE( findtvec!4 )
	    Selectinput( in )
	    newoutput := FINDOUTFILE( findtvec!0 )
	    TEST newoutput=0
	    THEN ENDREAD()
	    ELSE
	    $(
		Selectoutput( newoutput )
              	do.corrections ()
		ENDREAD()
		ENDWRITE()
	    $)
$>PDPRSX

$<CAP1
    UNLESS errors = 0 DO
    $(
	SELECTOUTPUT( errors )
	IF errorcount = 0 THEN WRITES("No errors reported")
	ENDWRITE()				// If had one, close error file
    $)

    IF ( correctionstodo > 0 ) & can.correct.file	// corrections and can do them
    THEN $(

	    // If there was a second serial argument on the command line, we
	    // send the corrected output to that.
	    // Otherwise we send it to an anon file, which we attempt to
	    // rename to the input-text-filename (i.e. 1st. serial arg)

	    LET using.anon.file,rc = ?,?
	    LET infile = GETSLOT()
	    SELECTINPUT( findinfile ("1"))	// Reopen input file for correction
	    MOVECAP( K.N0 , infile )		// and save its name
	    newoutput := findoutfile ("2|/A")
	    using.anon.file := EQSTRING( K.N0 , "/A" )
	    SELECTOUTPUT( newoutput )		// Open approp file
	    do.corrections()			// and correct it
	    ENDREAD()
	    ENDWRITE()
	    IF using.anon.file THEN		// Have to rename to input filename
	    $(
		LET rc = ?
		LET s = GETSLOT()
		MOVECAP( K.N0 , s )
		rc := PRESERVE( s , infile , DEFAULT.SEG.AM )
		IF rc < 0 THEN
		$(
		    can.correct.file := FALSE
		    SELECTOUTPUT(MSTREAM)
		    WRITEF("*NCannot correct input file - %S*N",FAULT(ERRORCODE))
		$)
		FREESLOT(s)			// not re-used, so release
	    $)
	    FREESLOT( infile )
$>CAP1

	$)
$)

AND findfile( file, op) = VALOF
$(
    LET s = ?
    IF file%0=0 RESULTIS 0
    s := op( file )

$<PDPRSX
    IF s<0 THEN
    $(
	Selectoutput( Sysout )
	Writes("SPE -- ")
	Writef(IOerror(s),s,file)
	Writes( op = FINDOUTPUT -> " (output)", " (input)")
	Newline()
	FINISH
    $)
$>PDPRSX

$<CAP1
    IF s=0 THEN
    $(						// Deal with error in findI/O
	LET find.error = FAULT(ERRORCODE)
	SELECTOUTPUT(MSTREAM)
	WRITEF("*N******FATAL ERROR - failed to open %S for %S - %S*N",
		file, op = FINDOUTPUT -> "output", "input", find.error)
	FINISH
    $)
$>CAP1

    RESULTIS s
$)

AND findoutfile( file ) = findfile( file, FINDOUTPUT)

AND findinfile( file ) = findfile( file, FINDINPUT )

AND findio() BE
$(
$<PDPRSX
    LET in, out = ?, ?
    LET outcount = ?
    LET incount  = ?

    Selectinput(Findcml("SPE"))
    $(  IF RDCH() = ENDSTREAMCH THEN STOP()
        UNRDCH()
        outcount := FINDTITLES (2, findtvec, "/.ERR/")
	incount  := RESULT2
        in       := FINDINFILE (findtvec!4)
    $)
    ENDREAD()

    syslex.name := incount<3 -> "LB:[1,1]LEXICON", findtvec!8
    mylex.name  := incount<2 -> "LEXICON"        , findtvec!6

    find.newoutput()
    version := STREAMBLOCK( newoutput )!40		// pickup first version

    can.correct.file := ( ( findtvec!0 )%0 > 0 )

    SELECTINPUT (in)
$>PDPRSX

$<CAP1

    // On CAP the setting up and maintainance of the lexicon file names is done
    // by the open.lexicons and close.lexicons routines in the LEX module

    LET infile = GETSLOT()
    LET rc = ?
    LET parms.err(file,rc) BE
    $(
	WRITEF("*N******FATAL ERROR - Illegal %S filename - %S*N",file,FAULT(rc))
	FINISH
    $)

    SELECTOUTPUT(MSTREAM)
    SELECTINPUT (findinfile ("1"))			// pickup input file
    MOVECAP( K.N0 , infile )				// saved, just in case need it

    // Do a dummy read of the args on the command line to find out if any
    // silly ones present, report if there are

    rc := SERIALARG( 2 , PARMS.FILETITLE , 0 )
    UNLESS rc = 0 | rc = PARMS.MISSING DO parms.err("output",rc)

    // If there is no output file, find out if we can update the input one
    // Warn them if we can't, further message if no errors file either as
    // they probably didn't mean it (although they just might)

    IF rc = PARMS.MISSING THEN
    $(
	MANIFEST $(
			update.access	   = #X40
			ebit.in.am	   = #X01010101
		 $)
	LET b2 , b3 , b4 , b5 = ? , ? , ? , ?
	MOVESTRING( infile , K.N0 )
	ENTER2(CURRENT.DIR, DIRMAN.DETAILS, 0, 0, 0, 0, @b2, @b3, @b4, @b5)
	IF b2 < 0 THEN					// Should never get this error
	$(
	    WRITEF("*N******FATAL ERROR on file %S = %S", infile, FAULT( b2 ))
	    FINISH
	$)
	UNLESS ((( b5 & update.access ) ~= 0 ) & (( b4 & ebit.in.am) = 0 )) DO
	$(
	    can.correct.file := FALSE
	    WRITEF("*N******Warning - Cannot update input file - %S*N",infile)
	$)
    $)

    rc := KEYARG("ERRORS", PARMS.FILETITLE , 0 )
    UNLESS rc = 0 | rc = PARMS.MISSING DO parms.err("ERRORS",rc)
    IF ( rc = PARMS.MISSING ) & (~can.correct.file) THEN
	WRITES("and no error file specified*N")

    rc := KEYARG("LEXICON", PARMS.FILETITLE , 0 )
    UNLESS rc = 0 | rc = PARMS.MISSING DO parms.err("LEXICON",rc)
    rc := KEYARG("SYSLEX", PARMS.FILETITLE , 0 )
    UNLESS rc = 0 | rc = PARMS.MISSING DO parms.err("SYSLEX",rc)

  $<HEAPTEST
    rc := KEYARG("HEAPSIZE", PARMS.INTEGER )
    UNLESS rc = 0 | rc = PARMS.MISSING DO parms.err("heapsize",rc)
  $>HEAPTEST

    UNLESS EXTRAITEMS() DO
    $(
	LET s = GETSLOT()
	MOVECAP( K.N0 , s )
	WRITEF("*N******Warning - unrecognized items on command line - %S*N",s)
	FREESLOT( s )
    $)
    FREESLOT( infile )

    find.newoutput()					// pickup first anon sort file
    version := 0					// anon file selected on exit

$>CAP1

$)
