; kwic.lsp                Gordon S. Novak Jr.              ; 26 Aug 04

; Copyright (c) 2007 Gordon S. Novak Jr. and The University of Texas at Austin.
; All rights reserved.

; Key Word In Context example

; 16 Nov 94; 05 Jan 95; 29 Mar 96; 31 Mar 96; 16 Apr 96; 02 Jan 97; 15 Sep 98
; 30 Dec 98; 07 Jan 99; 22 Jan 04; 26 May 04; 29 May 04

; To test:
; (ld bib)                 ; file bib.lsp for test data, sets *bibdata*
; (glcc 'bibentry-reader)  ; must compile first for mapcar
; (car (setq *bibd* (mapcar #'bibentry-reader *bibdata*)))
; (viewas 'avl-tree 'kwic-avl)
;    left = less, right = more, sort-value = word + Done, balance = bal
; (bb *bibd*)
;
; Second test: a simple kwic case, just a list of strings.
; (bc '("evolution of the slithy tove" "treasure trove of toves"
;       "slithy creatures of the southwest"))

; Note that there is a problem with composed-generator-generate that causes
; it to loop on an input file with blank lines at the end of the file.  *****

(defvar *string-of-words-column* 0)
(defvar *string-of-words-string* "")
(defvar *bibd*)
(defvar *noisewords*)
(setq *noisewords*
      '(a an the those that which I me my you your yours he she it
          his her hers its him they their them in on by with to for
	  and or not of through about from s some whose when where
	  system manual reference program programs introduction
	  based using like data processing programmer use approach
	  computer language non problem problems programming
	  have has had having be is was been being will are were))

(glispglobals (*noisewords* (listof symbol))
	      (*string-of-words-column* integer)
	      (*string-of-words-string* string))

(gladdprop 'string 'prop '(words (self) result string-of-words))

(glispobjects

(string-of-words-state (list (str string) (n integer))
  adj ((done      (glambda ((st string-of-words-state))
			   (or (not (stringp (str st)))
				    (>= (n st) (length (str st)))))) )
  msg ((generate  string-of-words-generate)) )

(string-of-words (z string)
  prop   ((initial-state (glambda ((self string))
			   (a string-of-words-state with str = self  n = 0))))
  msg    ((iterator      generator-iterator)) )

(gen-string (self string)
  prop   ((column      (*string-of-words-column*))
	  (orig-string (*string-of-words-string*)))
  supers (string))

(bibentry (list (author string)
		(title string)
		(year integer))
  prop   ((last-name ((string-of-words-generate
		        (a string-of-words-state with
			   str = (author self) n = 0))))
	  (title-words (title) result string-of-words) )
  msg    ((print-ad  ((progn (princ (last-name self)) (spaces 1)
			     (princ (year self)))))) )

(kwic-framework anything
  prop   ((seqview      ('prog1))         ; defaults to given object
          (titleview    ('prog1) result string-of-words)
                                          ; defaults to item as title
	  (printmsg     ('prog1))         ; how to print rest of item
	  (maxwidth     (60))             ; max width of title area
	  (noise-filter (t))              ; t to remove noise words
	  (noise-words  (*noisewords*))   ; default noise words
	  (noise-dictionary       (nil) result kwic-avl)
	  (noise-dictionary-view  ('avl-tree))
	  (interesting-filter     (nil))  ; t to keep only interesting words
	  (interesting-words      (nil))
	  (interesting-dictionary (nil) result kwic-avl)
	  (interesting-dictionary-view  ('avl-tree))
	  (key-dictionary         (nil) result kwic-avl)
	  (key-dictionary-view    ('avl-tree)))
  msg    ((kwic         generic-kwic  specialize t)) )

(kwic-avl (list (bal integer)
		(less (^ kwic-avl))
		(is   (list (word string)
			    (occurrences (listof kwic-entry))))
		(more (^ kwic-avl)))
  viewspecs ((avl-tree avl-tree (balance bal) (sort-value word)
		                (right more) (left less))) )

(kwic-entry (list (column integer)
		  (item   bibentry)) )    ; ****** needs to be parameterized

(bib-kwic-framework (listof bibentry)
  prop   ((titleview    ('title-words))
	  (printmsg     ('print-ad)) )
  supers (kwic-framework) )

(simple-kwic-framework (listof string-of-words)
  supers (kwic-framework) )

(file-kwic-framework file-generator             ; ???
  supers (kwic-framework) )

(file-generator-state (list (filename string) (stream anything) (eof boolean))
  prop ((get-stream   ((or stream (open self)))))
  adj  ((done         file-generator-done) )
  msg  ((open         file-generator-state-open)
	(generate     file-generator-generate  open t)) )

(file-generator (z string)
  prop ((initial-state (glambda ((self string))
			 (a file-generator-state
			    with filename = self  stream = nil  eof = nil))))
  msg  ((iterator      generator-iterator)) )

(composed-generator-state (list (s1 anything) (item anything) (s2 anything))
  adj  ((done          composed-generator-done  specialize t) )
  prop ((itemview      ('prog1)))          ; defaults to item of sequence
  msg  ((generate      composed-generator-generate  specialize t)
	(intermediate-item (glambda (self itm)
				    (funcall (itemview self) itm))) ) )

(composed-generator (z anything)
  prop   ((initial-state (glambda (self)
			   (a composed-generator-state
			      with s1 = (initial-state self)))))
  msg    ((iterator      generator-iterator)) )

(file-of-words (z file-generator)
  prop   ((initial-state ((a file-of-words-generator-state
			      with s1 = (initial-state z)))) )
  supers (composed-generator) )

(file-of-words-generator-state
  (list (s1   file-generator-state)
	(s2   string-of-words-state))
  prop ((itemview ('words)) )
  supers (composed-generator-state) )
  ) ; glispobjects

(gladdprop 'string 'prop  '(upper-case string-upcase result string))
(gladdprop 'string 'prop  '(lower-case string-downcase result string))
(gladdprop 'string 'views '(words string-of-words))
(gladdprop 'string 'msg   '(print-subseq string-print-subseq))
(setf (glitemtype 'string-of-words) 'gen-string)
(setf (glitemtype 'file-of-words) 'gen-string)

; 04 Nov 93; 05 Nov 93; 14 Dec 93; 31 Mar 94; 29 May 04
; Generic program to compile and print a Key Word In Context (KWIC) index
(gldefun generic-kwic ((kwic kwic-framework))
  (let ((noisetable (typeof (noise-dictionary kwic)))
        (inttable (typeof (interesting-dictionary kwic)))
	(keytable (typeof (noise-dictionary kwic)))
	entry align wd shift (str string) lng firstcol lastcol)
 ; initialize tables of noise words and interesting words
    (if (noise-filter kwic)
	(if (noise-dictionary kwic)
	    (noisetable = (noise-dictionary kwic))
	    (for nw in (noise-words kwic) do
		 (noisetable = (insert-key (avl-tree noisetable)
				     (upper-case (stringify nw)))))))
    (if (interesting-filter kwic)
	(if (interesting-dictionary kwic)
	    (inttable = (interesting-dictionary kwic))
	    (for wd in (interesting-words kwic) do
		 (inttable = (insert-key (avl-tree inttable)
					 (upper-case (stringify wd)))))))
 ; process items and put into table
    (for item in (funcall (seqview kwic) kwic) do
      (for word in (funcall (titleview kwic) item) do
	(wd = (upper-case word))
	(if (if (noise-filter kwic)
		(not (member (funcall (noise-dictionary-view kwic)
					   noisetable)
				  wd))
	        (if (interesting-filter kwic)
		    (member (funcall (interesting-dictionary-view
				      kwic)
				     inttable)
			    wd)
		    t))
	    (progn (keytable = (insert-key (funcall (key-dictionary-view kwic)
						    keytable)
					   wd))
	         (entry = (implementation
			      (^. (member (funcall (key-dictionary-view kwic)
						    keytable)
					  wd))))
		 ((occurrences entry) +_ (a kwic-entry with item = item
					    column = *string-of-words-column*))
		 ))))
 ; print the resulting table
    (align = (maxwidth kwic) / 2 - 5)
    (for entry in (avl-tree keytable) do
      (for occ in (occurrences entry) do
	(str = (funcall (titleview kwic) (item occ)))
	(lng = (length str))
	(shift = align - (column occ))
	(if (shift > 0)
	    (progn (spaces shift)
		   (firstcol = 0))
	    (firstcol = (- shift)) )
	(lastcol = (min lng ((maxwidth kwic) - shift)))
	(print-subseq str firstcol lastcol)
	(spaces ((maxwidth kwic) + 2 - ((lastcol - firstcol) + (max shift 0))))
	(funcall (printmsg kwic) (item occ))
	(terpri) ))  ))

; 04 Nov 93; 31 Mar 96; 02 Apr 96; 07 Jan 99; 26 May 04; 29 May 04
; Generator to treat a string as a sequence of words.
(gldefun string-of-words-generate ((st string-of-words-state))
  (result string)
  (let (start lng res)
    (lng = (length (str st)))
    (while (and (< (n st) lng) (not (alpha-char-p (char (str st) (n st)))))
      do ((n st) _+ 1))
    (*string-of-words-column* = (n st))
    (*string-of-words-string* = (str st))   ; save string
    (start = (n st))
    (if (< (n st) lng)
	(progn (while (and (< (n st) lng) (alpha-char-p (char (str st) (n st))))
	       do ((n st) _+ 1))
	     (res = (subseq (str st) start (n st)))
	     (while (and (< (n st) lng)
			 (not (alpha-char-p (char (str st) (n st)))))
	       do ((n st) _+ 1))
	     res)
        "") ))

; 05 Nov 93
; Print a subsequence of the characters of a string
(defun string-print-subseq (str first last &optional stream)
  (dotimes (i (- last first)) (write-char (char str (+ i first)) stream)))

; 04 Nov 93
; Test string of words generator
(gldefun ba ((s string)) (for w in (words s) (print w)))
; (ba "now is the time")

; 04 Nov 93
(gldefun bibentry-reader (l) (result bibentry)
  (a bibentry with author = (getf (cdr l) 'author=)
                   title  = (getf (cdr l) 'title=)
		   year   = (read-from-string (getf (cdr l) 'year=)) ) )

(gldefun bb ((b bib-kwic-framework)) (kwic b))
(gldefun bc ((b simple-kwic-framework)) (kwic b))
(gldefun bd ((b file-kwic-framework)) (kwic b))        ; ???

; 29 Mar 96; 31 Mar 96
; test a file-generator-state for eof
(gldefun file-generator-done ((state file-generator-state))
  (let (char strm)
    (strm = (get-stream state))
    (char = (peek-char nil strm nil '*gl-eof-value*))
    (eq char '*gl-eof-value*) ))

; 29 Mar 96; 31 Mar 96
(gldefun file-generator-generate ((state file-generator-state))
  (read-line (get-stream state) nil '*gl-eof-value*) )

; 29 Mar 96; 29 May 04
; open the file of a file-generator-state
(gldefun file-generator-state-open ((state file-generator-state))
  (let (tmp)
    (if (tmp = (open (filename state) :direction :input
		       :if-does-not-exist nil))
	(progn ((stream state) = tmp)
	       ((eof state) = nil)))))

; 02 Apr 96
; Test file generator: given a file name string, it prints lines
(gldefun fg1 ((f file-generator)) (for s in f (print s)))

; 29 Mar 96; 31 Mar 96
(gldefun composed-generator-done ((state composed-generator-state))
  (and (done (s1 state)) (done (s2 state))) )

; 29 Mar 96; 31 Mar 96; 02 Apr 96; 22 Jan 04
(gldefun composed-generator-generate ((state composed-generator-state))
  (let (item)
    (while (and (or (null (s2 state)) (done (s2 state)))
		(not (done (s1 state))))
      do (item = (intermediate-item state (generate (s1 state))))
         ((s2 state) = (initial-state item)) )
    (generate (s2 state)) ))

; 02 Apr 96; 26 Aug 04
; Test file of words generator: given a file name string, it prints words
(gldefun fg2 ((f file-of-words)) (for s in f (print s)))

; 22 Jan 04; 26 Aug 04
; Count words in a file.  Arg is file name string
(gldefun wordcount ((f file-of-words))
  (let (timestamp word wordlist)
    (timestamp = (get-universal-time))
    (for s in f when (> (length s) 1)
      (word = (intern (string-upcase s)))
      (if (not (equal (get word 'wordcounttime) timestamp))
	  (progn (setf (get word 'wordcounttime) timestamp)
		 (setf (get word 'wordcount) 0)
		 (push word wordlist)))
      (incf (get word 'wordcount) ) )
    (for wd in wordlist collect (list wd (get wd 'wordcount))) ))
