; xml.lsp             Gordon S. Novak Jr.         ; 07 Jan 11

; Copyright (c) 2011 Gordon S. Novak Jr. and The University of Texas at Austin.

; 12 Oct 06; 13 Oct 06; 16 Oct 06; 17 Oct 06; 20 Oct 06; 22 Oct 06; 23 Oct 06
; 24 Oct 06; 26 Oct 06; 12 Nov 07; 27 Nov 07; 06 Mar 08; 12 Mar 08; 14 Mar 08
; 20 Mar 08; 06 Jan 11

; This program is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.

; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.

; You should have received a copy of the GNU General Public License
; along with this program; if not, see <http://www.gnu.org/licenses/>.

(defvar *xmlstrs* nil)
(defvar *xmlkeynumber*)
(defvar *xmlstr* nil)
(defvar *xmldata* nil)
(setq *xmlkeynumber* (random 1000000))
(defvar *weekdays* '("Sunday" "Monday" "Tuesday" "Wednesday" "Thursday"
                     "Friday" "Saturday"
                     "Sun" "Mon" "Tues" "Wed" "Thurs"
                     "Fri" "Sat"))
(defvar *monthnames* '("January" "February" "March" "April" "May"
                       "June" "July" "August" "September" "October"
                       "November" "December"
                       "Jan" "Feb" "Mar" "Apr" "Jun" "Jul" "Aug" "Sep"
                       "Sept" "Oct" "Nov" "Dec"))
(defvar *monthtable* '((January 1) (February 2) (March 3) (April 4) (May 5)
                       (June 6) (July 7) (August 8) (September 9) (October 10)
                       (November 11) (December 12)
                       (Jan 1) (Feb 2) (Mar 3) (Apr 4) (Jun 6) (Jul 7) (Aug 8)
                       (Sep 9) (Sept 9) (Oct 10) (Nov 11) (Dec 12)))

(defvar *statedata* '(
("Alabama"        "AL" "Montgomery"     4369862  50750)
("Alaska"         "AK" "Juneau"          619500 570373)
("Arizona"        "AZ" "Phoenix"        4778332 113642)
("Arkansas"       "AR" "LittleRock"     2551373  52075)
("California"     "CA" "Sacramento"    33145121 155973)
("Colorado"       "CO" "Denver"         4056133 103730)
("Connecticut"    "CT" "Hartford"       3282031   4845)
("Delaware"       "DE" "Dover"           753538   1955)
("Florida"        "FL" "Tallahassee"   15111244  53997)
("Georgia"        "GA" "Atlanta"        7788240  57919)
("Hawaii"         "HI" "Honolulu"       1185497   6423)
("Idaho"          "ID" "Boise"          1251700  82751)
("Illinois"       "IL" "Springfield"   12128370  55593)
("Indiana"        "IN" "Indianapolis"   5942901  35870)
("Iowa"           "IA" "Des Moines"     2869413  55875)
("Kansas"         "KS" "Topeka"         2654052  81823)
("Kentucky"       "KY" "Frankfort"      3960825  39732)
("Louisiana"      "LA" "Baton Rouge"    4372035  43566)
("Maine"          "ME" "Augusta"        1253040  30865)
("Maryland"       "MD" "Annapolis"      5171634   9775)
("Massachusetts"  "MA" "Boston"         6175169   7838)
("Michigan"       "MI" "Lansing"        9863775  56809)
("Minnesota"      "MN" "St.Paul"        4775508  79617)
("Mississippi"    "MS" "Jackson"        2768619  46914)
("Missouri"       "MO" "Jefferson City" 5468338  68898)
("Montana"        "MT" "Helena"          882779 145556)
("Nebraska"       "NE" "Lincoln"        1666028  76878)
("Nevada"         "NV" "Carson City"    1809253 109806)
("New Hampshire"  "NH" "Concord"        1201134   8969)
("New Jersey"     "NJ" "Trenton"        8143412   7419)
("New Mexico"     "NM" "Santa Fe"       1739844 121364)
("New York"       "NY" "Albany"        18196601  47224)
("North Carolina" "NC" "Raleigh"        7650789  48718)
("North Dakota"   "ND" "Bismarck"        633666  68994)
("Ohio"           "OH" "Columbus"      11256654  40953)
("Oklahoma"       "OK" "Oklahoma City"  3358044  68679)
("Oregon"         "OR" "Salem"          3316154  96003)
("Pennsylvania"   "PA" "Harrisburg"    11994016  44820)
("Rhode Island"   "RI" "Providence"      990819   1045)
("South Carolina" "SC" "Columbia"       3885736  30111)
("South Dakota"   "SD" "Pierre"          733133  75898)
("Tennessee"      "TN" "Nashville"      5483535  41220)
("Texas"          "TX" "Austin"        20044141 261914)
("Utah"           "UT" "Salt Lake City" 2129836  82168)
("Vermont"        "VT" "Montpelier"      593740   9249)
("Virginia"       "VA" "Richmond"       6872912  39598)
("Washington"     "WA" "Olympia"        5756361  66582)
("West Virginia"  "WV" "Charleston"     1806928  24087)
("Wisconsin"      "WI" "Madison"        5250446  54314)
("Wyoming"        "WY" "Cheyenne"        479602  97105)
))

(defvar *countrydata* '(
("United Kingdom"       "UK")
("United States"        "US")
("United States"        "USA")
("European Union"       "EU")
("Norway"               "Norway")
("Norway"               "NO")
))

; 23 Oct 06
; Read XML data from a file or url
(defun xmlread (filestring)
  (let (file res)
    (if (glurlstrp filestring)
        (if (setq file (ftpcurl nil nil filestring "" (genfilename)))
            (progn (setq res (parsefile file '(xml)))
                   (system (concatenate 'string "rm " file))
                   res)
            "Failed.")
        (parsefile filestring '(xml)) ) ))

; 12 Oct 06; 17 Oct 06; 20 Oct 06
; Convert Lisp data to XML via GLISP structure description
; (gltoxml '((3 4) (5 6)) 'region)
(defun gltoxml (data &optional str (outfile t))
  (let (strb)
    (if (and (null str)          ; infer type of data if possible
             (consp data)
             (symbolp (car data))
             (setq strb (glgetxmlstr (car data))) )
        (setq str (list (car data) (car data))))
    (if (and str (symbolp str))
        (if (xmlbasictypep str)
            (princ data outfile)
            (if (and data str (glgetxmlstr str))
                (progn (push str *xmlstrs*)
                       (terpri outfile)
                       (gltoxmltag str outfile nil)
                       (gltoxml data (car (glgetxmlstr str)) outfile)
                       (gltoxmltag str outfile t)
                       (pop *xmlstrs*))
                (princ data outfile)))
        (if (consp str)
            (if (member (car str)
                        '(cons list record crecord listof arrayof alist
                               atom symbol object proplist listobject
                               atomobject tuple))
                (case (car str)
                  (cons (when (consp data)
                          (gltoxml (car data) (cadr str) outfile)
                          (gltoxml (cdr data) (caddr str) outfile)))
                  (list (when (consp data)
                          (mapc #'(lambda (d s) (gltoxml d s outfile))
                                data
                                (cdr str))))
                  (listof (when (consp data)
                            (dolist (d data) (gltoxml d (cadr str) outfile))))
                  (arrayof (when (arrayp data)
                             (dotimes (i (array-dimension data 0))
                               (gltoxml (aref data i) (cadr str) outfile))))
                  ((atom symbol)
                     (if (and (symbolp data)
                              (consp (cadr str))
                              (eq (caadr str) 'proplist))
                         (dolist (item (cdadr str))
                           (if (and (consp item) (get data (car item)))
                               (gltoxml (get data (car item)) item outfile)))))
                  (listobject
                     (if (and (consp (cadr str))
                              (eq (caadr str) 'text)
                              (eq (cadadr str) 'string))
                         (progn (princ (cadr data) outfile)
                                (mapc #'(lambda (d s) (gltoxml d s outfile))
                                      (cddr data)
                                      (cddr str)))
                         (mapc #'(lambda (d s) (gltoxml d s outfile))
                               (cdr data)
                               (cdr str))))
                  (t (error "unimplemented str: ~A~%" str)) )
                (progn (gltoxmltag (car str) outfile nil)
                       (gltoxml data (cadr str) outfile)
                       (gltoxmltag (car str) outfile t)) ) ) ) ))

; 12 Oct 06
; Make an XML tag
(defun gltoxmltag (tag outfile close)
  (let (tagstring)
    (setq tagstring (or (cdr (assoc tag (get (car *xmlstrs*) 'xmltags)))
                        (symbol-name tag)))
    (princ "<" outfile)
    (if close (princ "/" outfile))
    (princ tagstring outfile)
    (princ ">" outfile) ))

; 12 Oct 06; 13 Oct 06; 23 Oct 06; 20 Mar 08
; Infer what is represented by a text string
(defun glinferstringtype (str)
  (let (lng val)
    (cond ((<= (setq lng (length str)) 0) nil)
          ((glnumberstringp str) 'integer)
          ((glcurrencystrp str) 'currency)
          ((glphonestrp str) 'phone-string)
          ((glfloatstrp str) 'real)
          ((every #'alpha-char-p str)
             (cond ((member str *weekdays* :test #'string-equal)
                     'weekday)
                   ((member str *monthnames* :test #'string-equal)
                     'monthname)
                   ((find str *statedata* :key #'cadr :test #'string-equal)
                     'statecode)
                   ((assoc str *statedata* :test #'string-equal)
                     'statename)
                   ((find str *countrydata* :key #'cadr :test #'string-equal)
                     'countrycode)
                   ((assoc str *countrydata* :test #'string-equal)
                     'countryname)
                   (t 'symbol)))
          ((glalphastringp str)
            (cond ((assoc str *statedata* :test #'string-equal)
                     'statename)
                   ((find str *countrydata* :key #'cadr :test #'string-equal)
                     'countrycode)
                   ((assoc str *countrydata* :test #'string-equal)
                     'countryname)
                   (t 'string)))
          ((glurlstrp str) 'url)
          (t 'string)) ))

; Test for all alpha or blank
(defun glalphastringp (str)
  (every #'(lambda (c) (or (alpha-char-p c) (char= c #\Space))) str))

; 05 Jan 11
(defun glnumberstringp (str)
  (or (every #'(lambda (x) (or (digit-char-p x)
                               (char= x #\Space)
                               (char= x #\,)))
             str)
      (and (or (char= (char str 0) #\+)
               (char= (char str 0) #\-))
           (every #'(lambda (x) (or (digit-char-p x) (char= x #\Space)))
                  (subseq str 1)))) )

; Test for a legal floating-point number
(defun glfloatstrp (str)
  (let ((lng (length str)) (i 0) c (state 'start) gotnum)
    (while (and (< i lng) (not (eq state 'bad)))
      (setq c (char str i))
      (incf i)
      (case state
        (start     (if (or (char= c #\+) (char= c #\-) (digit-char-p c))
                       (progn (setq state 'beforedec)
                              (if (digit-char-p c) (setq gotnum t)))
                       (if (char= c #\.)
                           (setq state 'afterdec)
                           (setq state 'bad))))
        (beforedec (if (char= c #\.)
                       (setq state 'afterdec)
                       (if (digit-char-p c)
                           (setq gotnum t)
                           (setq state 'bad))))
        (afterdec  (if (or (char= c #\e) (char= c #\E))
                       (setq state 'aftere)
                       (if (digit-char-p c)
                           (setq gotnum t)
                           (setq state 'bad))))
        (aftere    (if (or (char= c #\+) (char= c #\-))
                       (setq state 'afteresign)
                       (if (digit-char-p c)
                           (setq state 'end)
                           (setq state 'bad))))
        (afteresign (if (digit-char-p c)
                           (setq state 'end)
                           (setq state 'bad)))
        (end        (if (digit-char-p c)
                           (setq state 'end)
                           (setq state 'bad))) ))
    (and (or (eq state 'afterdec) (eq state 'end))
         gotnum) ))

; 13 Oct 06; 05 Jan 11
; Test for a legal currency number
(defun glcurrencystrp (str)
  (let ((lng (length str)) (i 0) c (state 'start) gotnum)
    (while (and (< i lng) (not (eq state 'bad)))
      (setq c (char str i))
      (incf i)
      (case state
        (start     (if (or (char= c #\$) (digit-char-p c))
                       (progn (setq state 'beforedec)
                              (if (digit-char-p c) (setq gotnum t)))
                       (if (char= c #\.)
                           (setq state 'afterdec)
                           (setq state 'bad))))
        (beforedec (if (char= c #\.)
                       (setq state 'afterdec)
                       (if (digit-char-p c)
                           (setq gotnum t)
                           (if (not (char= c #\,))
                               (setq state 'bad)))))
        (afterdec  (if (digit-char-p c)
                       (setq state 'dec1)
                       (setq state 'bad)))
        (dec1      (if (digit-char-p c)
                       (setq state 'end)
                       (setq state 'bad)))
        (end           (setq state 'bad)) ) )
    (member state '(end beforedec afterdec)) ))

; 13 Oct 06
; Test for a phone number string
(defun glphonestrp (str)
  (and (= (length str) 8)
       (char= (char str 3) #\-)
       (every #'(lambda (i) (digit-char-p (char str i)))
              '(0 1 2 4 5 6 7))))

; 22 Oct 06
; compare strings: first arg is constant
(defun str= (str1 str2)
  (let ((n (length str1)))
    (and (>= (length str2) n)
         (string= str1 str2 :end2 n))))

; 22 Oct 06; 12 Mar 08; 07 Apr 17
; test for a URL
(defun glurlstrp (str)
  (and (> (length str) 7)
       (or (str= "ftp://" str)
           (str= "http://" str)
           (str= "https://" str)
           (str= "FTP://" str)
           (str= "HTTP://" str) ) ) )

; 06 Mar 08
(defun gldatestrp (str)
  (let (res month day year)
    (if (setq res   ; 2 April 2003
              (parsed str '(seq (skipws) (int) (skipws) (sym) (skipws) (int))))
        (progn (setq day (car res))
               (setq month (cadr (assoc (cadr res) *monthtable*)))
               (setq year (caddr res)))
        (if (setq res   ; 4/02/03
              (parsed str '(seq (skipws) (int) (skipws) "/" (skipws)
                                (int) (skipws) "/" (skipws) (int))))
              (progn (setq day (cadr res))
                     (setq month (car res))
                     (setq year (caddr res))) ))
    (if (and month day year)
        (list (if (< year 100)
                  (if (< year 20) (+ year 2000) (+ year 1900))
                  year)
              month day) ) ))

; 13 Oct 06; 16 Oct 06; 20 Mar 08; 05 Jan 11
; Determine GLISP type for a field of data based on example string
; name = interned field name
; data  = string value
; Output is type for basic data
(defun glgrokbasic (name data)
  (let (type)
    (setq type (glinferstringtype data))
    (case type
      (integer
        (setq val (read-from-string data))
        (case name
          ((MONTH |Month| |month|)
            (if (and (> val 0) (<= val 12)) 'monthnumber 'integer))
          ((DAY |Day| |day|)
            (if (and (> val 0) (<= val 31)) 'daynumber 'integer))
          ((YEAR |Year| |year|)
            (if (and (> val 1000) (<= val 2201)) 'year 'integer))
          ((ZIP |Zip| |zip| ZIPCODE |Zipcode| |zipcode|)
            (if (and (> val 0) (<= val 99999)) 'zipcode 'integer))
          ((SSN |ssn| SS |ss|)
            (if (and (> val 100000000) (<= val 999999999)) 'ssn 'integer))
          ((AREA |Area| |area| AREACODE |Areacode| |areacode|)
            (if (and (> val 100) (<= val 999)) 'areacode 'integer))
          ((PHONE |Phone| |phone|)
            (if (and (> val 2000000) (<= val 9999999)) 'phonenumber 'integer))
          (T 'integer)))
      (currency
        (if (char= (char data 0) #\$)
            'currency
            (case name
              ((COST |Cost| |cost| PRICE |Price| |price|)
                'currency)
              (t 'real))))
      ((real weekday monthname statecode statename
             countryname countrycode phone-number url) type)
      (t 'string) ) ))

; 05 Jan 11
; infer likely subtype of an integer
(defun glgrokinteger (str)
  (let (val)
    (setq val (glreadnumber str))
    (if (and (> val 0) (<= val 12)) 'monthnumber
      (if (and (> val 0) (<= val 31)) 'daynumber
        (if (and (= (length str) 4) (> val 1000) (<= val 2201)) 'year
          (if (and (= (length str) 5) (> val 0) (<= val 99999)) 'zipcode
            (if (and (= (length str) 9)
                     (> val 100000000) (<= val 999999999)) 'ssn
              (if (and (= (length str) 3) (> val 100) (<= val 999))
                  'areacode
                (if (and (= (length str) 7) (> val 2000000)
                         (<= val 9999999)) 'phonenumber
                  'integer))))))) ))

; 05 Jan 11
; read a number that may contain extra stuff such as $ and ,
(defun glreadnumber (str)
  (let (chars ch)
    (dotimes (i (length str))
      (setq ch (char str i))
      (if (or (digit-char-p ch)
              (char= ch #\.))
          (push ch chars)) )
    (read-from-string (if (= (length str) (length chars))
                          str
                          (coerce (nreverse chars) 'string))) ))

; 23 Oct 06
; Determine GLISP type for a Lisp version of XML data
(defun glgrok (data &optional glstrflg)
  (setq *xmlkeynumber* (random 1000000))
  (glgrokstr data glstrflg))

; 16 Oct 06; 17 Oct 06; 22 Oct 06; 23 Oct 06; 26 Oct 06
; Determine GLISP type for a Lisp version of XML data
; Stores result on 'glxmlstr and under 'glstructure if glstrflg is set.
(defun glgrokstr (data glstrflg)
  (let ((name (car data)) type field fields sets tmp res (dt (cdr data)) str)
    (if (and (null (cddr data))
             (stringp (cadr data)))
        (list name (glgrokbasic name (cadr data)))
      (if (and (null (cddr data))
               (consp (cadr data))
               (eq (caadr data) 'text)
               (stringp (cadadr data)))
          (list name (glgrokbasic name (cadadr data)))
        (progn
          (if (and (consp (cadr data))
                   (eq (caadr data) 'text)
                   (stringp (cadadr data)))
              (progn (push (list 'text 'string) fields)
                     (pop dt)))
          (dolist (x dt)
            (setq field (glgrokstr x glstrflg))
            (if (not (assoc (car field) sets))
                (if (setq tmp (assoc (car field) fields))
                    (progn (setq fields (remove tmp fields))
                           (push tmp sets))
                    (push field fields))) )
          (dolist (x sets)
            (push (list (intern (glpluralize (symbol-name (car x))))
                        (list 'listof (cadr x)))
                  fields))
          (setq str
                (list (cons 'listobject
                       (if (equal (get name 'glxmlkeynumber) *xmlkeynumber*)
                           (glgrokunify fields (cdar (get name 'glxmlstr)))
                           (reverse fields)))))
          (setf (get name 'glxmlstr) str)
          (if (and glstrflg (fboundp 'gldefstr))
              (gldefstr (cons name str) nil))
          (setf (get name 'glxmlkeynumber) *xmlkeynumber*)
          (list name name) ) ) ) ))

; 16 Oct 06
; Make plural form of a string
(defun glpluralize (name)
  (let (lastc lng)
    (setq lng (length name))
    (setq lastc (char name (1- lng)))
    (if (and (char= lastc #\Y)
             (> lng 1)
             (not (glvowelp (char name (- lng 2)))))
        (concatenate 'string (subseq name 0 (1- lng)) "IES")
        (if (and (char= lastc #\y)
                 (> lng 1)
                 (not (glvowelp (char name (- lng 2)))))
            (concatenate 'string (subseq name 0 (1- lng)) "ies")
            (concatenate 'string name (if (lower-case-p lastc) "s" "S")))) ))

; test if a character is a vowel
(defun glvowelp (ch)
  (member ch '(#\a #\e #\i #\o #\u #\y #\A #\E #\I #\O #\U #\Y)
          :test #'char=))

; 23 Oct 06
; Unify two structures by taking union of fields.
; Each arg is a list of (name type) items
(defun glgrokunify (new old)
  (let (both field type)
    (dolist (item old)
      (if (setq field (assoc (car item) new))
          (progn (setq type (glgrokunifyleaf (cadr field) (cadr item)))
                 (setq new (remove field new))
                 (if (eq type (cadr item))
                     (push item both)
                     (push (list (car field) type) both)))
          (push item both)))
    (dolist (item new) (push item both))
    (nreverse both) ))

; 23 Oct 06
; Unify two leaf structures
(defun glgrokunifyleaf (new old)
  (if (equal new old)
      old
      (if (or (eq new 'string) (eq old 'string))
          'string
          (if (eq new 'integer)
              old
              (if (eq old 'integer)
                  new
                  (progn (format t "glgrokunifyleaf  ~A ~A ~%" new old)
                         old))))))

; 16 Oct 06; 17 Oct 06; 20 Oct 06; 23 Oct 06; 24 Oct 06; 05 Jan 11
; Read in XML data from Lisp form into data structure form
;    test: chargecontrol.xml
(defun glxmlread (data &optional type)
  (let (colls fields nm fld res str flat flds)
    (if (stringp data)
        (case type
          ((integer real number monthnumber daynumber year zipcode)
            (if (or (glnumberstringp data)
                    (glfloatstrp data))
                (read-from-string data)
                data))
          (currency (glreadnumber data))
          (t data))
      (if (not (consp data))
          (error "bad data to glxmlread: ~A ~A~%" data type)
        (if (and (consp (cdr data)) (stringp (cadr data)))
            (case type
              ((integer real number monthnumber daynumber year zipcode)
               (if (or (glnumberstringp (cadr data))
                       (glfloatstrp (cadr data)))
                   (read-from-string (cadr data))
                   (cadr data)))
              (currency (glreadnumber (cadr data)))
              (t (cadr data)))
          (if (and (consp (cdr data)) (consp (cadr data))
                   (eq (caadr data) 'text) (stringp (cadadr data)))
              (case type
                ((integer real number monthnumber daynumber year zipcode)
                  (if (or (glnumberstringp (cadadr data))
                          (glfloatstrp (cadadr data)))
                      (read-from-string (cadadr data))
                      (cadadr data)))
                (currency (glreadnumber (cadadr data)))
                (t (cadadr data)))
            (if (setq str (if (consp type)
                            type
                            (car (glgetxmlstr (or type (car data))))))
              (progn
                (setq flat (glflatfields str))
                (dolist (x flat)
                  (if (and (consp x) (consp (cadr x))
                           (member (caadr x) '(listof arrayof)))
                      (push (list (cadadr x) (car x) (cadr x) nil) colls)
                      (push x flds)))
                (dolist (x (cdr data))
                  (setq nm (car x))
                  (if (setq fld (assoc nm colls))
                      (push (glxmlread x nm) (fourth fld))
                      (if (setq fld (assoc nm flds))
                          (push (list nm (glxmlread x (cadr fld)))
                                fields)
                          (format t "Warning: ~A lost.~%" x))))
                (glxmlbuild fields flds colls (or type (car data)) str)))))))))

; 20 Oct 06
; build a structure from items and structure description
(defun glxmlbuild (fields flds colls type str)
  (let (pairs res)
    (if (and (consp str)
             (eq (car str) 'listobject))  ; do listobject here
        (progn
          (dolist (x (cdr str))
            (push (if (setq fld (assoc (car x) fields))
                      (cadr fld)
                      (if (setq fld (some #'(lambda (y)
                                              (if (eq (car x) (cadr y))
                                                  y))
                                          colls))
                          (reverse (fourth fld))))
                  res))
          (cons type (reverse res)) )
        (if (fboundp 'glgetdefaults)
            (progn
              (dolist (x fields)
                (if (setq tmp (assoc (car x) flds))
                    (push (list (car x) (kwote (cadr x)) (cadr tmp))
                          pairs)
                    (if (setq tmp (assoc (car x) colls))
                        (push (list (car x) (kwote (nreverse (fourth tmp)))
                                    (third tmp))
                              pairs))))
              (setq pairs (glgetdefaults type pairs))
              (glmkstr str type pairs nil) )))))

; 19 Oct 06; 24 Oct 06; 26 Oct 06; 12 Nov 07; 27 Nov 07; 12 Mar 08; 20 Mar 08
; Read data from a file, try to grok it, read into Lisp form
; (groknroll "https://www.w3schools.com/xml/cd_catalog.xml")
(defun groknroll (filestring &optional (glstrflg t))
  (let (data type)
    (setq data (xmlread (fixurl filestring)))
    (setq *xmlkeynumber* (random 1000000))
    (setq *xmlstr* (glgrokstr data glstrflg))
    (setq *xmldata* (glxmlread data))
    (setq type (or (and (consp *xmlstr*)
                        (consp (cdr *xmlstr*))
                        (cadr *xmlstr*))
                   *xmlstr*) )
    (pushnew type *gp-types*)
    type))

; 12 Mar 08
(defun fixurl (str)
  (if (glurlstrp str)
      str
      (concatenate 'string "http://" str)))

; 20 Oct 06; 24 Oct 06
; Get XML type for a name: inferred xml type or glisp type
(defun glgetxmlstr (sym)
  (and sym (symbolp sym)
       (or (get sym 'glxmlstr)
           (and (fboundp 'glstr) (glstr sym))) ))

; 20 Oct 06; 24 Oct 06
; Get a flat list of fields for a GLISP type
(defun glflatfields (str)
  (and (consp str)
       (case (car str)
         ((list cons atomobject listobject alist proplist tuple record)
          (mapcan #'(lambda (x) (glflatfields x))
                  (cdr str)))
         ((atom symbol)
          (if (and (consp (cadr str)) (eq (caadr str) 'proplist))
              (mapcan #'(lambda (x) (glflatfields x))
                      (cdadrr str))))
         (crecord (mapcan #'(lambda (x) (glflatfields x))
                          (cddr str)))
         ((listof arrayof))
         (t (list str)) ) ))

(defun xmlbasictypep (str)
  (member str '(atom symbol integer real number string boolean character)))

; 12 Nov 07
; Make a tree struction synopsis of a type, omitting basic types
(defun glstrtree (type) (car (glstrtreer type nil)))
(defun glstrtreer (type prev)
  (let (res)
    (if (member type prev)
        (list type)
        (if (consp type)
            (if (member (car type) *gltypenames*)
                (mapcan #'(lambda (x) (glstrtreer x prev)) (cdr type))
                (glstrtreer (cadr type) prev))
            (if (or (not (symbolp type))
                    (eq type 'anything)
                    (xmlbasictypep type)
                    (xmlbasictypep (glxtrtypeg type)))
                nil
                (progn
                  (setq res (glstrtreer (car (glstr type))
                                       (cons type prev)))
                  (if res
                      (list (cons type res))
                      (list type))))))))

; 05 Jan 11; 07 Jan 11
; (glhtmltable (xmlread "testb.html"))
; parse an html table
; result is (value type)
(defun glhtmltable (data)
  (let (rows ncols headers type (n 0) head it)
    (setq rows (glhtmltablerows data))
    (setq ncols (length (rest (first rows))))
    (if (and (consp (car rows))
             (eq (caar rows) 'tr)
             (consp (cadar rows))
             (eq (caadar rows) 'th))
        (progn (setq headers (glhtmlheaders (car rows)))
               (pop rows))
        (if (setq head (assoc 'thead (cdr data)))
            (setq headers (glhtmlheaders (cadr head)))
            (dotimes (i ncols)
              (push (intern (cstr "COL" (princ-to-string (- ncols i))))
                    headers)) ) )
    (dolist (h headers)
      (push (glhtmltype (intern (string-upcase h))
                        (mapcar #'(lambda (row)
                                    (if (setq it (glhtmlitemdata
                                                  (nth n (cdr row))))
                                        (glgrokbasic nil it)))
                                rows))
            type)
      (incf n))
    (setq type (nreverse type))
    (list (mapcar #'(lambda (row)
                      (mapcar #'(lambda (item tp)
                                  (if (setq it (glhtmlitemdata item))
                                      (glxmlread it (cadr tp))))
                              (cdr row) type))
                  rows)
          (list 'listof (cons 'list type))) ))

; 06 Jan 11
; infer type of a column based on name and list of item types
(defun glhtmltype (name types)
  (let (histo type)
    (setq histo (glhtmltypehisto types))
    (setq type (caar histo))
    (if (eq type 'integer)
        (dolist (spec '(currency))
          (if (assoc spec histo) (setq type spec) ) ) )
    (list name type) ))

; histogram sorted by most frequent
(defun glhtmltypehisto (arg140)
  (let (acc)
    (dolist (x arg140)
      (let (rec)
        (setq rec (assoc x acc))
        (when (null rec)
          (push (cons x 0) acc)
          (setq rec (assoc x acc))
          (setf (cdr rec) 0))
        (incf (cdr rec))))
    (sort acc #'(lambda (x y) (> (cdr x) (cdr y)))) ))


; 05 Jan 11
; extract rows data from a table
(defun glhtmltablerows (data)
  (and (consp data)
       (if (consp (car data))
           (if (eq (caar data) 'tr)
               data
               (if (eq (caar data) 'tbody)
                   (glhtmltablerows (car data))
                   (glhtmltablerows (cdr data)) ))
           (glhtmltablerows (cdr data)) ) ) )

; 05 Jan 11; 06 Jan 11; 07 Jan 11
; extract item data from an item
(defun glhtmlitemdata (data)
  (and (consp data)
       (if (stringp (car data))
           (car data)
           (if (and (consp (car data))
                    (eq (caar data) 'text))
               (cadar data)
               (if (and (consp (car data))
                        (member (caar data) '(b i tt)))
                   (or (glhtmlitemdata (cadar data))
                       (glhtmlitemdata (cdr data)))
                   (if (and (consp (car data))
                            (member (caar data) '(a)))
                       (or (glhtmlitemdata (cdar data))
                           (glhtmlitemdata (cdr data)))
                       (glhtmlitemdata (cdr data))) ) ) ) ) )

; 05 Jan 11
; extract headers from a row
(defun glhtmlheaders (row)
  (let (headers)
    (setq headers (mapcar #'glhtmlitemdata (cdr row)))
    (mapcar #'(lambda (x)
                (if (glnumberstringp x)
                    (cstr "Y" x)
                    x))
            headers) ))

; 06 Jan 11
; Find the largest table (in number of lines) in an html file
; Returns line number where found, or nil.
(defun glfindhtmltable (filenm)
  (let (start end new save)
    (with-open-file (infile filenm :direction :input
                            :if-does-not-exist nil)
      (setq *parsedfile* infile)
      (setq *linenumber* 0)
      (setq *ptr* 0)
      (setq *lng* 0)
      (setq new (parseg '(skipto "<table")))
      (when (eq new 'zzomit)
        (setq start *linenumber*)
        (setq new (parseg '(skipto "</table")))
        (if (eq new 'zzomit) (setq end *linenumber*))
        (while *parsedfile*
          (setq new (parseg '(skipto "<table")))
          (if (eq new 'zzomit) (setq save *linenumber*))
          (setq new (parseg '(skipto "</table")))
          (if (and (eq new 'zzomit)
                   (> (- *linenumber* save)
                      (- end start)))
              (progn (setq start save)
                     (setq end *linenumber*))) ) ) )
    start))
