#!/usr/local/bin/python
# Copyright 1998 Joseph Koshy.
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
#    must display the following acknowledgment:
#        ``This product includes software developed by Joseph Koshy.''
# 4. The name of the author may not be used to endorse or promote products
#    derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#	From: cvsupchk,v 1.1 1999/01/17 14:31:02 koshy Exp koshy
#	$Id: cvsupchk,v 1.1.1.1 1999/02/09 18:15:11 jdp Exp $

#
# check a CVS repository against a CVSup "checkouts" file
#

import sys, os, string, getopt, re


###
# Variables
###

debug           = 0                     # debug off by default
verbose         = 0                     # verbose off by default
checkouts_mode  = 0                     # Repository mode by default
cvs_root        = ''                    # CVSROOT variable from the environment
options         = 'cd:r:v'              # options supported.  See usage() below
stack           = []                    # LIFO stack of directories
restrictions    = []                    # list of restriction regexes 
directory       = ''                    # current directory
hash            = {}                    # current hash table
livedirs        = {}                    # all live dirs encountered
msg_record    = {}                      # record of files flagged
cfn             = ''                    # current file name
cl              = 0                     # current line number



###
# Helper Functions
###

def diag_err(m):
  '''Print an error message.'''
  print 'ERROR: %s' % m


# Some file system inconsistencies can cause multiple messages to be
# generated per file.  The diagnostic routines below print only one
# message per file in an attempt to reduce the clutter.

def diag_unexpected(file):
  if not msg_record.has_key(file):
    print 'UNEXPECTED: %s' % file
    msg_record[file] = 1

def diag_missing(file):
  if not msg_record.has_key(file):
    print 'MISSING: %s' % file
    msg_record[file] = 1

def diag_extra(file):
  if not msg_record.has_key(file):
    print 'EXTRA: %s' % file
    msg_record[file] = 1

def diag_not_a_directory(file):
  if not msg_record.has_key(file):
    print 'NOT A DIRECTORY: %s' % file
    msg_record[file] = 1

def diag_fatal(m):
  '''Print a message describing a fatal error.  Do not return to caller.'''
  if cfn != '': print '(%s,%d):' % (cfn, cl),
  print 'FATAL: %s' % m
  sys.exit(1)

#
# Other helpers
#

def nextline(f):
  '''Read in one line from file "f", increment current line number'''
  global cl
  cl = cl + 1
  return f.readline()
  
def check_first_line(f):
  '''Check first line of checkouts file for format and version'''
  l = nextline(f)
  if l == '' or l[0] != 'F':
    diag_fatal('Invalid format for initial line: "%s"' % l[0:-1])

def check_is_directory(d):
  '''Check for presence of directory "d"'''
  if not os.path.exists(d):
    diag_missing(d)
    return 0
  elif not os.path.isdir(d):
    diag_not_a_directory(d)
    return 0
  return 1


#
# Usage message
#

def usage():
  '''Print a usage string'''
  print '''Usage: %s [options]
        OPTION            DESCRIPTION          DEFAULT
  	-c		: checkouts mode       [repository mode]
  	-d directory	: specify CVS Root     [$CVS_ROOT in environment]
  	-r regexp	: specify restrictions [no restrictions]
  	-v 		: verbose output       [not verbose]''' % \
  	(os.path.split(sys.argv[0]))[1]

  sys.exit(1)


##
# process_enter_directory
#
# Check if the directory is actually present if in repository mode.
# In checkouts mode, we need to verify whether the directory is actually
# live before we can flag a missing directory as an error.  Mark the
# given file name as a directory in the current hash, and push the
# current hash and directory name on the stack.  Set the liveness of
# the current directory to '0' (default).
##

def process_enter_directory(d):
  """Handle a 'D' line in the checkouts file."""
  
  global directory, hash

  # If in repository mode, the directory needs to be present.
  # In checkouts mode, the directory may not be present if
  # it is not 'live'.
  
  if not checkouts_mode:
    check_is_directory(d)

  # keep track of having seen this directory
  hash[d] = 'D'
  
  # push the current directory and hash onto the stack
  stack.insert(0, (directory, hash))
  
  # reset current directory and hash
  directory = d                         # save cur dir
  hash      = {}                        # and hash table
  livedirs[d] = 0                       # assume not live
  

##
# process_exit_directory
#
# Try determine if the current directory is live.  Take action as
# follows:
#
# In checkouts mode:
#   If the directory is live, it must be present and else it must
#   not be present.  If the directory is present, get a list of files
#   in the directory and look inside the current hash table for the
#   presence of each of these files (note that the hash records the
#   RCS file names for normal files and the regular name for directories).
#   The file codes in the hash should be either a 'C' (live file) or
#   'D' (directory) else issue a diagnostic.
#   Delete each file found from the hash table.
#
# If in repository mode:
#   The named directory must be present (all dirs are live).  We
#   check for an 'Attic' directory if the checkouts file had a
#   'v' coded entries.  For all files in the Attic and current
#   directories, remove the entries named by the files from the hash
#   checking if the codes match ('D' for directories, 'V' for regular
#   files and 'v' for Attic'ized files).
#
# Whatever remains in the hash are files that should have been seen in
# the current directory but are missing.  Issue diagnostics as required.
##

def process_exit_directory(d):

  global directory, hash
  
  if d != directory:                    # stacking error in checkouts file
    diag_fatal('Mismatched directories: expected "%s", actual "%s"' %
             (d, directory))

  dir_is_live = 0
  try:
    dir_is_live = livedirs[d]
  except KeyError:
    diag_fatal('No liveness recorded for directory "%s"' % d)

  if checkouts_mode:
    
    dir_present = 0

    if dir_is_live:                     # if live, the directory should be
      dir_present = check_is_directory(d) # present
    else:                               # not live, should not be present
      if os.path.exists(d):
        diag_extra(d)

    if dir_present:
      for file in os.listdir(d):

        if os.path.isdir(d + '/' + file):
          hashpath = d + '/' + file
        else:
          hashpath = d + '/' + file + ",v"

        if hash.has_key(hashpath):
          code = hash[hashpath]
          if code != 'C' and code != 'D':
            diag_extra(hashpath)
          del hash[hashpath]

        else:
          diag_extra(d + '/' + file)

  else:                                 # Repository mode
    dir_present = check_is_directory(d)
    
    # look for an Attic directory

    attic_directory = d + '/Attic'
    attic_is_live = 0

    try:
      attic_is_live = livedirs[attic_directory]
    except KeyError:                    # 'Attic' not in livedirs is ok
      pass

    attic_present = 0
    if attic_is_live:
      attic_present = check_is_directory(attic_directory)
    else:                               # if not live, should not be present
      if os.path.exists(attic_directory):
        diag_extra(attic_directory)

    # every file in the attic directory should be present in the hash
    # and marked with code 'v'
    if attic_present:
      for file in os.listdir(attic_directory):
        
        hashpath = d + '/' + file         # the hash doesn't use '/Attic/'
        try:
          code = hash[hashpath]
          if code != 'v':                 # is not marked dead
            diag_extra(hashpath)
          del hash[hashpath]
        except KeyError:                  # not in hash 
          # not in hash table => is an extra file
          diag_extra(attic_directory + '/' + file)

    # look in the current directory
    if dir_present:
      for file in os.listdir(d):

        if file == 'Attic': continue    # we already processed the Attic

        hashpath = d + '/' + file
        if hash.has_key(hashpath):      # is the plain file known?
          code = hash[hashpath]

          if code != 'V' and code != 'D': # Not a live RCS file or directory
            diag_extra(hashpath)

          del hash[hashpath]
        else:                           # not in hash => extra 
          diag_extra(hashpath)

  # 
  # Files that remain in the hash are missing from the current directory
  # these are worth reporting only if the parent directory was present ...
  # 
  if dir_present:
    for file in hash.keys():

      code = hash[file]
    
      if   code == 'c': pass            # as it should be

      elif code == 'v':                 # dead RCS file
        head, tail = os.path.split(file) # construct the full filename
        file = head + '/Attic/' + tail
        diag_missing(file)

      elif code in ['C', 'V']:
        diag_missing(file)

      elif code == 'D':                 # 
        if not checkouts_mode or livedirs[file]:
          diag_missing(file)

      else:
        diag_fatal('Unexpected code: "%s" for %s' % (code, file))
        
  # pop the stack
  (directory, hash) = stack[0]
  del stack[0]

  if dir_is_live:                       # if this directory was live
    livedirs[directory] = 1             # mark parent as live


##
# process_file
#
# For 'V' and 'C' files mark the parent directory as live.  'v' files
# mark the parents Attic directory as live.  'V', 'C', 'v' files should
# be actually present.  'c' files should not be present.
#
# Add this file to the current hash table.
##

def process_file(n, c):
  """Handle file 'name' with type 'code'"""
  global directory, hash

  if   c == 'V':                        # file is a live RCS file
    if not os.path.exists(n):           # is missing?
      diag_missing(n)
    livedirs[directory] = 1             # containing directory is live

  elif c == 'C':                        # live checked out file
    if not os.path.exists(n[0:-2]):     # strip trailing ',v'
      diag_missing(n[0:-2])
    livedirs[directory] = 1             # containing directory is live

  elif c == 'v':                        # dead RCS file, look in Attic
    h,t = os.path.split(n)
    attic_dir = h + '/Attic'
    attic_file = attic_dir + '/' + t
    if not os.path.exists(attic_file):
      diag_missing(attic_file)
    livedirs[attic_dir] = 1             # mark Attic as live

  elif c == 'c':                        # dead checkedout file
    if os.path.exists(n[0:-2]):         # shouldn't exist
      diag_extra(n[0:-2])

  else:
    diag_fatal('Unknown code: "%s" for "%s"' % (c, n))

  # keep track of the code
  hash[n] = c


##
# process_checkouts
#
# For each line from the checkouts file, dispatch the appropriate
# handler function if the pathname is matched by the restriction
# regexes specified by the user.
##
  
def process_checkouts(cf):
  """Process a CVSup checkouts file"""

  if verbose:
    print 'Processing "%s" ...' % cfn
    
  check_first_line(cf)                  # sanity check

  # operate on the checkouts.cvs file line by line 
  while 1:
    l = nextline(cf)                    # next line
    if l == "":                         # EOF
      break
    
    c = string.split(l)

    if c[0] not in [ 'C', 'c', 'D', 'F', 'U', 'V', 'v' ]:
      diag_fatal('Unrecognized line: "%s"' % l[0:-1])

    # check if one of the given regexes matches this line
    if not restrictions:
      match = 1
    else:
      for r in restrictions:            # match entry against regexes
        match = r.search(c[1])
        if match:
          break

    # process entry if matched
    if match:
      if   c[0] == 'D':                 # enter a directory
        process_enter_directory(cvs_root + c[1])

      elif c[0] == 'U':                 # leave a directory
        process_exit_directory(cvs_root + c[1])

      elif c[0] in ['V', 'v', 'C', 'c']:
        process_file(cvs_root + c[1], c[0])

      elif c[0] == 'F':                 # new 'F' line ...
        pass

      else:                             # should NEVER come here
        diag_fatal('Unrecognized line: "%s"' % l[0:-1])



###
# MAIN
###

# Attempt to get a default for 'cvs_root' from the environment
try:
  cvs_root = os.environ['CVSROOT']
except KeyError:
  pass

# Parse options
try:
  optlist, args = getopt.getopt(sys.argv[1:], options)
  while optlist:
    option, argument = optlist[0]
    if   option == '-d':		# cvs root
      if argument[-1] != '/':           # add a trailing slash if needed
        cvs_root = argument + '/'
      else:
        cvs_root = argument
    elif option == '-r':                # path restriction regex
      try:
        r = re.compile(argument)
      except re.error, message:
        diag_fatal('Regex compiled failed: %s', message[0])
      restrictions.insert(0, r)
    elif option == '-v':		# verbose
      verbose = 1
    elif option == '-c':                # checkouts mode
      checkouts_mode = 1
    else:
      usage()
    del optlist[0]
except getopt.error, o:
  diag_err(o)
  usage()

if not cvs_root:
  diag_fatal("""CVS root directory not specified.
  Use the '-d' option to specify the CVS root or set the
  environment variable 'CVSROOT'.""")
elif not os.path.isdir(cvs_root):
  diag_fatal('Specified repository root "%s" is not a directory' % cvs_root)
  
# add a sentinel entry as top level stack element
stack = [(cvs_root, {}, 0)]

# if no files were specified on the command line, read standard input
# otherwise process each file in turn
try:
  if not args:
    cfn = "(stdin)"
    cl = 0
    process_checkouts(sys.stdin)
  else:
    for file in args:
      try:
        cl  = 0
        cfn = file
        cf  = open(file, 'r')
      except IOError, message:
        diag_fatal('Cannot open "%s": %s' % (file, message[1]))

      process_checkouts(cf)

except KeyboardInterrupt:               # quit on SIGINT
  pass

# Local Variables:
# Mode: python
# End:
