from ROOT import TFile
import sets
import sys

histos = ['TH1D', 'TH2D', 'TProfile']
ser = 'SERIAL'
par = 'PARALL'

# =================================================================================================
# Method   : rec( o, path=None, lst=None )
# 
# @param o    : a ROOT object
# @param path : a string like a transient store path; ie '/stat/CaloPIDs/ECALPIDE'
# @param lst  : a list to hold (path, object) tuples
# 
# function    : recursively pull apart a ROOT file, making a list of (path, TObject) tuples
#               This is done by GetListOfKeys method, which lets one work down through directories
#               until you hit the Histo at the end of the path.  The list of tuples is returned
# 
def rec( o, path=None, lst=None ) :
  if not path : path = '/stat' ; lst = []
  else        : path = path + '/' + o.GetName()
  lst.append( (path,o) )
  if 'GetListOfKeys' in dir(o) : 
    keys = o.GetListOfKeys()
    for k in keys :
      name =  k.GetName()
      rec( o.Get(name), path, lst ) 
  else :
    pass
  return lst
# =================================================================================================

# =================================================================================================
# Method   : composition( t )
# 
# @param t : a tuple of ( type, d ) where type is either 'SERIAL' or 'PARALL'
#            and d is a dictionary of ROOT objects, with each key = ROOT path 
# 
# function : deduce the composition, (objects/histos) counts
#
def composition( t ) :
  typ, d = t 
  hists = 0 ; objs = 0
  for k in d.keys() : 
    if d[k].__class__.__name__ in histos : hists += 1
    else                                 : objs  += 1
  return objs, hists
# =================================================================================================
  
# =================================================================================================
# Method        : comparePaths( t1, t2 )
# 
# @param t1, t2 : a tuple of ( type, d ) where type is either 'SERIAL' or 'PARALL'
#                 and d is a dictionary of ROOT objects, with each key = ROOT path 
# 
# function      : compare the paths between the two histo files.  If the files are identical, they
#                 should have the same set of paths.  The Parallel file should definitely have the
#                 same paths as the Serial.  Perhaps the Serial file will have some more paths due
#                 to extra histos added as part of Application Sequencer finalisation
#                 Arguments t1 and t2 are checked and the parallel/serial auto-detected
#                 Uses sets module for intersections/unions, etc.
# 
def comparePaths( t1, t2 ) : 
  if   t1[0]  == ser : ds = t1[1] ; dp = t2[1]
  elif t2[0]  == ser : ds = t2[1] ; dp = t1[1]
  else : print 'Neither tuple is Serial Root file reference?' ; return
  
  dsks = ds.keys() ; dpks = dp.keys()
  dsks.sort()      ; dpks.sort()
  
  sset = sets.Set( dsks )
  pset = sets.Set( dpks )
  os, hs = composition( (ser, ds) )
  op, hp = composition( (par, dp) )
  print '\n' + '='*80
  print 'Comparison of Paths : Serial vs Parallel ROOT files'
  print '-'*80
  print 'Number of paths in Serial file : %i (objects, histos) = ( %i, %i )'%( len(dsks), os, hs )
  print 'Number of paths in Parall file : %i (objects, histos) = ( %i, %i )'%( len(dpks), op, hp )
  matching = sset.intersection(pset)
  matchingHistos = 0
  for n in matching :
    if ds[n].__class__.__name__ in histos : matchingHistos += 1    
  print '\nMatching paths                 : %i'%( len(matching) )
  uSer = sset - pset
  # work out histos unique to parallel file
  uniqueSerialHistos = 0
  for n in uSer :
    if ds[n].__class__.__name__ in histos : uniqueSerialHistos += 1
  print 'Paths unique to Serial file : %i ( %i Histos )'%( len(uSer), uniqueSerialHistos )
  if uSer : 
    for n in uSer : print '\t%s : \t%s'%( ds[n], n )
  uPar = pset - sset
  uniqueParallHistos = 0  
  for n in uPar :
    if dp[n].__class__.__name__ in histos : uniqueParallHistos += 1  
  print 'Paths unique to Parall file : %i ( %i Histos )'%( len(uPar), uniqueParallHistos )
  if uPar : 
    for n in uPar : print '\t%s : \t%s'%( dp[n], n )
  print 'Matching Histos to test : %i'%( matchingHistos )
  print '='*80 + '\n'
  return ( ((os,hs),(op,hp)), (uSer, uniqueSerialHistos), (uPar, uniqueParallHistos), matchingHistos )
# =================================================================================================  

# =================================================================================================
# Method        : compareHistos( t1, t2 )
# 
# @param t1, t2 : a tuple of ( type, d ) where type is either 'SERIAL' or 'PARALL'
#                 and d is a dictionary of ROOT objects, with each key = ROOT path 
# 
# function      : compare the histograms in Serial/Parallel ROOT files.  First, go through each 
#                 dict to collect the histos (ignore TDirectory objects, etc).  Then the histos
#                 in the parallel file (experimental) are compared to their equivalents in the 
#                 serial file (definitely correct) using 3 methods.
#                 1) The entries are checked, they should be equal
#                 2) If entries are equal, check the Integral(); should be equal
#                 3) If integrals are equal, check the KolmogorovTest() ; should be 1
#                 Arguments t1 and t2 are checked and the parallel/serial auto-detected
# 
def compareHistos(t1, t2, state) :
  
  ( ((serialObjects,serialHistos),(parallObjects, parallHistos)), (uniqueSerPaths,uniqueSerHistos), (uniqueParPaths,uniqueParHistos), mh ) = state
 
  # deduce which one is parallel, which serial
  if   t1[0]  == ser : ds = t1[1] ; dp = t2[1]
  elif t2[0]  == ser : ds = t2[1] ; dp = t1[1]
  else : print 'Neither tuple is Serial Root file reference?' ; return

  # histocount, objectcount for parallel/serial
  hcp = 0 ; pHistos = []
  hcs = 0 ; sHistos = []
  
  omit = ['/stat/Brunel/MemoryTool/Virtual mem, all entries',
          '/stat/Brunel/MemoryTool/Virtual mem, downscaled']
  omit = []
  
  # find the histos in the serial file
  for k in ds.keys() : 
    if k not in omit : 
      if ds[k].__class__.__name__ in histos : hcs += 1 ; sHistos.append( k )
  # same for parallel
  for k in dp.keys() : 
    if k not in omit : 
      if dp[k].__class__.__name__ in histos : hcp += 1 ; pHistos.append( k )
      

  cEntries   = 0 ; xEntries   = 0 ; diffEntries   = []
  cIntegrals = 0 ; xIntegrals = 0 ; diffIntegrals = []
  passedKol  = 0 ; failedKol  = 0 ; diffKols      = [] ; zeroIntegrals = 0
  kTested    = 0
  notfound   = 0 ; integralMatch = 0 ; otherTest = 0 ; zeroIntegralMatch = 0
  for h in sHistos :
    if h in pHistos : 
      # matching histos to check
      cEntries += 1
      sh = ds[h] ; ph = dp[h]
      # first check entries
      if sh.GetEntries() != ph.GetEntries() : diffEntries.append(h) ; xEntries += 1 ; continue
      # check for (non-zero sum of bin error) && (non-zero integrals) for K-Test
      sBinError = 0.0 ; pBinError = 0.0
      for i in xrange(sh.GetNbinsX()) : sBinError += sh.GetBinError(i)
      for i in xrange(ph.GetNbinsX()) : pBinError += ph.GetBinError(i)
      sint = sh.Integral() ; pint = ph.Integral()
      if (bool(sint) and bool(pint)) and ( sBinError>0 and pBinError>0 ) : 
        kTested += 1
        kTest = sh.KolmogorovTest(ph)
        if int(kTest) : passedKol += 1 
        else          : failedKol += 1 ; diffKols.append(h) # ; print 'KTest result : ', kTest        
      else : 
        # try the integral test?
        otherTest += 1
        if all((sint, pint)) and (sint==pint) : 
          integralMatch += 1
        elif (sint==pint) :
          zeroIntegralMatch += 1
        else : 
            diffIntegrals.append( h )
            xIntegrals += 1
    else :
      notfound += 1 ; print 'not found? ', h

  # report on Failed Entry-Checks      
  print '\n\n'+'-'*80
  print 'Summary of histos with different Entries'
  print '-'*80
  if diffEntries :
    diffEntries.sort()  
    for e in diffEntries : print '\t\t\t%s:\t%i != %i'%( e, int(ds[e].GetEntries()), int(dp[e].GetEntries()) )  
  print '-'*80
  
  # report on Failed Kolmogorov Tests    
  print '\n\n'+'-'*60
  print 'Summary of histos which failed Kolmogorov Test'
  print '-'*60
  if diffKols :
    diffKols.sort()  
    for e in diffKols : 
      result = ds[e].KolmogorovTest(dp[e])
      print '%s\t\t%s :\tK-Test Result :\t %5.16f'%( type(ds[e]), e, result )  
  print '-'*60
  
  # report on Failed Integral Checks
  print '\n\n'+'-'*60
  print 'Summary of histos which failed Integral Check'
  print '-'*60
  if diffIntegrals :
    diffIntegrals.sort()
    for e in diffIntegrals : 
      diff = dp[e].Integral()-ds[e].Integral()
      pc   = (diff*100)/ds[e].Integral()
      print '%s\t\t%s:\t Diff = %5.6f\tPercent Diff to Serial : %5.6f '%( type(ds[e]), e, diff, pc  )
  print '-'*60 + '\n' 
  print '='*80 + '\n'
  
  print '\n' + '='*80
  print 'Comparison : Serial/Parallel ROOT Histo files'
  print '\n\t\tSerial\tParall'
  print '\tObjects : %i\t%i\t\t( p-s = %i )'%( serialObjects, parallObjects, parallObjects-serialObjects )  
  print '\tHistos  : %i\t%i\t\t( p-s = %i )'%( serialHistos,  parallHistos,  parallHistos-serialHistos  )
  print '\t          __________'
  print '\tTotal   : %i\t%i\n'%( serialHistos+serialObjects,  parallHistos+parallObjects  )  
  print 'Objects/Histos unique to Serial File : %i / %i'%( len(uniqueSerPaths)-uniqueSerHistos, uniqueSerHistos )
  print 'Objects/Histos unique to Parall File : %i / %i'%( len(uniqueParPaths)-uniqueParHistos, uniqueParHistos )
  print '\nMatching Histograms valid for Comparison : %i'%( mh )
  print '\nOmissions : '
  for entry in omit : print '\t%s'%( entry )   
  print '\nHistograms for Comparison (after Omissions) : %i'%( mh-len(omit) )  
  print '\n\tHISTOGRAM TESTS : '
  print '\t\tKOLMOGOROV TEST      : %i'%( kTested )
  print '\t\tINTEGRAL TEST        : %i'%( otherTest )
  print '\t\tENTRIES TEST         : %i'%( xEntries )
  print '\t\t                       ____'
  print '\t\tTested               : %i'%( cEntries )  

  print '\n\tDISCREPANCIES : '
  print '\t\tK-Test      : %i'%( failedKol  )
  print '\t\tIntegrals   : %i'%( xIntegrals )    
  print '\t\tEntries     : %i'%( xEntries   )  
  print '\n'+'='*80  

# =================================================================================================

if __name__ == '__main__' : 
  sys.argv.pop(0)   # get rid of script name
  if len(sys.argv) == 2 :
    pFile = sys.argv[0]
    sFile = sys.argv[1]
  else :
    print '*'*80
    print 'Wrong count of arguments? > python compareRootHistos.py someParallelFile.root someSerialFile.root'
    print '*'*80
    sys.exit(0)
  tfs = TFile( sFile, 'REC' ) ; print 'opening Serial File : %s'%( sFile )
  tfp = TFile( pFile, 'REC' ) ; print 'opening Parall File : %s'%( pFile )
  
  # get structure of TFiles in a list of (path, object) tuples
  lser = rec(tfs) ; lpar = rec(tfp)
  # make a dictionary of lser and lpar.  keys=paths
  dserial = dict( [(n, o) for n, o in lser] )
  dparall = dict( [(n, o) for n, o in lpar] )
  # make a tuple of (type, dict) where type is either 'serial' or 'parallel'
  ts = ( ser, dserial )  ; tp = ( par, dparall )
  
  # check objs/histos in each file
#   composition( ts ) ; composition( tp )
  
  # compare paths from each file
  state = comparePaths( ts, tp )
  
  # compare histos from each file
  compareHistos( ts, tp, state )
  
#   # finished with TFiles
#   tfs.Close()     ; tfp.Close()  
#