#!/usr/bin/env python2.4

# Based on a transaction analyzer by Matt Kromer.

import pickle
import re
import sys
import types
from ZODB.FileStorage import FileStorage
from cStringIO import StringIO

class FakeError(Exception):
    def __init__(self, module, name):
        Exception.__init__(self)
        self.module = module
        self.name = name

class FakeUnpickler(pickle.Unpickler):
    def find_class(self, module, name):
        raise FakeError(module, name)

class Report:
    def __init__(self):
        self.OIDMAP = {}
        self.TYPEMAP = {}
        self.TYPESIZE = {}
        self.FREEMAP = {}
        self.USEDMAP = {}
        self.TIDS = 0
        self.OIDS = 0
        self.DBYTES = 0
        self.COIDS = 0
        self.CBYTES = 0
        self.FOIDS = 0
        self.FBYTES = 0

def shorten(s, n):
    l = len(s)
    if l <= n:
        return s
    while len(s) + 3 > n: # account for ...
        i = s.find(".")
        if i == -1:
            # In the worst case, just return the rightmost n bytes
            return s[-n:]
        else:
            s = s[i + 1:]
            l = len(s)
    return "..." + s

def report(rep):
    print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
    print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
    print ("Average transaction size is %7.2f bytes" %
           (rep.DBYTES * 1.0 / rep.TIDS))

    print "Types used:"
    fmt = "%-46s %7s %9s %6s %7s"
    fmtp = "%-46s %7d %9d %5.1f%% %7.2f" # per-class format
    fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
    print fmt % ("Class Name", "Count", "TBytes", "Pct", "AvgSize")
    print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7)
    typemap = rep.TYPEMAP.keys()
    typemap.sort()
    cumpct = 0.0
    for t in typemap:
        pct = rep.TYPESIZE[t] * 100.0 / rep.DBYTES
        cumpct += pct
        print fmtp % (shorten(t, 46), rep.TYPEMAP[t], rep.TYPESIZE[t],
                      pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t])

    print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7)
    print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
        ' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
    print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
                  rep.DBYTES * 1.0 / rep.OIDS)

    print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
                  rep.CBYTES * 100.0 / rep.DBYTES,
                  rep.CBYTES * 1.0 / rep.COIDS)
    if rep.FOIDS:
        print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
                      rep.FBYTES * 100.0 / rep.DBYTES,
                      rep.FBYTES * 1.0 / rep.FOIDS)

def analyze(path):
    fs = FileStorage(path, read_only=1)
    fsi = fs.iterator()
    report = Report()
    for txn in fsi:
        analyze_trans(report, txn)
    return report

def analyze_trans(report, txn):
    report.TIDS += 1
    for rec in txn:
        analyze_rec(report, rec)

def get_type(record):
    try:
        unpickled = FakeUnpickler(StringIO(record.data)).load()
    except FakeError, err:
        return "%s.%s" % (err.module, err.name)
    except:
        raise
    classinfo = unpickled[0]
    if isinstance(classinfo, types.TupleType):
        mod, klass = classinfo
        return "%s.%s" % (mod, klass)
    else:
        return str(classinfo)

def analyze_rec(report, record):
    oid = record.oid
    report.OIDS += 1
    if record.data is None:
        # No pickle -- aborted version or undo of object creation.
        return
    try:
        size = len(record.data) # Ignores various overhead
        report.DBYTES += size
        if oid not in report.OIDMAP:
            type = get_type(record)
            report.OIDMAP[oid] = type
            report.USEDMAP[oid] = size
            report.COIDS += 1
            report.CBYTES += size
        else:
            type = report.OIDMAP[oid]
            fsize = report.USEDMAP[oid]
            report.FREEMAP[oid] = report.FREEMAP.get(oid, 0) + fsize
            report.USEDMAP[oid] = size
            report.FOIDS += 1
            report.FBYTES += fsize
            report.CBYTES += size - fsize
        report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
        report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
    except Exception, err:
        print err

def main():
    path = sys.argv[1]
    report(analyze(path))