__init__.py 4.27 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
#!/usr/bin/env python2.4

# Based on a transaction analyzer by Matt Kromer.

import pickle
import re
import sys
import types
from ZODB.FileStorage import FileStorage
from cStringIO import StringIO

class FakeError(Exception):
    def __init__(self, module, name):
        Exception.__init__(self)
        self.module = module
        self.name = name

class FakeUnpickler(pickle.Unpickler):
    def find_class(self, module, name):
        raise FakeError(module, name)

class Report:
    def __init__(self):
        self.OIDMAP = {}
        self.TYPEMAP = {}
        self.TYPESIZE = {}
        self.FREEMAP = {}
        self.USEDMAP = {}
        self.TIDS = 0
        self.OIDS = 0
        self.DBYTES = 0
        self.COIDS = 0
        self.CBYTES = 0
        self.FOIDS = 0
        self.FBYTES = 0

def shorten(s, n):
    l = len(s)
    if l <= n:
        return s
    while len(s) + 3 > n: # account for ...
        i = s.find(".")
        if i == -1:
            # In the worst case, just return the rightmost n bytes
            return s[-n:]
        else:
            s = s[i + 1:]
            l = len(s)
    return "..." + s

def report(rep):
    print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
    print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
    print ("Average transaction size is %7.2f bytes" %
           (rep.DBYTES * 1.0 / rep.TIDS))

    print "Types used:"
    fmt = "%-46s %7s %9s %6s %7s"
    fmtp = "%-46s %7d %9d %5.1f%% %7.2f" # per-class format
    fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
    print fmt % ("Class Name", "Count", "TBytes", "Pct", "AvgSize")
    print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7)
    typemap = rep.TYPEMAP.keys()
    typemap.sort()
    cumpct = 0.0
    for t in typemap:
        pct = rep.TYPESIZE[t] * 100.0 / rep.DBYTES
        cumpct += pct
        print fmtp % (shorten(t, 46), rep.TYPEMAP[t], rep.TYPESIZE[t],
                      pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t])

    print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7)
    print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
        ' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
    print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
                  rep.DBYTES * 1.0 / rep.OIDS)

    print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
                  rep.CBYTES * 100.0 / rep.DBYTES,
                  rep.CBYTES * 1.0 / rep.COIDS)
    if rep.FOIDS:
        print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
                      rep.FBYTES * 100.0 / rep.DBYTES,
                      rep.FBYTES * 1.0 / rep.FOIDS)

def analyze(path):
    fs = FileStorage(path, read_only=1)
    fsi = fs.iterator()
    report = Report()
    for txn in fsi:
        analyze_trans(report, txn)
    return report

def analyze_trans(report, txn):
    report.TIDS += 1
    for rec in txn:
        analyze_rec(report, rec)

def get_type(record):
    try:
        unpickled = FakeUnpickler(StringIO(record.data)).load()
    except FakeError, err:
        return "%s.%s" % (err.module, err.name)
    except:
        raise
    classinfo = unpickled[0]
    if isinstance(classinfo, types.TupleType):
        mod, klass = classinfo
        return "%s.%s" % (mod, klass)
    else:
        return str(classinfo)

def analyze_rec(report, record):
    oid = record.oid
    report.OIDS += 1
    if record.data is None:
        # No pickle -- aborted version or undo of object creation.
        return
    try:
        size = len(record.data) # Ignores various overhead
        report.DBYTES += size
        if oid not in report.OIDMAP:
            type = get_type(record)
            report.OIDMAP[oid] = type
            report.USEDMAP[oid] = size
            report.COIDS += 1
            report.CBYTES += size
        else:
            type = report.OIDMAP[oid]
            fsize = report.USEDMAP[oid]
            report.FREEMAP[oid] = report.FREEMAP.get(oid, 0) + fsize
            report.USEDMAP[oid] = size
            report.FOIDS += 1
            report.FBYTES += fsize
            report.CBYTES += size - fsize
        report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
        report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
    except Exception, err:
        print err

141
def main():
142 143
    path = sys.argv[1]
    report(analyze(path))