Hook in Nexedi's zodbanalyze

This is enhanced version of ZODB original scripts/analyze.py . Nexedi version was originally developed as part of ERP5 here: https://lab.nexedi.com/nexedi/erp5/commits/master/erp5/util/zodbanalyze It was agreed to move zodbanalyze to zodbutils /see nexedi/slapos!116

Hook in Nexedi's zodbanalyze
This is enhanced version of ZODB original scripts/analyze.py . Nexedi version was originally developed as part of ERP5 here: https://lab.nexedi.com/nexedi/erp5/commits/master/erp5/util/zodbanalyze It was agreed to move zodbanalyze to zodbutils /see nexedi/slapos!116
ab17cf2d · Kirill Smelkov · e30f2c94 · f33abff0 · ab17cf2d
Commit ab17cf2d authored Nov 17, 2016 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 202 additions and 0 deletions

zodbtool/zodbanalyze.py zodbtool/zodbanalyze.py +202 -0

No files found.
--- a/zodbtool/zodbanalyze.py
+++ b/zodbtool/zodbanalyze.py
+#!/usr/bin/env python2.4
+
+# Based on a transaction analyzer by Matt Kromer.
+
+import sys
+import os
+import getopt
+import anydbm as dbm
+import tempfile
+import shutil
+from ZODB.FileStorage import FileStorage
+from ZODB.utils import get_pickle_metadata
+
+class Report:
+    def __init__(self, use_dbm=False):
+        self.use_dbm = use_dbm
+        if use_dbm:
+            self.temp_dir = tempfile.mkdtemp()
+            self.OIDMAP = dbm.open(os.path.join(self.temp_dir, 'oidmap.db'),
+                                   'nf')
+            self.USEDMAP = dbm.open(os.path.join(self.temp_dir, 'usedmap.db'),
+                                    'nf')
+        else:
+            self.OIDMAP = {}
+            self.USEDMAP = {}
+        self.TYPEMAP = {}
+        self.TYPESIZE = {}
+        self.TIDS = 0
+        self.OIDS = 0
+        self.DBYTES = 0
+        self.COIDS = 0
+        self.CBYTES = 0
+        self.FOIDS = 0
+        self.FBYTES = 0
+        self.COIDSMAP = {}
+        self.CBYTESMAP = {}
+        self.FOIDSMAP = {}
+        self.FBYTESMAP = {}
+
+def shorten(s, n):
+    l = len(s)
+    if l <= n:
+        return s
+    while len(s) + 3 > n: # account for ...
+        i = s.find(".")
+        if i == -1:
+            # In the worst case, just return the rightmost n bytes
+            return s[-n:]
+        else:
+            s = s[i + 1:]
+            l = len(s)
+    return "..." + s
+
+def report(rep, csv=False):
+    if not csv:
+        print "Processed %d records in %d transactions" % (rep.OIDS, rep.TIDS)
+        print "Average record size is %7.2f bytes" % (rep.DBYTES * 1.0 / rep.OIDS)
+        print ("Average transaction size is %7.2f bytes" %
+               (rep.DBYTES * 1.0 / rep.TIDS))
+
+        print "Types used:"
+    if csv:
+        fmt = "%s,%s,%s,%s,%s,%s,%s,%s,%s"
+        fmtp = "%s,%d,%d,%f%%,%f,%d,%d,%d,%d" # per-class format
+    else:
+        fmt = "%-46s %7s %9s %6s %7s %7s %9s %7s %9s"
+        fmtp = "%-46s %7d %9d %5.1f%% %7.2f %7d %9d %7d %9d" # per-class format
+    fmts = "%46s %7d %8dk %5.1f%% %7.2f" # summary format
+    print fmt % ("Class Name", "T.Count", "T.Bytes", "Pct", "AvgSize",
+                 "C.Count", "C.Bytes", "O.Count", "O.Bytes")
+    if not csv:
+        print fmt % ('-'*46, '-'*7, '-'*9, '-'*5, '-'*7, '-'*7, '-'*9, '-'*7, '-'*9)
+    typemap = rep.TYPEMAP.keys()
+    typemap.sort(key=lambda a:rep.TYPESIZE[a])
+    cumpct = 0.0
+    for t in typemap:
+        pct = rep.TYPESIZE[t] * 100.0 / rep.DBYTES
+        cumpct += pct
+        if csv:
+            t_display = t
+        else:
+            t_display = shorten(t, 46)
+        print fmtp % (t_display, rep.TYPEMAP[t], rep.TYPESIZE[t],
+                      pct, rep.TYPESIZE[t] * 1.0 / rep.TYPEMAP[t],
+                      rep.COIDSMAP[t], rep.CBYTESMAP[t],
+                      rep.FOIDSMAP.get(t, 0), rep.FBYTESMAP.get(t, 0))
+
+    if csv:
+        return
+
+    print fmt % ('='*46, '='*7, '='*9, '='*5, '='*7, '='*7, '='*9, '='*7, '='*9)
+    print "%46s %7d %9s %6s %6.2fk" % ('Total Transactions', rep.TIDS, ' ',
+        ' ', rep.DBYTES * 1.0 / rep.TIDS / 1024.0)
+    print fmts % ('Total Records', rep.OIDS, rep.DBYTES / 1024.0, cumpct,
+                  rep.DBYTES * 1.0 / rep.OIDS)
+
+    print fmts % ('Current Objects', rep.COIDS, rep.CBYTES / 1024.0,
+                  rep.CBYTES * 100.0 / rep.DBYTES,
+                  rep.CBYTES * 1.0 / rep.COIDS)
+    if rep.FOIDS:
+        print fmts % ('Old Objects', rep.FOIDS, rep.FBYTES / 1024.0,
+                      rep.FBYTES * 100.0 / rep.DBYTES,
+                      rep.FBYTES * 1.0 / rep.FOIDS)
+
+def analyze(path, use_dbm):
+    fs = FileStorage(path, read_only=1)
+    fsi = fs.iterator()
+    report = Report(use_dbm)
+    for txn in fsi:
+        analyze_trans(report, txn)
+    if use_dbm:
+        shutil.rmtree(report.temp_dir)
+    return report
+
+def analyze_trans(report, txn):
+    report.TIDS += 1
+    for rec in txn:
+        analyze_rec(report, rec)
+
+def get_type(record):
+    mod, klass = get_pickle_metadata(record.data)
+    return "%s.%s" % (mod, klass)
+
+def analyze_rec(report, record):
+    oid = record.oid
+    report.OIDS += 1
+    if record.data is None:
+        # No pickle -- aborted version or undo of object creation.
+        return
+    try:
+        size = len(record.data) # Ignores various overhead
+        report.DBYTES += size
+        if oid not in report.OIDMAP:
+            type = get_type(record)
+            report.OIDMAP[oid] = type
+            if report.use_dbm:
+                report.USEDMAP[oid] = str(size)
+            else:
+                report.USEDMAP[oid] = size
+            report.COIDS += 1
+            report.CBYTES += size
+            report.COIDSMAP[type] = report.COIDSMAP.get(type, 0) + 1
+            report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size
+        else:
+            type = report.OIDMAP[oid]
+            if report.use_dbm:
+                fsize = int(report.USEDMAP[oid])
+                report.USEDMAP[oid] = str(size)
+            else:
+                fsize = report.USEDMAP[oid]
+                report.USEDMAP[oid] = size
+            report.FOIDS += 1
+            report.FBYTES += fsize
+            report.CBYTES += size - fsize
+            report.FOIDSMAP[type] = report.FOIDSMAP.get(type, 0) + 1
+            report.FBYTESMAP[type] = report.FBYTESMAP.get(type, 0) + fsize
+            report.CBYTESMAP[type] = report.CBYTESMAP.get(type, 0) + size - fsize
+        report.TYPEMAP[type] = report.TYPEMAP.get(type, 0) + 1
+        report.TYPESIZE[type] = report.TYPESIZE.get(type, 0) + size
+    except Exception, err:
+        print err
+
+__doc__ = """%(program)s: Data.fs analyzer
+
+usage: %(program)s [options] /path/to/Data.fs
+
+Options:
+  -h, --help                 this help screen
+  -c, --csv                  output CSV
+  -d, --dbm                  use DBM as temporary storage to limit memory usage
+"""
+
+def usage(stream, msg=None):
+    if msg:
+        print >>stream, msg
+        print >>stream
+    program = os.path.basename(sys.argv[0])
+    print >>stream, __doc__ % {"program": program}
+
+
+def main():
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],
+                                   'hcd', ['help', 'csv', 'dbm'])
+        path = args[0]
+    except (getopt.GetoptError, IndexError), msg:
+        usage(sys.stderr, msg)
+        sys.exit(2)
+    csv = False
+    use_dbm = False
+    for opt, args in opts:
+        if opt in ('-c', '--csv'):
+            csv = True
+        if opt in ('-d', '--dbm'):
+            use_dbm = True
+        if opt in ('-h', '--help'):
+            usage(sys.stdout)
+            sys.exit()
+    report(analyze(path, use_dbm), csv)
+
+if __name__ == "__main__":
+    main()