Many little bugfixes and improvements in stats.py.

This has survived several 100 MB of trace files I generated over the last few days, so it's solid now if not necessarily perfect. Replaced simul.py with the much broader-ranging code Jeremy and I were working on a couple years ago, although it can't work with the current trace file format (no real loss there -- the simul.py it's replacing can't work with the current format either).

Many little bugfixes and improvements in stats.py.
This has survived several 100 MB of trace files I generated over the last few days, so it's solid now if not necessarily perfect. Replaced simul.py with the much broader-ranging code Jeremy and I were working on a couple years ago, although it can't work with the current trace file format (no real loss there -- the simul.py it's replacing can't work with the current format either).
94a7da6f · Tim Peters · 2f0cf584 · 94a7da6f · 94a7da6f · 94a7da6f
Commit 94a7da6f authored Jul 22, 2005 by Tim Peters
Expand all Show whitespace changes
Inline Side-by-side

Showing with 1037 additions and 86 deletions

doc/ZEO/trace.txt doc/ZEO/trace.txt +1 -1

src/ZEO/simul.py src/ZEO/simul.py +996 -36

src/ZEO/stats.py src/ZEO/stats.py +40 -49

No files found.
--- a/doc/ZEO/trace.txt
+++ b/doc/ZEO/trace.txt
@@ -24,7 +24,7 @@ application server) must be restarted.
 The trace file can grow pretty quickly; on a moderately loaded server, we
 observed it growing by 5 MB per hour.  The file consists of binary records,
-each 26 bytes long if 8-byte oids are in use; a detailed description of the
+each 34 bytes long if 8-byte oids are in use; a detailed description of the
 record lay-out is given in stats.py.  No sensitive data is logged:  data
 record sizes and binary object and transaction ids are logged, but no
 information about object types or names, user names, version names,

--- a/src/ZEO/simul.py
+++ b/src/ZEO/simul.py
--- a/src/ZEO/stats.py
+++ b/src/ZEO/stats.py
@@ -26,7 +26,7 @@ Usage: stats.py [-h] [-i interval] [-q] [-s] [-S] [-v] [-X] tracefile
 """File format:
-Each record is 18 bytes, plus a variable number of bytes to store an oid,
+Each record is 26 bytes, plus a variable number of bytes to store an oid,
 with the following layout.  Numbers are big-endian integers.
 Offset  Size  Contents
@@ -35,8 +35,9 @@ Offset  Size  Contents
 4       3     data size, in 256-byte increments, rounded up
 7       1     code (see below)
 8       2     object id length
-10      8     serial number
+10      8     start tid
-18  variable  object id
+18      8     end tid
+26  variable  object id
 The code at offset 7 packs three fields:
@@ -131,74 +132,66 @@ def main():
            print >> sys.stderr, "can't open %s: %s" % (filename, msg)
            return 1
-    # Read file, gathering statistics, and printing each record if verbose
    rt0 = time.time()
-    # bycode -- map code to count of occurrences
+    bycode = {}     # map code to count of occurrences
-    bycode = {}
+    byinterval = {} # map code to count in current interval
-    # records -- number of records
+    records = 0     # number of trace records read
-    records = 0
+    versions = 0    # number of trace records with versions
-    # version -- number of records with versions
+    datarecords = 0 # number of records with dlen set
-    versions = 0
+    datasize = 0L   # sum of dlen across records with dlen set
-    t0 = te = None
+    oids = {}       # map oid to number of times it was loaded
-    # datarecords -- number of records with dlen set
+    bysize = {}     # map data size to number of loads
-    datarecords = 0
+    bysizew = {}    # map data size to number of writes
-    datasize = 0L
-    # oids -- maps oid to number of times it was loaded
-    oids = {}
-    # bysize -- maps data size to number of loads
-    bysize = {}
-    # bysize -- maps data size to number of writes
-    bysizew = {}
    total_loads = 0
-    byinterval = {}
+    t0 = None       # first timestamp seen
-    thisinterval = None
+    te = None       # most recent timestamp seen
-    h0 = he = None
+    h0 = None       # timestamp at start of current interval
-    offset = 0
+    he = None       # timestamp at end of current interval
+    thisinterval = None  # generally te//interval
    f_read = f.read
    struct_unpack = struct.unpack
+    # Read file, gathering statistics, and printing each record if verbose.
    try:
        while 1:
-            r = f_read(8)
+            r = f_read(8) # timestamp:4 code:4
            if len(r) < 8:
                break
-            offset += 8
            ts, code = struct_unpack(">ii", r)
            if ts == 0:
-                # Must be a misaligned record caused by a crash
+                # Must be a misaligned record caused by a crash.
                if not quiet:
-                    print "Skipping 8 bytes at offset", offset-8
+                    print "Skipping 8 bytes at offset", f.tell() - 8
                continue
-            r = f_read(18)
+            r = f_read(18) # oidlen:2 starttid:8 endtid:8
-            if len(r) < 10:
+            if len(r) < 18:
                break
-            offset += 10
-            records += 1
            oidlen, start_tid, end_tid = struct_unpack(">H8s8s", r)
            oid = f_read(oidlen)
-            if len(oid) != oidlen:
+            if len(oid) < oidlen:
                break
-            offset += oidlen
+            records += 1
            if t0 is None:
                t0 = ts
-                thisinterval = t0 / interval
+                thisinterval = t0 // interval
                h0 = he = ts
            te = ts
-            if ts / interval != thisinterval:
+            if ts // interval != thisinterval:
                if not quiet:
                    dumpbyinterval(byinterval, h0, he)
                byinterval = {}
-                thisinterval = ts / interval
+                thisinterval = ts // interval
                h0 = ts
            he = ts
            dlen, code = code & 0x7fffff00, code & 0xff
            if dlen:
                datarecords += 1
                datasize += dlen
-            version = '-'
            if code & 0x80:
                version = 'V'
                versions += 1
-            code = code & 0x7e
+            else:
+                version = '-'
+            code &= 0x7e
            bycode[code] = bycode.get(code, 0) + 1
            byinterval[code] = byinterval.get(code, 0) + 1
            if dlen:
@@ -220,11 +213,11 @@ def main():
            if code & 0x70 == 0x20:
                oids[oid] = oids.get(oid, 0) + 1
                total_loads += 1
-            if code == 0x00:
+            if code == 0x00:    # restart
                if not quiet:
                    dumpbyinterval(byinterval, h0, he)
                byinterval = {}
-                thisinterval = ts / interval
+                thisinterval = ts // interval
                h0 = he = ts
                if not quiet:
                    print time.ctime(ts)[4:-5],
@@ -232,6 +225,7 @@ def main():
    except KeyboardInterrupt:
        print "\nInterrupted.  Stats so far:\n"
+    end_pos = f.tell()
    f.close()
    rte = time.time()
    if not quiet:
@@ -245,8 +239,8 @@ def main():
    # Print statistics
    if dostats:
        print
-        print "Read %s records (%s bytes) in %.1f seconds" % (
+        print "Read %s trace records (%s bytes) in %.1f seconds" % (
-            addcommas(records), addcommas(records*24), rte-rt0)
+            addcommas(records), addcommas(end_pos), rte-rt0)
        print "Versions:   %s records used a version" % addcommas(versions)
        print "First time: %s" % time.ctime(t0)
        print "Last time:  %s" % time.ctime(te)
@@ -309,9 +303,8 @@ def dumpbysize(bysize, how, how2):
                                loads)
 def dumpbyinterval(byinterval, h0, he):
-    loads = 0
+    loads = hits = 0
-    hits = 0
+    for code in byinterval:
-    for code in byinterval.keys():
        if code & 0x70 == 0x20:
            n = byinterval[code]
            loads += n
@@ -328,8 +321,7 @@ def dumpbyinterval(byinterval, h0, he):
        addcommas(loads), addcommas(hits), hr)
 def hitrate(bycode):
-    loads = 0
+    loads = hits = 0
-    hits = 0
    for code in bycode:
        if code & 0x70 == 0x20:
            n = bycode[code]
@@ -389,7 +381,6 @@ explain = {
    0x50: "store (version)",
    0x52: "store (current, non-version)",
    0x54: "store (non-current)",
    }
 if __name__ == "__main__":