Commit 94a7da6f authored by Tim Peters's avatar Tim Peters

Many little bugfixes and improvements in stats.py.

This has survived several 100 MB of trace files
I generated over the last few days, so it's solid
now if not necessarily perfect.

Replaced simul.py with the much broader-ranging code
Jeremy and I were working on a couple years ago,
although it can't work with the current trace file
format (no real loss there -- the simul.py it's
replacing can't work with the current format either).
parent 2f0cf584
...@@ -24,7 +24,7 @@ application server) must be restarted. ...@@ -24,7 +24,7 @@ application server) must be restarted.
The trace file can grow pretty quickly; on a moderately loaded server, we The trace file can grow pretty quickly; on a moderately loaded server, we
observed it growing by 5 MB per hour. The file consists of binary records, observed it growing by 5 MB per hour. The file consists of binary records,
each 26 bytes long if 8-byte oids are in use; a detailed description of the each 34 bytes long if 8-byte oids are in use; a detailed description of the
record lay-out is given in stats.py. No sensitive data is logged: data record lay-out is given in stats.py. No sensitive data is logged: data
record sizes and binary object and transaction ids are logged, but no record sizes and binary object and transaction ids are logged, but no
information about object types or names, user names, version names, information about object types or names, user names, version names,
......
This diff is collapsed.
...@@ -26,7 +26,7 @@ Usage: stats.py [-h] [-i interval] [-q] [-s] [-S] [-v] [-X] tracefile ...@@ -26,7 +26,7 @@ Usage: stats.py [-h] [-i interval] [-q] [-s] [-S] [-v] [-X] tracefile
"""File format: """File format:
Each record is 18 bytes, plus a variable number of bytes to store an oid, Each record is 26 bytes, plus a variable number of bytes to store an oid,
with the following layout. Numbers are big-endian integers. with the following layout. Numbers are big-endian integers.
Offset Size Contents Offset Size Contents
...@@ -35,8 +35,9 @@ Offset Size Contents ...@@ -35,8 +35,9 @@ Offset Size Contents
4 3 data size, in 256-byte increments, rounded up 4 3 data size, in 256-byte increments, rounded up
7 1 code (see below) 7 1 code (see below)
8 2 object id length 8 2 object id length
10 8 serial number 10 8 start tid
18 variable object id 18 8 end tid
26 variable object id
The code at offset 7 packs three fields: The code at offset 7 packs three fields:
...@@ -131,74 +132,66 @@ def main(): ...@@ -131,74 +132,66 @@ def main():
print >> sys.stderr, "can't open %s: %s" % (filename, msg) print >> sys.stderr, "can't open %s: %s" % (filename, msg)
return 1 return 1
# Read file, gathering statistics, and printing each record if verbose
rt0 = time.time() rt0 = time.time()
# bycode -- map code to count of occurrences bycode = {} # map code to count of occurrences
bycode = {} byinterval = {} # map code to count in current interval
# records -- number of records records = 0 # number of trace records read
records = 0 versions = 0 # number of trace records with versions
# version -- number of records with versions datarecords = 0 # number of records with dlen set
versions = 0 datasize = 0L # sum of dlen across records with dlen set
t0 = te = None oids = {} # map oid to number of times it was loaded
# datarecords -- number of records with dlen set bysize = {} # map data size to number of loads
datarecords = 0 bysizew = {} # map data size to number of writes
datasize = 0L
# oids -- maps oid to number of times it was loaded
oids = {}
# bysize -- maps data size to number of loads
bysize = {}
# bysize -- maps data size to number of writes
bysizew = {}
total_loads = 0 total_loads = 0
byinterval = {} t0 = None # first timestamp seen
thisinterval = None te = None # most recent timestamp seen
h0 = he = None h0 = None # timestamp at start of current interval
offset = 0 he = None # timestamp at end of current interval
thisinterval = None # generally te//interval
f_read = f.read f_read = f.read
struct_unpack = struct.unpack struct_unpack = struct.unpack
# Read file, gathering statistics, and printing each record if verbose.
try: try:
while 1: while 1:
r = f_read(8) r = f_read(8) # timestamp:4 code:4
if len(r) < 8: if len(r) < 8:
break break
offset += 8
ts, code = struct_unpack(">ii", r) ts, code = struct_unpack(">ii", r)
if ts == 0: if ts == 0:
# Must be a misaligned record caused by a crash # Must be a misaligned record caused by a crash.
if not quiet: if not quiet:
print "Skipping 8 bytes at offset", offset-8 print "Skipping 8 bytes at offset", f.tell() - 8
continue continue
r = f_read(18) r = f_read(18) # oidlen:2 starttid:8 endtid:8
if len(r) < 10: if len(r) < 18:
break break
offset += 10
records += 1
oidlen, start_tid, end_tid = struct_unpack(">H8s8s", r) oidlen, start_tid, end_tid = struct_unpack(">H8s8s", r)
oid = f_read(oidlen) oid = f_read(oidlen)
if len(oid) != oidlen: if len(oid) < oidlen:
break break
offset += oidlen records += 1
if t0 is None: if t0 is None:
t0 = ts t0 = ts
thisinterval = t0 / interval thisinterval = t0 // interval
h0 = he = ts h0 = he = ts
te = ts te = ts
if ts / interval != thisinterval: if ts // interval != thisinterval:
if not quiet: if not quiet:
dumpbyinterval(byinterval, h0, he) dumpbyinterval(byinterval, h0, he)
byinterval = {} byinterval = {}
thisinterval = ts / interval thisinterval = ts // interval
h0 = ts h0 = ts
he = ts he = ts
dlen, code = code & 0x7fffff00, code & 0xff dlen, code = code & 0x7fffff00, code & 0xff
if dlen: if dlen:
datarecords += 1 datarecords += 1
datasize += dlen datasize += dlen
version = '-'
if code & 0x80: if code & 0x80:
version = 'V' version = 'V'
versions += 1 versions += 1
code = code & 0x7e else:
version = '-'
code &= 0x7e
bycode[code] = bycode.get(code, 0) + 1 bycode[code] = bycode.get(code, 0) + 1
byinterval[code] = byinterval.get(code, 0) + 1 byinterval[code] = byinterval.get(code, 0) + 1
if dlen: if dlen:
...@@ -220,11 +213,11 @@ def main(): ...@@ -220,11 +213,11 @@ def main():
if code & 0x70 == 0x20: if code & 0x70 == 0x20:
oids[oid] = oids.get(oid, 0) + 1 oids[oid] = oids.get(oid, 0) + 1
total_loads += 1 total_loads += 1
if code == 0x00: if code == 0x00: # restart
if not quiet: if not quiet:
dumpbyinterval(byinterval, h0, he) dumpbyinterval(byinterval, h0, he)
byinterval = {} byinterval = {}
thisinterval = ts / interval thisinterval = ts // interval
h0 = he = ts h0 = he = ts
if not quiet: if not quiet:
print time.ctime(ts)[4:-5], print time.ctime(ts)[4:-5],
...@@ -232,6 +225,7 @@ def main(): ...@@ -232,6 +225,7 @@ def main():
except KeyboardInterrupt: except KeyboardInterrupt:
print "\nInterrupted. Stats so far:\n" print "\nInterrupted. Stats so far:\n"
end_pos = f.tell()
f.close() f.close()
rte = time.time() rte = time.time()
if not quiet: if not quiet:
...@@ -245,8 +239,8 @@ def main(): ...@@ -245,8 +239,8 @@ def main():
# Print statistics # Print statistics
if dostats: if dostats:
print print
print "Read %s records (%s bytes) in %.1f seconds" % ( print "Read %s trace records (%s bytes) in %.1f seconds" % (
addcommas(records), addcommas(records*24), rte-rt0) addcommas(records), addcommas(end_pos), rte-rt0)
print "Versions: %s records used a version" % addcommas(versions) print "Versions: %s records used a version" % addcommas(versions)
print "First time: %s" % time.ctime(t0) print "First time: %s" % time.ctime(t0)
print "Last time: %s" % time.ctime(te) print "Last time: %s" % time.ctime(te)
...@@ -309,9 +303,8 @@ def dumpbysize(bysize, how, how2): ...@@ -309,9 +303,8 @@ def dumpbysize(bysize, how, how2):
loads) loads)
def dumpbyinterval(byinterval, h0, he): def dumpbyinterval(byinterval, h0, he):
loads = 0 loads = hits = 0
hits = 0 for code in byinterval:
for code in byinterval.keys():
if code & 0x70 == 0x20: if code & 0x70 == 0x20:
n = byinterval[code] n = byinterval[code]
loads += n loads += n
...@@ -328,8 +321,7 @@ def dumpbyinterval(byinterval, h0, he): ...@@ -328,8 +321,7 @@ def dumpbyinterval(byinterval, h0, he):
addcommas(loads), addcommas(hits), hr) addcommas(loads), addcommas(hits), hr)
def hitrate(bycode): def hitrate(bycode):
loads = 0 loads = hits = 0
hits = 0
for code in bycode: for code in bycode:
if code & 0x70 == 0x20: if code & 0x70 == 0x20:
n = bycode[code] n = bycode[code]
...@@ -389,7 +381,6 @@ explain = { ...@@ -389,7 +381,6 @@ explain = {
0x50: "store (version)", 0x50: "store (version)",
0x52: "store (current, non-version)", 0x52: "store (current, non-version)",
0x54: "store (non-current)", 0x54: "store (non-current)",
} }
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment