Commit 15bb2e12 authored by Tim Peters's avatar Tim Peters

More steps on the way toward having useful cache simulation.

I think the primary hangup now is that simul.py doesn't know
anything about MVCC.  As a result, it thinks there's significatly
more free space in the cache than there really is, and that
probably accounts for it predicting significantly higher hit
rates than I actually see.
parent 7d19c28b
...@@ -174,6 +174,7 @@ class ClientCache(object): ...@@ -174,6 +174,7 @@ class ClientCache(object):
if version: if version:
p = self.version.get(oid) p = self.version.get(oid)
if p is None: if p is None:
self._trace(0x20, oid, version)
return None return None
elif p[0] == version: elif p[0] == version:
tid = p[1] tid = p[1]
...@@ -187,6 +188,7 @@ class ClientCache(object): ...@@ -187,6 +188,7 @@ class ClientCache(object):
return None return None
o = self.fc.access((oid, tid)) o = self.fc.access((oid, tid))
if o is None: if o is None:
self._trace(0x20, oid, version)
return None return None
self._trace(0x22, oid, version, o.start_tid, o.end_tid, len(o.data)) self._trace(0x22, oid, version, o.start_tid, o.end_tid, len(o.data))
return o.data, tid, o.version return o.data, tid, o.version
...@@ -389,7 +391,6 @@ class ClientCache(object): ...@@ -389,7 +391,6 @@ class ClientCache(object):
## ##
# Generates (oid, serial, version) triples for all objects in the # Generates (oid, serial, version) triples for all objects in the
# cache. This generator is used by cache verification. # cache. This generator is used by cache verification.
def contents(self): def contents(self):
# May need to materialize list instead of iterating; # May need to materialize list instead of iterating;
# depends on whether the caller may change the cache. # depends on whether the caller may change the cache.
...@@ -461,8 +462,6 @@ class ClientCache(object): ...@@ -461,8 +462,6 @@ class ClientCache(object):
time_time=time.time, struct_pack=struct.pack): time_time=time.time, struct_pack=struct.pack):
# The code argument is two hex digits; bits 0 and 7 must be zero. # The code argument is two hex digits; bits 0 and 7 must be zero.
# The first hex digit shows the operation, the second the outcome. # The first hex digit shows the operation, the second the outcome.
# If the second digit is in "02468" then it is a 'miss'.
# If it is in "ACE" then it is a 'hit'.
# This method has been carefully tuned to be as fast as possible. # This method has been carefully tuned to be as fast as possible.
# Note: when tracing is disabled, this method is hidden by a dummy. # Note: when tracing is disabled, this method is hidden by a dummy.
if version: if version:
......
...@@ -120,16 +120,19 @@ def main(): ...@@ -120,16 +120,19 @@ def main():
# Read trace file, simulating cache behavior. # Read trace file, simulating cache behavior.
f_read = f.read f_read = f.read
unpack = struct.unpack unpack = struct.unpack
FMT = ">iiH8s8s"
FMT_SIZE = struct.calcsize(FMT)
assert FMT_SIZE == 26
while 1: while 1:
# Read a record and decode it. # Read a record and decode it.
r = f_read(26) r = f_read(FMT_SIZE)
if len(r) < 26: if len(r) < FMT_SIZE:
break break
ts, code, oidlen, start_tid, end_tid = unpack(">iiH8s8s", r) ts, code, oidlen, start_tid, end_tid = unpack(FMT, r)
if ts == 0: if ts == 0:
# Must be a misaligned record caused by a crash; skip 8 bytes # Must be a misaligned record caused by a crash; skip 8 bytes
# and try again. Why 8? Lost in the mist of history. # and try again. Why 8? Lost in the mist of history.
f.seek(f.tell() - 18) f.seek(f.tell() - FMT_SIZE + 8)
continue continue
oid = f_read(oidlen) oid = f_read(oidlen)
if len(oid) < oidlen: if len(oid) < oidlen:
...@@ -149,8 +152,7 @@ def main(): ...@@ -149,8 +152,7 @@ def main():
# Exit code from main(). # Exit code from main().
return 0 return 0
class Simulation: class Simulation(object):
"""Base class for simulations. """Base class for simulations.
The driver program calls: event(), printheader(), finish(). The driver program calls: event(), printheader(), finish().
...@@ -163,7 +165,7 @@ class Simulation: ...@@ -163,7 +165,7 @@ class Simulation:
def __init__(self, cachelimit): def __init__(self, cachelimit):
self.cachelimit = cachelimit self.cachelimit = cachelimit
# Initialize global statistics # Initialize global statistics.
self.epoch = None self.epoch = None
self.total_loads = 0 self.total_loads = 0
self.total_hits = 0 # subclass must increment self.total_hits = 0 # subclass must increment
...@@ -172,11 +174,11 @@ class Simulation: ...@@ -172,11 +174,11 @@ class Simulation:
if not hasattr(self, "extras"): if not hasattr(self, "extras"):
self.extras = (self.extraname,) self.extras = (self.extraname,)
self.format = self.format + " %7s" * len(self.extras) self.format = self.format + " %7s" * len(self.extras)
# Reset per-run statistics and set up simulation data # Reset per-run statistics and set up simulation data.
self.restart() self.restart()
def restart(self): def restart(self):
# Reset per-run statistics # Reset per-run statistics.
self.loads = 0 self.loads = 0
self.hits = 0 # subclass must increment self.hits = 0 # subclass must increment
self.invals = 0 # subclass must increment self.invals = 0 # subclass must increment
...@@ -191,21 +193,22 @@ class Simulation: ...@@ -191,21 +193,22 @@ class Simulation:
self.epoch = ts self.epoch = ts
self.ts1 = ts self.ts1 = ts
# Simulate cache behavior. # Simulate cache behavior. Caution: the codes in the trace file
# record whether the actual cache missed or hit on each load, but
# that bears no relationship to whether the simulated cache will
# hit or miss.
action = code & 0x70 # ignore high bit (version flag) action = code & 0x70 # ignore high bit (version flag)
if dlen: if action == 0x20:
if action == 0x20: # Load.
# Load. self.loads += 1
self.loads += 1 self.total_loads += 1
self.total_loads += 1 assert (dlen == 0) == (code in (0x20, 0x24))
if dlen:
self.load(oid, dlen) self.load(oid, dlen)
elif action == 0x50: elif action == 0x50:
# Store. # Store.
self.writes += 1 assert dlen
self.total_writes += 1 self.write(oid, dlen)
self.write(oid, dlen)
else:
assert False, (hex(code), dlen)
elif action == 0x10: elif action == 0x10:
# Invalidate. # Invalidate.
self.inval(oid) self.inval(oid)
...@@ -213,6 +216,8 @@ class Simulation: ...@@ -213,6 +216,8 @@ class Simulation:
# Restart. # Restart.
self.report() self.report()
self.restart() self.restart()
else:
raise ValueError("unknown trace code 0x%x" % code)
def write(self, oid, size): def write(self, oid, size):
pass pass
...@@ -966,6 +971,9 @@ class OracleSimulation(LRUCacheSimulation): ...@@ -966,6 +971,9 @@ class OracleSimulation(LRUCacheSimulation):
print "Scanned file, %d unique oids, %d repeats" % ( print "Scanned file, %d unique oids, %d repeats" % (
all, len(self.count)) all, len(self.count))
from ZEO.cache import ZEC3_HEADER_SIZE
class CircularCacheSimulation(Simulation): class CircularCacheSimulation(Simulation):
# The cache is managed as a single file with a pointer that # The cache is managed as a single file with a pointer that
# goes around the file, circularly, forever. New objects # goes around the file, circularly, forever. New objects
...@@ -975,15 +983,21 @@ class CircularCacheSimulation(Simulation): ...@@ -975,15 +983,21 @@ class CircularCacheSimulation(Simulation):
extras = "evicts", "inuse" extras = "evicts", "inuse"
def __init__(self, cachelimit): def __init__(self, cachelimit):
from ZEO import cache
Simulation.__init__(self, cachelimit) Simulation.__init__(self, cachelimit)
self.total_evicts = 0 self.total_evicts = 0
# Current offset in file. # Current offset in file.
self.offset = 0 self.offset = ZEC3_HEADER_SIZE
# Map offset in file to (size, oid) pair. # Map offset in file to (size, oid) pair.
self.filemap = {0: (self.cachelimit, None)} self.filemap = {ZEC3_HEADER_SIZE: (self.cachelimit - ZEC3_HEADER_SIZE,
None)}
# Map oid to file offset. # Map oid to file offset.
self.oid2ofs = {} self.oid2ofs = {}
self.overhead = (cache.Object.TOTAL_FIXED_SIZE +
cache.OBJECT_HEADER_SIZE)
def restart(self): def restart(self):
Simulation.restart(self) Simulation.restart(self)
self.evicts = 0 self.evicts = 0
...@@ -992,10 +1006,30 @@ class CircularCacheSimulation(Simulation): ...@@ -992,10 +1006,30 @@ class CircularCacheSimulation(Simulation):
if oid in self.oid2ofs: if oid in self.oid2ofs:
self.hits += 1 self.hits += 1
self.total_hits += 1 self.total_hits += 1
else: elif size:
self.writes += 1
self.total_writes += 1
self.add(oid, size)
# Else it was a load miss in the trace file, and a load miss here too.
def inval(self, oid):
pos = self.oid2ofs.pop(oid, None)
if pos is None:
return
self.invals += 1
self.total_invals += 1
size, _oid = self.filemap[pos]
assert oid == _oid
self.filemap[pos] = size, None
def write(self, oid, size):
if oid not in self.oid2ofs:
self.writes += 1
self.total_writes += 1
self.add(oid, size) self.add(oid, size)
def add(self, oid, size): def add(self, oid, size):
size += self.overhead
avail = self.makeroom(size) avail = self.makeroom(size)
assert oid not in self.oid2ofs assert oid not in self.oid2ofs
self.filemap[self.offset] = size, oid self.filemap[self.offset] = size, oid
...@@ -1009,7 +1043,7 @@ class CircularCacheSimulation(Simulation): ...@@ -1009,7 +1043,7 @@ class CircularCacheSimulation(Simulation):
def makeroom(self, need): def makeroom(self, need):
# Evict enough objects to make the necessary space available. # Evict enough objects to make the necessary space available.
if self.offset + need > self.cachelimit: if self.offset + need > self.cachelimit:
self.offset = 0 self.offset = ZEC3_HEADER_SIZE
pos = self.offset pos = self.offset
while need > 0: while need > 0:
assert pos < self.cachelimit assert pos < self.cachelimit
...@@ -1028,25 +1062,6 @@ class CircularCacheSimulation(Simulation): ...@@ -1028,25 +1062,6 @@ class CircularCacheSimulation(Simulation):
pos += size pos += size
return pos - self.offset # total number of bytes freed return pos - self.offset # total number of bytes freed
def inval(self, oid):
pos = self.oid2ofs.pop(oid, None)
if pos is None:
return
self.invals += 1
self.total_invals += 1
size, _oid = self.filemap[pos]
assert oid == _oid
self.filemap[pos] = size, None
def write(self, oid, size):
if oid in self.oid2ofs:
# Delete the current record.
pos = self.oid2ofs.pop(oid)
size, _oid = self.filemap[pos]
assert oid == _oid
self.filemap[pos] = size, None
self.add(oid, size)
def report(self): def report(self):
self.check() self.check()
free = used = total = 0 free = used = total = 0
...@@ -1062,7 +1077,8 @@ class CircularCacheSimulation(Simulation): ...@@ -1062,7 +1077,8 @@ class CircularCacheSimulation(Simulation):
Simulation.report(self) Simulation.report(self)
def check(self): def check(self):
pos = oidcount = 0 oidcount = 0
pos = ZEC3_HEADER_SIZE
while pos < self.cachelimit: while pos < self.cachelimit:
size, oid = self.filemap[pos] size, oid = self.filemap[pos]
if oid: if oid:
......
...@@ -150,18 +150,21 @@ def main(): ...@@ -150,18 +150,21 @@ def main():
thisinterval = None # generally te//interval thisinterval = None # generally te//interval
f_read = f.read f_read = f.read
unpack = struct.unpack unpack = struct.unpack
FMT = ">iiH8s8s"
FMT_SIZE = struct.calcsize(FMT)
assert FMT_SIZE == 26
# Read file, gathering statistics, and printing each record if verbose. # Read file, gathering statistics, and printing each record if verbose.
try: try:
while 1: while 1:
r = f_read(26) r = f_read(FMT_SIZE)
if len(r) < 26: if len(r) < FMT_SIZE:
break break
ts, code, oidlen, start_tid, end_tid = unpack(">iiH8s8s", r) ts, code, oidlen, start_tid, end_tid = unpack(FMT, r)
if ts == 0: if ts == 0:
# Must be a misaligned record caused by a crash. # Must be a misaligned record caused by a crash.
if not quiet: if not quiet:
print "Skipping 8 bytes at offset", f.tell() - 26 print "Skipping 8 bytes at offset", f.tell() - FMT_SIZE
f.seek(f.tell() - 18) f.seek(f.tell() - FMT_SIZE + 8)
continue continue
oid = f_read(oidlen) oid = f_read(oidlen)
if len(oid) < oidlen: if len(oid) < oidlen:
...@@ -210,7 +213,7 @@ def main(): ...@@ -210,7 +213,7 @@ def main():
if code & 0x70 == 0x20: if code & 0x70 == 0x20:
oids[oid] = oids.get(oid, 0) + 1 oids[oid] = oids.get(oid, 0) + 1
total_loads += 1 total_loads += 1
if code == 0x00: # restart elif code == 0x00: # restart
if not quiet: if not quiet:
dumpbyinterval(byinterval, h0, he) dumpbyinterval(byinterval, h0, he)
byinterval = {} byinterval = {}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment