Commit a5ad6b94 authored by Jim Fulton's avatar Jim Fulton

Made a small improvement to the client cache to that makes it slightly

MRUish.

In some experiments, this led to significant reductions in cache miss
rates.
parent 83769d62
...@@ -152,7 +152,7 @@ class ClientCache(object): ...@@ -152,7 +152,7 @@ class ClientCache(object):
# default of 20MB. The default here is misleading, though, since # default of 20MB. The default here is misleading, though, since
# ClientStorage is the only user of ClientCache, and it always passes an # ClientStorage is the only user of ClientCache, and it always passes an
# explicit size of its own choosing. # explicit size of its own choosing.
def __init__(self, path=None, size=200*1024**2): def __init__(self, path=None, size=200*1024**2, rearrange=.8):
# - `path`: filepath for the cache file, or None (in which case # - `path`: filepath for the cache file, or None (in which case
# a temp file will be created) # a temp file will be created)
...@@ -163,6 +163,11 @@ class ClientCache(object): ...@@ -163,6 +163,11 @@ class ClientCache(object):
size = max(size, ZEC_HEADER_SIZE) size = max(size, ZEC_HEADER_SIZE)
self.maxsize = size self.maxsize = size
# rearrange: if we read a current record and it's more than
# rearrange*size from the end, then copy it forward to keep it
# from being evicted.
self.rearrange = rearrange * size
# The number of records in the cache. # The number of records in the cache.
self._len = 0 self._len = 0
...@@ -497,6 +502,7 @@ class ClientCache(object): ...@@ -497,6 +502,7 @@ class ClientCache(object):
size, saved_oid, tid, end_tid, lver, ldata = unpack( size, saved_oid, tid, end_tid, lver, ldata = unpack(
">I8s8s8sHI", read(34)) ">I8s8s8sHI", read(34))
assert saved_oid == oid, (ofs, self.f.tell(), oid, saved_oid) assert saved_oid == oid, (ofs, self.f.tell(), oid, saved_oid)
assert end_tid == z64, (ofs, self.f.tell(), oid, tid, end_tid)
assert lver == 0, "Versions aren't supported" assert lver == 0, "Versions aren't supported"
data = read(ldata) data = read(ldata)
...@@ -508,6 +514,25 @@ class ClientCache(object): ...@@ -508,6 +514,25 @@ class ClientCache(object):
self._n_accesses += 1 self._n_accesses += 1
self._trace(0x22, oid, tid, end_tid, ldata) self._trace(0x22, oid, tid, end_tid, ldata)
ofsofs = self.currentofs - ofs
if ofsofs < 0:
ofsofs += self.maxsize
if (ofsofs > self.rearrange and
self.maxsize > 10*len(data) and
size > 4):
# The record is far back and might get evicted, but it's
# valuable, so move it forward.
# Remove fromn old loc:
del self.current[oid]
self.f.seek(ofs)
self.f.write('f'+pack(">I", size))
# Write to new location:
self._store(oid, tid, None, data, size)
return data, tid return data, tid
## ##
...@@ -599,6 +624,16 @@ class ClientCache(object): ...@@ -599,6 +624,16 @@ class ClientCache(object):
self._n_added_bytes += size self._n_added_bytes += size
self._len += 1 self._len += 1
self._store(oid, start_tid, end_tid, data, size)
if end_tid:
self._trace(0x54, oid, start_tid, end_tid, dlen=len(data))
else:
self._trace(0x52, oid, start_tid, dlen=len(data))
def _store(self, oid, start_tid, end_tid, data, size):
# Low-level store used by store and load
# In the next line, we ask for an extra to make sure we always # In the next line, we ask for an extra to make sure we always
# have a free block after the new alocated block. This free # have a free block after the new alocated block. This free
# block acts as a ring pointer, so that on restart, we start # block acts as a ring pointer, so that on restart, we start
...@@ -618,6 +653,7 @@ class ClientCache(object): ...@@ -618,6 +653,7 @@ class ClientCache(object):
extra = 'f' + pack(">I", excess) extra = 'f' + pack(">I", excess)
ofs = self.currentofs ofs = self.currentofs
seek = self.f.seek
seek(ofs) seek(ofs)
write = self.f.write write = self.f.write
...@@ -639,13 +675,12 @@ class ClientCache(object): ...@@ -639,13 +675,12 @@ class ClientCache(object):
if end_tid: if end_tid:
self._set_noncurrent(oid, start_tid, ofs) self._set_noncurrent(oid, start_tid, ofs)
self._trace(0x54, oid, start_tid, end_tid, dlen=len(data))
else: else:
self.current[oid] = ofs self.current[oid] = ofs
self._trace(0x52, oid, start_tid, dlen=len(data))
self.currentofs += size self.currentofs += size
## ##
# If `tid` is None, # If `tid` is None,
# forget all knowledge of `oid`. (`tid` can be None only for # forget all knowledge of `oid`. (`tid` can be None only for
...@@ -660,7 +695,6 @@ class ClientCache(object): ...@@ -660,7 +695,6 @@ class ClientCache(object):
# - oid object id # - oid object id
# - tid the id of the transaction that wrote a new revision of oid, # - tid the id of the transaction that wrote a new revision of oid,
# or None to forget all cached info about oid. # or None to forget all cached info about oid.
@locked @locked
def invalidate(self, oid, tid): def invalidate(self, oid, tid):
ofs = self.current.get(oid) ofs = self.current.get(oid)
......
...@@ -19,6 +19,7 @@ Usage: simul.py [-s size] tracefile ...@@ -19,6 +19,7 @@ Usage: simul.py [-s size] tracefile
Options: Options:
-s size: cache size in MB (default 20 MB) -s size: cache size in MB (default 20 MB)
-i: summarizing interval in minutes (default 15; max 60) -i: summarizing interval in minutes (default 15; max 60)
-r: rearrange factor
Note: Note:
...@@ -52,18 +53,22 @@ def main(args=None): ...@@ -52,18 +53,22 @@ def main(args=None):
# Parse options. # Parse options.
MB = 1<<20 MB = 1<<20
cachelimit = 20*MB cachelimit = 20*MB
rearrange = 0.8
simclass = CircularCacheSimulation simclass = CircularCacheSimulation
interval_step = 15 interval_step = 15
try: try:
opts, args = getopt.getopt(args, "s:i:") opts, args = getopt.getopt(args, "s:i:r:")
except getopt.error, msg: except getopt.error, msg:
usage(msg) usage(msg)
return 2 return 2
for o, a in opts: for o, a in opts:
if o == '-s': if o == '-s':
cachelimit = int(float(a)*MB) cachelimit = int(float(a)*MB)
elif o == '-i': elif o == '-i':
interval_step = int(a) interval_step = int(a)
elif o == '-r':
rearrange = float(a)
else: else:
assert False, (o, a) assert False, (o, a)
...@@ -103,8 +108,8 @@ def main(args=None): ...@@ -103,8 +108,8 @@ def main(args=None):
return 1 return 1
# Create simulation object. # Create simulation object.
sim = simclass(cachelimit) sim = simclass(cachelimit, rearrange)
interval_sim = simclass(cachelimit) interval_sim = simclass(cachelimit, rearrange)
# Print output header. # Print output header.
sim.printheader() sim.printheader()
...@@ -141,6 +146,8 @@ def main(args=None): ...@@ -141,6 +146,8 @@ def main(args=None):
if last_interval is not None: if last_interval is not None:
interval_sim.report() interval_sim.report()
interval_sim.restart() interval_sim.restart()
if not interval_sim.warm:
sim.restart()
last_interval = this_interval last_interval = this_interval
sim.event(ts, dlen, version, code, oid, start_tid, end_tid) sim.event(ts, dlen, version, code, oid, start_tid, end_tid)
interval_sim.event(ts, dlen, version, code, oid, start_tid, end_tid) interval_sim.event(ts, dlen, version, code, oid, start_tid, end_tid)
...@@ -160,10 +167,12 @@ class Simulation(object): ...@@ -160,10 +167,12 @@ class Simulation(object):
finish() method also calls report(). finish() method also calls report().
""" """
def __init__(self, cachelimit): def __init__(self, cachelimit, rearrange):
self.cachelimit = cachelimit self.cachelimit = cachelimit
self.rearrange = rearrange
# Initialize global statistics. # Initialize global statistics.
self.epoch = None self.epoch = None
self.warm = False
self.total_loads = 0 self.total_loads = 0
self.total_hits = 0 # subclass must increment self.total_hits = 0 # subclass must increment
self.total_invals = 0 # subclass must increment self.total_invals = 0 # subclass must increment
...@@ -284,20 +293,19 @@ class Simulation(object): ...@@ -284,20 +293,19 @@ class Simulation(object):
# For use in CircularCacheSimulation. # For use in CircularCacheSimulation.
class CircularCacheEntry(object): class CircularCacheEntry(object):
__slots__ = (# object key: an (oid, start_tid) pair, where __slots__ = (
# start_tid is the tid of the transaction that created # object key: an (oid, start_tid) pair, where start_tid is the
# this revision of oid # tid of the transaction that created this revision of oid
'key', 'key',
# tid of transaction that created the next revision; # tid of transaction that created the next revision; z64 iff
# z64 iff this is the current revision # this is the current revision
'end_tid', 'end_tid',
# Offset from start of file to the object's data # Offset from start of file to the object's data record; this
# record; this includes all overhead bytes (status # includes all overhead bytes (status byte, size bytes, etc).
# byte, size bytes, etc). 'offset',
'offset', )
)
def __init__(self, key, end_tid, offset): def __init__(self, key, end_tid, offset):
self.key = key self.key = key
...@@ -316,10 +324,12 @@ class CircularCacheSimulation(Simulation): ...@@ -316,10 +324,12 @@ class CircularCacheSimulation(Simulation):
extras = "evicts", "inuse" extras = "evicts", "inuse"
def __init__(self, cachelimit): evicts = 0
def __init__(self, cachelimit, rearrange):
from ZEO import cache from ZEO import cache
Simulation.__init__(self, cachelimit) Simulation.__init__(self, cachelimit, rearrange)
self.total_evicts = 0 # number of cache evictions self.total_evicts = 0 # number of cache evictions
# Current offset in file. # Current offset in file.
...@@ -348,6 +358,8 @@ class CircularCacheSimulation(Simulation): ...@@ -348,6 +358,8 @@ class CircularCacheSimulation(Simulation):
def restart(self): def restart(self):
Simulation.restart(self) Simulation.restart(self)
if self.evicts:
self.warm = True
self.evicts = 0 self.evicts = 0
self.evicted_hit = self.evicted_miss = 0 self.evicted_hit = self.evicted_miss = 0
...@@ -358,6 +370,20 @@ class CircularCacheSimulation(Simulation): ...@@ -358,6 +370,20 @@ class CircularCacheSimulation(Simulation):
if oid in self.current: # else it's a cache miss if oid in self.current: # else it's a cache miss
self.hits += 1 self.hits += 1
self.total_hits += 1 self.total_hits += 1
tid = self.current[oid]
entry = self.key2entry[(oid, tid)]
offset_offset = self.offset - entry.offset
if offset_offset < 0:
offset_offset += self.cachelimit
assert offset_offset >= 0
if offset_offset > self.rearrange * self.cachelimit:
# we haven't accessed it in a while. Move it forward
size = self.filemap[entry.offset][0]
self._remove(*entry.key)
self.add(oid, size, tid)
elif oid in self.evicted: elif oid in self.evicted:
size, e = self.evicted[oid] size, e = self.evicted[oid]
self.write(oid, size, e.key[1], z64, 1) self.write(oid, size, e.key[1], z64, 1)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment