Commit 128c3f55 authored by Tim Peters's avatar Tim Peters

New tool fsoids.py. Whew! This is the first tool with an

automated test.  There were so many oddities getting that test
to pass, and so much consequent fiddling of other code, that I
don't intend to merge this to the 3.3 branch.  Also moved to
current Python CVS doctest.py, because the test really needs
the +ELLIPSIS option (tids vary across runs).
parent cc88d72d
......@@ -5,6 +5,20 @@ Release date: DD-MMM-YYYY
Tools
-----
New tool fsoids.py, for heavy debugging of FileStorages; shows all
uses of specified oids in the entire database (e.g., suppose oid 0x345620
is missing -- did it ever exist? if so, when? who referenced it? when
was the last transaction that modified an object that referenced it?
which objects did it reference? what kind of object was it?).
What's new in ZODB3 3.3 ?
=========================
Release date: DD-MMM-YYYY
Tools
-----
FileStorage.FileIterator was confused about how to read a transaction's
user and description fields, which caused several tools to display
binary gibberish for these values.
......@@ -34,6 +48,15 @@ analyze.py produced spurious "len of unsized object" messages when
finding a data record for an object uncreation or version abort. These
no longer appear.
fsdump.py's get_pickle_metadata() function (which is used by several
tools) was confused about what to do when the ZODB pickle started with
a pickle GLOBAL opcode. It actually loaded the class then, which it
intends never to do, leading to stray messages on stdout when the class
wasn't available, and leading to a strange return value even when it was
available (the repr of the type object was returned as "the module name",
and an empty string was returned as "the class name"). This has been
repaired.
What's new in ZODB3 3.3 beta 2
==============================
......
script scripts/fsdump.py
script scripts/fsoids.py
script scripts/fsrefs.py
script scripts/fstail.py
script scripts/fstest.py
......
......@@ -141,6 +141,7 @@ packages = ["BTrees", "BTrees.tests",
]
scripts = ["src/scripts/fsdump.py",
"src/scripts/fsoids.py",
"src/scripts/fsrefs.py",
"src/scripts/fstail.py",
"src/scripts/fstest.py",
......
from cPickle import Unpickler
from cStringIO import StringIO
import md5
import struct
from ZODB.FileStorage import FileIterator
from ZODB.FileStorage.format \
import TRANS_HDR, TRANS_HDR_LEN, DATA_HDR, DATA_HDR_LEN
......@@ -5,23 +10,27 @@ from ZODB.TimeStamp import TimeStamp
from ZODB.utils import u64
from ZODB.tests.StorageTestBase import zodb_unpickle
from cPickle import Unpickler
from cStringIO import StringIO
import md5
import struct
import types
def get_pickle_metadata(data):
# ZODB's data records contain two pickles. The first is the class
# of the object, the second is the object. We're only trying to
# pick apart the first here, to extract the module and class names.
if data.startswith('(c'): # pickle MARK GLOBAL sequence
if data.startswith('(c'): # pickle MARK GLOBAL opcode sequence
global_prefix = 2
elif data.startswith('c'): # pickle GLOBAL opcode
global_prefix = 1
else:
global_prefix = 0
if global_prefix:
# Don't actually unpickle a class, because it will attempt to
# load the class. Just break open the pickle and get the
# module and class from it.
# module and class from it. The module and the class names are
# given by newline-terminated strings following the GLOBAL opcode.
modname, classname, rest = data.split('\n', 2)
modname = modname[2:] # strip leading '(c'
modname = modname[global_prefix:] # strip GLOBAL opcode
return modname, classname
# Else there are a bunch of other possible formats.
f = StringIO(data)
u = Unpickler(f)
try:
......@@ -29,8 +38,8 @@ def get_pickle_metadata(data):
except Exception, err:
print "Error", err
return '', ''
if isinstance(class_info, types.TupleType):
if isinstance(class_info[0], types.TupleType):
if isinstance(class_info, tuple):
if isinstance(class_info[0], tuple):
modname, classname = class_info[0]
else:
modname, classname = class_info
......
##############################################################################
#
# Copyright (c) 2004 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
import ZODB.FileStorage
from ZODB.FileStorage.fsdump import get_pickle_metadata
from ZODB.utils import U64, p64, oid_repr, tid_repr, get_refs
from ZODB.TimeStamp import TimeStamp
# Extract module.class string from pickle.
def get_class(pickle):
return "%s.%s" % get_pickle_metadata(pickle)
# Shorten a string for display.
def shorten(s, size=50):
if len(s) <= size:
return s
# Stick ... in the middle.
navail = size - 5
nleading = navail // 2
ntrailing = size - nleading
return s[:nleading] + " ... " + s[-ntrailing:]
class Tracer(object):
"""Trace all occurrences of a set of oids in a FileStorage.
Create passing a path to an existing FileStorage.
Call register_oid() one or more times to specify which oids to
investigate.
Call run() to do the analysis. This isn't swift -- it has to read
every byte in the database, in order to find all references.
Call report() to display the results.
"""
def __init__(self, path):
import os
if not os.path.isfile(path):
raise ValueError("must specify an existing FileStorage")
self.path = path
# Map an interesting tid to (status, user, description, pos).
self.tid2info = {}
# List of messages. Each is a tuple of the form
# (oid, tid, string)
# The order in the tuple is important, because it defines the
# sort order for grouping.
self.msgs = []
# The set of interesting oids, specified by register_oid() calls.
# Maps oid to # of revisions.
self.oids = {}
# Maps interesting oid to its module.class name. If a creation
# record for an interesting oid is never seen, it won't appear
# in this mapping.
self.oid2name = {}
def register_oid(self, oid):
"""
Declare that an oid is "interesting".
The oid can be given as a native 8-byte string, or as an
integer.
Info will be gathered about all appearances of this oid in the
entire database, including references.
"""
if isinstance(oid, str):
assert len(oid) == 8
else:
oid = p64(oid)
self.oids[oid] = 0
def _msg(self, oid, tid, *args):
args = map(str, args)
self.msgs.append( (oid, tid, ' '.join(args)) )
def report(self):
"""Show all msgs, grouped by oid and sub-grouped by tid."""
msgs = self.msgs
oids = self.oids
oid2name = self.oid2name
# First determine which oids weren't seen at all, and synthesize msgs
# for them.
NOT_SEEN = "this oid was neither defined nor referenced"
for oid in oids:
if oid not in oid2name:
msgs.append( (oid, None, NOT_SEEN) )
msgs.sort() # oids are primary key, tids secondary
current_oid = current_tid = None
for oid, tid, msg in msgs:
if oid != current_oid:
nrev = oids[oid]
revision = "revision" + (nrev != 1 and 's' or '')
name = oid2name.get(oid, "<unknown>")
print "oid", oid_repr(oid), name, nrev, revision
current_oid = oid
current_tid = None
if msg is NOT_SEEN:
assert tid is None
print " ", msg
continue
if tid != current_tid:
current_tid = tid
status, user, description, pos = self.tid2info[tid]
print " tid %s offset=%d %s" % (tid_repr(tid),
pos,
TimeStamp(tid))
print " tid user=%r" % shorten(user)
print " tid description=%r" % shorten(description)
print " ", msg
# Do the analysis.
def run(self):
"""Find all occurrences of the registered oids in the database."""
for txn in ZODB.FileStorage.FileIterator(self.path):
self._check_trec(txn)
# Process next transaction record.
def _check_trec(self, txn):
# txn has members tid, status, user, description,
# _extension, _pos, _tend, _file, _tpos
interesting = False
for drec in txn:
if self._check_drec(drec):
interesting = True
if interesting:
self.tid2info[txn.tid] = (txn.status, txn.user, txn.description,
txn._tpos)
# Process next data record. Return true iff a message is produced (so
# the caller can know whether to save information about the tid the
# data record belongs to).
def _check_drec(self, drec):
# drec has members oid, tid, version, data, data_txn
result = False
tid, oid, pick, pos = drec.tid, drec.oid, drec.data, drec.pos
if pick:
oidclass = None
if oid in self.oids:
oidclass = get_class(pick)
self._msg(oid, tid, "new revision", oidclass,
"at", drec.pos)
result = True
self.oids[oid] += 1
for ref, klass in get_refs(pick):
if klass is None:
klass = '<unknown>'
elif isinstance(klass, tuple):
klass = "%s.%s" % klass
if ref in self.oids:
if oidclass is None:
oidclass = get_class(pick)
self._msg(ref, tid, "referenced by", oid_repr(oid),
oidclass, "at", pos)
result = True
if oid in self.oids:
self._msg(oid, tid, "references", oid_repr(ref), klass,
"at", pos)
result = True
if oidclass is not None:
self.oid2name[oid] = oidclass
elif oid in self.oids:
# Or maybe it's a version abort.
self._msg(oid, tid, "creation undo at", pos)
result = True
return result
##############################################################################
#
# Copyright (c) 2004 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
r"""
fsoids test, of the workhorse fsoids.Trace class
================================================
Let's get a temp file path to work with first.
>>> import tempfile
>>> path = tempfile.mktemp('.fs', 'Data')
>>> print 'path:', path #doctest: +ELLIPSIS
path: ...Data...fs
More imports.
>>> import ZODB
>>> from ZODB.FileStorage import FileStorage
>>> import transaction as txn
>>> from BTrees.OOBTree import OOBTree
>>> from ZODB.FileStorage.fsoids import Tracer # we're testing this
Create an empty FileStorage.
>>> st = FileStorage(path)
There's not a lot interesting in an empty DB!
>>> t = Tracer(path)
>>> t.register_oid(0x123456)
>>> t.register_oid(1)
>>> t.register_oid(0)
>>> t.run()
>>> t.report()
oid 0x00 <unknown> 0 revisions
this oid was neither defined nor referenced
oid 0x01 <unknown> 0 revisions
this oid was neither defined nor referenced
oid 0x123456 <unknown> 0 revisions
this oid was neither defined nor referenced
That didn't tell us much, but does show that the specified oids are sorted
into increasing order.
Create a root object and try again:
>>> db = ZODB.DB(st) # yes, that creates a root object!
>>> t = Tracer(path)
>>> t.register_oid(0); t.register_oid(1)
>>> t.run(); t.report() #doctest: +ELLIPSIS
oid 0x00 persistent.mapping.PersistentMapping 1 revision
tid 0x... offset=4 ...
tid user=''
tid description='initial database creation'
new revision persistent.mapping.PersistentMapping at 52
oid 0x01 <unknown> 0 revisions
this oid was neither defined nor referenced
So we see oid 0 has been used in our one transaction, and that it was created
there, and is a PersistentMapping. 4 is the file offset to the start of the
transaction record, and 52 is the file offset to the start of the data record
for oid 0 within this transaction. Because tids are timestamps too, the
"..." parts vary across runs. The initial line for a tid actually looks like
this:
tid 0x035748597843b877 offset=4 2004-08-20 20:41:28.187000
Let's add a BTree and try again:
>>> root = db.open().root()
>>> root['tree'] = OOBTree()
>>> txn.get().note('added an OOBTree')
>>> txn.get().commit()
>>> t = Tracer(path)
>>> t.register_oid(0); t.register_oid(1)
>>> t.run(); t.report() #doctest: +ELLIPSIS
oid 0x00 persistent.mapping.PersistentMapping 2 revisions
tid 0x... offset=4 ...
tid user=''
tid description='initial database creation'
new revision persistent.mapping.PersistentMapping at 52
tid 0x... offset=168 ...
tid user=''
tid description='added an OOBTree'
new revision persistent.mapping.PersistentMapping at 207
references 0x01 <unknown> at 207
oid 0x01 BTrees._OOBTree.OOBTree 1 revision
tid 0x... offset=168 ...
tid user=''
tid description='added an OOBTree'
new revision BTrees._OOBTree.OOBTree at 363
referenced by 0x00 persistent.mapping.PersistentMapping at 207
So there are two revisions of oid 0 now, and the second references oid 1.
It's peculiar that the class shows as <unknown> in:
references 0x01 <unknown> at 207
The code that does this takes long tours through undocumented code in
cPickle.c (using cPickle features that aren't in pickle.py, and aren't even
documented as existing). Whatever the reason, ZODB/util.py's get_refs()
function returns (oid_0x01, None) for the reference to oid 1, instead of the
usual (oid, (module_name, class_name)) form. Before I wrote this test,
I never saw a case of that before! "references" lines usually identify
the class of the object. Anyway, the correct class is given in the new
output for oid 1.
One more, storing a reference in the BTree back to the root object:
>>> tree = root['tree']
>>> tree['root'] = root
>>> txn.get().note('circling back to the root')
>>> txn.get().commit()
>>> t = Tracer(path)
>>> t.register_oid(0); t.register_oid(1); t.register_oid(2)
>>> t.run(); t.report() #doctest: +ELLIPSIS
oid 0x00 persistent.mapping.PersistentMapping 2 revisions
tid 0x... offset=4 ...
tid user=''
tid description='initial database creation'
new revision persistent.mapping.PersistentMapping at 52
tid 0x... offset=168 ...
tid user=''
tid description='added an OOBTree'
new revision persistent.mapping.PersistentMapping at 207
references 0x01 <unknown> at 207
tid 0x... offset=443 ...
tid user=''
tid description='circling back to the root'
referenced by 0x01 BTrees._OOBTree.OOBTree at 491
oid 0x01 BTrees._OOBTree.OOBTree 2 revisions
tid 0x... offset=168 ...
tid user=''
tid description='added an OOBTree'
new revision BTrees._OOBTree.OOBTree at 363
referenced by 0x00 persistent.mapping.PersistentMapping at 207
tid 0x... offset=443 ...
tid user=''
tid description='circling back to the root'
new revision BTrees._OOBTree.OOBTree at 491
references 0x00 <unknown> at 491
oid 0x02 <unknown> 0 revisions
this oid was neither defined nor referenced
Note that we didn't create any new object there (oid 2 is still unused), we
just made oid 1 refer to oid 0. Therefore there's a new "new revision" line
in the output for oid 1. Note that there's also new output for oid 0, even
though the root object didn't change: we got new output for oid 0 because
it's a traced oid and the new transaction made a new reference *to* it.
Since the Trace constructor takes only one argument, the only sane thing
you can do to make it fail is to give it a path to a file that doesn't
exist:
>>> Tracer('/eiruowieuu/lsijflfjlsijflsdf/eurowiurowioeuri/908479287.fs')
Traceback (most recent call last):
...
ValueError: must specify an existing FileStorage
You get the same kind of exception if you pass it a path to an existing
directory (the path must be to a file, not a directory):
>>> import os
>>> Tracer(os.path.dirname(__file__))
Traceback (most recent call last):
...
ValueError: must specify an existing FileStorage
Clean up.
>>> st.close()
>>> st.cleanup() # remove .fs, .index, etc
"""
from zope.testing import doctest
def test_suite():
return doctest.DocTestSuite()
......@@ -17,13 +17,38 @@ import time
from struct import pack, unpack
from types import StringType
from binascii import hexlify
import cPickle
import cStringIO
from persistent.TimeStamp import TimeStamp
__all__ = ['z64',
't32',
'p64',
'u64',
'U64',
'cp',
'newTimeStamp',
'oid_repr',
'serial_repr',
'tid_repr',
'positive_id',
'get_refs',
]
z64 = '\0'*8
# TODO The purpose of t32 is unclear. Code that uses it is usually
# of the form:
#
# if e < 0:
# e = t32 - e
#
# Doesn't make sense (since e is negative, it creates a number larger than
# t32). If users said "e += t32", *maybe* it would make sense.
t32 = 1L << 32
assert sys.hexversion >= 0x02020000
assert sys.hexversion >= 0x02030000
# The distinction between ints and longs is blurred in Python 2.2,
# so u64() are U64() really the same.
......@@ -77,6 +102,7 @@ def oid_repr(oid):
return repr(oid)
serial_repr = oid_repr
tid_repr = serial_repr
# Addresses can "look negative" on some boxes, some of the time. If you
# feed a "negative address" to an %x format, Python 2.3 displays it as
......@@ -100,3 +126,32 @@ def positive_id(obj):
result += 1L << 64
assert result >= 0 # else addresses are fatter than 64 bits
return result
# So full of undocumented magic it's hard to fathom.
# The existence of cPickle.noload() isn't documented, and what it
# does isn't documented either. In general it unpickles, but doesn't
# actually build any objects of user-defined classes. Despite that
# persistent_load is documented to be a callable, there's an
# undocumented gimmick where if it's actually a list, for a PERSID or
# BINPERSID opcode cPickle just appends "the persistent id" to that list.
# Also despite that "a persistent id" is documented to be a string,
# ZODB persistent ids are actually (often? always?) tuples, most often
# of the form
# (oid, (module_name, class_name))
# So the effect of the following is to dig into the object pickle, and
# return a list of the persistent ids found (which are usually nested
# tuples), without actually loading any modules or classes.
# Note that pickle.py doesn't support any of this, it's undocumented code
# only in cPickle.c.
def get_refs(pickle):
# The pickle is in two parts. First there's the class of the object,
# needed to build a ghost, See get_pickle_metadata for how complicated
# this can get. The second part is the state of the object. We want
# to find all the persistent references within both parts (although I
# expect they can only appear in the second part).
f = cStringIO.StringIO(pickle)
u = cPickle.Unpickler(f)
u.persistent_load = refs = []
u.noload() # class info
u.noload() # instance state info
return refs
......@@ -27,6 +27,15 @@ transaction and one line for each data record in that transaction.
Includes time stamps, file positions, and class names.
fsoids.py -- trace all uses of specified oids in a FileStorage
For heavy debugging.
A set of oids is specified by text file listing and/or command line.
A report is generated showing all uses of these oids in the database:
all new-revision creation/modifications, all references from all
revisions of other objects, and all creation undos.
fstest.py -- simple consistency checker for FileStorage
usage: fstest.py [-v] data.fs
......
#!/usr/bin/env python2.3
##############################################################################
#
# Copyright (c) 2004 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""FileStorage oid-tracer.
usage: fsoids.py [-f oid_file] Data.fs [oid]...
Display information about all occurrences of specified oids in a FileStorage.
This is meant for heavy debugging.
This includes all revisions of the oids, all objects referenced by the
oids, and all revisions of all objects referring to the oids.
If specified, oid_file is an input text file, containing one oid per
line. oids are specified as integers, in any of Python's integer
notations (typically like 0x341a). One or more oids can also be specified
on the command line.
The output is grouped by oid, from smallest to largest, and sub-grouped
by transaction, from oldest to newest.
This will not alter the FileStorage, but running against a live FileStorage
is not recommended (spurious error messages may result).
"""
import sys
from ZODB.FileStorage.fsoids import Tracer
def usage():
print __doc__
def main():
import getopt
try:
opts, args = getopt.getopt(sys.argv[1:], 'f:')
if not args:
usage()
raise ValueError, "Must specify a FileStorage"
path = None
for k, v in opts:
if k == '-f':
path = v
except (getopt.error, ValueError):
usage()
raise
c = Tracer(args[0])
for oid in args[1:]:
as_int = int(oid, 0) # 0 == auto-detect base
c.register_oid(as_int)
if path is not None:
for line in open(path):
as_int = int(line, 0)
c.register_oid(as_int)
if not c.oids:
raise ValueError("no oids specified")
c.run()
c.report()
if __name__ == "__main__":
main()
......@@ -63,48 +63,17 @@ revisions of objects; therefore fsrefs cannot find problems in versions or
in non-current revisions.
"""
import cPickle
import cStringIO
import traceback
import types
from ZODB.FileStorage import FileStorage
from ZODB.TimeStamp import TimeStamp
from ZODB.utils import u64, oid_repr
from ZODB.utils import u64, oid_repr, get_refs
from ZODB.FileStorage.fsdump import get_pickle_metadata
from ZODB.POSException import POSKeyError
VERBOSE = 0
# So full of undocumented magic it's hard to fathom.
# The existence of cPickle.noload() isn't documented, and what it
# does isn't documented either. In general it unpickles, but doesn't
# actually build any objects of user-defined classes. Despite that
# persistent_load is documented to be a callable, there's an
# undocumented gimmick where if it's actually a list, for a PERSID or
# BINPERSID opcode cPickle just appends "the persistent id" to that list.
# Also despite that "a persistent id" is documented to be a string,
# ZODB persistent ids are actually (often? always?) tuples, most often
# of the form
# (oid, (module_name, class_name))
# So the effect of the following is to dig into the object pickle, and
# return a list of the persistent ids found (which are usually nested
# tuples), without actually loading any modules or classes.
# Note that pickle.py doesn't support any of this, it's undocumented code
# only in cPickle.c.
def get_refs(pickle):
# The pickle is in two parts. First there's the class of the object,
# needed to build a ghost, See get_pickle_metadata for how complicated
# this can get. The second part is the state of the object. We want
# to find all the persistent references within both parts (although I
# expect they can only appear in the second part).
f = cStringIO.StringIO(pickle)
u = cPickle.Unpickler(f)
u.persistent_load = refs = []
u.noload() # class info
u.noload() # instance state info
return refs
# There's a problem with oid. 'data' is its pickle, and 'serial' its
# serial number. 'missing' is a list of (oid, class, reason) triples,
# explaining what the problem(s) is(are).
......
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment