Commit afdded70 authored by Jeremy Hylton's avatar Jeremy Hylton

Simply transactionalUndo implementation and make it much less efficient.

Do not encode the file position in the transaction id used for undo.
An attacker could construct a pickle with a bogus transaction record
in its binary data, deduce the position of the pickle in the file from
the undo log, then submit an undo with a bogus file position that
caused the pickle to get written as a regular data record.  Bad stuff.

The new implementation uses a straight linear search backwards from
the most recent transaction header.
parent d64f8846
...@@ -115,7 +115,7 @@ ...@@ -115,7 +115,7 @@
# may have a back pointer to a version record or to a non-version # may have a back pointer to a version record or to a non-version
# record. # record.
# #
__version__='$Revision: 1.92 $'[11:-2] __version__='$Revision: 1.93 $'[11:-2]
import struct, time, os, string, base64, sys import struct, time, os, string, base64, sys
from struct import pack, unpack from struct import pack, unpack
...@@ -1039,30 +1039,9 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -1039,30 +1039,9 @@ class FileStorage(BaseStorage.BaseStorage,
raise UndoError('Some data were modified by a later transaction') raise UndoError('Some data were modified by a later transaction')
# undoLog() returns a description dict that includes an id entry. # undoLog() returns a description dict that includes an id entry.
# The id is opaque to the client, but encodes information that # The id is opaque to the client, but contains the transaction id.
# uniquely identifies a transaction in the storage. The id is a # The transactionalUndo() implementation does a simple linear
# base64 encoded string, where the components of the string are: # search through the file (from the end) to find the transaction.
# - the transaction id
# - the packed file position of the transaction record
# - the oid of an object modified by the transaction
# The file position is sufficient in most cases, but doesn't work
# if the id is used after a pack and may not work if used with
# replicated storages. If the file position is incorrect, the oid
# can be used for a relatively efficient search for the
# transaction record. FileStorage keeps an index mapping oids to
# file positions, but do notes have a transaction id to file
# offset index. The oid index maps to the most recent revision of
# the object. Transactional undo must follow back pointers until
# it finds the correct transaction record,
# This approach fails if the transaction record has no data
# records. It's not clear if that is possible, but it may be for
# commitVersion and abortVersion.
# The file offset also supports non-transactional undo, which
# won't work after a pack and isn't supported by replicated
# storages.
def undoLog(self, first=0, last=-20, filter=None): def undoLog(self, first=0, last=-20, filter=None):
if last < 0: if last < 0:
...@@ -1082,11 +1061,9 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -1082,11 +1061,9 @@ class FileStorage(BaseStorage.BaseStorage,
self._file.seek(pos) self._file.seek(pos)
h = self._file.read(TRANS_HDR_LEN) h = self._file.read(TRANS_HDR_LEN)
tid, tl, status, ul, dl, el = struct.unpack(">8s8scHHH", h) tid, tl, status, ul, dl, el = struct.unpack(">8s8scHHH", h)
if tid < self._packt: if tid < self._packt or status == 'p':
break break
if status == 'p': if status != ' ':
break
elif status != ' ':
continue continue
d = u = '' d = u = ''
if ul: if ul:
...@@ -1099,14 +1076,7 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -1099,14 +1076,7 @@ class FileStorage(BaseStorage.BaseStorage,
e = loads(read(el)) e = loads(read(el))
except: except:
pass pass
next = self._file.read(8) d = {'id': base64.encodestring(tid).rstrip(),
# next is either the redundant txn length - 8, or an oid
if next == tl:
# There were no objects in this txn
id = tid + p64(pos)
else:
id = tid + p64(pos) + next
d = {'id': base64.encodestring(id).rstrip(),
'time': TimeStamp(tid).timeTime(), 'time': TimeStamp(tid).timeTime(),
'user_name': u, 'user_name': u,
'description': d} 'description': d}
...@@ -1144,57 +1114,28 @@ class FileStorage(BaseStorage.BaseStorage, ...@@ -1144,57 +1114,28 @@ class FileStorage(BaseStorage.BaseStorage,
def _txn_undo(self, transaction_id): def _txn_undo(self, transaction_id):
# Find the right transaction to undo and call _txn_undo_write(). # Find the right transaction to undo and call _txn_undo_write().
transaction_id = base64.decodestring(transaction_id + '\n') tid = base64.decodestring(transaction_id + '\n')
tid = transaction_id[:8] assert len(tid) == 8
tpos = U64(transaction_id[8:16]) tpos = self._txn_find(tid)
if not self._check_txn_pos(tpos, tid):
# If the pos and tid don't match, we must use the oid to
# find the transaction record. Find the file position for
# the current revision of this object, and search back for
# the beginning of its transaction record
oid = transaction_id[16:]
if oid == '' or not self._index.has_key(oid):
# XXX Is this the right error message?
raise UndoError('Undoing a non-object affecting transaction')
pos = self._index[oid]
while 1:
self._file.seek(pos)
h = self._file.read(DATA_HDR_LEN)
doid, serial, prev, tpos, vlen, plen = \
unpack('>8s8s8s8sH8s', h)
tpos = U64(tpos)
self._file.seek(tpos)
# Read transaction id to see if we've got a match
thistid = self._file.read(8)
if thistid == tid:
break # Yeee ha!
# Keep looking
pos = U64(prev)
if not pos:
# We never found the right transaction
raise UndoError('Invalid undo transaction id')
tindex = self._txn_undo_write(tpos, tid) tindex = self._txn_undo_write(tpos, tid)
self._tindex.update(tindex) self._tindex.update(tindex)
return tindex.keys() return tindex.keys()
def _check_txn_pos(self, pos, tid): def _txn_find(self, tid):
"Return true if pos is location of the transaction record for tid." pos = self._pos
# XXX Why 39? Only because undoLog() uses it as a boundary.
while pos > 39:
self._file.seek(pos - 8)
pos = pos - U64(self._file.read(8)) - 8
self._file.seek(pos) self._file.seek(pos)
this_tid = self._file.read(8) h = self._file.read(TRANS_HDR_LEN)
if this_tid != tid: _tid = h[:8]
return 0 if _tid == tid:
# be extra cautious: Check the record length makes sense, to return pos
# guard against a random file location that happens to have status = h[16] # get the c in 8s8sc
# the right 8-byte pattern. if status == 'p' or _tid < self._packt:
stlen = self._file.read(8) break
tlen = U64(stlen) raise UndoError("Invalid transaction id")
self._file.seek(tlen, 1)
redundant_stlen = self._file.read(8)
if len(redundant_stlen) != 8:
return 0
if redundant_stlen != stlen:
return 0
return 1
def _txn_undo_write(self, tpos, tid): def _txn_undo_write(self, tpos, tid):
# a helper function to write the data records for transactional undo # a helper function to write the data records for transactional undo
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment