Commit 443d47b9 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 4979b6cd
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2020 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""Program treedelta-genallstructs generates subset of all possible tree changes
in between two trees that represent two specified key->value dicts.
It is used as helper for ΔBTree tests.
Usage: treedelta-genallstructs <zurl> <n> <kv1> <kv2>
It generates ZODB commits with <tree1> -> <tree2> changes for subset of all
possible tree topologies tree1 and tree2 that can represent kv1 and kv2
correspondingly:
For every kv the following tree topologies are used: 1) native (the one that
ZODB would usually create natively via regular usage), and 2) n random ones.
Then every pair of topology change in between tree1 and tree2 is considered and
corresponding commit is made with the tree undergoing kv1->kv2 change with
tree1->tree2 transition in the database.
For every commit the following is printed to stdout:
tid <tid> # <tree1> -> <tree2>
tree <oid>
δ
<LF>
XXX
"""
from __future__ import print_function, absolute_import
import sys
from golang import func, defer, panic
from golang import time
from ZODB import DB
from ZODB.Connection import Connection
import transaction
import random
from wendelin.wcfs.internal import xbtree, xbtree_test
from wendelin.bigfile.file_zodb import ZBlk
from zodbtools.util import storageFromURL, ashex
from persistent import CHANGED
from persistent.mapping import PersistentMapping
# XXX hack: set LOBTree.LOBTree -> XLOTree so that nodes are split often
# (XLOTree is LOBTree with small .max_*_size). Do it this way so that generated
# database looks as if regular LOBTree was used. We use the hack because
# we cannot tune LOBTree directly.
XLOTree = xbtree_test.XLOTree
XLOTree.__module__ = 'BTrees.LOBTree'
XLOTree.__name__ = 'LOBTree'
from BTrees import LOBTree
LOBTree.LOBTree = XLOTree
# treedeltaGenAllStructs generates subset of all possible tree changes in
# between kv1 and kv2. See top-level documentation for details.
@func
def treedeltaGenAllStructs(zstor, kv1txt, kv2txt, n, seed=None):
db = DB(zstor); defer(db.close)
zconn = db.open(); defer(zconn.close)
root = zconn.root()
# seed
if seed is None:
seed = int(time.now())
print("# seed=%d" % seed)
random.seed(seed)
# root['treedelta/values'] = {} v -> ZBlk(v)
valdict = root['treedelta/values'] = PersistentMapping()
valv = b'abcdefghi'
for v in valv:
zblk = ZBlk()
zblk.setblkdata(v)
valdict[v] = zblk
commit('treedelta/values -> %r' % valv)
# vdecode(vtxt) -> vobj decodes value text into value object, e.g. 'a' -> ZBlk(a)
# vencode(vobj) -> vtxt encodes value object into value text, e.g. ZBlk(a) -> 'a'
def vdecode(vtxt): # -> vobj
return valdict[vtxt]
def vencode(vobj): # -> vtxt
for (k,v) in valdict.items():
if v is vobj:
return k
raise KeyError("%r not found in value registry" % (vobj,))
kv1 = kvDecode(kv1txt, vdecode)
kv2 = kvDecode(kv2txt, vdecode)
# δ kv1 <-> kv2
diff12 = diff(kv1, kv2)
diff21 = diff(kv2, kv1)
# all tree topologies that can represent kv1 and kv2
maxdepth=2 # XXX -> 3?
maxsplit=1 # XXX -> 2?
t1AllStructs = list(xbtree.AllStructs(kv1.keys(), maxdepth, maxsplit))
t2AllStructs = list(xbtree.AllStructs(kv2.keys(), maxdepth, maxsplit))
# create the tree
ztree = root['treedelta/tree'] = XLOTree()
commit('treedelta/tree')
# XXX print something?
# emit patches ztree with delta, adjusts tree structure and emits corresponding commit.
def emit(delta, verify, treeTopo):
ttxt_prev = treetxt(ztree)
patch(ztree, delta, verify)
if treeTopo is not None:
xbtree.Restructure(ztree, treeTopo)
ttxt = treetxt(ztree)
tid = commit('%s -> %s' % (ttxt_prev, ttxt))
print('txn %s # %s -> %s' % (ashex(tid), ttxt_prev, ttxt))
for (k,v) in delta:
vtxt = 'ø'
if v is not DEL:
vtxt = vencode(v)
# XXX print not only +, but also - (e.g. -1:a +1:b) ?
print('\t%d:%s' % (k,vtxt))
# emit initial kv1 and kv2 states prepared as ZODB would do natively
emit(diff({}, kv1), verify=kv1, treeTopo=None)
t1struct0 = xbtree.StructureOf(ztree)
emit(diff(kv1, kv2), verify=kv2, treeTopo=None)
t2struct0 = xbtree.StructureOf(ztree)
# all tree1 and tree2 topologies jumps in between we are going to emit:
# native + n random ones.
t1structv = [t1struct0] + random.sample(t1AllStructs, min(n, len(t1AllStructs)))
t2structv = [t2struct0] + random.sample(t2AllStructs, min(n, len(t2AllStructs)))
# emit tree1->tree2 and tree1<-tree2 transitions for all combinations of tree1 and tree2.
# tree1₀->tree2₀ was already done.
t12travel = list(bitravel2Way(t1structv, t2structv))
assert t12travel[0] is t1struct0
assert t12travel[1] is t2struct0
for i,tstruct in enumerate(t12travel[2:]):
if i%2 == 0:
delta = diff21
verify = kv1
assert tstruct in t1structv
else:
delta = diff12
verify = kv2
assert tstruct in t2structv
emit(delta, verify, tstruct)
# bitravel2Way generates travel path through all A<->B edges such
# that all edges a->b and a<-b are traveled and exactly once.
#
# The travel starts from A[0].
def bitravel2Way(A, B): # -> i[] of node
na = len(A); assert na > 0
nb = len(B); assert nb > 0
yield A[0] # A₀
for j in range(nb):
yield B[j] # A₀ -> Bj
for i in range(1,na):
yield A[i] # Ai <- Bj
yield B[j] # Ai -> Bj
yield A[0] # A₀ <- Bj
def test_bitravel2Way():
a,b,c = 'a','b','c'
A = [a,b,c]
B = [1, 2]
got = list(bitravel2Way(A, B))
want = [a,1,b,1,c,1,a,2,b,2,c,2,a]
assert got == want, (got, want)
test_bitravel2Way()
# kvEncode encodes key->value mapping into text.
# e.g. {1:'a', 2:'b'} -> '1:a,2:b'
def kvEncode(kvDict, vencode): # -> kvText
retv = []
for k in sorted(kvDict.keys()):
v = kvDict[k]
retv.append('%d:%s' % (k, vencode(v)))
return ','.join(retv)
# kvDecode decodes key->value mapping from text.
# e.g. '1:a,2:b' -> {1:'a', 2:'b'}
def kvDecode(kvText, vdecode): # -> kvDict
if kvText == "":
return {}
kv = {}
for item in kvText.split(','):
ktxt, vtxt = item.split(':')
k = int(ktxt)
v = vdecode(vtxt)
if k in kv:
raise ValueError("key %s present multiple times" % k)
kv[k] = v
return kv
# diff computes difference in between mappings d1 and d2.
DEL = 'ø'
def diff(d1, d2): # -> [] of (k,v) to change; DEL means del[k]
delta = []
keys = set(d1.keys())
keys.update(d2.keys())
for k in sorted(keys):
v1 = d1.get(k, DEL)
v2 = d2.get(k, DEL)
if v1 is not v2:
delta.append((k,v2))
return delta
# patch changes mapping d according to diff.
# diff = [] of (k,v) to change; DEL means del[k]
def patch(d, diff, verify):
for (k,v) in diff:
if v is DEL:
del d[k]
else:
d[k] = v
if verify is None:
return
keys = set(d.keys())
keyok = set(verify.keys())
if keys != keyok:
panic("patch: verify: different keys: %s" % keys.symmetric_difference(keyok))
for k in keys:
if d[k] is not verify[k]:
panic("patch: verify: [%d] different: got %r; want %r" % (k, d[k], verify[k]))
# commit commits current transaction with description.
def commit(description): # -> tid
txn = transaction.get()
txn.description = description
# XXX hack to retrieve committed transaction ID via ._p_serial of object changed in this transaction
assert len(txn._resources) == 1
zconn = txn._resources[0]
assert isinstance(zconn, Connection)
assert len(zconn._registered_objects) > 0
obj = zconn._registered_objects[0]
assert obj._p_state == CHANGED
txn.commit()
return obj._p_serial
# treetxt returns text representation of a tree.
def treetxt(ztree): # -> txt
return xbtree.TopoEncode(xbtree.StructureOf(ztree), vencode=...) # FIXME include values
@func
def main():
if len(sys.argv) != 5:
print("Usage: %s <zurl> <n> <kv1> <kv2>" % sys.argv[0], file=sys.stderr)
sys.exit(1)
zurl = sys.argv[1]
n = int(sys.argv[2])
kv1, kv2 = sys.argv[3:]
zstor = storageFromURL(zurl)
defer(zstor.close)
treedeltaGenAllStructs(zstor, kv1, kv2, n)
if __name__ == '__main__':
main()
......@@ -18,19 +18,276 @@
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""Program treegen generates tree states ..."""
"""Program treedelta-genallstructs generates subset of all possible tree changes
in between two trees that represent two specified key->value dicts.
# XXX -> treecommit ?
It is used as helper for ΔBTree tests.
# XXX input of known tricky cases from ΔBTree test driver
# tree1 (with values)
# tree2
# ...
# | treegen
Usage: treedelta-genallstructs <zurl> <n> <kv1> <kv2>
It generates ZODB commits with <tree1> -> <tree2> changes for subset of all
possible tree topologies tree1 and tree2 that can represent kv1 and kv2
correspondingly:
For every kv the following tree topologies are used: 1) native (the one that
ZODB would usually create natively via regular usage), and 2) n random ones.
Then every pair of topology change in between tree1 and tree2 is considered and
corresponding commit is made with the tree undergoing kv1->kv2 change with
tree1->tree2 transition in the database.
For every commit the following is printed to stdout:
tid <tid> # <tree1> -> <tree2>
tree <oid>
δ
<LF>
XXX
"""
from __future__ import print_function, absolute_import
import sys
from golang import func, defer, panic
from golang import time
from ZODB import DB
from ZODB.Connection import Connection
import transaction
import random
from wendelin.wcfs.internal import xbtree, xbtree_test
from wendelin.bigfile.file_zodb import ZBlk
from zodbtools.util import storageFromURL, ashex
from persistent import CHANGED
from persistent.mapping import PersistentMapping
# XXX hack: set LOBTree.LOBTree -> XLOTree so that nodes are split often
# (XLOTree is LOBTree with small .max_*_size). Do it this way so that generated
# database looks as if regular LOBTree was used. We use the hack because
# we cannot tune LOBTree directly.
XLOTree = xbtree_test.XLOTree
XLOTree.__module__ = 'BTrees.LOBTree'
XLOTree.__name__ = 'LOBTree'
from BTrees import LOBTree
LOBTree.LOBTree = XLOTree
# treedeltaGenAllStructs generates subset of all possible tree changes in
# between kv1 and kv2. See top-level documentation for details.
@func
def treedeltaGenAllStructs(zstor, kv1txt, kv2txt, n, seed=None):
db = DB(zstor); defer(db.close)
zconn = db.open(); defer(zconn.close)
root = zconn.root()
# seed
if seed is None:
seed = int(time.now())
print("# seed=%d" % seed)
random.seed(seed)
# root['treedelta/values'] = {} v -> ZBlk(v)
valdict = root['treedelta/values'] = PersistentMapping()
valv = b'abcdefghi'
for v in valv:
zblk = ZBlk()
zblk.setblkdata(v)
valdict[v] = zblk
commit('treedelta/values -> %r' % valv)
# vdecode(vtxt) -> vobj decodes value text into value object, e.g. 'a' -> ZBlk(a)
# vencode(vobj) -> vtxt encodes value object into value text, e.g. ZBlk(a) -> 'a'
def vdecode(vtxt): # -> vobj
return valdict[vtxt]
def vencode(vobj): # -> vtxt
for (k,v) in valdict.items():
if v is vobj:
return k
raise KeyError("%r not found in value registry" % (vobj,))
kv1 = kvDecode(kv1txt, vdecode)
kv2 = kvDecode(kv2txt, vdecode)
# δ kv1 <-> kv2
diff12 = diff(kv1, kv2)
diff21 = diff(kv2, kv1)
# all tree topologies that can represent kv1 and kv2
maxdepth=2 # XXX -> 3?
maxsplit=1 # XXX -> 2?
t1AllStructs = list(xbtree.AllStructs(kv1.keys(), maxdepth, maxsplit))
t2AllStructs = list(xbtree.AllStructs(kv2.keys(), maxdepth, maxsplit))
# create the tree
ztree = root['treedelta/tree'] = XLOTree()
commit('treedelta/tree')
# XXX print something?
# emit patches ztree with delta, adjusts tree structure and emits corresponding commit.
def emit(delta, verify, treeTopo):
ttxt_prev = treetxt(ztree)
patch(ztree, delta, verify)
if treeTopo is not None:
xbtree.Restructure(ztree, treeTopo)
ttxt = treetxt(ztree)
tid = commit('%s -> %s' % (ttxt_prev, ttxt))
print('txn %s # %s -> %s' % (ashex(tid), ttxt_prev, ttxt))
for (k,v) in delta:
vtxt = 'ø'
if v is not DEL:
vtxt = vencode(v)
# XXX print not only +, but also - (e.g. -1:a +1:b) ?
print('\t%d:%s' % (k,vtxt))
# emit initial kv1 and kv2 states prepared as ZODB would do natively
emit(diff({}, kv1), verify=kv1, treeTopo=None)
t1struct0 = xbtree.StructureOf(ztree)
emit(diff(kv1, kv2), verify=kv2, treeTopo=None)
t2struct0 = xbtree.StructureOf(ztree)
# all tree1 and tree2 topologies jumps in between we are going to emit:
# native + n random ones.
t1structv = [t1struct0] + random.sample(t1AllStructs, min(n, len(t1AllStructs)))
t2structv = [t2struct0] + random.sample(t2AllStructs, min(n, len(t2AllStructs)))
# emit tree1->tree2 and tree1<-tree2 transitions for all combinations of tree1 and tree2.
# tree1₀->tree2₀ was already done.
t12travel = list(bitravel2Way(t1structv, t2structv))
assert t12travel[0] is t1struct0
assert t12travel[1] is t2struct0
for i,tstruct in enumerate(t12travel[2:]):
if i%2 == 0:
delta = diff21
verify = kv1
assert tstruct in t1structv
else:
delta = diff12
verify = kv2
assert tstruct in t2structv
emit(delta, verify, tstruct)
# bitravel2Way generates travel path through all A<->B edges such
# that all edges a->b and a<-b are traveled and exactly once.
#
# a zodb tree goes through tree states
# every state is committed as separate transaction & printed in the same
# format as treedelta-genallstructs.py does
# The travel starts from A[0].
def bitravel2Way(A, B): # -> i[] of node
na = len(A); assert na > 0
nb = len(B); assert nb > 0
yield A[0] # A₀
for j in range(nb):
yield B[j] # A₀ -> Bj
for i in range(1,na):
yield A[i] # Ai <- Bj
yield B[j] # Ai -> Bj
yield A[0] # A₀ <- Bj
def test_bitravel2Way():
a,b,c = 'a','b','c'
A = [a,b,c]
B = [1, 2]
got = list(bitravel2Way(A, B))
want = [a,1,b,1,c,1,a,2,b,2,c,2,a]
assert got == want, (got, want)
test_bitravel2Way()
# kvEncode encodes key->value mapping into text.
# e.g. {1:'a', 2:'b'} -> '1:a,2:b'
def kvEncode(kvDict, vencode): # -> kvText
retv = []
for k in sorted(kvDict.keys()):
v = kvDict[k]
retv.append('%d:%s' % (k, vencode(v)))
return ','.join(retv)
# kvDecode decodes key->value mapping from text.
# e.g. '1:a,2:b' -> {1:'a', 2:'b'}
def kvDecode(kvText, vdecode): # -> kvDict
if kvText == "":
return {}
kv = {}
for item in kvText.split(','):
ktxt, vtxt = item.split(':')
k = int(ktxt)
v = vdecode(vtxt)
if k in kv:
raise ValueError("key %s present multiple times" % k)
kv[k] = v
return kv
# diff computes difference in between mappings d1 and d2.
DEL = 'ø'
def diff(d1, d2): # -> [] of (k,v) to change; DEL means del[k]
delta = []
keys = set(d1.keys())
keys.update(d2.keys())
for k in sorted(keys):
v1 = d1.get(k, DEL)
v2 = d2.get(k, DEL)
if v1 is not v2:
delta.append((k,v2))
return delta
# patch changes mapping d according to diff.
# diff = [] of (k,v) to change; DEL means del[k]
def patch(d, diff, verify):
for (k,v) in diff:
if v is DEL:
del d[k]
else:
d[k] = v
if verify is None:
return
keys = set(d.keys())
keyok = set(verify.keys())
if keys != keyok:
panic("patch: verify: different keys: %s" % keys.symmetric_difference(keyok))
for k in keys:
if d[k] is not verify[k]:
panic("patch: verify: [%d] different: got %r; want %r" % (k, d[k], verify[k]))
# commit commits current transaction with description.
def commit(description): # -> tid
txn = transaction.get()
txn.description = description
# XXX hack to retrieve committed transaction ID via ._p_serial of object changed in this transaction
assert len(txn._resources) == 1
zconn = txn._resources[0]
assert isinstance(zconn, Connection)
assert len(zconn._registered_objects) > 0
obj = zconn._registered_objects[0]
assert obj._p_state == CHANGED
txn.commit()
return obj._p_serial
# treetxt returns text representation of a tree.
def treetxt(ztree): # -> txt
return xbtree.TopoEncode(xbtree.StructureOf(ztree), vencode=...) # FIXME include values
@func
def main():
if len(sys.argv) != 5:
print("Usage: %s <zurl> <n> <kv1> <kv2>" % sys.argv[0], file=sys.stderr)
sys.exit(1)
zurl = sys.argv[1]
n = int(sys.argv[2])
kv1, kv2 = sys.argv[3:]
zstor = storageFromURL(zurl)
defer(zstor.close)
treedeltaGenAllStructs(zstor, kv1, kv2, n)
# XXX move all into treegen? ex. `treegen allstructs n kv1 kv2`
# `treegen trees tree1 tree2 ... or stdin` ?
if __name__ == '__main__':
main()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2020 Nexedi SA and Contributors.
# Kirill Smelkov <kirr@nexedi.com>
#
# This program is free software: you can Use, Study, Modify and Redistribute
# it under the terms of the GNU General Public License version 3, or (at your
# option) any later version, as published by the Free Software Foundation.
#
# You can also Link and Combine this program with other software covered by
# the terms of any of the Free Software licenses or any of the Open Source
# Initiative approved licenses and Convey the resulting work. Corresponding
# source of such a combination shall include the source code for all other
# software used.
#
# This program is distributed WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See COPYING file for full licensing terms.
# See https://www.nexedi.com/licensing for rationale and options.
"""Program treegen generates tree states ..."""
# XXX -> treecommit ?
# XXX input of known tricky cases from ΔBTree test driver
# tree1 (with values)
# tree2
# ...
# | treegen
#
# a zodb tree goes through tree states
# every state is committed as separate transaction & printed in the same
# format as treedelta-genallstructs.py does
# XXX move all into treegen? ex. `treegen allstructs n kv1 kv2`
# `treegen trees tree1 tree2 ... or stdin` ?
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment