Commit 838f450c authored by Julien Muchembled's avatar Julien Muchembled

qa: review testImporter

- Stop using NEO source code as sample data.
- For ZODB5, add a test that does not merge several DB.
parent f4c2fc6a
#
# Copyright (C) 2014-2017 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os, stat, time
from persistent import Persistent
from BTrees.OOBTree import OOBTree
class Inode(OOBTree):
data = None
def __init__(self, up=None, mode=stat.S_IFDIR):
self[os.pardir] = self if up is None else up
self.mode = mode
self.mtime = time.time()
def __getstate__(self):
return Persistent.__getstate__(self), OOBTree.__getstate__(self)
def __setstate__(self, state):
Persistent.__setstate__(self, state[0])
OOBTree.__setstate__(self, state[1])
def edit(self, data=None, mtime=None):
fmt = stat.S_IFMT(self.mode)
if data is None:
assert fmt == stat.S_IFDIR, oct(fmt)
else:
assert fmt == stat.S_IFREG or fmt == stat.S_IFLNK, oct(fmt)
if self.data != data:
self.data = data
if self.mtime != mtime:
self.mtime = mtime or time.time()
def root(self):
try:
self = self[os.pardir]
except KeyError:
return self
return self.root()
def traverse(self, path, followlinks=True):
path = iter(path.split(os.sep) if isinstance(path, basestring) and path
else path)
for d in path:
if not d:
return self.root().traverse(path, followlinks)
if d != os.curdir:
d = self[d]
if followlinks and stat.S_ISLNK(d.mode):
d = self.traverse(d.data, True)
return d.traverse(path, followlinks)
return self
def inodeFromFs(self, path):
s = os.lstat(path)
mode = s.st_mode
name = os.path.basename(path)
try:
i = self[name]
assert stat.S_IFMT(i.mode) == stat.S_IFMT(mode)
changed = False
except KeyError:
i = self[name] = self.__class__(self, mode)
changed = True
i.edit(open(path).read() if stat.S_ISREG(mode) else
os.readlink(p) if stat.S_ISLNK(mode) else
None, s.st_mtime)
return changed or i._p_changed
def treeFromFs(self, path, yield_interval=None, filter=None):
prefix_len = len(path) + len(os.sep)
n = 0
for dirpath, dirnames, filenames in os.walk(path):
inodeFromFs = self.traverse(dirpath[prefix_len:]).inodeFromFs
for names in dirnames, filenames:
skipped = []
for j, name in enumerate(names):
p = os.path.join(dirpath, name)
if filter and not filter(p[prefix_len:]):
skipped.append(j)
elif inodeFromFs(p):
n += 1
if n == yield_interval:
n = 0
yield self
while skipped:
del names[skipped.pop()]
if n:
yield self
def walk(self):
s = [(None, self)]
while s:
top, self = s.pop()
dirs = []
nondirs = []
for name, inode in self.iteritems():
if name != os.pardir:
(dirs if stat.S_ISDIR(inode.mode) else nondirs).append(name)
yield top or os.curdir, dirs, nondirs
for name in dirs:
s.append((os.path.join(top, name) if top else name, self[name]))
#
# Copyright (C) 2018 Nexedi SA
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import hashlib, random
from collections import deque
from itertools import islice
from persistent import Persistent
from BTrees.IOBTree import IOBTree
from .stat_zodb import _DummyData
def generateTree(random=random):
tree = []
N = 5
fifo = deque()
path = ()
size = lambda: max(int(random.gauss(40,30)), 0)
while 1:
tree.extend(path + (i, size())
for i in xrange(-random.randrange(N), 0))
n = N * (1 - len(path)) + random.randrange(N)
for i in xrange(n):
fifo.append(path + (i,))
try:
path = fifo.popleft()
except IndexError:
break
change = tree
while change:
change = [x[:-1] + (size(),) for x in change if random.randrange(2)]
tree += change
random.shuffle(tree)
return tree
class Leaf(Persistent):
pass
Node = IOBTree
def importTree(root, tree, yield_interval=None, filter=None):
n = 0
for path in tree:
node = root
for i, x in enumerate(path[:-1], 1):
if filter and not filter(path[:i]):
break
if x < 0:
try:
node = node[x]
except KeyError:
node[x] = node = Leaf()
node.data = bytes(_DummyData(random.Random(path), path[-1]))
else:
try:
node = node[x]
continue
except KeyError:
node[x] = node = Node()
n += 1
if n == yield_interval:
n = 0
yield root
if n:
yield root
class hashTree(object):
_hash = None
_new = hashlib.md5
def __init__(self, node):
s = [((), node)]
def walk():
h = self._new()
update = h.update
while s:
top, node = s.pop()
try:
update('%s %s %s\n' % (top, len(node.data),
self._new(node.data).hexdigest()))
yield
except AttributeError:
update('%s %s\n' % (top, tuple(node.keys())))
yield
for k, v in reversed(node.items()):
s.append((top + (k,), v))
del self._walk
self._hash = h
self._walk = walk()
def __getattr__(self, attr):
return getattr(self._hash, attr)
def __call__(self, n=None):
if n is None:
return sum(1 for _ in self._walk)
next(islice(self._walk, n - 1, None))
...@@ -19,11 +19,13 @@ PROD1 = lambda random=random: DummyZODB(6.04237779991, 1.55811487853, ...@@ -19,11 +19,13 @@ PROD1 = lambda random=random: DummyZODB(6.04237779991, 1.55811487853,
1.04108991045, 0.906703192546, 1.04108991045, 0.906703192546,
0.810080409164, random) 0.810080409164, random)
def DummyData(random=random): def _DummyData(random, size):
# returns data that gzip at about 28.5 % # returns data that gzip at about 28.5 %
return bytearray(int(random.gauss(0, .8)) % 256 for x in xrange(size))
def DummyData(random=random):
# make sure sample is bigger than dictionary of compressor # make sure sample is bigger than dictionary of compressor
data = bytearray(int(random.gauss(0, .8)) % 256 for x in xrange(100000)) return StringIO(_DummyData(random, 100000))
return StringIO(data)
class DummyZODB(object): class DummyZODB(object):
......
...@@ -16,15 +16,14 @@ ...@@ -16,15 +16,14 @@
from cPickle import Pickler, Unpickler from cPickle import Pickler, Unpickler
from cStringIO import StringIO from cStringIO import StringIO
from itertools import islice, izip_longest from itertools import izip_longest
import os, shutil, unittest import os, random, shutil, unittest
import neo, transaction, ZODB import transaction, ZODB
from neo.client.exception import NEOPrimaryMasterLost from neo.client.exception import NEOPrimaryMasterLost
from neo.lib import logging from neo.lib import logging
from neo.lib.util import u64 from neo.lib.util import u64
from neo.storage.database.importer import Repickler from neo.storage.database.importer import Repickler
from ..fs2zodb import Inode from .. import expectedFailure, getTempDirectory, random_tree
from .. import expectedFailure, getTempDirectory
from . import NEOCluster, NEOThreadedTest from . import NEOCluster, NEOThreadedTest
from ZODB import serialize from ZODB import serialize
from ZODB.FileStorage import FileStorage from ZODB.FileStorage import FileStorage
...@@ -129,61 +128,56 @@ class ImporterTests(NEOThreadedTest): ...@@ -129,61 +128,56 @@ class ImporterTests(NEOThreadedTest):
self.assertIs(Obj, load()) self.assertIs(Obj, load())
self.assertDictEqual(state, load()) self.assertDictEqual(state, load())
def test(self): def _importFromFileStorage(self, multi=(),
# XXX: Using NEO source files as test data was a bad idea because root_filter=None, sub_filter=None):
# the test breaks easily in case of massive changes in the code, import_hash = '1d4ff03730fe6bcbf235e3739fbe5f5b'
# or if there are many untracked files. txn_size = 10
importer = [] tree = random_tree.generateTree(random.Random(0))
i = len(tree) // 3
assert i > txn_size
before_tree = tree[:i]
after_tree = tree[i:]
fs_dir = os.path.join(getTempDirectory(), self.id()) fs_dir = os.path.join(getTempDirectory(), self.id())
shutil.rmtree(fs_dir, 1) # for --loop shutil.rmtree(fs_dir, 1) # for --loop
os.mkdir(fs_dir) os.mkdir(fs_dir)
src_root, = neo.__path__
fs_list = "root", "client", "master", "tests"
def not_pyc(name):
return not name.endswith(".pyc")
# We use 'hash' to skip roughly half of files.
# They'll be added after the migration has started.
def root_filter(name):
if not_pyc(name):
i = name.find(os.sep)
return (i < 0 or name[:i] not in fs_list) and (
'.' not in name or hash(name) & 1)
def sub_filter(name):
return lambda n: not_pyc(n) and (
hash(n) & 1 if '.' in n else
os.sep in n or n in (name, "scripts"))
conn_list = []
iter_list = [] iter_list = []
db_list = []
# Setup several FileStorage databases. # Setup several FileStorage databases.
for i, name in enumerate(fs_list): for i, db in enumerate(('root',) + multi):
fs_path = os.path.join(fs_dir, name + ".fs") fs_path = os.path.join(fs_dir, '%s.fs' % db)
c = ZODB.DB(FileStorage(fs_path)).open() c = ZODB.DB(FileStorage(fs_path)).open()
r = c.root()["neo"] = Inode() r = c.root()['tree'] = random_tree.Node()
transaction.commit() transaction.commit()
conn_list.append(c) iter_list.append(random_tree.importTree(r, before_tree, txn_size,
iter_list.append(r.treeFromFs(src_root, 10, sub_filter(db) if i else root_filter))
sub_filter(name) if i else root_filter)) db_list.append((db, r, {
importer.append((name, {
"storage": "<filestorage>\npath %s\n</filestorage>" % fs_path "storage": "<filestorage>\npath %s\n</filestorage>" % fs_path
})) }))
# Populate FileStorage databases. # Populate FileStorage databases.
for iter_list in izip_longest(*iter_list): for i, iter_list in enumerate(izip_longest(*iter_list)):
for i in iter_list: for r in iter_list:
if i: if r:
transaction.commit() transaction.commit()
del iter_list
# Get oids of mount points and close. # Get oids of mount points and close.
for (name, cfg), c in zip(importer, conn_list): importer = []
r = c.root()["neo"] for db, r, cfg in db_list:
if name == "root": if db == 'root':
for name in fs_list[1:]: if multi:
cfg[name] = str(u64(r[name]._p_oid)) for x in multi:
cfg['_%s' % x] = str(u64(r[x]._p_oid))
else: else:
cfg["oid"] = str(u64(r[name]._p_oid)) h = random_tree.hashTree(r)
c.db().close() h()
self.assertEqual(import_hash, h.hexdigest())
else:
cfg["oid"] = str(u64(r[db]._p_oid))
db = '_%s' % db
r._p_jar.db().close()
importer.append((db, cfg))
del db_list, iter_list
#del importer[0][1][importer.pop()[0]] #del importer[0][1][importer.pop()[0]]
# Start NEO cluster with transparent import of a multi-base ZODB. # Start NEO cluster with transparent import.
with NEOCluster(compress=False, importer=importer) as cluster: with NEOCluster(importer=importer) as cluster:
# Suspend import for a while, so that import # Suspend import for a while, so that import
# is finished in the middle of the below 'for' loop. # is finished in the middle of the below 'for' loop.
# Use a slightly different main loop for storage so that it # Use a slightly different main loop for storage so that it
...@@ -202,7 +196,7 @@ class ImporterTests(NEOThreadedTest): ...@@ -202,7 +196,7 @@ class ImporterTests(NEOThreadedTest):
dm.doOperation = doOperation dm.doOperation = doOperation
cluster.start() cluster.start()
t, c = cluster.getTransaction() t, c = cluster.getTransaction()
r = c.root()["neo"] r = c.root()['tree']
# Test retrieving of an object from ZODB when next serial is in NEO. # Test retrieving of an object from ZODB when next serial is in NEO.
r._p_changed = 1 r._p_changed = 1
t.commit() t.commit()
...@@ -213,31 +207,42 @@ class ImporterTests(NEOThreadedTest): ...@@ -213,31 +207,42 @@ class ImporterTests(NEOThreadedTest):
## ##
self.assertRaisesRegexp(NotImplementedError, " getObjectHistory$", self.assertRaisesRegexp(NotImplementedError, " getObjectHistory$",
c.db().history, r._p_oid) c.db().history, r._p_oid)
i = r.walk() h = random_tree.hashTree(r)
next(islice(i, 4, None)) h(30)
logging.info("start migration") logging.info("start migration")
dm.doOperation(cluster.storage) dm.doOperation(cluster.storage)
# Adjust if needed. Must remain > 0. # Adjust if needed. Must remain > 0.
assert 14 == sum(1 for i in i) self.assertEqual(22, h())
self.assertEqual(import_hash, h.hexdigest())
# New writes after the switch to NEO.
last_import = -1 last_import = -1
for i, r in enumerate(r.treeFromFs(src_root, 6, not_pyc)): for i, r in enumerate(random_tree.importTree(
r, after_tree, txn_size)):
t.commit() t.commit()
if cluster.storage.dm._import: if cluster.storage.dm._import:
last_import = i last_import = i
self.tic() self.tic()
# Same as above. We want last_import smaller enough compared to i # Same as above. We want last_import smaller enough compared to i
assert i / 3 < last_import < i - 2, (last_import, i) assert i < last_import * 3 < 2 * i, (last_import, i)
self.assertFalse(cluster.storage.dm._import) self.assertFalse(cluster.storage.dm._import)
i = len(src_root) + 1 storage._cache.clear()
self.assertEqual(sorted(r.walk()), sorted( h = random_tree.hashTree(r)
(x[i:] or '.', sorted(y), sorted(filter(not_pyc, z))) self.assertEqual(93, h())
for x, y, z in os.walk(src_root))) self.assertEqual('6bf0f0cb2d6c1aae9e52c412ef0e25b6', h.hexdigest())
t.commit()
def test1(self):
self._importFromFileStorage()
def testMerge(self):
multi = 1, 2, 3
self._importFromFileStorage(multi,
(lambda path: path[0] not in multi or len(path) == 1),
(lambda db: lambda path: path[0] in (db, 4)))
if getattr(serialize, '_protocol', 1) > 1: if getattr(serialize, '_protocol', 1) > 1:
# XXX: With ZODB5, we should at least keep a working test that does not # XXX: With ZODB5, we should at least keep a working test that does not
# merge several DB. # merge several DB.
test = expectedFailure(NEOPrimaryMasterLost)(test) testMerge = expectedFailure(NEOPrimaryMasterLost)(testMerge)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment