Commit 651037cd authored by Christian Theune's avatar Christian Theune

- Added migration script

- Fixed bug in bushy layout: oid recognition pattern would not handle hex
  representations correctly
- Fixed bug in lawn layout: empty strings (the base directory) would be
  recognized as the oid 0.
parent 1dbc74e0
...@@ -11,7 +11,8 @@ New Features ...@@ -11,7 +11,8 @@ New Features
- Changed layout strategy for the blob directory to a bushy approach (8 levels - Changed layout strategy for the blob directory to a bushy approach (8 levels
deep, at most ~256 entries per directory level, one directory for each deep, at most ~256 entries per directory level, one directory for each
blob). Old directories are automatically detected and will be handled with blob). Old directories are automatically detected and will be handled with
the old strategy. the old strategy. A migration script (`migrateblobs.py`) is provided to
convert the different layouts.
- Versions are no-longer supported. - Versions are no-longer supported.
......
...@@ -451,26 +451,24 @@ class FilesystemHelper: ...@@ -451,26 +451,24 @@ class FilesystemHelper:
def auto_layout_select(path): def auto_layout_select(path):
# A heuristic to look at a path and determine which directory layout to # A heuristic to look at a path and determine which directory layout to
# use. Basically we try to figure out if the directory is either already # use.
# used and contains an explicit marker, is unused or used without a
# marker.
layout_marker = os.path.join(path, LAYOUT_MARKER) layout_marker = os.path.join(path, LAYOUT_MARKER)
if not os.path.exists(path): if not os.path.exists(path):
log('Blob directory %s does not exist. ' log('Blob directory %s does not exist. '
'Selected `bushy` layout. ' % path) 'Selected `bushy` layout. ' % path)
layout = 'bushy' layout = 'bushy'
elif len(os.listdir(path)) == 0: elif len(os.listdir(path)) == 0:
log('Blob directory %s is unused and has no layout marker set.' log('Blob directory `%s` is unused and has no layout marker set. '
'Selected `bushy` layout. ' % path) 'Selected `bushy` layout. ' % path)
layout = 'bushy' layout = 'bushy'
elif LAYOUT_MARKER not in os.listdir(path): elif LAYOUT_MARKER not in os.listdir(path):
log('Blob directory %s is used but has no layout marker set.' log('Blob directory `%s` is used but has no layout marker set. '
'Selected `lawn` layout. ' % path) 'Selected `lawn` layout. ' % path)
layout = 'lawn' layout = 'lawn'
else: else:
layout = open(layout_marker, 'rb').read() layout = open(layout_marker, 'rb').read()
layout = layout.strip() layout = layout.strip()
log('Blob directory %s has layout marker set.' log('Blob directory `%s` has layout marker set. '
'Selected `%s` layout. ' % (path, layout)) 'Selected `%s` layout. ' % (path, layout))
return layout return layout
...@@ -483,7 +481,7 @@ class BushyLayout(object): ...@@ -483,7 +481,7 @@ class BushyLayout(object):
""" """
blob_path_pattern = r'^' + (r'0x[0-9]{1,2}/*'*8) + r'$' blob_path_pattern = r'^' + (r'0x[0-9a-f]{1,2}/*'*8) + r'$'
blob_path_pattern = re.compile(blob_path_pattern) blob_path_pattern = re.compile(blob_path_pattern)
def oid_to_path(self, oid): def oid_to_path(self, oid):
...@@ -496,8 +494,7 @@ class BushyLayout(object): ...@@ -496,8 +494,7 @@ class BushyLayout(object):
def path_to_oid(self, path): def path_to_oid(self, path):
if self.blob_path_pattern.match(path) is None: if self.blob_path_pattern.match(path) is None:
raise ValueError("Not a valid OID path: %s" % path) raise ValueError("Not a valid OID path: `%s`" % path)
# The path always has a leading slash that we need to ignore.
path = path.split('/') path = path.split('/')
# The path contains the OID in little endian form but the OID itself # The path contains the OID in little endian form but the OID itself
# is big endian. # is big endian.
...@@ -522,9 +519,13 @@ class LawnLayout(object): ...@@ -522,9 +519,13 @@ class LawnLayout(object):
def path_to_oid(self, path): def path_to_oid(self, path):
try: try:
if path == '':
# This is a special case where repr_to_oid converts '' to the
# OID z64.
raise TypeError()
return utils.repr_to_oid(path) return utils.repr_to_oid(path)
except TypeError: except TypeError:
raise ValueError('Not a valid OID path: %s' % path) raise ValueError('Not a valid OID path: `%s`' % path)
LAYOUTS['lawn'] = LawnLayout() LAYOUTS['lawn'] = LawnLayout()
......
##############################################################################
#
# Copyright (c) 2008 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""A script to migrate a blob directory into a different layout.
"""
import logging
import optparse
import os
from ZODB.blob import FilesystemHelper, rename_or_copy_blob
from ZODB.utils import cp, oid_repr
def link_or_copy(f1, f2):
try:
os.link(f1, f2)
except OSError:
shutil.copy(f1, f2)
def migrate(source, dest, layout):
source_fsh = FilesystemHelper(source)
source_fsh.create()
dest_fsh = FilesystemHelper(dest, layout)
dest_fsh.create()
print "Migrating blob data from `%s` (%s) to `%s` (%s)" % (
source, source_fsh.layout_name, dest, dest_fsh.layout_name)
for oid, path in source_fsh.listOIDs():
dest_path = dest_fsh.getPathForOID(oid, create=True)
files = os.listdir(path)
for file in files:
source_file = os.path.join(path, file)
dest_file = os.path.join(dest_path, file)
link_or_copy(source_file, dest_file)
print "\tOID: %s - %s files " % (oid_repr(oid), len(files))
def main(source=None, dest=None, layout="bushy"):
usage = "usage: %prog [options] <source> <dest> <layout>"
description = ("Create the new directory <dest> and migrate all blob "
"data <source> to <dest> while using the new <layout> for "
"<dest>")
parser = optparse.OptionParser(usage=usage, description=description)
parser.add_option("-l", "--layout",
default=layout, type='choice',
choices=['bushy', 'lawn'],
help="Define the layout to use for the new directory "
"(bushy or lawn). Default: %default")
options, args = parser.parse_args()
if not len(args) == 2:
parser.error("source and destination must be given")
logging.getLogger().addHandler(logging.StreamHandler())
logging.getLogger().setLevel(0)
source, dest = args
migrate(source, dest, options.layout)
if __name__ == '__main__':
main()
...@@ -31,12 +31,14 @@ entries per directory level: ...@@ -31,12 +31,14 @@ entries per directory level:
>>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00') >>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
'\x00\x00\x00\x00\x00\x00\x00\x01' '\x00\x00\x00\x00\x00\x00\x00\x01'
>>> bushy.path_to_oid('0xff/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
'\x00\x00\x00\x00\x00\x00\x00\xff'
Paths that do not represent an OID will cause a ValueError: Paths that do not represent an OID will cause a ValueError:
>>> bushy.path_to_oid('tmp') >>> bushy.path_to_oid('tmp')
Traceback (most recent call last): Traceback (most recent call last):
ValueError: Not a valid OID path: tmp ValueError: Not a valid OID path: `tmp`
The `lawn` layout The `lawn` layout
...@@ -61,7 +63,10 @@ Paths that do not represent an OID will cause a ValueError: ...@@ -61,7 +63,10 @@ Paths that do not represent an OID will cause a ValueError:
>>> lawn.path_to_oid('tmp') >>> lawn.path_to_oid('tmp')
Traceback (most recent call last): Traceback (most recent call last):
ValueError: Not a valid OID path: tmp ValueError: Not a valid OID path: `tmp`
>>> lawn.path_to_oid('')
Traceback (most recent call last):
ValueError: Not a valid OID path: ``
Auto-detecting the layout of a directory Auto-detecting the layout of a directory
...@@ -162,3 +167,117 @@ ValueError: Directory layout `bushy` selected for blob directory /.../blobs/, bu ...@@ -162,3 +167,117 @@ ValueError: Directory layout `bushy` selected for blob directory /.../blobs/, bu
>>> shutil.rmtree(d) >>> shutil.rmtree(d)
Migrating between directory layouts
===================================
A script called `migrateblobs.py` is distributed with the ZODB for offline
migration capabilities between different directory layouts. It can migrate any
blob directory layout to any other layout. It leaves the original blob
directory untouched (except from eventually creating a temporary directory and
the storage layout marker).
The migration is accessible as a library function:
>>> from ZODB.scripts.migrateblobs import migrate
Create a `lawn` directory structure and migrate it to the new `bushy` one:
>>> from ZODB.blob import FilesystemHelper
>>> d = tempfile.mkdtemp()
>>> old = os.path.join(d, 'old')
>>> old_fsh = FilesystemHelper(old, 'lawn')
>>> old_fsh.create()
>>> blob1 = old_fsh.getPathForOID(7039, create=True)
>>> blob2 = old_fsh.getPathForOID(10, create=True)
>>> blob3 = old_fsh.getPathForOID(7034, create=True)
>>> open(os.path.join(blob1, 'foo'), 'wb').write('foo')
>>> open(os.path.join(blob1, 'foo2'), 'wb').write('bar')
>>> open(os.path.join(blob2, 'foo3'), 'wb').write('baz')
>>> open(os.path.join(blob2, 'foo4'), 'wb').write('qux')
>>> open(os.path.join(blob3, 'foo5'), 'wb').write('quux')
>>> open(os.path.join(blob3, 'foo6'), 'wb').write('corge')
Committed blobs have their permissions set to 000
The migration function is called with the old and the new path and the layout
that shall be used for the new directory:
>>> bushy = os.path.join(d, 'bushy')
>>> migrate(old, bushy, 'bushy') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
Migrating blob data from `/.../old` (lawn) to `/.../bushy` (bushy)
OID: 0x1b7f - 2 files
OID: 0x0a - 2 files
OID: 0x1b7a - 2 files
The new directory now contains the same files in different directories, but
with the same sizes and permissions:
>>> import string
>>> def stat(path):
... s = os.stat(path)
... print "%s\t%s\t%s" % (string.rjust(oct(s.st_mode), 10), s.st_size, path)
>>> def ls(path):
... for p, dirs, files in os.walk(path):
... stat(p)
... for file in files:
... stat(os.path.join(p, file))
>>> ls(bushy)
040700 4096 /.../bushy
0100644 5 /.../bushy/.layout
040700 4096 /.../bushy/0x7a
040700 4096 /.../bushy/0x7a/0x1b
040700 4096 /.../bushy/0x7a/0x1b/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
0100644 5 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo6
0100644 4 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo5
040700 4096 /.../bushy/tmp
040700 4096 /.../bushy/0x0a
040700 4096 /.../bushy/0x0a/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00
0100644 3 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo4
0100644 3 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo3
040700 4096 /.../bushy/0x7f
040700 4096 /.../bushy/0x7f/0x1b
040700 4096 /.../bushy/0x7f/0x1b/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
0100644 3 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo
0100644 3 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo2
We can also migrate the bushy layout back to the lawn layout:
>>> lawn = os.path.join(d, 'lawn')
>>> migrate(bushy, lawn, 'lawn')
Migrating blob data from `/.../bushy` (bushy) to `/.../lawn` (lawn)
OID: 0x1b7a - 2 files
OID: 0x0a - 2 files
OID: 0x1b7f - 2 files
>>> ls(lawn)
040700 4096 /.../lawn
0100644 4 /.../lawn/.layout
040700 4096 /.../lawn/0x1b7f
0100644 3 /.../lawn/0x1b7f/foo
0100644 3 /.../lawn/0x1b7f/foo2
040700 4096 /.../lawn/tmp
040700 4096 /.../lawn/0x0a
0100644 3 /.../lawn/0x0a/foo4
0100644 3 /.../lawn/0x0a/foo3
040700 4096 /.../lawn/0x1b7a
0100644 5 /.../lawn/0x1b7a/foo6
0100644 4 /.../lawn/0x1b7a/foo5
>>> shutil.rmtree(d)
...@@ -502,7 +502,13 @@ def test_suite(): ...@@ -502,7 +502,13 @@ def test_suite():
suite.addTest(doctest.DocFileSuite( suite.addTest(doctest.DocFileSuite(
"blob_basic.txt", "blob_connection.txt", "blob_transaction.txt", "blob_basic.txt", "blob_connection.txt", "blob_transaction.txt",
"blob_packing.txt", "blob_importexport.txt", "blob_consume.txt", "blob_packing.txt", "blob_importexport.txt", "blob_consume.txt",
"blob_tempdir.txt", "blob_layout.txt", "blob_tempdir.txt",
setUp=ZODB.tests.util.setUp,
tearDown=ZODB.tests.util.tearDown,
))
suite.addTest(doctest.DocFileSuite(
"blob_layout.txt",
optionflags=doctest.ELLIPSIS|doctest.NORMALIZE_WHITESPACE|doctest.REPORT_NDIFF,
setUp=ZODB.tests.util.setUp, setUp=ZODB.tests.util.setUp,
tearDown=ZODB.tests.util.tearDown, tearDown=ZODB.tests.util.tearDown,
)) ))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment