Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
ZODB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Nicolas Wavrant
ZODB
Commits
3037f42a
Commit
3037f42a
authored
Jun 21, 2008
by
Christian Theune
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Back out my changes that only belong onto the branch right now.
parent
44900f9c
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
72 additions
and
551 deletions
+72
-551
src/CHANGES.txt
src/CHANGES.txt
+0
-6
src/ZEO/ClientStorage.py
src/ZEO/ClientStorage.py
+16
-3
src/ZEO/tests/testZEO.py
src/ZEO/tests/testZEO.py
+11
-8
src/ZODB/blob.py
src/ZODB/blob.py
+39
-166
src/ZODB/scripts/migrateblobs.py
src/ZODB/scripts/migrateblobs.py
+0
-74
src/ZODB/tests/blob_layout.txt
src/ZODB/tests/blob_layout.txt
+0
-283
src/ZODB/tests/blob_tempdir.txt
src/ZODB/tests/blob_tempdir.txt
+1
-1
src/ZODB/tests/blob_transaction.txt
src/ZODB/tests/blob_transaction.txt
+3
-3
src/ZODB/tests/testblob.py
src/ZODB/tests/testblob.py
+2
-7
No files found.
src/CHANGES.txt
View file @
3037f42a
...
...
@@ -8,12 +8,6 @@ Change History
New Features
------------
- Changed layout strategy for the blob directory to a bushy approach (8 levels
deep, at most ~256 entries per directory level, one directory for each
blob). Old directories are automatically detected and will be handled with
the old strategy. A migration script (`migrateblobs.py`) is provided to
convert the different layouts.
- Versions are no-longer supported.
- ZEO cache files can be larger than 4G. Note that older ZEO cache
...
...
src/ZEO/ClientStorage.py
View file @
3037f42a
...
...
@@ -855,7 +855,9 @@ class ClientStorage(object):
def
_storeBlob_shared
(
self
,
oid
,
serial
,
data
,
filename
,
txn
):
# First, move the blob into the blob directory
self
.
fshelper
.
getPathForOID
(
oid
,
create
=
True
)
dir
=
self
.
fshelper
.
getPathForOID
(
oid
)
if
not
os
.
path
.
exists
(
dir
):
os
.
mkdir
(
dir
)
fd
,
target
=
self
.
fshelper
.
blob_mkstemp
(
oid
,
serial
)
os
.
close
(
fd
)
...
...
@@ -922,7 +924,14 @@ class ClientStorage(object):
raise
POSException
.
POSKeyError
(
"No blob file"
,
oid
,
serial
)
# First, we'll create the directory for this oid, if it doesn't exist.
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
,
create
=
True
)
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
)
if
not
os
.
path
.
exists
(
targetpath
):
try
:
os
.
makedirs
(
targetpath
,
0700
)
except
OSError
:
# We might have lost a race. If so, the directory
# must exist now
assert
os
.
path
.
exists
(
targetpath
)
# OK, it's not here and we (or someone) needs to get it. We
# want to avoid getting it multiple times. We want to avoid
...
...
@@ -1109,15 +1118,19 @@ class ClientStorage(object):
assert
s
==
tid
,
(
s
,
tid
)
self
.
_cache
.
store
(
oid
,
s
,
None
,
data
)
if
self
.
fshelper
is
not
None
:
blobs
=
self
.
_tbuf
.
blobs
while
blobs
:
oid
,
blobfilename
=
blobs
.
pop
()
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
,
create
=
True
)
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
)
if
not
os
.
path
.
exists
(
targetpath
):
os
.
makedirs
(
targetpath
,
0700
)
rename_or_copy_blob
(
blobfilename
,
self
.
fshelper
.
getBlobFilename
(
oid
,
tid
),
)
self
.
_tbuf
.
clear
()
def
undo
(
self
,
trans_id
,
txn
):
...
...
src/ZEO/tests/testZEO.py
View file @
3037f42a
...
...
@@ -515,7 +515,8 @@ class CommonBlobTests:
self
.
_storage
.
tpc_abort
(
t
)
raise
self
.
assert_
(
not
os
.
path
.
exists
(
tfname
))
filename
=
self
.
_storage
.
fshelper
.
getBlobFilename
(
oid
,
revid
)
filename
=
os
.
path
.
join
(
self
.
blobdir
,
oid_repr
(
oid
),
tid_repr
(
revid
)
+
BLOB_SUFFIX
)
self
.
assert_
(
os
.
path
.
exists
(
filename
))
self
.
assertEqual
(
somedata
,
open
(
filename
).
read
())
...
...
@@ -629,15 +630,17 @@ class BlobAdaptedFileStorageTests(GenericTests, CommonBlobTests):
d1
=
f
.
read
(
8096
)
d2
=
somedata
.
read
(
8096
)
self
.
assertEqual
(
d1
,
d2
)
# The file should be in the cache ...
filename
=
self
.
_storage
.
fshelper
.
getBlobFilename
(
oid
,
revid
)
# The file should have been copied to the server:
filename
=
os
.
path
.
join
(
self
.
blobdir
,
oid_repr
(
oid
),
tid_repr
(
revid
)
+
BLOB_SUFFIX
)
check_data
(
filename
)
#
... and on the server
server_filename
=
filename
.
replace
(
self
.
blob_cache_dir
,
self
.
blobdir
)
self
.
assert_
(
server_filename
.
startswith
(
self
.
blobdir
)
)
check_data
(
server_
filename
)
#
It should also be in the cache:
filename
=
os
.
path
.
join
(
self
.
blob_cache_dir
,
oid_repr
(
oid
),
tid_repr
(
revid
)
+
BLOB_SUFFIX
)
check_data
(
filename
)
# If we remove it from the cache and call loadBlob, it should
# come back. We can do this in many threads. We'll instrument
...
...
src/ZODB/blob.py
View file @
3037f42a
...
...
@@ -15,10 +15,8 @@
"""
import
base64
import
binascii
import
logging
import
os
import
re
import
shutil
import
stat
import
sys
...
...
@@ -45,9 +43,6 @@ logger = logging.getLogger('ZODB.blob')
BLOB_SUFFIX
=
".blob"
SAVEPOINT_SUFFIX
=
".spb"
LAYOUT_MARKER
=
'.layout'
LAYOUTS
=
{}
valid_modes
=
'r'
,
'w'
,
'r+'
,
'a'
# Threading issues:
...
...
@@ -297,42 +292,21 @@ class FilesystemHelper:
# with blobs and storages needn't indirect through this if they
# want to perform blob storage differently.
def
__init__
(
self
,
base_dir
,
layout_name
=
'automatic'
):
self
.
base_dir
=
os
.
path
.
normpath
(
base_dir
)
+
'/'
def
__init__
(
self
,
base_dir
):
self
.
base_dir
=
base_dir
self
.
temp_dir
=
os
.
path
.
join
(
base_dir
,
'tmp'
)
if
layout_name
==
'automatic'
:
layout_name
=
auto_layout_select
(
base_dir
)
if
layout_name
==
'lawn'
:
log
(
'The `lawn` blob directory layout is deprecated due to '
'scalability issues on some file systems, please consider '
'migrating to the `bushy` layout.'
,
level
=
logging
.
WARN
)
self
.
layout_name
=
layout_name
self
.
layout
=
LAYOUTS
[
layout_name
]
def
create
(
self
):
if
not
os
.
path
.
exists
(
self
.
base_dir
):
os
.
makedirs
(
self
.
base_dir
,
0700
)
log
(
"Blob directory '%s' does not exist. "
"Created new directory."
%
self
.
base_dir
)
log
(
"Blob cache directory '%s' does not exist. "
"Created new directory."
%
self
.
base_dir
,
level
=
logging
.
INFO
)
if
not
os
.
path
.
exists
(
self
.
temp_dir
):
os
.
makedirs
(
self
.
temp_dir
,
0700
)
log
(
"Blob temporary directory '%s' does not exist. "
"Created new directory."
%
self
.
temp_dir
)
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
self
.
base_dir
,
LAYOUT_MARKER
)):
layout_marker
=
open
(
os
.
path
.
join
(
self
.
base_dir
,
LAYOUT_MARKER
),
'wb'
)
layout_marker
.
write
(
self
.
layout_name
)
else
:
layout_marker
=
open
(
os
.
path
.
join
(
self
.
base_dir
,
LAYOUT_MARKER
),
'rb'
)
layout
=
layout_marker
.
read
().
strip
()
if
layout
!=
self
.
layout_name
:
raise
ValueError
(
"Directory layout `%s` selected for blob directory %s, but "
"marker found for layout `%s`"
%
(
self
.
layout_name
,
self
.
base_dir
,
layout
))
"Created new directory."
%
self
.
temp_dir
,
level
=
logging
.
INFO
)
def
isSecure
(
self
,
path
):
"""Ensure that (POSIX) path mode bits are 0700."""
...
...
@@ -343,40 +317,12 @@ class FilesystemHelper:
log
(
'Blob dir %s has insecure mode setting'
%
self
.
base_dir
,
level
=
logging
.
WARNING
)
def
getPathForOID
(
self
,
oid
,
create
=
False
):
def
getPathForOID
(
self
,
oid
):
"""Given an OID, return the path on the filesystem where
the blob data relating to that OID is stored.
If the create flag is given, the path is also created if it didn't
exist already.
"""
# OIDs are numbers and sometimes passed around as integers. For our
# computations we rely on the 64-bit packed string representation.
if
isinstance
(
oid
,
int
):
oid
=
utils
.
p64
(
oid
)
path
=
self
.
layout
.
oid_to_path
(
oid
)
path
=
os
.
path
.
join
(
self
.
base_dir
,
path
)
if
create
and
not
os
.
path
.
exists
(
path
):
try
:
os
.
makedirs
(
path
,
0700
)
except
OSError
:
# We might have lost a race. If so, the directory
# must exist now
assert
os
.
path
.
exists
(
targetpath
)
return
path
def
getOIDForPath
(
self
,
path
):
"""Given a path, return an OID, if the path is a valid path for an
OID. The inverse function to `getPathForOID`.
Raises ValueError if the path is not valid for an OID.
"""
path
=
path
[
len
(
self
.
base_dir
):]
return
self
.
layout
.
path_to_oid
(
path
)
return
os
.
path
.
join
(
self
.
base_dir
,
utils
.
oid_repr
(
oid
))
def
getBlobFilename
(
self
,
oid
,
tid
):
"""Given an oid and a tid, return the full filename of the
...
...
@@ -384,10 +330,6 @@ class FilesystemHelper:
"""
oid_path
=
self
.
getPathForOID
(
oid
)
# TIDs are numbers and sometimes passed around as integers. For our
# computations we rely on the 64-bit packed string representation
if
isinstance
(
tid
,
int
):
tid
=
utils
.
p64
(
tid
)
filename
=
"%s%s"
%
(
utils
.
tid_repr
(
tid
),
BLOB_SUFFIX
)
return
os
.
path
.
join
(
oid_path
,
filename
)
...
...
@@ -417,9 +359,10 @@ class FilesystemHelper:
if
not
filename
.
endswith
(
BLOB_SUFFIX
):
return
None
,
None
path
,
filename
=
os
.
path
.
split
(
filename
)
oid
=
self
.
getOIDForPath
(
path
)
oid
=
os
.
path
.
split
(
path
)[
1
]
serial
=
filename
[:
-
len
(
BLOB_SUFFIX
)]
oid
=
utils
.
repr_to_oid
(
oid
)
serial
=
utils
.
repr_to_oid
(
serial
)
return
oid
,
serial
...
...
@@ -429,105 +372,24 @@ class FilesystemHelper:
"""
oids
=
[]
for
oid
,
oidpath
in
self
.
listOIDs
():
for
filename
in
os
.
listdir
(
oidpath
):
blob_path
=
os
.
path
.
join
(
oidpath
,
filename
)
base_dir
=
self
.
base_dir
for
oidpath
in
os
.
listdir
(
base_dir
):
for
filename
in
os
.
listdir
(
os
.
path
.
join
(
base_dir
,
oidpath
)):
blob_path
=
os
.
path
.
join
(
base_dir
,
oidpath
,
filename
)
oid
,
serial
=
self
.
splitBlobFilename
(
blob_path
)
if
search_serial
==
serial
:
oids
.
append
(
oid
)
return
oids
def
listOIDs
(
self
):
"""
Iterates over all paths under the base directory that contain blob
files.
"""
Lists all OIDs and their paths.
"""
for
path
,
dirs
,
files
in
os
.
walk
(
self
.
base_dir
):
try
:
oid
=
self
.
getOIDForPath
(
path
)
except
ValueError
:
for
candidate
in
os
.
listdir
(
self
.
base_dir
):
if
candidate
==
'tmp'
:
continue
yield
oid
,
path
def
auto_layout_select
(
path
):
# A heuristic to look at a path and determine which directory layout to
# use.
layout_marker
=
os
.
path
.
join
(
path
,
LAYOUT_MARKER
)
if
not
os
.
path
.
exists
(
path
):
log
(
'Blob directory %s does not exist. '
'Selected `bushy` layout. '
%
path
)
layout
=
'bushy'
elif
len
(
os
.
listdir
(
path
))
==
0
:
log
(
'Blob directory `%s` is unused and has no layout marker set. '
'Selected `bushy` layout. '
%
path
)
layout
=
'bushy'
elif
LAYOUT_MARKER
not
in
os
.
listdir
(
path
):
log
(
'Blob directory `%s` is used but has no layout marker set. '
'Selected `lawn` layout. '
%
path
)
layout
=
'lawn'
else
:
layout
=
open
(
layout_marker
,
'rb'
).
read
()
layout
=
layout
.
strip
()
log
(
'Blob directory `%s` has layout marker set. '
'Selected `%s` layout. '
%
(
path
,
layout
))
return
layout
class
BushyLayout
(
object
):
"""A bushy directory layout for blob directories.
Creates an 8-level directory structure (one level per byte) in
little-endian order from the OID of an object.
"""
blob_path_pattern
=
r'^'
+
(
r'0x[0-9a-f]{1,2}/*'
*
8
)
+
r'$'
blob_path_pattern
=
re
.
compile
(
blob_path_pattern
)
def
oid_to_path
(
self
,
oid
):
directories
=
[]
# Create the bushy directory structure with the least significant byte
# first
for
byte
in
reversed
(
str
(
oid
)):
directories
.
append
(
'0x%s'
%
binascii
.
hexlify
(
byte
))
return
'/'
.
join
(
directories
)
def
path_to_oid
(
self
,
path
):
if
self
.
blob_path_pattern
.
match
(
path
)
is
None
:
raise
ValueError
(
"Not a valid OID path: `%s`"
%
path
)
path
=
path
.
split
(
'/'
)
# The path contains the OID in little endian form but the OID itself
# is big endian.
path
.
reverse
()
# Each path segment stores a byte in hex representation. Turn it into
# an int and then get the character for our byte string.
oid
=
''
.
join
(
binascii
.
unhexlify
(
byte
[
2
:])
for
byte
in
path
)
return
oid
LAYOUTS
[
'bushy'
]
=
BushyLayout
()
class
LawnLayout
(
object
):
"""A shallow directory layout for blob directories.
Creates a single level of directories (one for each oid).
"""
def
oid_to_path
(
self
,
oid
):
return
utils
.
oid_repr
(
oid
)
def
path_to_oid
(
self
,
path
):
try
:
if
path
==
''
:
# This is a special case where repr_to_oid converts '' to the
# OID z64.
raise
TypeError
()
return
utils
.
repr_to_oid
(
path
)
except
TypeError
:
raise
ValueError
(
'Not a valid OID path: `%s`'
%
path
)
LAYOUTS
[
'lawn'
]
=
LawnLayout
()
oid
=
utils
.
repr_to_oid
(
candidate
)
yield
oid
,
self
.
getPathForOID
(
oid
)
class
BlobStorage
(
SpecificationDecoratorBase
):
...
...
@@ -539,13 +401,13 @@ class BlobStorage(SpecificationDecoratorBase):
# us to have instance attributes explicitly on the proxy.
__slots__
=
(
'fshelper'
,
'dirty_oids'
,
'_BlobStorage__supportsUndo'
)
def
__new__
(
self
,
base_directory
,
storage
,
layout
=
'automatic'
):
def
__new__
(
self
,
base_directory
,
storage
):
return
SpecificationDecoratorBase
.
__new__
(
self
,
storage
)
def
__init__
(
self
,
base_directory
,
storage
,
layout
=
'automatic'
):
def
__init__
(
self
,
base_directory
,
storage
):
# XXX Log warning if storage is ClientStorage
SpecificationDecoratorBase
.
__init__
(
self
,
storage
)
self
.
fshelper
=
FilesystemHelper
(
base_directory
,
layout
)
self
.
fshelper
=
FilesystemHelper
(
base_directory
)
self
.
fshelper
.
create
()
self
.
fshelper
.
checkSecure
()
self
.
dirty_oids
=
[]
...
...
@@ -576,7 +438,10 @@ class BlobStorage(SpecificationDecoratorBase):
self
.
_lock_acquire
()
try
:
self
.
fshelper
.
getPathForOID
(
oid
,
create
=
True
)
targetpath
=
self
.
fshelper
.
getPathForOID
(
oid
)
if
not
os
.
path
.
exists
(
targetpath
):
os
.
makedirs
(
targetpath
,
0700
)
targetname
=
self
.
fshelper
.
getBlobFilename
(
oid
,
serial
)
rename_or_copy_blob
(
blobfilename
,
targetname
)
...
...
@@ -622,12 +487,14 @@ class BlobStorage(SpecificationDecoratorBase):
# if they are still needed by attempting to load the revision
# of that object from the database. This is maybe the slowest
# possible way to do this, but it's safe.
base_dir
=
self
.
fshelper
.
base_dir
for
oid
,
oid_path
in
self
.
fshelper
.
listOIDs
():
files
=
os
.
listdir
(
oid_path
)
for
filename
in
files
:
filepath
=
os
.
path
.
join
(
oid_path
,
filename
)
whatever
,
serial
=
self
.
fshelper
.
splitBlobFilename
(
filepath
)
try
:
fn
=
self
.
fshelper
.
getBlobFilename
(
oid
,
serial
)
self
.
loadSerial
(
oid
,
serial
)
except
POSKeyError
:
remove_committed
(
filepath
)
...
...
@@ -637,6 +504,7 @@ class BlobStorage(SpecificationDecoratorBase):
@
non_overridable
def
_packNonUndoing
(
self
,
packtime
,
referencesf
):
base_dir
=
self
.
fshelper
.
base_dir
for
oid
,
oid_path
in
self
.
fshelper
.
listOIDs
():
exists
=
True
try
:
...
...
@@ -684,11 +552,15 @@ class BlobStorage(SpecificationDecoratorBase):
"""Return the size of the database in bytes."""
orig_size
=
getProxiedObject
(
self
).
getSize
()
blob_size
=
0
for
oid
,
path
in
self
.
fshelper
.
listOIDs
():
for
serial
in
os
.
listdir
(
path
):
base_dir
=
self
.
fshelper
.
base_dir
for
oid
in
os
.
listdir
(
base_dir
):
sub_dir
=
os
.
path
.
join
(
base_dir
,
oid
)
if
not
os
.
path
.
isdir
(
sub_dir
):
continue
for
serial
in
os
.
listdir
(
sub_dir
):
if
not
serial
.
endswith
(
BLOB_SUFFIX
):
continue
file_path
=
os
.
path
.
join
(
path
,
serial
)
file_path
=
os
.
path
.
join
(
base_dir
,
oid
,
serial
)
blob_size
+=
os
.
stat
(
file_path
).
st_size
return
orig_size
+
blob_size
...
...
@@ -711,6 +583,7 @@ class BlobStorage(SpecificationDecoratorBase):
# we get all the blob oids on the filesystem related to the
# transaction we want to undo.
for
oid
in
self
.
fshelper
.
getOIDsForSerial
(
serial_id
):
# we want to find the serial id of the previous revision
# of this blob object.
load_result
=
self
.
loadBefore
(
oid
,
serial_id
)
...
...
src/ZODB/scripts/migrateblobs.py
deleted
100644 → 0
View file @
44900f9c
##############################################################################
#
# Copyright (c) 2008 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
"""A script to migrate a blob directory into a different layout.
"""
import
logging
import
optparse
import
os
from
ZODB.blob
import
FilesystemHelper
,
rename_or_copy_blob
from
ZODB.utils
import
cp
,
oid_repr
def
link_or_copy
(
f1
,
f2
):
try
:
os
.
link
(
f1
,
f2
)
except
OSError
:
shutil
.
copy
(
f1
,
f2
)
def
migrate
(
source
,
dest
,
layout
):
source_fsh
=
FilesystemHelper
(
source
)
source_fsh
.
create
()
dest_fsh
=
FilesystemHelper
(
dest
,
layout
)
dest_fsh
.
create
()
print
"Migrating blob data from `%s` (%s) to `%s` (%s)"
%
(
source
,
source_fsh
.
layout_name
,
dest
,
dest_fsh
.
layout_name
)
for
oid
,
path
in
source_fsh
.
listOIDs
():
dest_path
=
dest_fsh
.
getPathForOID
(
oid
,
create
=
True
)
files
=
os
.
listdir
(
path
)
for
file
in
files
:
source_file
=
os
.
path
.
join
(
path
,
file
)
dest_file
=
os
.
path
.
join
(
dest_path
,
file
)
link_or_copy
(
source_file
,
dest_file
)
print
"
\
t
OID: %s - %s files "
%
(
oid_repr
(
oid
),
len
(
files
))
def
main
(
source
=
None
,
dest
=
None
,
layout
=
"bushy"
):
usage
=
"usage: %prog [options] <source> <dest> <layout>"
description
=
(
"Create the new directory <dest> and migrate all blob "
"data <source> to <dest> while using the new <layout> for "
"<dest>"
)
parser
=
optparse
.
OptionParser
(
usage
=
usage
,
description
=
description
)
parser
.
add_option
(
"-l"
,
"--layout"
,
default
=
layout
,
type
=
'choice'
,
choices
=
[
'bushy'
,
'lawn'
],
help
=
"Define the layout to use for the new directory "
"(bushy or lawn). Default: %default"
)
options
,
args
=
parser
.
parse_args
()
if
not
len
(
args
)
==
2
:
parser
.
error
(
"source and destination must be given"
)
logging
.
getLogger
().
addHandler
(
logging
.
StreamHandler
())
logging
.
getLogger
().
setLevel
(
0
)
source
,
dest
=
args
migrate
(
source
,
dest
,
options
.
layout
)
if
__name__
==
'__main__'
:
main
()
src/ZODB/tests/blob_layout.txt
deleted
100644 → 0
View file @
44900f9c
======================
Blob directory layouts
======================
The internal structure of the blob directories is governed by so called
`layouts`. The current default layout is called `bushy`.
The original blob implementation used a layout that we now call `lawn` and
which is still available for backwards compatibility.
Layouts implement two methods: one for computing a relative path for an
OID and one for turning a relative path back into an OID.
Our terminology is roughly the same as used in `DirectoryStorage`.
The `bushy` layout
==================
The bushy layout splits the OID into the 8 byte parts, reverses them and
creates one directory level for each part, named by the hexlified
representation of the byte value. This results in 8 levels of directories, the
leaf directories being used for the revisions of the blobs and at most 256
entries per directory level:
>>> from ZODB.blob import BushyLayout
>>> bushy = BushyLayout()
>>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
'0x00/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
>>> bushy.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
'0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00'
>>> bushy.path_to_oid('0x01/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
'\x00\x00\x00\x00\x00\x00\x00\x01'
>>> bushy.path_to_oid('0xff/0x00/0x00/0x00/0x00/0x00/0x00/0x00')
'\x00\x00\x00\x00\x00\x00\x00\xff'
Paths that do not represent an OID will cause a ValueError:
>>> bushy.path_to_oid('tmp')
Traceback (most recent call last):
ValueError: Not a valid OID path: `tmp`
The `lawn` layout
=================
The lawn layout creates on directory for each blob named by the blob's hex
representation of its OID. This has some limitations on various file systems
like performance penalties or the inability to store more than a given number
of blobs at the same time (e.g. 32k on ext3).
>>> from ZODB.blob import LawnLayout
>>> lawn = LawnLayout()
>>> lawn.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x00')
'0x00'
>>> lawn.oid_to_path('\x00\x00\x00\x00\x00\x00\x00\x01')
'0x01'
>>> lawn.path_to_oid('0x01')
'\x00\x00\x00\x00\x00\x00\x00\x01'
Paths that do not represent an OID will cause a ValueError:
>>> lawn.path_to_oid('tmp')
Traceback (most recent call last):
ValueError: Not a valid OID path: `tmp`
>>> lawn.path_to_oid('')
Traceback (most recent call last):
ValueError: Not a valid OID path: ``
Auto-detecting the layout of a directory
========================================
To allow easier migration, we provide an auto-detection feature that analyses a
blob directory and decides for a strategy to use. In general it prefers to
choose the `bushy` layout, except if it determines that the directory has
already been used to create a lawn structure.
>>> from ZODB.blob import auto_layout_select
1. Non-existing directories will trigger a bushy layout:
>>> import tempfile
>>> import shutil
>>> d = tempfile.mkdtemp()
>>> shutil.rmtree(d)
>>> auto_layout_select(d)
'bushy'
2. Empty directories will trigger a bushy layout too:
>>> d = tempfile.mkdtemp()
>>> auto_layout_select(d)
'bushy'
3. If the directory contains a marker for the strategy it will be used:
>>> from ZODB.blob import LAYOUT_MARKER
>>> import os.path
>>> open(os.path.join(d, LAYOUT_MARKER), 'wb').write('bushy')
>>> auto_layout_select(d)
'bushy'
>>> open(os.path.join(d, LAYOUT_MARKER), 'wb').write('lawn')
>>> auto_layout_select(d)
'lawn'
>>> shutil.rmtree(d)
4. If the directory does not contain a marker but other files, we assume that
it was created with an earlier version of the blob implementation and uses our
`lawn` layout:
>>> d = tempfile.mkdtemp()
>>> open(os.path.join(d, '0x0101'), 'wb').write('foo')
>>> auto_layout_select(d)
'lawn'
>>> shutil.rmtree(d)
Directory layout markers
========================
When the file system helper (FSH) is asked to create the directory structure,
it will leave a marker with the choosen layout if no marker exists yet:
>>> from ZODB.blob import FilesystemHelper
>>> d = tempfile.mkdtemp()
>>> blobs = os.path.join(d, 'blobs')
>>> fsh = FilesystemHelper(blobs)
>>> fsh.layout_name
'bushy'
>>> fsh.create()
>>> open(os.path.join(blobs, LAYOUT_MARKER), 'rb').read()
'bushy'
If the FSH finds a marker, then it verifies whether its content matches the
strategy that was chosen. It will raise an exception if we try to work with a
directory that has a different marker than the chosen strategy:
>>> fsh = FilesystemHelper(blobs, 'lawn')
>>> fsh.layout_name
'lawn'
>>> fsh.create() # doctest: +ELLIPSIS
Traceback (most recent call last):
ValueError: Directory layout `lawn` selected for blob directory /.../blobs/, but marker found for layout `bushy`
>>> shutil.rmtree(blobs)
This function interacts with the automatic detection in the way, that an
unmarked directory will be marked the first time when it is auto-guessed and
the marker will be used in the future:
>>> import ZODB.FileStorage
>>> from ZODB.blob import BlobStorage
>>> datafs = os.path.join(d, 'data.fs')
>>> base_storage = ZODB.FileStorage.FileStorage(datafs)
>>> os.mkdir(blobs)
>>> open(os.path.join(blobs, 'foo'), 'wb').write('foo')
>>> blob_storage = BlobStorage(blobs, base_storage)
>>> blob_storage.fshelper.layout_name
'lawn'
>>> open(os.path.join(blobs, LAYOUT_MARKER), 'rb').read()
'lawn'
>>> blob_storage = BlobStorage(blobs, base_storage, layout='bushy') # doctest: +ELLIPSIS
Traceback (most recent call last):
ValueError: Directory layout `bushy` selected for blob directory /.../blobs/, but marker found for layout `lawn`
>>> shutil.rmtree(d)
Migrating between directory layouts
===================================
A script called `migrateblobs.py` is distributed with the ZODB for offline
migration capabilities between different directory layouts. It can migrate any
blob directory layout to any other layout. It leaves the original blob
directory untouched (except from eventually creating a temporary directory and
the storage layout marker).
The migration is accessible as a library function:
>>> from ZODB.scripts.migrateblobs import migrate
Create a `lawn` directory structure and migrate it to the new `bushy` one:
>>> from ZODB.blob import FilesystemHelper
>>> d = tempfile.mkdtemp()
>>> old = os.path.join(d, 'old')
>>> old_fsh = FilesystemHelper(old, 'lawn')
>>> old_fsh.create()
>>> blob1 = old_fsh.getPathForOID(7039, create=True)
>>> blob2 = old_fsh.getPathForOID(10, create=True)
>>> blob3 = old_fsh.getPathForOID(7034, create=True)
>>> open(os.path.join(blob1, 'foo'), 'wb').write('foo')
>>> open(os.path.join(blob1, 'foo2'), 'wb').write('bar')
>>> open(os.path.join(blob2, 'foo3'), 'wb').write('baz')
>>> open(os.path.join(blob2, 'foo4'), 'wb').write('qux')
>>> open(os.path.join(blob3, 'foo5'), 'wb').write('quux')
>>> open(os.path.join(blob3, 'foo6'), 'wb').write('corge')
Committed blobs have their permissions set to 000
The migration function is called with the old and the new path and the layout
that shall be used for the new directory:
>>> bushy = os.path.join(d, 'bushy')
>>> migrate(old, bushy, 'bushy') # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
Migrating blob data from `/.../old` (lawn) to `/.../bushy` (bushy)
OID: 0x1b7f - 2 files
OID: 0x0a - 2 files
OID: 0x1b7a - 2 files
The new directory now contains the same files in different directories, but
with the same sizes and permissions:
>>> import string
>>> def stat(path):
... s = os.stat(path)
... print "%s\t%s\t%s" % (string.rjust(oct(s.st_mode), 10), s.st_size, path)
>>> def ls(path):
... for p, dirs, files in os.walk(path):
... stat(p)
... for file in files:
... stat(os.path.join(p, file))
>>> ls(bushy)
040700 4096 /.../bushy
0100644 5 /.../bushy/.layout
040700 4096 /.../bushy/0x7a
040700 4096 /.../bushy/0x7a/0x1b
040700 4096 /.../bushy/0x7a/0x1b/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
0100644 5 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo6
0100644 4 /.../bushy/0x7a/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo5
040700 4096 /.../bushy/tmp
040700 4096 /.../bushy/0x0a
040700 4096 /.../bushy/0x0a/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00
0100644 3 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo4
0100644 3 /.../bushy/0x0a/0x00/0x00/0x00/0x00/0x00/0x00/0x00/foo3
040700 4096 /.../bushy/0x7f
040700 4096 /.../bushy/0x7f/0x1b
040700 4096 /.../bushy/0x7f/0x1b/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00
040700 4096 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00
0100644 3 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo
0100644 3 /.../bushy/0x7f/0x1b/0x00/0x00/0x00/0x00/0x00/0x00/foo2
We can also migrate the bushy layout back to the lawn layout:
>>> lawn = os.path.join(d, 'lawn')
>>> migrate(bushy, lawn, 'lawn')
Migrating blob data from `/.../bushy` (bushy) to `/.../lawn` (lawn)
OID: 0x1b7a - 2 files
OID: 0x0a - 2 files
OID: 0x1b7f - 2 files
>>> ls(lawn)
040700 4096 /.../lawn
0100644 4 /.../lawn/.layout
040700 4096 /.../lawn/0x1b7f
0100644 3 /.../lawn/0x1b7f/foo
0100644 3 /.../lawn/0x1b7f/foo2
040700 4096 /.../lawn/tmp
040700 4096 /.../lawn/0x0a
0100644 3 /.../lawn/0x0a/foo4
0100644 3 /.../lawn/0x0a/foo3
040700 4096 /.../lawn/0x1b7a
0100644 5 /.../lawn/0x1b7a/foo6
0100644 4 /.../lawn/0x1b7a/foo5
>>> shutil.rmtree(d)
src/ZODB/tests/blob_tempdir.txt
View file @
3037f42a
...
...
@@ -32,7 +32,7 @@ First, we need a datatabase with blob support::
>>> from ZODB.DB import DB
>>> from tempfile import mkdtemp
>>> import os.path
>>> base_storage = MappingStorage(
'test'
)
>>> base_storage = MappingStorage(
"test"
)
>>> blob_dir = mkdtemp()
>>> blob_storage = BlobStorage(blob_dir, base_storage)
>>> database = DB(blob_storage)
...
...
src/ZODB/tests/blob_transaction.txt
View file @
3037f42a
...
...
@@ -322,9 +322,9 @@ clean up dirty files:
>>> base_storage = DummyBaseStorage()
>>> blob_dir2 = mkdtemp()
>>> blob_storage2 = BlobStorage(blob_dir2, base_storage)
>>> committed_blob_dir =
blob_storage2.fshelper.getPathForOID(0
)
>>>
os.makedirs(committed_blob_dir
)
>>>
committed_blob_file = blob_storage2.fshelper.getBlobFilename(0, 0
)
>>> committed_blob_dir =
os.path.join(blob_dir2, '0'
)
>>>
committed_blob_file = os.path.join(committed_blob_dir, '0.blob'
)
>>>
os.mkdir(committed_blob_dir
)
>>> open(os.path.join(committed_blob_file), 'w').write('foo')
>>> os.path.exists(committed_blob_file)
True
...
...
src/ZODB/tests/testblob.py
View file @
3037f42a
...
...
@@ -105,6 +105,7 @@ class BlobUndoTests(unittest.TestCase):
self
.
here
=
os
.
getcwd
()
os
.
chdir
(
self
.
test_dir
)
self
.
storagefile
=
'Data.fs'
os
.
mkdir
(
'blobs'
)
self
.
blob_dir
=
'blobs'
def
tearDown
(
self
):
...
...
@@ -482,7 +483,7 @@ def loadblob_tmpstore():
We can access the blob correctly:
>>> tmpstore.loadBlob(blob_oid, tid) # doctest: +ELLIPSIS
'.../0x01/0x
00/0x00/0x00/0x00/0x00/0x00/0x00/0x
...blob'
'.../0x01/0x...blob'
Clean up:
...
...
@@ -506,12 +507,6 @@ def test_suite():
setUp
=
ZODB
.
tests
.
util
.
setUp
,
tearDown
=
ZODB
.
tests
.
util
.
tearDown
,
))
suite
.
addTest
(
doctest
.
DocFileSuite
(
"blob_layout.txt"
,
optionflags
=
doctest
.
ELLIPSIS
|
doctest
.
NORMALIZE_WHITESPACE
|
doctest
.
REPORT_NDIFF
,
setUp
=
ZODB
.
tests
.
util
.
setUp
,
tearDown
=
ZODB
.
tests
.
util
.
tearDown
,
))
suite
.
addTest
(
doctest
.
DocTestSuite
(
setUp
=
ZODB
.
tests
.
util
.
setUp
,
tearDown
=
ZODB
.
tests
.
util
.
tearDown
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment