Commit 32e5d538 authored by Fred Drake's avatar Fred Drake

merge 64-bit "I" BTree support from the ZODB 3.7 branch

parent ff678cc9
What's new in ZODB3 3.7.0?
==========================
Release date: ???
BTrees
------
- Support for 64-bit integer keys and values has been provided as a
compile-time option.
What's new in ZODB3 3.6.2? What's new in ZODB3 3.6.2?
========================== ==========================
Release date: 15-July-2006 Release date: 15-July-2006
......
...@@ -28,7 +28,11 @@ ZoneAlarm. Many particularly slow tests are skipped unless you pass ...@@ -28,7 +28,11 @@ ZoneAlarm. Many particularly slow tests are skipped unless you pass
Compatibility Compatibility
------------- -------------
ZODB 3.7 requires Python 2.4.2 or later. ZODB 3.7 requires Python 2.4.2 or later. The BTree code may be
compiled with support for 64-bit keys and values for the "I" flavors;
older versions of the BTrees package will not be able to load
persistent BTrees that use 64-bit data (an exception will be raised on
load).
The Zope 2.8 release, and Zope3 releases, should be compatible with this The Zope 2.8 release, and Zope3 releases, should be compatible with this
version of ZODB. Note that Zope 2.7 and higher includes ZEO, so this package version of ZODB. Note that Zope 2.7 and higher includes ZEO, so this package
...@@ -78,6 +82,11 @@ script:: ...@@ -78,6 +82,11 @@ script::
% python setup.py build % python setup.py build
The 64-bit support for the BTrees package may be enabled by using this
build command instead::
% python setup.py build_ext -DZODB_64BIT_INTS build
To test the build, run the test script:: To test the build, run the test script::
% python test.py % python test.py
......
...@@ -66,6 +66,45 @@ static void PyVar_Assign(PyObject **v, PyObject *e) { Py_XDECREF(*v); *v=e;} ...@@ -66,6 +66,45 @@ static void PyVar_Assign(PyObject **v, PyObject *e) { Py_XDECREF(*v); *v=e;}
#define ASSERT(C, S, R) if (! (C)) { \ #define ASSERT(C, S, R) if (! (C)) { \
PyErr_SetString(PyExc_AssertionError, (S)); return (R); } PyErr_SetString(PyExc_AssertionError, (S)); return (R); }
#ifdef NEED_LONG_LONG_SUPPORT
/* Helper code used to support long long instead of int. */
#ifndef PY_LONG_LONG
#error "PY_LONG_LONG required but not defined"
#endif
static int
longlong_check(PyObject *ob)
{
if (PyInt_Check(ob))
return 1;
if (PyLong_Check(ob)) {
/* check magnitude */
PY_LONG_LONG val = PyLong_AsLongLong(ob);
if (val == -1 && PyErr_Occurred())
return 0;
return 1;
}
return 0;
}
static PyObject *
longlong_as_object(PY_LONG_LONG val)
{
static PY_LONG_LONG maxint = 0;
if (maxint == 0)
maxint = PyInt_GetMax();
if ((val > maxint) || (val < (-maxint-1)))
return PyLong_FromLongLong(val);
return PyInt_FromLong((long)val);
}
#endif
/* Various kinds of BTree and Bucket structs are instances of /* Various kinds of BTree and Bucket structs are instances of
* "sized containers", and have a common initial layout: * "sized containers", and have a common initial layout:
* The stuff needed for all Python objects, or all Persistent objects. * The stuff needed for all Python objects, or all Persistent objects.
...@@ -488,4 +527,11 @@ INITMODULE (void) ...@@ -488,4 +527,11 @@ INITMODULE (void)
if (PyDict_SetItemString(d, MOD_NAME_PREFIX "TreeIterator", if (PyDict_SetItemString(d, MOD_NAME_PREFIX "TreeIterator",
(PyObject *)&BTreeIter_Type) < 0) (PyObject *)&BTreeIter_Type) < 0)
return; return;
#if defined(ZODB_64BIT_INTS) && defined(NEED_LONG_LONG_SUPPORT)
if (PyDict_SetItemString(d, "using64bits", Py_True) < 0)
return;
#else
if (PyDict_SetItemString(d, "using64bits", Py_False) < 0)
return;
#endif
} }
...@@ -543,7 +543,7 @@ multiunion_m(PyObject *ignored, PyObject *args) ...@@ -543,7 +543,7 @@ multiunion_m(PyObject *ignored, PyObject *args)
*/ */
if (result->len > 0) { if (result->len > 0) {
size_t newlen; /* number of elements in final result set */ size_t newlen; /* number of elements in final result set */
newlen = sort_int4_nodups(result->keys, (size_t)result->len); newlen = sort_int_nodups(result->keys, (size_t)result->len);
result->len = (int)newlen; result->len = (int)newlen;
} }
return (PyObject *)result; return (PyObject *)result;
......
#define KEYMACROS_H "$Id$\n" #define KEYMACROS_H "$Id$\n"
#ifdef ZODB_64BIT_INTS
/* PY_LONG_LONG as key */
#define NEED_LONG_LONG_SUPPORT
#define KEY_TYPE PY_LONG_LONG
#define KEY_CHECK longlong_check
#define COPY_KEY_TO_OBJECT(O, K) O=longlong_as_object(K)
#define COPY_KEY_FROM_ARG(TARGET, ARG, STATUS) \
if (PyInt_Check(ARG)) TARGET=PyInt_AS_LONG(ARG); else \
if (longlong_check(ARG)) TARGET=PyLong_AsLongLong(ARG); else \
if (PyLong_Check(ARG)) { \
PyErr_SetString(PyExc_ValueError, "long integer out of range"); \
(STATUS)=0; (TARGET)=0; } \
else { \
PyErr_SetString(PyExc_TypeError, "expected integer key"); \
(STATUS)=0; (TARGET)=0; }
#else
/* C int as key */
#define KEY_TYPE int #define KEY_TYPE int
#undef KEY_TYPE_IS_PYOBJECT
#define KEY_CHECK PyInt_Check #define KEY_CHECK PyInt_Check
#define TEST_KEY_SET_OR(V, K, T) if ( ( (V) = (((K) < (T)) ? -1 : (((K) > (T)) ? 1: 0)) ) , 0 )
#define DECREF_KEY(KEY)
#define INCREF_KEY(k)
#define COPY_KEY(KEY, E) (KEY=(E))
#define COPY_KEY_TO_OBJECT(O, K) O=PyInt_FromLong(K) #define COPY_KEY_TO_OBJECT(O, K) O=PyInt_FromLong(K)
#define COPY_KEY_FROM_ARG(TARGET, ARG, STATUS) \ #define COPY_KEY_FROM_ARG(TARGET, ARG, STATUS) \
if (PyInt_Check(ARG)) TARGET=PyInt_AS_LONG(ARG); else { \ if (PyInt_Check(ARG)) TARGET=PyInt_AS_LONG(ARG); else { \
PyErr_SetString(PyExc_TypeError, "expected integer key"); \ PyErr_SetString(PyExc_TypeError, "expected integer key"); \
(STATUS)=0; (TARGET)=0; } (STATUS)=0; (TARGET)=0; }
#endif
#undef KEY_TYPE_IS_PYOBJECT
#define TEST_KEY_SET_OR(V, K, T) if ( ( (V) = (((K) < (T)) ? -1 : (((K) > (T)) ? 1: 0)) ) , 0 )
#define DECREF_KEY(KEY)
#define INCREF_KEY(k)
#define COPY_KEY(KEY, E) (KEY=(E))
#define MULTI_INT_UNION 1 #define MULTI_INT_UNION 1
#define VALUEMACROS_H "$Id$\n" #define VALUEMACROS_H "$Id$\n"
#ifdef ZODB_64BIT_INTS
#define NEED_LONG_LONG_SUPPORT
#define VALUE_TYPE PY_LONG_LONG
#define VALUE_PARSE "L"
#define COPY_VALUE_TO_OBJECT(O, K) O=longlong_as_object(K)
#define COPY_VALUE_FROM_ARG(TARGET, ARG, STATUS) \
if (PyInt_Check(ARG)) TARGET=PyInt_AS_LONG(ARG); else \
if (longlong_check(ARG)) TARGET=PyLong_AsLongLong(ARG); else \
if (PyLong_Check(ARG)) { \
PyErr_SetString(PyExc_ValueError, "long integer out of range"); \
(STATUS)=0; (TARGET)=0; } \
else { \
PyErr_SetString(PyExc_TypeError, "expected integer value"); \
(STATUS)=0; (TARGET)=0; }
#else
#define VALUE_TYPE int #define VALUE_TYPE int
#define VALUE_PARSE "i"
#define COPY_VALUE_TO_OBJECT(O, K) O=PyInt_FromLong(K)
#define COPY_VALUE_FROM_ARG(TARGET, ARG, STATUS) \
if (PyInt_Check(ARG)) TARGET=PyInt_AsLong(ARG); else { \
PyErr_SetString(PyExc_TypeError, "expected integer value"); \
(STATUS)=0; (TARGET)=0; }
#endif
#undef VALUE_TYPE_IS_PYOBJECT #undef VALUE_TYPE_IS_PYOBJECT
#define TEST_VALUE(K, T) (((K) < (T)) ? -1 : (((K) > (T)) ? 1: 0)) #define TEST_VALUE(K, T) (((K) < (T)) ? -1 : (((K) > (T)) ? 1: 0))
#define VALUE_SAME(VALUE, TARGET) ( (VALUE) == (TARGET) ) #define VALUE_SAME(VALUE, TARGET) ( (VALUE) == (TARGET) )
#define DECLARE_VALUE(NAME) VALUE_TYPE NAME #define DECLARE_VALUE(NAME) VALUE_TYPE NAME
#define VALUE_PARSE "i"
#define DECREF_VALUE(k) #define DECREF_VALUE(k)
#define INCREF_VALUE(k) #define INCREF_VALUE(k)
#define COPY_VALUE(V, E) (V=(E)) #define COPY_VALUE(V, E) (V=(E))
#define COPY_VALUE_TO_OBJECT(O, K) O=PyInt_FromLong(K)
#define COPY_VALUE_FROM_ARG(TARGET, ARG, STATUS) \
if (PyInt_Check(ARG)) TARGET=PyInt_AsLong(ARG); else { \
PyErr_SetString(PyExc_TypeError, "expected integer value"); \
(STATUS)=0; (TARGET)=0; }
#define NORMALIZE_VALUE(V, MIN) ((MIN) > 0) ? ((V)/=(MIN)) : 0 #define NORMALIZE_VALUE(V, MIN) ((MIN) > 0) ? ((V)/=(MIN)) : 0
#define MERGE_DEFAULT 1 #define MERGE_DEFAULT 1
......
...@@ -15,22 +15,23 @@ ...@@ -15,22 +15,23 @@
/* Revision information: $Id$ */ /* Revision information: $Id$ */
/* The only routine here intended to be used outside the file is /* The only routine here intended to be used outside the file is
size_t sort_int4_nodups(int *p, size_t n) size_t sort_int_nodups(int *p, size_t n)
Sort the array of n ints pointed at by p, in place, and also remove Sort the array of n ints pointed at by p, in place, and also remove
duplicates. Return the number of unique elements remaining, which occupy duplicates. Return the number of unique elements remaining, which occupy
a contiguous and monotonically increasing slice of the array starting at p. a contiguous and monotonically increasing slice of the array starting at p.
Example: If the input array is [3, 1, 2, 3, 1, 5, 2], sort_int4_nodups Example: If the input array is [3, 1, 2, 3, 1, 5, 2], sort_int_nodups
returns 4, and the first 4 elements of the array are changed to returns 4, and the first 4 elements of the array are changed to
[1, 2, 3, 5]. The content of the remaining array positions is not defined. [1, 2, 3, 5]. The content of the remaining array positions is not defined.
Notes: Notes:
+ This is specific to 4-byte signed ints, with endianness natural to the + This is specific to n-byte signed ints, with endianness natural to the
platform. platform. `n` is determined based on ZODB_64BIT_INTS.
+ 4*n bytes of available heap memory are required for best speed. + 4*n bytes of available heap memory are required for best speed
(8*n when ZODB_64BIT_INTS is defined).
*/ */
#include <stdlib.h> #include <stdlib.h>
...@@ -45,7 +46,7 @@ ...@@ -45,7 +46,7 @@
the radix sort has to know everything about the type's internal the radix sort has to know everything about the type's internal
representation. representation.
*/ */
typedef int element_type; typedef KEY_TYPE element_type;
/* The radixsort is faster than the quicksort for large arrays, but radixsort /* The radixsort is faster than the quicksort for large arrays, but radixsort
has high fixed overhead, making it a poor choice for small arrays. The has high fixed overhead, making it a poor choice for small arrays. The
...@@ -72,25 +73,33 @@ typedef int element_type; ...@@ -72,25 +73,33 @@ typedef int element_type;
swaps are done internally, the final result may come back in 'in' or 'work'; swaps are done internally, the final result may come back in 'in' or 'work';
and that pointer is returned. and that pointer is returned.
radixsort_int4 is specific to signed 4-byte ints, with natural machine radixsort_int is specific to signed n-byte ints, with natural machine
endianness. endianness. `n` is determined based on ZODB_64BIT_INTS.
*/ */
static element_type* static element_type*
radixsort_int4(element_type *in, element_type *work, size_t n) radixsort_int(element_type *in, element_type *work, size_t n)
{ {
/* count[i][j] is the number of input elements that have byte value j /* count[i][j] is the number of input elements that have byte value j
in byte position i, where byte position 0 is the LSB. Note that in byte position i, where byte position 0 is the LSB. Note that
holding i fixed, the sum of count[i][j] over all j in range(256) holding i fixed, the sum of count[i][j] over all j in range(256)
is n. is n.
*/ */
size_t count[4][256]; #ifdef ZODB_64BIT_INTS
size_t count[8][256];
#else
size_t count[4][256];
#endif
size_t i; size_t i;
int offset, offsetinc; int offset, offsetinc;
/* Which byte position are we working on now? 0=LSB, 1, 2, ... */ /* Which byte position are we working on now? 0=LSB, 1, 2, ... */
int bytenum; int bytenum;
assert(sizeof(element_type) == 4); #ifdef ZODB_64BIT_INTS
assert(sizeof(element_type) == 8);
#else
assert(sizeof(element_type) == 4);
#endif
assert(in); assert(in);
assert(work); assert(work);
...@@ -102,6 +111,12 @@ radixsort_int4(element_type *in, element_type *work, size_t n) ...@@ -102,6 +111,12 @@ radixsort_int4(element_type *in, element_type *work, size_t n)
++count[1][(x >> 8) & 0xff]; ++count[1][(x >> 8) & 0xff];
++count[2][(x >> 16) & 0xff]; ++count[2][(x >> 16) & 0xff];
++count[3][(x >> 24) & 0xff]; ++count[3][(x >> 24) & 0xff];
#ifdef ZODB_64BIT_INTS
++count[4][(x >> 32) & 0xff];
++count[5][(x >> 40) & 0xff];
++count[6][(x >> 48) & 0xff];
++count[7][(x >> 56) & 0xff];
#endif
} }
/* For p an element_type* cast to char*, offset is how much farther we /* For p an element_type* cast to char*, offset is how much farther we
...@@ -111,7 +126,7 @@ radixsort_int4(element_type *in, element_type *work, size_t n) ...@@ -111,7 +126,7 @@ radixsort_int4(element_type *in, element_type *work, size_t n)
from p+offset to get to the element's more-significant bytes. from p+offset to get to the element's more-significant bytes.
*/ */
{ {
int one = 1; element_type one = 1;
if (*(char*)&one) { if (*(char*)&one) {
/* Little endian. */ /* Little endian. */
offset = 0; offset = 0;
...@@ -498,12 +513,12 @@ quicksort(element_type *plo, size_t n) ...@@ -498,12 +513,12 @@ quicksort(element_type *plo, size_t n)
/* Sort p and remove duplicates, as fast as we can. */ /* Sort p and remove duplicates, as fast as we can. */
static size_t static size_t
sort_int4_nodups(int *p, size_t n) sort_int_nodups(KEY_TYPE *p, size_t n)
{ {
size_t nunique; size_t nunique;
element_type *work; element_type *work;
assert(sizeof(int) == sizeof(element_type)); assert(sizeof(KEY_TYPE) == sizeof(element_type));
assert(p); assert(p);
/* Use quicksort if the array is small, OR if malloc can't find /* Use quicksort if the array is small, OR if malloc can't find
...@@ -514,7 +529,7 @@ sort_int4_nodups(int *p, size_t n) ...@@ -514,7 +529,7 @@ sort_int4_nodups(int *p, size_t n)
work = (element_type *)malloc(n * sizeof(element_type)); work = (element_type *)malloc(n * sizeof(element_type));
if (work) { if (work) {
element_type *out = radixsort_int4(p, work, n); element_type *out = radixsort_int(p, work, n);
nunique = uniq(p, out, n); nunique = uniq(p, out, n);
free(work); free(work);
} }
......
...@@ -20,6 +20,7 @@ from BTrees.IIBTree import IIBTree, IIBucket, IISet, IITreeSet ...@@ -20,6 +20,7 @@ from BTrees.IIBTree import IIBTree, IIBucket, IISet, IITreeSet
from BTrees.IFBTree import IFBTree, IFBucket, IFSet, IFTreeSet from BTrees.IFBTree import IFBTree, IFBucket, IFSet, IFTreeSet
from BTrees.OIBTree import OIBTree, OIBucket, OISet, OITreeSet from BTrees.OIBTree import OIBTree, OIBucket, OISet, OITreeSet
from BTrees.IIBTree import using64bits
from BTrees.check import check from BTrees.check import check
import transaction import transaction
...@@ -1145,6 +1146,113 @@ class BTreeTests(MappingBase): ...@@ -1145,6 +1146,113 @@ class BTreeTests(MappingBase):
"changed size") "changed size")
break break
LARGEST_32_BITS = 2147483647
SMALLEST_32_BITS = -LARGEST_32_BITS - 1
SMALLEST_POSITIVE_33_BITS = LARGEST_32_BITS + 1
LARGEST_NEGATIVE_33_BITS = SMALLEST_32_BITS - 1
LARGEST_64_BITS = 0x7fffffffffffffff
SMALLEST_64_BITS = -LARGEST_64_BITS - 1
SMALLEST_POSITIVE_65_BITS = LARGEST_64_BITS + 1
LARGEST_NEGATIVE_65_BITS = SMALLEST_64_BITS - 1
class TestLongIntSupport:
def getTwoValues(self):
"""Return two distinct values; these must compare as un-equal.
These values must be usable as values.
"""
return object(), object()
def getTwoKeys(self):
"""Return two distinct values, these must compare as un-equal.
These values must be usable as keys.
"""
return 0, 1
def _set_value(self, key, value):
self.t[key] = value
class TestLongIntKeys(TestLongIntSupport):
def testLongIntKeysWork(self):
o1, o2 = self.getTwoValues()
assert o1 != o2
# Test some small key values first:
self.t[0L] = o1
self.assertEqual(self.t[0], o1)
self.t[0] = o2
self.assertEqual(self.t[0L], o2)
self.assertEqual(list(self.t.keys()), [0])
# Test some large key values too:
k1 = SMALLEST_POSITIVE_33_BITS
k2 = LARGEST_64_BITS
k3 = SMALLEST_64_BITS
self.t[k1] = o1
self.t[k2] = o2
self.t[k3] = o1
self.assertEqual(self.t[k1], o1)
self.assertEqual(self.t[k2], o2)
self.assertEqual(self.t[k3], o1)
self.assertEqual(list(self.t.keys()), [k3, 0, k1, k2])
def testLongIntKeysOutOfRange(self):
o1, o2 = self.getTwoValues()
self.assertRaises(
ValueError,
self._set_value, SMALLEST_POSITIVE_65_BITS, o1)
self.assertRaises(
ValueError,
self._set_value, LARGEST_NEGATIVE_65_BITS, o1)
class TestLongIntValues(TestLongIntSupport):
def testLongIntValuesWork(self):
keys = list(self.getTwoKeys())
keys.sort()
k1, k2 = keys
assert k1 != k2
# This is the smallest positive integer that requires 33 bits:
v1 = SMALLEST_POSITIVE_33_BITS
v2 = v1 + 1
self.t[k1] = v1
self.t[k2] = v2
self.assertEqual(self.t[k1], v1)
self.assertEqual(self.t[k2], v2)
self.assertEqual(list(self.t.values()), [v1, v2])
def testLongIntValuesOutOfRange(self):
k1, k2 = self.getTwoKeys()
self.assertRaises(
ValueError,
self._set_value, k1, SMALLEST_POSITIVE_65_BITS)
self.assertRaises(
ValueError,
self._set_value, k1, LARGEST_NEGATIVE_65_BITS)
if not using64bits:
# We're not using 64-bit ints in this build, so we don't expect
# the long-integer tests to pass.
class TestLongIntKeys:
pass
class TestLongIntValues:
pass
# tests of various type errors # tests of various type errors
class TypeTest(TestCase): class TypeTest(TestCase):
...@@ -1499,18 +1607,24 @@ class OOSetTest(ExtendedSetTests): ...@@ -1499,18 +1607,24 @@ class OOSetTest(ExtendedSetTests):
def setUp(self): def setUp(self):
self.t = OOSet() self.t = OOSet()
class IIBTreeTest(BTreeTests): class IIBTreeTest(BTreeTests, TestLongIntKeys, TestLongIntValues):
def setUp(self): def setUp(self):
self.t = IIBTree() self.t = IIBTree()
class IFBTreeTest(BTreeTests): def getTwoValues(self):
return 1, 2
class IFBTreeTest(BTreeTests, TestLongIntKeys):
def setUp(self): def setUp(self):
self.t = IFBTree() self.t = IFBTree()
class IOBTreeTest(BTreeTests): def getTwoValues(self):
return 0.5, 1.5
class IOBTreeTest(BTreeTests, TestLongIntKeys):
def setUp(self): def setUp(self):
self.t = IOBTree() self.t = IOBTree()
class OIBTreeTest(BTreeTests): class OIBTreeTest(BTreeTests, TestLongIntValues):
def setUp(self): def setUp(self):
self.t = OIBTree() self.t = OIBTree()
def getTwoKeys(self):
return object(), object()
class OOBTreeTest(BTreeTests): class OOBTreeTest(BTreeTests):
def setUp(self): def setUp(self):
self.t = OOBTree() self.t = OOBTree()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment