Commit 9e2d8d99 authored by Tim Peters's avatar Tim Peters

Introduced a new BTREE_SEARCH macro for searching interior BTree nodes,

to squash delicate code duplication and for speed.

+ This is optimized in several subtle ways over the current method.  This
  is documented in Maintainer.txt, along with a correctness proof.

+ I'll replace all the BTree searches with this soon.  For now I just
  changed _BTree_get().  I *suspect* this also fixes a subtle bug
  introduced by the recent "don't ignore comparison error" patch:  if a
  comparison did trigger an exception, the function just returned without
  doing the PER_ALLOW_DEACTIVATION + PER_ACCESSED dance.  _BTree_get()
  does that dance again now.
parent d4c60b08
...@@ -147,6 +147,32 @@ staticforward PyExtensionClass BTreeType; ...@@ -147,6 +147,32 @@ staticforward PyExtensionClass BTreeType;
#define BTREE(O) ((BTree*)(O)) #define BTREE(O) ((BTree*)(O))
/* Use BTREE_SEARCH to find which child pointer to follow.
* RESULT An int lvalue to hold the index i such that SELF->data[i].child
* is the correct node to search next.
* SELF A pointer to a BTree node.
* KEY The key you're looking for, of type KEY_TYPE.
* ONERROR What to do if key comparison raises an exception; for example,
* perhaps 'return NULL'.
*
* See Maintainer.txt for discussion: this is optimized in subtle ways.
* It's recommended that you call this at the start of a routine, waiting
* to check for self->len == 0 after.
*/
#define BTREE_SEARCH(RESULT, SELF, KEY, ONERROR) { \
int _lo = 0; \
int _hi = (SELF)->len; \
int _i, _cmp; \
for (_i = _hi >> 1; _i > _lo; _i = (_lo + _hi) >> 1) { \
TEST_KEY_SET_OR(_cmp, (SELF)->data[_i].key, (KEY)) \
ONERROR; \
if (_cmp < 0) _lo = _i; \
else if (_cmp > 0) _hi = _i; \
else /* equal */ break; \
} \
(RESULT) = _i; \
}
typedef struct SetIteration_s typedef struct SetIteration_s
{ {
PyObject *set; PyObject *set;
...@@ -323,7 +349,7 @@ static char BTree_module_documentation[] = ...@@ -323,7 +349,7 @@ static char BTree_module_documentation[] =
"\n" "\n"
MASTER_ID MASTER_ID
BTREEITEMSTEMPLATE_C BTREEITEMSTEMPLATE_C
"$Id: BTreeModuleTemplate.c,v 1.26 2002/05/31 17:56:59 tim_one Exp $\n" "$Id: BTreeModuleTemplate.c,v 1.27 2002/05/31 20:01:16 tim_one Exp $\n"
BTREETEMPLATE_C BTREETEMPLATE_C
BUCKETTEMPLATE_C BUCKETTEMPLATE_C
KEYMACROS_H KEYMACROS_H
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
****************************************************************************/ ****************************************************************************/
#define BTREETEMPLATE_C "$Id: BTreeTemplate.c,v 1.32 2002/05/31 17:56:59 tim_one Exp $\n" #define BTREETEMPLATE_C "$Id: BTreeTemplate.c,v 1.33 2002/05/31 20:01:16 tim_one Exp $\n"
/* /*
** _BTree_get ** _BTree_get
...@@ -21,8 +21,8 @@ ...@@ -21,8 +21,8 @@
static PyObject * static PyObject *
_BTree_get(BTree *self, PyObject *keyarg, int has_key) _BTree_get(BTree *self, PyObject *keyarg, int has_key)
{ {
int min, max, i, cmp, copied=1; int min, copied=1;
PyObject *r; PyObject *r = NULL;
KEY_TYPE key; KEY_TYPE key;
COPY_KEY_FROM_ARG(key, keyarg, copied); COPY_KEY_FROM_ARG(key, keyarg, copied);
...@@ -30,38 +30,25 @@ _BTree_get(BTree *self, PyObject *keyarg, int has_key) ...@@ -30,38 +30,25 @@ _BTree_get(BTree *self, PyObject *keyarg, int has_key)
PER_USE_OR_RETURN(self, NULL); PER_USE_OR_RETURN(self, NULL);
BTREE_SEARCH(min, self, key, goto Error);
if (self->len) if (self->len)
{ {
for (min=0, max=self->len, i=max/2; max-min > 1; i=(min+max)/2)
{
TEST_KEY_SET_OR(cmp, self->data[i].key, key) return NULL;
if (cmp < 0) min=i;
else if (cmp == 0)
{
min=i;
break;
}
else max=i;
}
if (SameType_Check(self, self->data[min].child)) if (SameType_Check(self, self->data[min].child))
r=_BTree_get( BTREE(self->data[min].child), keyarg, r = _BTree_get( BTREE(self->data[min].child), keyarg,
has_key ? has_key + 1: 0); has_key ? has_key + 1: 0);
else else
r=_bucket_get(BUCKET(self->data[min].child), keyarg, r = _bucket_get(BUCKET(self->data[min].child), keyarg,
has_key ? has_key + 1: 0); has_key ? has_key + 1: 0);
} }
else else
{ /* No data */ { /* No data */
UNLESS (has_key) UNLESS (has_key)
{
PyErr_SetObject(PyExc_KeyError, keyarg); PyErr_SetObject(PyExc_KeyError, keyarg);
r=NULL;
}
else else
r=PyInt_FromLong(0); r = PyInt_FromLong(0);
} }
Error:
PER_ALLOW_DEACTIVATION(self); PER_ALLOW_DEACTIVATION(self);
PER_ACCESSED(self); PER_ACCESSED(self);
return r; return r;
......
...@@ -171,3 +171,92 @@ More or less random bits of helpful info. ...@@ -171,3 +171,92 @@ More or less random bits of helpful info.
mostly to "even out" pickle sizes in storage. That's why, e.g., mostly to "even out" pickle sizes in storage. That's why, e.g.,
an IIBTree has larger values than an OOBTree: pickles store ints an IIBTree has larger values than an OOBTree: pickles store ints
more efficiently than they can store arbitrary Python objects. more efficiently than they can store arbitrary Python objects.
The BTREE_SEARCH Macro
======================
For notational ease, consider a fixed BTree node x, and let
K(i) mean x->data.key[i]
C(i) mean all the keys reachable from x->data.child[i]
For each i in 0 to x->len-1 inclusive,
K(i) <= C(i) < K(i+1)
is a BTree node invariant, where we pretend that K(0) holds a key
smaller than any possible key, and K(x->len) holds a key larger
than any possible key. (Note that K(x->len) doesn't actually exist,
and K(0) is never used although space for it exists in non-empty
BTree nodes.)
When searching for a key k, then, the child pointer we want to follow
is the one at index i such that K(i) <= k < K(i+1). There can be
only one such i, since the keys are strictly increasing. And there is
at *least* one such i provided the tree isn't empty. For the moment,
assume the tree isn't empty (we'll get back to that later).
The macro's chief loop invariant is
K(lo) < k < K(hi)
This holds trivially at the start, since lo is set to 0 ahd hi to
x->len, and we pretend K(0) is minus infinity and K(len) is plus
infinity. Inside the loop, if K(i) < k we set lo to i, and if
K(i) > k we set hi to i. These obviously preserve the invariant.
If K(i) == k, the loop breaks and sets the result to i, and since
K(i) == k in that case i is obviously the correct result.
What if the key isn't present? lo and hi move toward each other,
narrowing the range, until eventually lo+1 == hi. At that point,
i = (lo+hi)/2 = (lo+lo+1)/2 = lo + 1/2 = lo, so that:
1. The loop's "i > lo" test is false, so the loop ends then.
and
2. The invariant still holds, so K(i) < k < K(i+1), and i is again
the correct answer.
Can we get out of the loop too early? No: if hi = lo + d for some d
greater than 1, then i = (lo+lo+d)/2 = lo + d/2, and d/2 is at least 1
since d is at least 2: i is strictly greater than lo then, and the
loop continues.
Can lo==hi? Yes, but only if the node is empty. Then i, lo and hi
all start out as 0, and the loop exits immediately. If the loop
isn't empty, then lo and hi start out with different values. Whenever
lo and hi have different values, lo <= (lo + hi)/2 < hi, so i and lo
are strictly smaller than hi, so setting either lo or hi to i leaves
the new lo strictly smaller than the new hi.
Can the loop fail to terminate? No: by the above, when lo < hi-1,
lo < i=(lo+hi)/2 < hi, so setting either lo or hi to i leaves the
new lo and hi strictly closer to each other than were the old lo and
hi.
Optimization points:
+ Division by 2 is done via shift rather via "/2". These are
signed ints, and almost all C compilers treat signed int division
as truncating, and shifting is not the same as truncation for
signed int division. The compiler has no way to know these values
aren't negative, so has to generate longer-winded code for "/2".
But we know these values aren't negative, and exploit it.
+ The order of _cmp comparisons matters. We're in an interior
BTree node, and are looking at only a tiny fraction of all the
keys that exist. So finding the key exactly in this node is
unlikely, and checking _cmp == 0 is a waste of time to the same
extent. It doesn't matter whether we check for _cmp < 0 or
_cmp > 0 first, so long as we do both before worrying about
equality.
+ At the start of a routine, it's better to run this macro even
if x->len is 0 (check for that afterwards). We just called a
function and so probably drained the pipeline. If the first thing
we do then is read up self->len and check it against 0, we just
sit there waiting for the data to get read up, and then another
immediate test-and-branch, and for a very unlikely case (BTree
nodes are rarely empty). It's better to get into the loop right
away so the normal case makes progress ASAP.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment