Commit 891bd3bb authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Go to the O(logN)-integer-compare implementation, which is about 5% faster...

Go to the O(logN)-integer-compare implementation, which is about 5% faster than the O(1) implementation.  I cannot explain it.  The old stuff is now in {{{omt-with-o1-cursors/}}}.  Addresses #855, #856.

git-svn-id: file:///svn/tokudb@4329 c7de825b-a66e-492c-adef-691d508d4ae1
parent d0f15ed7
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include <errno.h>
#include <sys/types.h>
typedef void *OMTVALUE;
#include "omt.h"
#include "omt-internal.h"
#include "../newbrt/memory.h"
#include "../newbrt/toku_assert.h"
#include "../include/db.h"
#include "../newbrt/brttypes.h"
static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) {
if (num_starting_nodes < 2) num_starting_nodes = 2;
OMT MALLOC(result);
if (result==NULL) return errno;
result->root=NODE_NULL;
result->node_capacity = num_starting_nodes*2;
MALLOC_N(result->node_capacity, result->nodes);
if (result->nodes==NULL) {
toku_free(result);
return errno;
}
result->tmparray_size = num_starting_nodes*2;
MALLOC_N(result->tmparray_size, result->tmparray);
if (result->tmparray==NULL) {
toku_free(result->nodes);
toku_free(result);
return errno;
}
result->free_idx = 0;
*omtp = result;
return 0;
}
int toku_omt_create (OMT *omtp) {
return omt_create_internal(omtp, 2);
}
void toku_omt_destroy(OMT *omtp) {
OMT omt=*omtp;
toku_free(omt->nodes);
toku_free(omt->tmparray);
toku_free(omt);
*omtp=NULL;
}
static inline u_int32_t nweight(OMT omt, node_idx idx) {
if (idx==NODE_NULL) return 0;
else return (omt->nodes+idx)->weight;
}
u_int32_t toku_omt_size(OMT V) {
return nweight(V, V->root);
}
static inline node_idx omt_node_malloc(OMT omt) {
assert(omt->free_idx < omt->node_capacity);
return omt->free_idx++;
}
static inline void omt_node_free(OMT omt, node_idx idx) {
assert(idx < omt->node_capacity);
}
static inline void fill_array_with_subtree_values(OMT omt, OMTVALUE *array, node_idx tree_idx) {
if (tree_idx==NODE_NULL) return;
OMT_NODE tree = omt->nodes+tree_idx;
fill_array_with_subtree_values(omt, array, tree->left);
array[nweight(omt, tree->left)] = tree->value;
fill_array_with_subtree_values(omt, array+nweight(omt, tree->left)+1, tree->right);
}
// Example: numvalues=4, halfway=2, left side is values of size 2
// right side is values+3 of size 1
// numvalues=3, halfway=1, left side is values of size 1
// right side is values+2 of size 1
// numvalues=2, halfway=1, left side is values of size 1
// right side is values+2 of size 0
// numvalues=1, halfway=0, left side is values of size 0
// right side is values of size 0.
static inline void create_from_sorted_array_internal(OMT omt, node_idx *n_idxp,
OMTVALUE *values, u_int32_t numvalues) {
if (numvalues==0) {
*n_idxp = NODE_NULL;
} else {
u_int32_t halfway = numvalues/2;
node_idx newidx = omt_node_malloc(omt);
OMT_NODE newnode = omt->nodes+newidx;
newnode->weight = numvalues;
newnode->value = values[halfway];
create_from_sorted_array_internal(omt, &newnode->left, values, halfway);
create_from_sorted_array_internal(omt, &newnode->right, values+halfway+1, numvalues-(halfway+1));
*n_idxp = newidx;
}
}
int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, u_int32_t numvalues) {
OMT omt = NULL;
int r;
if ((r = omt_create_internal(&omt, numvalues))) return r;
create_from_sorted_array_internal(omt, &omt->root, values, numvalues);
*omtp=omt;
return 0;
}
enum build_choice { MAYBE_REBUILD, JUST_RESIZE };
static inline int maybe_resize_and_rebuild(OMT omt, u_int32_t n, enum build_choice choice) {
node_idx *new_tmparray = NULL;
OMT_NODE new_nodes = NULL;
OMTVALUE *tmp_values = NULL;
int r = ENOSYS;
u_int32_t new_size = n<=2 ? 4 : 2*n;
if (omt->tmparray_size<n ||
(omt->tmparray_size/2 >= new_size)) {
/* Malloc and free instead of realloc (saves the memcpy). */
MALLOC_N(new_size, new_tmparray);
if (new_tmparray==NULL) { r = errno; goto cleanup; }
}
/* Rebuild/realloc the nodes array iff any of the following:
* The array is smaller than the number of elements we want.
* We are increasing the number of elements and there is no free space.
* The array is too large. */
u_int32_t num_nodes = nweight(omt, omt->root);
if ((omt->node_capacity/2 >= new_size) ||
(omt->free_idx>=omt->node_capacity && num_nodes<n) ||
(omt->node_capacity<n)) {
if (choice==MAYBE_REBUILD) {
MALLOC_N(num_nodes, tmp_values);
if (tmp_values==NULL) { r = errno; goto cleanup;}
}
MALLOC_N(new_size, new_nodes);
if (new_nodes==NULL) { r = errno; goto cleanup; }
}
/* Nothing can fail now. Atomically update both sizes. */
if (new_tmparray) {
toku_free(omt->tmparray);
omt->tmparray = new_tmparray;
omt->tmparray_size = new_size;
}
if (new_nodes) {
/* Rebuild the tree in the new array, leftshifted, in preorder */
if (choice==MAYBE_REBUILD) {
fill_array_with_subtree_values(omt, tmp_values, omt->root);
}
toku_free(omt->nodes);
omt->nodes = new_nodes;
omt->node_capacity = new_size;
omt->free_idx = 0; /* Allocating from mempool starts over. */
omt->root = NODE_NULL;
if (choice==MAYBE_REBUILD) {
create_from_sorted_array_internal(omt, &omt->root, tmp_values, num_nodes);
}
}
r = 0;
cleanup:
if (r!=0) {
if (new_tmparray) toku_free(new_tmparray);
if (new_nodes) toku_free(new_nodes);
}
if (tmp_values) toku_free(tmp_values);
return r;
}
static inline void fill_array_with_subtree_idxs(OMT omt, node_idx *array, node_idx tree_idx) {
if (tree_idx==NODE_NULL) return;
OMT_NODE tree = omt->nodes+tree_idx;
fill_array_with_subtree_idxs(omt, array, tree->left);
array[nweight(omt, tree->left)] = tree_idx;
fill_array_with_subtree_idxs(omt, array+nweight(omt, tree->left)+1, tree->right);
}
/* Reuses existing OMT_NODE structures (used for rebalancing). */
static inline void rebuild_subtree_from_idxs(OMT omt, node_idx *n_idxp, node_idx *idxs,
u_int32_t numvalues) {
if (numvalues==0) {
*n_idxp=NODE_NULL;
} else {
u_int32_t halfway = numvalues/2;
node_idx newidx = idxs[halfway];
OMT_NODE newnode = omt->nodes+newidx;
newnode->weight = numvalues;
// value is already in there.
rebuild_subtree_from_idxs(omt, &newnode->left, idxs, halfway);
rebuild_subtree_from_idxs(omt, &newnode->right, idxs+halfway+1, numvalues-(halfway+1));
*n_idxp = newidx;
}
}
static inline void rebalance(OMT omt, node_idx *n_idxp) {
node_idx idx = *n_idxp;
OMT_NODE n = omt->nodes+idx;
fill_array_with_subtree_idxs(omt, omt->tmparray, idx);
rebuild_subtree_from_idxs(omt, n_idxp, omt->tmparray, n->weight);
}
static inline BOOL will_need_rebalance(OMT omt, node_idx n_idx, int leftmod, int rightmod) {
if (n_idx==NODE_NULL) return FALSE;
OMT_NODE n = omt->nodes+n_idx;
// one of the 1's is for the root.
// the other is to take ceil(n/2)
u_int32_t weight_left = nweight(omt, n->left) + leftmod;
u_int32_t weight_right = nweight(omt, n->right) + rightmod;
return ((1+weight_left < (1+1+weight_right)/2)
||
(1+weight_right < (1+1+weight_left)/2));
}
static inline void insert_internal(OMT omt, node_idx *n_idxp, OMTVALUE value, u_int32_t index, node_idx **rebalance_idx) {
if (*n_idxp==NODE_NULL) {
assert(index==0);
node_idx newidx = omt_node_malloc(omt);
OMT_NODE newnode = omt->nodes+newidx;
newnode->weight = 1;
newnode->left = NODE_NULL;
newnode->right = NODE_NULL;
newnode->value = value;
*n_idxp = newidx;
} else {
node_idx idx = *n_idxp;
OMT_NODE n = omt->nodes+idx;
n->weight++;
if (index <= nweight(omt, n->left)) {
if (*rebalance_idx==NULL && will_need_rebalance(omt, idx, 1, 0)) {
*rebalance_idx = n_idxp;
}
insert_internal(omt, &n->left, value, index, rebalance_idx);
} else {
if (*rebalance_idx==NULL && will_need_rebalance(omt, idx, 0, 1)) {
*rebalance_idx = n_idxp;
}
u_int32_t sub_index = index-nweight(omt, n->left)-1;
insert_internal(omt, &n->right, value, sub_index, rebalance_idx);
}
}
}
int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index) {
int r;
if (index>nweight(omt, omt->root)) return ERANGE;
if ((r=maybe_resize_and_rebuild(omt, 1+nweight(omt, omt->root), MAYBE_REBUILD))) return r;
node_idx* rebalance_idx = NULL;
insert_internal(omt, &omt->root, value, index, &rebalance_idx);
if (rebalance_idx) rebalance(omt, rebalance_idx);
return 0;
}
static inline void set_at_internal(OMT omt, node_idx n_idx, OMTVALUE v, u_int32_t index) {
assert(n_idx!=NODE_NULL);
OMT_NODE n = omt->nodes+n_idx;
if (index<nweight(omt, n->left))
set_at_internal(omt, n->left, v, index);
else if (index==nweight(omt, n->left)) {
n->value = v;
} else {
set_at_internal(omt, n->right, v, index-nweight(omt, n->left)-1);
}
}
int toku_omt_set_at (OMT omt, OMTVALUE value, u_int32_t index) {
if (index>=nweight(omt, omt->root)) return ERANGE;
set_at_internal(omt, omt->root, value, index);
return 0;
}
static inline void delete_internal(OMT omt, node_idx *n_idxp, u_int32_t index, OMTVALUE *vp, node_idx **rebalance_idx) {
assert(*n_idxp!=NODE_NULL);
OMT_NODE n = omt->nodes+*n_idxp;
if (index < nweight(omt, n->left)) {
n->weight--;
if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, -1, 0)) {
*rebalance_idx = n_idxp;
}
delete_internal(omt, &n->left, index, vp, rebalance_idx);
} else if (index == nweight(omt, n->left)) {
if (n->left==NODE_NULL) {
u_int32_t idx = *n_idxp;
*n_idxp = n->right;
*vp = n->value;
omt_node_free(omt, idx);
} else if (n->right==NODE_NULL) {
u_int32_t idx = *n_idxp;
*n_idxp = n->left;
*vp = n->value;
omt_node_free(omt, idx);
} else {
OMTVALUE zv;
// delete the successor of index, get the value, and store it here.
if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, 0, -1)) {
*rebalance_idx = n_idxp;
}
delete_internal(omt, &n->right, 0, &zv, rebalance_idx);
n->value = zv;
n->weight--;
}
} else {
n->weight--;
if (*rebalance_idx==NULL && will_need_rebalance(omt, *n_idxp, 0, -1)) {
*rebalance_idx = n_idxp;
}
delete_internal(omt, &n->right, index-nweight(omt, n->left)-1, vp, rebalance_idx);
}
}
int toku_omt_delete_at(OMT omt, u_int32_t index) {
OMTVALUE v;
int r;
if (index>=nweight(omt, omt->root)) return ERANGE;
if ((r=maybe_resize_and_rebuild(omt, -1+nweight(omt, omt->root), MAYBE_REBUILD))) return r;
node_idx* rebalance_idx = NULL;
delete_internal(omt, &omt->root, index, &v, &rebalance_idx);
if (rebalance_idx) rebalance(omt, rebalance_idx);
return 0;
}
static int omtcursor_stack_push(OMTCURSOR c, node_idx idx) {
if (c->max_pathlen-1<=c->pathlen) {
//Increase max_pathlen
u_int32_t new_max = c->max_pathlen*2;
node_idx *tmp_path = toku_realloc(c->path, new_max*sizeof(*c->path));
if (tmp_path==NULL) return errno;
c->path = tmp_path;
c->max_pathlen = new_max;
}
c->path[c->pathlen++] = idx;
return 0;
}
static node_idx omtcursor_stack_peek(OMTCURSOR c) {
return c->path[c->pathlen-1];
}
static node_idx omtcursor_stack_pop(OMTCURSOR c) {
assert(c->pathlen);
node_idx value = omtcursor_stack_peek(c);;
c->pathlen--;
return value;
}
static void omtcursor_associate(OMTCURSOR c, OMT omt) {
c->omt = omt;
c->pathlen = 0;
}
static inline int fetch_internal(OMT V, node_idx idx, u_int32_t i, OMTVALUE *v, OMTCURSOR c) {
int r;
// Add the current index to the cursor path
if (c!=NULL && (r=omtcursor_stack_push(c, idx))) return r;
/* Find the node corresponding to index idx */
OMT_NODE n = V->nodes+idx;
/* Visit recursively the appropriate sub-tree */
if (i < nweight(V, n->left)) {
return fetch_internal(V, n->left, i, v, c);
} else if (i == nweight(V, n->left)) {
*v = n->value;
} else {
return fetch_internal(V, n->right, i-nweight(V, n->left)-1, v, c);
}
return 0;
}
int toku_omt_fetch(OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c) {
if (i>=nweight(V, V->root)) return ERANGE;
if (c!=NULL) omtcursor_associate(c, V);
int r = fetch_internal(V, V->root, i, v, c);
if (c!=NULL && r!=0) toku_omt_cursor_invalidate(c);
return r;
}
static inline int iterate_internal(OMT omt, u_int32_t left, u_int32_t right,
node_idx n_idx, u_int32_t idx,
int (*f)(OMTVALUE, u_int32_t, void*), void*v) {
int r;
if (n_idx==NODE_NULL) return 0;
OMT_NODE n = omt->nodes+n_idx;
u_int32_t idx_root = idx+nweight(omt,n->left);
if (left< idx_root && (r=iterate_internal(omt, left, right, n->left, idx, f, v))) return r;
if (left<=idx_root && idx_root<right && (r=f(n->value, idx_root, v))) return r;
if (idx_root+1<right) return iterate_internal(omt, left, right, n->right, idx_root+1, f, v);
return 0;
}
int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, u_int32_t, void*), void*v) {
return iterate_internal(omt, 0, nweight(omt, omt->root), omt->root, 0, f, v);
}
int toku_omt_iterate_on_range(OMT omt, u_int32_t left, u_int32_t right, int (*f)(OMTVALUE, u_int32_t, void*), void*v) {
return iterate_internal(omt, left, right, omt->root, 0, f, v);
}
int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, u_int32_t *index) {
int r;
u_int32_t idx;
r = toku_omt_find_zero(omt, h, v, NULL, &idx, NULL);
if (r==0) {
if (index) *index = idx;
return DB_KEYEXIST;
}
if (r!=DB_NOTFOUND) return r;
if ((r = toku_omt_insert_at(omt, value, idx))) return r;
if (index) *index = idx;
return 0;
}
static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
int r;
if (n_idx==NODE_NULL) {
*index=0;
return DB_NOTFOUND;
}
// Add the current index to the cursor path
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
OMT_NODE n = omt->nodes+n_idx;
int hv = h(n->value, extra);
if (hv<0) {
r = find_internal_zero(omt, n->right, h, extra, value, index, c);
*index += nweight(omt, n->left)+1;
return r;
} else if (hv>0) {
r = find_internal_zero(omt, n->left, h, extra, value, index, c);
if (c!=NULL && r==DB_NOTFOUND && *index==nweight(omt, n->left)) {
//Truncate the saved cursor path at n_idx.
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
}
return r;
} else {
r = find_internal_zero(omt, n->left, h, extra, value, index, c);
if (r==DB_NOTFOUND) {
*index = nweight(omt, n->left);
*value = n->value;
if (c!=NULL) {
//Truncate the saved cursor path at n_idx.
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
}
r = 0;
}
return r;
}
}
int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
if (c!=NULL) omtcursor_associate(c, V);
u_int32_t idx_tmp;
OMTVALUE val_tmp;
int r = find_internal_zero(V, V->root, h, extra, &val_tmp, &idx_tmp, c);
if (c!=NULL && ( (r!=0 && r!=DB_NOTFOUND) ||
idx_tmp==nweight(V, V->root))) {
toku_omt_cursor_invalidate(c);
}
if (c==NULL || r==0 || r==DB_NOTFOUND) {
if (index!=NULL) *index = idx_tmp;
if (value!=NULL && r==0) *value = val_tmp;
}
return r;
}
// If direction <0 then find the largest i such that h(V_i,extra)<0.
static inline int find_internal_minus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
int r;
if (n_idx==NODE_NULL) return DB_NOTFOUND;
// Add the current index to the cursor path
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
OMT_NODE n = omt->nodes+n_idx;
int hv = h(n->value, extra);
if (hv<0) {
r = find_internal_minus(omt, n->right, h, extra, value, index, c);
if (r==0) (*index) += nweight(omt, n->left)+1;
else if (r==DB_NOTFOUND) {
*index = nweight(omt, n->left);
*value = n->value;
if (c!=NULL) {
//Truncate the saved cursor path at n_idx.
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
}
r = 0;
}
return r;
} else {
return find_internal_minus(omt, n->left, h, extra, value, index, c);
}
}
// If direction >0 then find the smallest i such that h(V_i,extra)>0.
static inline int find_internal_plus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
int r;
if (n_idx==NODE_NULL) return DB_NOTFOUND;
// Add the current index to the cursor path
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
OMT_NODE n = omt->nodes+n_idx;
int hv = h(n->value, extra);
if (hv>0) {
r = find_internal_plus(omt, n->left, h, extra, value, index, c);
if (r==DB_NOTFOUND) {
*index = nweight(omt, n->left);
*value = n->value;
if (c!=NULL) {
//Truncate the saved cursor path at n_idx.
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
}
r = 0;
}
return r;
} else {
r = find_internal_plus(omt, n->right, h, extra, value, index, c);
if (r==0) (*index) += nweight(omt, n->left)+1;
return r;
}
}
int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
if (direction==0) {
abort();
}
else {
int r;
u_int32_t idx_tmp;
OMTVALUE val_tmp;
if (c!=NULL) omtcursor_associate(c, V);
if (direction<0) {
r = find_internal_minus(V, V->root, h, extra, &val_tmp, &idx_tmp, c);
} else {
r = find_internal_plus( V, V->root, h, extra, &val_tmp, &idx_tmp, c);
}
if (c!=NULL && r!=0) toku_omt_cursor_invalidate(c);
if (r==0) {
if (index!=NULL) *index = idx_tmp;
if (value!=NULL) *value = val_tmp;
}
return r;
}
}
int toku_omt_split_at(OMT omt, OMT *newomtp, u_int32_t index) {
int r = ENOSYS;
OMT newomt = NULL;
OMTVALUE *tmp_values = NULL;
if (index>nweight(omt, omt->root)) { r = ERANGE; goto cleanup; }
u_int32_t newsize = nweight(omt, omt->root)-index;
if ((r = omt_create_internal(&newomt, newsize))) goto cleanup;
MALLOC_N(nweight(omt, omt->root), tmp_values);
if (tmp_values==NULL) { r = errno; goto cleanup; }
fill_array_with_subtree_values(omt, tmp_values, omt->root);
// Modify omt's array at the last possible moment, since after this nothing can fail.
if ((r = maybe_resize_and_rebuild(omt, index, TRUE))) goto cleanup;
create_from_sorted_array_internal(omt, &omt->root, tmp_values, index);
create_from_sorted_array_internal(newomt, &newomt->root, tmp_values+index, newsize);
*newomtp = newomt;
r = 0;
cleanup:
if (r!=0) {
if (newomt) toku_omt_destroy(&newomt);
}
if (tmp_values) toku_free(tmp_values);
return r;
}
int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) {
int r = ENOSYS;
OMT newomt = NULL;
OMTVALUE *tmp_values = NULL;
u_int32_t newsize = toku_omt_size(leftomt)+toku_omt_size(rightomt);
if ((r = omt_create_internal(&newomt, newsize))) goto cleanup;
MALLOC_N(newsize, tmp_values);
if (tmp_values==NULL) { r = errno; goto cleanup; }
fill_array_with_subtree_values(leftomt, tmp_values, leftomt->root);
fill_array_with_subtree_values(rightomt, tmp_values+toku_omt_size(leftomt), rightomt->root);
create_from_sorted_array_internal(newomt, &newomt->root, tmp_values, newsize);
toku_omt_destroy(&leftomt);
toku_omt_destroy(&rightomt);
*newomtp = newomt;
r = 0;
cleanup:
if (r!=0) {
if (newomt) toku_omt_destroy(&newomt);
}
if (tmp_values) toku_free(tmp_values);
return r;
}
void toku_omt_clear(OMT omt) {
omt->free_idx = 0;
omt->root = NODE_NULL;
}
int toku_omt_cursor_create(OMTCURSOR *p) {
OMTCURSOR MALLOC(result);
if (result==NULL) return errno;
result->max_pathlen = TOKU_OMTCURSOR_INITIAL_SIZE;
result->pathlen = 0;
MALLOC_N(result->max_pathlen, result->path);
if (result->path==NULL) {
toku_free(result);
return errno;
}
result->omt = NULL;
*p = result;
return 0;
}
void toku_omt_cursor_destroy(OMTCURSOR *p) {
OMTCURSOR c=*p;
toku_free(c->path);
toku_free(c);
*p = NULL;
}
int toku_omt_cursor_is_valid(OMTCURSOR c) {
return c->pathlen>0 && c->omt!=NULL;
}
void toku_omt_cursor_invalidate(OMTCURSOR c) {
c->pathlen = 0;
c->omt=NULL;
}
static void omtcursor_current_internal(OMTCURSOR c, OMTVALUE *v) {
*v = c->omt->nodes[omtcursor_stack_peek(c)].value;
}
int toku_omt_cursor_current(OMTCURSOR c, OMTVALUE *v) {
if (!toku_omt_cursor_is_valid(c)) return EINVAL;
omtcursor_current_internal(c, v);
return 0;
}
static int omtcursor_next_internal(OMTCURSOR c) {
if (!toku_omt_cursor_is_valid(c)) return EINVAL;
OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c);
if (current->right!=NODE_NULL) {
//Enter into subtree
if (omtcursor_stack_push(c, current->right)) goto invalidate;
current = c->omt->nodes+current->right;
while (current->left!=NODE_NULL) {
if (omtcursor_stack_push(c, current->left)) goto invalidate;
current = c->omt->nodes+current->left;
}
return 0;
}
else {
//Pop the stack till we remove a left child.
while (c->pathlen>=2) {
node_idx child_idx = omtcursor_stack_pop(c);
node_idx parent_idx = omtcursor_stack_peek(c);
if (c->omt->nodes[parent_idx].left==child_idx) return 0;
}
goto invalidate;
}
invalidate:
toku_omt_cursor_invalidate(c);
return EINVAL;
}
int toku_omt_cursor_next(OMTCURSOR c, OMTVALUE *v) {
if (omtcursor_next_internal(c)) return EINVAL;
omtcursor_current_internal(c, v);
return 0;
}
static int omtcursor_prev_internal(OMTCURSOR c) {
if (!toku_omt_cursor_is_valid(c)) return EINVAL;
OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c);
if (current->left!=NODE_NULL) {
//Enter into subtree
if (omtcursor_stack_push(c, current->left)) goto invalidate;
current = c->omt->nodes+current->left;
while (current->right!=NODE_NULL) {
if (omtcursor_stack_push(c, current->right)) goto invalidate;
current = c->omt->nodes+current->right;
}
return 0;
}
else {
//Pop the stack till we remove a right child.
while (c->pathlen>=2) {
node_idx child_idx = omtcursor_stack_pop(c);
node_idx parent_idx = omtcursor_stack_peek(c);
if (c->omt->nodes[parent_idx].right==child_idx) return 0;
}
goto invalidate;
}
invalidate:
toku_omt_cursor_invalidate(c);
return EINVAL;
}
int toku_omt_cursor_prev(OMTCURSOR c, OMTVALUE *v) {
if (omtcursor_prev_internal(c)) return EINVAL;
omtcursor_current_internal(c, v);
return 0;
}
size_t toku_omt_memory_size (OMT omt) {
return sizeof(*omt)+omt->node_capacity*sizeof(omt->nodes[0]) + omt->tmparray_size*sizeof(omt->tmparray[0]);
}
#if !defined(OMT_H)
#define OMT_H
#ident "Copyright (c) 2008 Tokutek Inc. All rights reserved."
// Order Maintenance Tree (OMT)
//
// Maintains a collection of totally ordered values, where each value has an integer weight.
// The OMT is a mutable datatype.
//
// The Abstraction:
//
// An OMT is a vector of values, $V$, where $|V|$ is the length of the vector.
// The vector is numbered from $0$ to $|V|-1$.
// Each value has a weight. The weight of the $i$th element is denoted $w(V_i)$.
//
// We can create a new OMT, which is the empty vector.
//
// We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where
// $|V'|=1+|V|$ and
//
// V'_j = V_j if $j<i$
// x if $j=i$
// V_{j-1} if $j>i$.
//
// We can specify $i$ using a kind of function instead of as an integer.
// Let $b$ be a function mapping from values to nonzero integers, such that
// the signum of $b$ is monotically increasing.
// We can specify $i$ as the minimum integer such that $b(V_i)>0$.
//
// We look up a value using its index, or using a Heaviside function.
// For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing.
// When lookup up values, we can look up
// $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$. (With a special return code if no such value exists.)
// (Rationale: Ordinarily we want $i$ to be unique. But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.)
// $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$. (Or an indication that no such value exists.)
// $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$. (Or an indication that no such value exists.)
//
// When looking up a value using a Heaviside function, we get the value and its index.
//
// We can also split an OMT into two OMTs, splitting the weight of the values evenly.
// Find a value $j$ such that the values to the left of $j$ have about the same total weight as the values to the right of $j$.
// The resulting two OMTs contain the values to the left of $j$ and the values to the right of $j$ respectively.
// All of the values from the original OMT go into one of the new OMTs.
// If the weights of the values don't split exactly evenly, then the implementation has the freedom to choose whether
// the new left OMT or the new right OMT is larger.
//
// Performance:
// Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function.
// The memory required is O(|V|).
//
//**********************************************************************
//* OMT Cursors
//**********************************************************************
// OMTs also support cursors. An OMTCURSOR is a mutable
// An OMTCURSOR is a mutable object that, at any moment in time, is
// either associated with a single OMT or is not associated with any
// OMT. Many different OMTCURSORs can be associated with a single OMT.
// We say that an OMTCURSOR is *valid* if it is currently
// associated with an OMT and has an abstract offset assigned to it.
// An OMTCURSOR that is not valid is said to be invalid.
// Abstractly, an OMTCURSOR simply contains an integer offset of a
// particular OMTVALUE. We call this abstract integer the *offset*.
// Note, however, that the implementation may use a more
// complex representation in order to obtain higher performance.
// (Note: A first implementation might use the integer.)
// Given a valid OMTCURSOR, one
// * obtain the OMTVALUE at which the integer points in O(1) time,
// * increment or decrement the abstract integer (usually quickly.)
// The requirements are that the cursor is initialized to a
// randomly chosen valid integer, then the integer can be
// incremented in O(1) expected time.
// The OMTCURSOR may become invalidated under several conditions:
// * Incrementing or decrementing the abstract integer out of its
// valid range invalidates the OMTCURSOR.
// * If the OMT is modified, it may invalidate the cursor.
// * The user of the OMTCURSOR may explicitly invalidate the cursor.
// * The OMT is destroyed (in which case the OMTCURSOR is
// invalidated, but not destroyed.)
// Implementation Hint: One way to implement the OMTCURSOR is with an
// integer. The problem is that obtaining the value at which the integer
// points takes O(\log n) time, which is not fast enough to meet the
// specification. However, this implementation is probably much
// faster than our current implementation because it is O(\log n)
// integer comparisons instead of O(\log n) key comparisons. This
// simple implementation may be the right thing for a first cut.
//
// To actually achieve the performance requirements, here's a better
// implementation: The OMTCURSOR contains a path from root to leaf.
// Fetching the current value is O(1) time since the leaf is
// immediately accessible. Modifying the path to find the next or
// previous item has O(1) expected time at a randomly chosen valid
// point
//
// The path can be implemented as an array. It probably makes sense
// for the array to by dynamically resized as needed. Since the
// array's size is O(log n), it is not necessary to ever shrink the
// array. Also, from the perspective of testing, it's probably best
// if the array is initialized to a short length (e.g., length 4) so
// that the doubling code is actually exercised.
// One way to implement invalidation is for each OMT to maintain a
// doubly linked list of OMTCURSORs. When destroying an OMT or
// changing the OMT's shape, one can simply step through the list
// invalidating all the OMTCURSORs.
// The list of OMTCURSORs should use the list.h abstraction. If it's
// not clear how to use it, Rich can explain it.
// The programming API:
typedef struct omt *OMT;
typedef struct omtcursor *OMTCURSOR;
int toku_omt_create (OMT *omtp);
// Effect: Create an empty OMT. Stores it in *omtp.
// Requires: omtp != NULL
// Returns:
// 0 success
// ENOMEM out of memory (and doesn't modify *omtp)
// Performance: constant time.
int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, u_int32_t numvalues);
// Effect: Create a OMT containing values. The number of values is in numvalues.
// Stores the new OMT in *omtp.
// Requires: omtp != NULL
// Requires: values != NULL
// Requires: values is sorted
// Returns:
// 0 success
// ENOMEM out of memory (and doesn't modify *omtp)
// Performance: time=O(numvalues)
// Rational: Normally to insert N values takes O(N lg N) amortized time.
// If the N values are known in advance, are sorted, and
// the structure is empty, we can batch insert them much faster.
void toku_omt_destroy(OMT *omtp);
// Effect: Destroy an OMT, freeing all its memory.
// Does not free the OMTVALUEs stored in the OMT.
// Those values may be freed before or after calling toku_omt_destroy.
// Also sets *omtp=NULL.
// Requires: omtp != NULL
// Requires: *omtp != NULL
// Rationale: The usage is to do something like
// toku_omt_destroy(&s->omt);
// and now s->omt will have a NULL pointer instead of a dangling freed pointer.
// Rationale: Returns no values since free() cannot fail.
// Rationale: Does not free the OMTVALUEs to reduce complexity.
// Performance: time=O(toku_omt_size(*omtp))
u_int32_t toku_omt_size(OMT V);
// Effect: return |V|.
// Requires: V != NULL
// Performance: time=O(1)
int toku_omt_iterate_on_range(OMT omt, u_int32_t left, u_int32_t right, int (*f)(OMTVALUE, u_int32_t, void*), void*v);
// Effect: Iterate over the values of the omt, from left to right, calling f on each value.
// The second argument passed to f is the index of the value.
// The third argument passed to f is v.
// The indices run from 0 (inclusive) to toku_omt_size(omt) (exclusive).
// We will iterate only over [left,right)
//
// Requires: omt != NULL
// left <= right
// Requires: f != NULL
// Returns:
// If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by toku_omt_iterate.
// If f always returns zero, then toku_omt_iterate returns 0.
// Requires: Don't modify omt while running. (E.g., f may not insert or delete values form omt.)
// Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in omt.
// Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, u_int32_t, void*), void*v);
// Effect: Iterate over the values of the omt, from left to right, calling f on each value.
// The second argument passed to f is the index of the value.
// The third argument passed to f is v.
// The indices run from 0 (inclusive) to toku_omt_size(omt) (exclusive).
// Requires: omt != NULL
// Requires: f != NULL
// Returns:
// If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by toku_omt_iterate.
// If f always returns zero, then toku_omt_iterate returns 0.
// Requires: Don't modify omt while running. (E.g., f may not insert or delete values form omt.)
// Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in omt.
// Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index);
// Effect: Increases indexes of all items at slot >= index by 1.
// Insert value into the position at index.
//
// Returns:
// 0 success
// ERANGE if index>toku_omt_size(omt)
// ENOMEM
// On error, omt is unchanged.
// Performance: time=O(\log N) amortized time.
// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
int toku_omt_set_at (OMT omt, OMTVALUE value, u_int32_t index);
// Effect: Replaces the item at index with value.
// Returns:
// 0 success
// ERANGE if index>=toku_omt_size(omt)
// On error, omt i sunchanged.
// Performance: time=O(\log N)
// Rationale: The BRT needs to be able to replace a value with another copy of the same value (allocated in a different location)
int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, u_int32_t *index);
// Effect: Insert value into the OMT.
// If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
// Otherwise, let i be the minimum value such that $h(V_i, v)>0$.
// If no such i exists, then let i be |V|
// Then this has the same effect as
// omt_insert_at(tree, value, i);
// If index!=NULL then i is stored in *index
// Requires: The signum of h must be monotonically increasing.
// Returns:
// 0 success
// DB_KEYEXIST the key is present (h was equal to zero for some value)
// ENOMEM
// On nonzero return, omt is unchanged.
// On nonzero non-DB_KEYEXIST return, *index is unchanged.
// Performance: time=O(\log N) amortized.
// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
int toku_omt_delete_at(OMT omt, u_int32_t index);
// Effect: Delete the item in slot index.
// Decreases indexes of all items at slot >= index by 1.
// Returns
// 0 success
// ERANGE if index>=toku_omt_size(omt)
// On error, omt is unchanged.
// Rationale: To delete an item, first find its index using toku_omt_find, then delete it.
// Performance: time=O(\log N) amortized.
int toku_omt_fetch (OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c);
// Effect: Set *v=V_i
// If c != NULL then set c's abstract offset to i.
// Requires: v != NULL
// Returns
// 0 success
// ERANGE if index>=toku_omt_size(omt)
// ENOMEM if c!=NULL and we run out of memory
// On nonzero return, *v is unchanged, and c (if nonnull) is either
// invalidated or unchanged.
// Performance: time=O(\log N)
// Notes: It is possible that c was previously valid and was
// associated with a different OMT. If c is changed by this
// function, the function must remove c's association with the old
// OMT, and associate it with the new OMT.
int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c);
// Effect: Find the smallest i such that h(V_i, extra)>=0
// If c != NULL and there is such an i then set c's abstract offset to i.
// If there is such an i and h(V_i,extra)==0 then set *index=i and return 0.
// If there is such an i and h(V_i,extra)>0 then set *index=i and return DB_NOTFOUND.
// If there is no such i then set *index=toku_omt_size(V), invalidate the cursor (if not NULL), and return DB_NOTFOUND.
// Requires: index!=NULL
// Returns
// 0 success
// ENOMEM if c!=NULL and we run out of memory
// Performance: time=O(\log N) (calls to h)
// Notes: It is possible that c was previously valid and was
// associated with a different OMT. If c is changed by this
// function, the function must remove c's association with the old
// OMT, and associate it with the new OMT.
// Future directions: the current implementation can be improved, in some cases, by supporting tail recursion.
// This would require an additional parameter that represents the current value of the index where the function is recursing,
// so that it becomes similar to the way fetch works.
int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c);
// Effect:
// If direction >0 then find the smallest i such that h(V_i,extra)>0.
// If direction <0 then find the largest i such that h(V_i,extra)<0.
// (Direction may not be equal to zero.)
// If value!=NULL then store V_i in *value
// If index!=NULL then store i in *index.
// If c != NULL and there is such an i then set c's abstract offset to i.
// Requires: The signum of h is monotically increasing.
// Performance: time=O(\log N) (calls to h)
// Returns
// 0 success
// DB_NOTFOUND no such value is found.
// ENOMEM if c!= NULL and we run out of memory
// On nonzero return, *value and *index are unchanged, and c (if nonnull) is either
// invalidated or unchanged.
// Notes: It is possible that c was previously valid and was
// associated with a different OMT. If c is changed by this
// function, the function must remove c's association with the old
// OMT, and associate it with the new OMT.
// Rationale:
// Here's how to use the find function to find various things
// Cases for find:
// find first value: ( h(v)=+1, direction=+1 )
// find last value ( h(v)=-1, direction=-1 )
// find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 )
// find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 )
// find X or successor to X ( same as find first X. )
//
// Rationale: To help understand heaviside functions and behavor of find:
// There are 7 kinds of heaviside functions.
// The signum of the h must be monotonically increasing.
// Given a function of the following form, A is the element
// returned for direction>0, B is the element returned
// for direction<0, C is the element returned for
// direction==0 (see find_zero) (with a return of 0), and D is the element
// returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
// If any of A, B, or C are not found, then asking for the
// associated direction will return DB_NOTFOUND.
// See find_zero for more information.
//
// Let the following represent the signum of the heaviside function.
//
// -...-
// A
// D
//
// +...+
// B
// D
//
// 0...0
// C
//
// -...-0...0
// AC
//
// 0...0+...+
// C B
//
// -...-+...+
// AB
// D
//
// -...-0...0+...+
// AC B
int toku_omt_split_at(OMT omt, OMT *newomt, u_int32_t index);
// Effect: Create a new OMT, storing it in *newomt.
// The values to the right of index (starting at index) are moved to *newomt.
// Requires: omt != NULL
// Requires: newomt != NULL
// Returns
// 0 success,
// ERANGE if index > toku_omt_size(omt)
// ENOMEM
// On nonzero return, omt and *newomt are unmodified.
// Performance: time=O(n)
// Rationale: We don't need a split-evenly operation. We need to split items so that their total sizes
// are even, and other similar splitting criteria. It's easy to split evenly by calling toku_omt_size(), and dividing by two.
int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomt);
// Effect: Appends leftomt and rightomt to produce a new omt.
// Sets *newomt to the new omt.
// On success, leftomt and rightomt destroyed,.
// Returns 0 on success
// ENOMEM on out of memory.
// On error, nothing is modified.
// Performance: time=O(n) is acceptable, but one can imagine implementations that are O(\log n) worst-case.
void toku_omt_clear(OMT omt);
// Effect: Set the tree to be empty.
// Note: Will not resize the array, since void precludes allowing a malloc.
// Performance: time=O(1)
int toku_omt_cursor_create (OMTCURSOR *p);
// Effect: Create an OMTCURSOR. Stores it in *p. The OMTCURSOR is
// initially invalid.
// Requires: p != NULL
// Returns:
// 0 success
// ENOMEM out of memory (and doesn't modify *omtp)
// Performance: constant time.
void toku_omt_cursor_destroy (OMTCURSOR *p);
// Effect: Invalidates *p (if it is valid) and frees any memory
// associated with *p.
// Also sets *p=NULL.
// Requires: *p != NULL
// Rationale: The usage is to do something like
// toku_omt_cursor_destroy(&c);
// and now c will have a NULL pointer instead of a dangling freed pointer.
// Rationale: Returns no values since free() cannot fail.
// Performance: time=O(1) x #calls to free
int toku_omt_cursor_is_valid (OMTCURSOR c);
// Effect: returns 0 iff c is invalid.
// Performance: time=O(1)
int toku_omt_cursor_next (OMTCURSOR c, OMTVALUE *v);
// Effect: Increment c's abstract offset, and store the corresponding value in v.
// Requires: v != NULL
// Returns
// 0 success
// EINVAL if the offset goes out of range or c is invalid.
// On nonzero return, *v is unchanged and c is invalidated.
// Performance: time=O(log N) worst case, expected time=O(1) for a randomly
// chosen initial position.
int toku_omt_cursor_current (OMTCURSOR c, OMTVALUE *v);
// Effect: Store in v the value pointed by c's abstract offset
// Requires: v != NULL
// Returns
// 0 success
// EINVAL if c is invalid
// On non-zero return, *v is unchanged
// Performance: O(1) time
int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v);
// Effect: Decrement c's abstract offset, and store the corresponding value in v.
// Requires: v != NULL
// Returns
// 0 success
// EINVAL if the offset goes out of range or c is invalid.
// On nonzero return, *v is unchanged and c is invalidated.
// Performance: time=O(log N) worst case, expected time=O(1) for a randomly
// chosen initial position.
void toku_omt_cursor_invalidate (OMTCURSOR c);
// Effect: Invalidate c. (This does not mean that c is destroyed or
// that its memory is freed.)
// Usage Hint: The OMTCURSOR is designed to be used inside the
// BRTcursor. A BRTcursor includes a pointer to an OMTCURSOR, which
// is created when the BRTcursor is created.
//
// The brt cursor implements its search by first finding a leaf node,
// containing an OMT. The BRT then passes its OMTCURSOR into the lookup
// method (i.e., one of toku_ebdomt_fetch, toku_omt_find_zero,
// toku_omt_find). The lookup method, if successful, sets the
// OMTCURSOR to refer to that element.
//
// As long as the OMTCURSOR remains valid, a BRTCURSOR next or prev
// operation can be implemented using next or prev on the OMTCURSOR.
//
// If the OMTCURSOR becomes invalidated, then the BRT must search
// again from the root of the tree. The only error that an OMTCURSOR
// next operation can raise is that it is invalid.
//
// If an element is inserted into the BRT, it may cause an OMTCURSOR
// to become invalid. This is especially true if the element will end
// up in the OMT associated with the cursor. A simple implementation
// is to invalidate all OMTCURSORS any time anything is inserted into
// into the BRT. Since the BRT already contains a list of BRT cursors
// associated with it, it is straightforward to go through that list
// and invalidate all the cursors.
//
// When the BRT closes a cursor, it destroys the OMTCURSOR.
size_t toku_omt_memory_size (OMT omt);
// Effect: Return the size (in bytes) of the omt, as it resides in main memory. Don't include any of the OMTVALUES.
#endif /* #ifndef OMT_H */
...@@ -2,15 +2,44 @@ ...@@ -2,15 +2,44 @@
#include <errno.h> #include <errno.h>
#include <sys/types.h> #include <sys/types.h>
#include <stdint.h>
typedef void *OMTVALUE; typedef void *OMTVALUE;
#include "omt.h" #include "omt.h"
#include "omt-internal.h"
#include "../newbrt/memory.h" #include "../newbrt/memory.h"
#include "../newbrt/toku_assert.h" #include "../newbrt/toku_assert.h"
#include "../include/db.h" #include "../include/db.h"
#include "../newbrt/brttypes.h" #include "../newbrt/brttypes.h"
typedef u_int32_t node_idx;
static const node_idx NODE_NULL = UINT32_MAX;
typedef struct omt_node *OMT_NODE;
struct omt_node {
u_int32_t weight; /* Size of subtree rooted at this node (including this one). */
node_idx left; /* Index of left subtree. */
node_idx right; /* Index of right subtree. */
OMTVALUE value; /* The value stored in the node. */
};
struct omt {
node_idx root;
u_int32_t node_capacity;
OMT_NODE nodes;
node_idx free_idx;
u_int32_t tmparray_size;
node_idx* tmparray;
OMTCURSOR associated; // the OMTs associated with this.
};
struct omt_cursor {
OMT omt; // The omt this cursor is associated with. NULL if not present.
int index; // This is the state for the initial implementation
OMTCURSOR next,prev; // circular linked list of all OMTCURSORs associated with omt.
};
static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) { static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) {
if (num_starting_nodes < 2) num_starting_nodes = 2; if (num_starting_nodes < 2) num_starting_nodes = 2;
...@@ -31,6 +60,7 @@ static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) { ...@@ -31,6 +60,7 @@ static int omt_create_internal(OMT *omtp, u_int32_t num_starting_nodes) {
return errno; return errno;
} }
result->free_idx = 0; result->free_idx = 0;
result->associated = NULL;
*omtp = result; *omtp = result;
return 0; return 0;
} }
...@@ -39,8 +69,66 @@ int toku_omt_create (OMT *omtp) { ...@@ -39,8 +69,66 @@ int toku_omt_create (OMT *omtp) {
return omt_create_internal(omtp, 2); return omt_create_internal(omtp, 2);
} }
int toku_omt_cursor_create (OMTCURSOR *omtcp) {
OMTCURSOR MALLOC(c);
if (c==0) return errno;
c->omt = NULL;
c->next = c->prev = NULL;
*omtcp = c;
return 0;
}
void toku_omt_cursor_invalidate (OMTCURSOR c) {
if (c==NULL || c->omt==NULL) return;
if (c->next == c) {
// It's the last one.
c->omt->associated = NULL;
} else {
OMTCURSOR next = c->next;
OMTCURSOR prev = c->prev;
if (c->omt->associated == c) {
c->omt->associated = next;
}
next->prev = prev;
prev->next = next;
}
c->next = c->prev = NULL;
c->omt = NULL;
}
void toku_omt_cursor_destroy (OMTCURSOR *p) {
toku_omt_cursor_invalidate(*p);
toku_free(*p);
*p = 0;
}
static void invalidate_cursors (OMT omt) {
OMTCURSOR assoced;
while ((assoced = omt->associated)) {
toku_omt_cursor_invalidate(assoced);
}
}
static void associate (OMT omt, OMTCURSOR c)
{
if (c->omt==omt) return;
toku_omt_cursor_invalidate(c);
if (omt->associated==NULL) {
c->prev = c;
c->next = c;
omt->associated = c;
} else {
c->prev = omt->associated->prev;
c->next = omt->associated;
omt->associated->prev->next = c;
omt->associated->prev = c;
}
c->omt = omt;
}
void toku_omt_destroy(OMT *omtp) { void toku_omt_destroy(OMT *omtp) {
OMT omt=*omtp; OMT omt=*omtp;
invalidate_cursors(omt);
toku_free(omt->nodes); toku_free(omt->nodes);
toku_free(omt->tmparray); toku_free(omt->tmparray);
toku_free(omt); toku_free(omt);
...@@ -242,6 +330,7 @@ static inline void insert_internal(OMT omt, node_idx *n_idxp, OMTVALUE value, u_ ...@@ -242,6 +330,7 @@ static inline void insert_internal(OMT omt, node_idx *n_idxp, OMTVALUE value, u_
int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index) { int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index) {
int r; int r;
invalidate_cursors(omt);
if (index>nweight(omt, omt->root)) return ERANGE; if (index>nweight(omt, omt->root)) return ERANGE;
if ((r=maybe_resize_and_rebuild(omt, 1+nweight(omt, omt->root), MAYBE_REBUILD))) return r; if ((r=maybe_resize_and_rebuild(omt, 1+nweight(omt, omt->root), MAYBE_REBUILD))) return r;
node_idx* rebalance_idx = NULL; node_idx* rebalance_idx = NULL;
...@@ -310,6 +399,7 @@ static inline void delete_internal(OMT omt, node_idx *n_idxp, u_int32_t index, O ...@@ -310,6 +399,7 @@ static inline void delete_internal(OMT omt, node_idx *n_idxp, u_int32_t index, O
int toku_omt_delete_at(OMT omt, u_int32_t index) { int toku_omt_delete_at(OMT omt, u_int32_t index) {
OMTVALUE v; OMTVALUE v;
int r; int r;
invalidate_cursors(omt);
if (index>=nweight(omt, omt->root)) return ERANGE; if (index>=nweight(omt, omt->root)) return ERANGE;
if ((r=maybe_resize_and_rebuild(omt, -1+nweight(omt, omt->root), MAYBE_REBUILD))) return r; if ((r=maybe_resize_and_rebuild(omt, -1+nweight(omt, omt->root), MAYBE_REBUILD))) return r;
node_idx* rebalance_idx = NULL; node_idx* rebalance_idx = NULL;
...@@ -318,60 +408,25 @@ int toku_omt_delete_at(OMT omt, u_int32_t index) { ...@@ -318,60 +408,25 @@ int toku_omt_delete_at(OMT omt, u_int32_t index) {
return 0; return 0;
} }
static int omtcursor_stack_push(OMTCURSOR c, node_idx idx) { static inline void fetch_internal(OMT V, node_idx idx, u_int32_t i, OMTVALUE *v) {
if (c->max_pathlen-1<=c->pathlen) {
//Increase max_pathlen
u_int32_t new_max = c->max_pathlen*2;
node_idx *tmp_path = toku_realloc(c->path, new_max*sizeof(*c->path));
if (tmp_path==NULL) return errno;
c->path = tmp_path;
c->max_pathlen = new_max;
}
c->path[c->pathlen++] = idx;
return 0;
}
static node_idx omtcursor_stack_peek(OMTCURSOR c) {
return c->path[c->pathlen-1];
}
static node_idx omtcursor_stack_pop(OMTCURSOR c) {
assert(c->pathlen);
node_idx value = omtcursor_stack_peek(c);;
c->pathlen--;
return value;
}
static void omtcursor_associate(OMTCURSOR c, OMT omt) {
c->omt = omt;
c->pathlen = 0;
}
static inline int fetch_internal(OMT V, node_idx idx, u_int32_t i, OMTVALUE *v, OMTCURSOR c) {
int r;
// Add the current index to the cursor path
if (c!=NULL && (r=omtcursor_stack_push(c, idx))) return r;
/* Find the node corresponding to index idx */
OMT_NODE n = V->nodes+idx; OMT_NODE n = V->nodes+idx;
/* Visit recursively the appropriate sub-tree */
if (i < nweight(V, n->left)) { if (i < nweight(V, n->left)) {
return fetch_internal(V, n->left, i, v, c); fetch_internal(V, n->left, i, v);
} else if (i == nweight(V, n->left)) { } else if (i == nweight(V, n->left)) {
*v = n->value; *v = n->value;
} else { } else {
return fetch_internal(V, n->right, i-nweight(V, n->left)-1, v, c); fetch_internal(V, n->right, i-nweight(V, n->left)-1, v);
} }
return 0;
} }
int toku_omt_fetch(OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c) { int toku_omt_fetch(OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c) {
if (i>=nweight(V, V->root)) return ERANGE; if (i>=nweight(V, V->root)) return ERANGE;
if (c!=NULL) omtcursor_associate(c, V); fetch_internal(V, V->root, i, v);
int r = fetch_internal(V, V->root, i, v, c); if (c) {
if (c!=NULL && r!=0) toku_omt_cursor_invalidate(c); associate(V,c);
return r; c->index = i;
}
return 0;
} }
static inline int iterate_internal(OMT omt, u_int32_t left, u_int32_t right, static inline int iterate_internal(OMT omt, u_int32_t left, u_int32_t right,
...@@ -399,6 +454,8 @@ int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, ...@@ -399,6 +454,8 @@ int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v,
int r; int r;
u_int32_t idx; u_int32_t idx;
invalidate_cursors(omt);
r = toku_omt_find_zero(omt, h, v, NULL, &idx, NULL); r = toku_omt_find_zero(omt, h, v, NULL, &idx, NULL);
if (r==0) { if (r==0) {
if (index) *index = idx; if (index) *index = idx;
...@@ -412,36 +469,26 @@ int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, ...@@ -412,36 +469,26 @@ int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v,
return 0; return 0;
} }
static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) { static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index)
int r; // requires: index!=NULL
{
if (n_idx==NODE_NULL) { if (n_idx==NODE_NULL) {
*index=0; *index = 0;
return DB_NOTFOUND; return DB_NOTFOUND;
} }
// Add the current index to the cursor path
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
OMT_NODE n = omt->nodes+n_idx; OMT_NODE n = omt->nodes+n_idx;
int hv = h(n->value, extra); int hv = h(n->value, extra);
if (hv<0) { if (hv<0) {
r = find_internal_zero(omt, n->right, h, extra, value, index, c); int r = find_internal_zero(omt, n->right, h, extra, value, index);
*index += nweight(omt, n->left)+1; *index += nweight(omt, n->left)+1;
return r; return r;
} else if (hv>0) { } else if (hv>0) {
r = find_internal_zero(omt, n->left, h, extra, value, index, c); return find_internal_zero(omt, n->left, h, extra, value, index);
if (c!=NULL && r==DB_NOTFOUND && *index==nweight(omt, n->left)) {
//Truncate the saved cursor path at n_idx.
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
}
return r;
} else { } else {
r = find_internal_zero(omt, n->left, h, extra, value, index, c); int r = find_internal_zero(omt, n->left, h, extra, value, index);
if (r==DB_NOTFOUND) { if (r==DB_NOTFOUND) {
*index = nweight(omt, n->left); *index = nweight(omt, n->left);
*value = n->value; if (value!=NULL) *value = n->value;
if (c!=NULL) {
//Truncate the saved cursor path at n_idx.
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
}
r = 0; r = 0;
} }
return r; return r;
...@@ -449,101 +496,84 @@ static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, ...@@ -449,101 +496,84 @@ static inline int find_internal_zero(OMT omt, node_idx n_idx, int (*h)(OMTVALUE,
} }
int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) { int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
if (c!=NULL) omtcursor_associate(c, V); u_int32_t tmp_index;
u_int32_t idx_tmp; if (index==0) index=&tmp_index;
OMTVALUE val_tmp; int r = find_internal_zero(V, V->root, h, extra, value, index);
int r = find_internal_zero(V, V->root, h, extra, &val_tmp, &idx_tmp, c); if (c && r==0) {
if (c!=NULL && ( (r!=0 && r!=DB_NOTFOUND) || associate(V,c);
idx_tmp==nweight(V, V->root))) { c->index = *index;
toku_omt_cursor_invalidate(c); } else {
} toku_omt_cursor_invalidate(c);
if (c==NULL || r==0 || r==DB_NOTFOUND) {
if (index!=NULL) *index = idx_tmp;
if (value!=NULL && r==0) *value = val_tmp;
} }
return r; return r;
} }
// If direction <0 then find the largest i such that h(V_i,extra)<0. // If direction <0 then find the largest i such that h(V_i,extra)<0.
static inline int find_internal_minus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) { static inline int find_internal_minus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index)
int r; // requires: index!=NULL
{
if (n_idx==NODE_NULL) return DB_NOTFOUND; if (n_idx==NODE_NULL) return DB_NOTFOUND;
// Add the current index to the cursor path
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
OMT_NODE n = omt->nodes+n_idx; OMT_NODE n = omt->nodes+n_idx;
int hv = h(n->value, extra); int hv = h(n->value, extra);
if (hv<0) { if (hv<0) {
r = find_internal_minus(omt, n->right, h, extra, value, index, c); int r = find_internal_minus(omt, n->right, h, extra, value, index);
if (r==0) (*index) += nweight(omt, n->left)+1; if (r==0) *index += nweight(omt, n->left)+1;
else if (r==DB_NOTFOUND) { else if (r==DB_NOTFOUND) {
*index = nweight(omt, n->left); *index = nweight(omt, n->left);
*value = n->value; if (value!=NULL) *value = n->value;
if (c!=NULL) {
//Truncate the saved cursor path at n_idx.
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
}
r = 0; r = 0;
} }
return r; return r;
} else { } else {
return find_internal_minus(omt, n->left, h, extra, value, index, c); return find_internal_minus(omt, n->left, h, extra, value, index);
} }
} }
// If direction >0 then find the smallest i such that h(V_i,extra)>0. // If direction >0 then find the smallest i such that h(V_i,extra)>0.
static inline int find_internal_plus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) { static inline int find_internal_plus(OMT omt, node_idx n_idx, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index)
int r; // requires: index!=NULL
{
if (n_idx==NODE_NULL) return DB_NOTFOUND; if (n_idx==NODE_NULL) return DB_NOTFOUND;
// Add the current index to the cursor path
if (c!=NULL && (r=omtcursor_stack_push(c, n_idx))) return r;
OMT_NODE n = omt->nodes+n_idx; OMT_NODE n = omt->nodes+n_idx;
int hv = h(n->value, extra); int hv = h(n->value, extra);
if (hv>0) { if (hv>0) {
r = find_internal_plus(omt, n->left, h, extra, value, index, c); int r = find_internal_plus(omt, n->left, h, extra, value, index);
if (r==DB_NOTFOUND) { if (r==DB_NOTFOUND) {
*index = nweight(omt, n->left); *index = nweight(omt, n->left);
*value = n->value; if (value!=NULL) *value = n->value;
if (c!=NULL) {
//Truncate the saved cursor path at n_idx.
while (omtcursor_stack_peek(c)!=n_idx) omtcursor_stack_pop(c);
}
r = 0; r = 0;
} }
return r; return r;
} else { } else {
r = find_internal_plus(omt, n->right, h, extra, value, index, c); int r = find_internal_plus(omt, n->right, h, extra, value, index);
if (r==0) (*index) += nweight(omt, n->left)+1; if (r==0) *index += nweight(omt, n->left)+1;
return r; return r;
} }
} }
int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) { int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c) {
u_int32_t tmp_index;
int r;
if (index==0) index=&tmp_index;
if (direction==0) { if (direction==0) {
abort(); abort();
} else if (direction<0) {
r = find_internal_minus(V, V->root, h, extra, value, index);
} else {
r = find_internal_plus( V, V->root, h, extra, value, index);
} }
else { if (c) {
int r; associate(V,c);
u_int32_t idx_tmp; c->index=*index;
OMTVALUE val_tmp;
if (c!=NULL) omtcursor_associate(c, V);
if (direction<0) {
r = find_internal_minus(V, V->root, h, extra, &val_tmp, &idx_tmp, c);
} else {
r = find_internal_plus( V, V->root, h, extra, &val_tmp, &idx_tmp, c);
}
if (c!=NULL && r!=0) toku_omt_cursor_invalidate(c);
if (r==0) {
if (index!=NULL) *index = idx_tmp;
if (value!=NULL) *value = val_tmp;
}
return r;
} }
return r;
} }
int toku_omt_split_at(OMT omt, OMT *newomtp, u_int32_t index) { int toku_omt_split_at(OMT omt, OMT *newomtp, u_int32_t index) {
int r = ENOSYS; int r = ENOSYS;
OMT newomt = NULL; OMT newomt = NULL;
OMTVALUE *tmp_values = NULL; OMTVALUE *tmp_values = NULL;
invalidate_cursors(omt);
if (index>nweight(omt, omt->root)) { r = ERANGE; goto cleanup; } if (index>nweight(omt, omt->root)) { r = ERANGE; goto cleanup; }
u_int32_t newsize = nweight(omt, omt->root)-index; u_int32_t newsize = nweight(omt, omt->root)-index;
if ((r = omt_create_internal(&newomt, newsize))) goto cleanup; if ((r = omt_create_internal(&newomt, newsize))) goto cleanup;
...@@ -568,6 +598,8 @@ int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) { ...@@ -568,6 +598,8 @@ int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) {
int r = ENOSYS; int r = ENOSYS;
OMT newomt = NULL; OMT newomt = NULL;
OMTVALUE *tmp_values = NULL; OMTVALUE *tmp_values = NULL;
invalidate_cursors(leftomt);
invalidate_cursors(rightomt);
u_int32_t newsize = toku_omt_size(leftomt)+toku_omt_size(rightomt); u_int32_t newsize = toku_omt_size(leftomt)+toku_omt_size(rightomt);
if ((r = omt_create_internal(&newomt, newsize))) goto cleanup; if ((r = omt_create_internal(&newomt, newsize))) goto cleanup;
MALLOC_N(newsize, tmp_values); MALLOC_N(newsize, tmp_values);
...@@ -589,118 +621,38 @@ cleanup: ...@@ -589,118 +621,38 @@ cleanup:
} }
void toku_omt_clear(OMT omt) { void toku_omt_clear(OMT omt) {
invalidate_cursors(omt);
omt->free_idx = 0; omt->free_idx = 0;
omt->root = NODE_NULL; omt->root = NODE_NULL;
} }
int toku_omt_cursor_create(OMTCURSOR *p) { unsigned long toku_omt_memory_size (OMT omt) {
OMTCURSOR MALLOC(result); return sizeof(*omt)+omt->node_capacity*sizeof(omt->nodes[0]) + omt->tmparray_size*sizeof(omt->tmparray[0]);
if (result==NULL) return errno;
result->max_pathlen = TOKU_OMTCURSOR_INITIAL_SIZE;
result->pathlen = 0;
MALLOC_N(result->max_pathlen, result->path);
if (result->path==NULL) {
toku_free(result);
return errno;
}
result->omt = NULL;
*p = result;
return 0;
}
void toku_omt_cursor_destroy(OMTCURSOR *p) {
OMTCURSOR c=*p;
toku_free(c->path);
toku_free(c);
*p = NULL;
}
int toku_omt_cursor_is_valid(OMTCURSOR c) {
return c->pathlen>0 && c->omt!=NULL;
}
void toku_omt_cursor_invalidate(OMTCURSOR c) {
c->pathlen = 0;
c->omt=NULL;
}
static void omtcursor_current_internal(OMTCURSOR c, OMTVALUE *v) {
*v = c->omt->nodes[omtcursor_stack_peek(c)].value;
}
int toku_omt_cursor_current(OMTCURSOR c, OMTVALUE *v) {
if (!toku_omt_cursor_is_valid(c)) return EINVAL;
omtcursor_current_internal(c, v);
return 0;
}
static int omtcursor_next_internal(OMTCURSOR c) {
if (!toku_omt_cursor_is_valid(c)) return EINVAL;
OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c);
if (current->right!=NODE_NULL) {
//Enter into subtree
if (omtcursor_stack_push(c, current->right)) goto invalidate;
current = c->omt->nodes+current->right;
while (current->left!=NODE_NULL) {
if (omtcursor_stack_push(c, current->left)) goto invalidate;
current = c->omt->nodes+current->left;
}
return 0;
}
else {
//Pop the stack till we remove a left child.
while (c->pathlen>=2) {
node_idx child_idx = omtcursor_stack_pop(c);
node_idx parent_idx = omtcursor_stack_peek(c);
if (c->omt->nodes[parent_idx].left==child_idx) return 0;
}
goto invalidate;
}
invalidate:
toku_omt_cursor_invalidate(c);
return EINVAL;
} }
int toku_omt_cursor_next(OMTCURSOR c, OMTVALUE *v) { int toku_omt_cursor_is_valid (OMTCURSOR c) {
if (omtcursor_next_internal(c)) return EINVAL; return c->omt!=NULL;
omtcursor_current_internal(c, v);
return 0;
} }
static int omtcursor_prev_internal(OMTCURSOR c) { int toku_omt_cursor_next (OMTCURSOR c, OMTVALUE *v) {
if (!toku_omt_cursor_is_valid(c)) return EINVAL; if (c->omt == NULL) return EINVAL;
OMT_NODE current = c->omt->nodes+omtcursor_stack_peek(c); c->index++;
if (current->left!=NODE_NULL) { int r = toku_omt_fetch(c->omt, c->index, v, 0);
//Enter into subtree if (r!=0) toku_omt_cursor_invalidate(c);
if (omtcursor_stack_push(c, current->left)) goto invalidate; return r;
current = c->omt->nodes+current->left;
while (current->right!=NODE_NULL) {
if (omtcursor_stack_push(c, current->right)) goto invalidate;
current = c->omt->nodes+current->right;
}
return 0;
}
else {
//Pop the stack till we remove a right child.
while (c->pathlen>=2) {
node_idx child_idx = omtcursor_stack_pop(c);
node_idx parent_idx = omtcursor_stack_peek(c);
if (c->omt->nodes[parent_idx].right==child_idx) return 0;
}
goto invalidate;
}
invalidate:
toku_omt_cursor_invalidate(c);
return EINVAL;
} }
int toku_omt_cursor_prev(OMTCURSOR c, OMTVALUE *v) { int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v) {
if (omtcursor_prev_internal(c)) return EINVAL; if (c->omt == NULL) return EINVAL;
omtcursor_current_internal(c, v); c->index--;
return 0; int r = toku_omt_fetch(c->omt, c->index, v, 0);
if (r!=0) toku_omt_cursor_invalidate(c);
return r;
} }
size_t toku_omt_memory_size (OMT omt) { int toku_omt_cursor_current (OMTCURSOR c, OMTVALUE *v) {
return sizeof(*omt)+omt->node_capacity*sizeof(omt->nodes[0]) + omt->tmparray_size*sizeof(omt->tmparray[0]); if (c->omt == NULL) return EINVAL;
int r = toku_omt_fetch(c->omt, c->index, v, 0);
if (r!=0) toku_omt_cursor_invalidate(c);
return r;
} }
...@@ -49,26 +49,20 @@ ...@@ -49,26 +49,20 @@
// Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function. // Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function.
// The memory required is O(|V|). // The memory required is O(|V|).
// //
//**********************************************************************
//* OMT Cursors
//**********************************************************************
// OMTs also support cursors. An OMTCURSOR is a mutable // OMTs also support cursors. An OMTCURSOR is a mutable
// An OMTCURSOR is a mutable object that, at any moment in time, is // An OMTCURSOR is a mutable object that, at any moment in time, is
// either associated with a single OMT or is not associated with any // either associated with a single OMT or is not associated with any
// OMT. Many different OMTCURSORs can be associated with a single OMT. // OMT. Many different OMTCURSORs can be associated with a single OMT.
//
// We say that an OMTCURSOR is *valid* if it is currently // We say that an OMTCURSOR is *invalid* if it is not currently
// associated with an OMT and has an abstract offset assigned to it. // associated with an OMT.
// An OMTCURSOR that is not valid is said to be invalid. //
// Abstractly, an OMTCURSOR simply contains an integer offset of a // Abstractly, an OMTCURSOR simply contains an integer offset of a
// particular OMTVALUE. We call this abstract integer the *offset*. // particular OMTVALUE. We call this abstract integer the *offset*.
// Note, however, that the implementation may use a more // Note, however, that the implementation may use a more
// complex representation in order to obtain higher performance. // complex representation in order to obtain higher performance.
// (Note: A first implementation might use the integer.) // (Note: A first implementation might use the integer.)
//
// Given a valid OMTCURSOR, one // Given a valid OMTCURSOR, one
// * obtain the OMTVALUE at which the integer points in O(1) time, // * obtain the OMTVALUE at which the integer points in O(1) time,
// * increment or decrement the abstract integer (usually quickly.) // * increment or decrement the abstract integer (usually quickly.)
...@@ -84,8 +78,11 @@ ...@@ -84,8 +78,11 @@
// * The OMT is destroyed (in which case the OMTCURSOR is // * The OMT is destroyed (in which case the OMTCURSOR is
// invalidated, but not destroyed.) // invalidated, but not destroyed.)
// Implementation Hint: One way to implement the OMTCURSOR is with an
// integer. The problem is that obtaining the value at which the integer // Implementation Hints
//
// One way to implement the OMTCURSOR is with an integer. The problem
// is that obtaining the value at which the integer
// points takes O(\log n) time, which is not fast enough to meet the // points takes O(\log n) time, which is not fast enough to meet the
// specification. However, this implementation is probably much // specification. However, this implementation is probably much
// faster than our current implementation because it is O(\log n) // faster than our current implementation because it is O(\log n)
...@@ -105,20 +102,51 @@ ...@@ -105,20 +102,51 @@
// array. Also, from the perspective of testing, it's probably best // array. Also, from the perspective of testing, it's probably best
// if the array is initialized to a short length (e.g., length 4) so // if the array is initialized to a short length (e.g., length 4) so
// that the doubling code is actually exercised. // that the doubling code is actually exercised.
//
// One way to implement invalidation is for each OMT to maintain a // One way to implement invalidation is for each OMT to maintain a
// doubly linked list of OMTCURSORs. When destroying an OMT or // doubly linked list of OMTCURSORs. When destroying an OMT or
// changing the OMT's shape, one can simply step through the list // changing the OMT's shape, one can simply step through the list
// invalidating all the OMTCURSORs. // invalidating all the OMTCURSORs.
//
// The list of OMTCURSORs should use the list.h abstraction. If it's // The list of OMTCURSORs should use the list.h abstraction. If it's
// not clear how to use it, Rich can explain it. // not clear how to use it, Rich can explain it.
// Usage Hint: The OMTCURSOR is designed to be used inside the
// BRTcursor. A BRTcursor includes a pointer to an OMTCURSOR, which
// is created when the BRTcursor is created.
//
// The brt cursor implements its search by first finding a leaf node,
// containing an OMT. The BRT then passes its OMTCURSOR into the lookup
// method (i.e., one of toku_ebdomt_fetch, toku_omt_find_zero,
// toku_omt_find). The lookup method, if successful, sets the
// OMTCURSOR to refer to that element.
//
// As long as the OMTCURSOR remains valid, a BRTCURSOR next or prev
// operation can be implemented using next or prev on the OMTCURSOR.
//
// If the OMTCURSOR becomes invalidated, then the BRT must search
// again from the root of the tree. The only error that an OMTCURSOR
// next operation can raise is that it is invalid.
//
// If an element is inserted into the BRT, it may cause an OMTCURSOR
// to become invalid. This is especially true if the element will end
// up in the OMT associated with the cursor. A simple implementation
// is to invalidate all OMTCURSORS any time anything is inserted into
// into the BRT. Since the BRT already contains a list of BRT cursors
// associated with it, it is straightforward to go through that list
// and invalidate all the cursors.
//
// When the BRT closes a cursor, it destroys the OMTCURSOR.
// The programming API: // The programming API:
//typedef struct value *OMTVALUE; // A slight improvement over using void*.
typedef struct omt *OMT; typedef struct omt *OMT;
typedef struct omt_cursor *OMTCURSOR;
typedef struct omtcursor *OMTCURSOR;
int toku_omt_create (OMT *omtp); int toku_omt_create (OMT *omtp);
...@@ -242,108 +270,88 @@ int toku_omt_delete_at(OMT omt, u_int32_t index); ...@@ -242,108 +270,88 @@ int toku_omt_delete_at(OMT omt, u_int32_t index);
// Rationale: To delete an item, first find its index using toku_omt_find, then delete it. // Rationale: To delete an item, first find its index using toku_omt_find, then delete it.
// Performance: time=O(\log N) amortized. // Performance: time=O(\log N) amortized.
int toku_omt_fetch (OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c); int toku_omt_fetch (OMT V, u_int32_t i, OMTVALUE *v, OMTCURSOR c);
// Effect: Set *v=V_i // Effect: Set *v=V_i
// If c != NULL then set c's abstract offset to i. // If c!=NULL then set c's abstract offset to i.
// Requires: v != NULL // Requires: v != NULL
// Returns // Returns
// 0 success // 0 success
// ERANGE if index>=toku_omt_size(omt) // ERANGE if index>=toku_omt_size(omt)
// ENOMEM if c!=NULL and we run out of memory
// On nonzero return, *v is unchanged, and c (if nonnull) is either // On nonzero return, *v is unchanged, and c (if nonnull) is either
// invalidated or unchanged. // invalidated or unchanged.
// Performance: time=O(\log N) // Performance: time=O(\log N)
// Notes: It is possible that c was previously valid and was // Implementation Notes: It is possible that c was previously valid and was
// associated with a different OMT. If c is changed by this // associated with a different OMT. If c is changed by this
// function, the function must remove c's association with the old // function, the function must remove c's association with the old
// OMT, and associate it with the new OMT. // OMT, and associate it with the new OMT.
int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c); int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index, OMTCURSOR c);
// Effect: Find the smallest i such that h(V_i, extra)>=0 // Effect: Find the smallest i such that h(V_i, extra)>=0
// If c != NULL and there is such an i then set c's abstract offset to i.
// If there is such an i and h(V_i,extra)==0 then set *index=i and return 0. // If there is such an i and h(V_i,extra)==0 then set *index=i and return 0.
// If there is such an i and h(V_i,extra)>0 then set *index=i and return DB_NOTFOUND. // If there is such an i and h(V_i,extra)>0 then set *index=i and return DB_NOTFOUND.
// If there is no such i then set *index=toku_omt_size(V), invalidate the cursor (if not NULL), and return DB_NOTFOUND. // If there is no such i then set *index=toku_omt_size(V) and return DB_NOTFOUND.
// Requires: index!=NULL // Requires: index!=NULL
// Returns
// 0 success
// ENOMEM if c!=NULL and we run out of memory
// Performance: time=O(\log N) (calls to h)
// Notes: It is possible that c was previously valid and was
// associated with a different OMT. If c is changed by this
// function, the function must remove c's association with the old
// OMT, and associate it with the new OMT.
// Future directions: the current implementation can be improved, in some cases, by supporting tail recursion.
// This would require an additional parameter that represents the current value of the index where the function is recursing,
// so that it becomes similar to the way fetch works.
int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c); int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index, OMTCURSOR c);
// Effect: // Effect:
// If direction >0 then find the smallest i such that h(V_i,extra)>0. // If direction >0 then find the smallest i such that h(V_i,extra)>0.
// If direction <0 then find the largest i such that h(V_i,extra)<0. // If direction <0 then find the largest i such that h(V_i,extra)<0.
// (Direction may not be equal to zero.) // (Direction may not be equal to zero.)
// If value!=NULL then store V_i in *value // If value!=NULL then store V_i in *value
// If index!=NULL then store i in *index. // If index!=NULL then store i in *index.
// If c != NULL and there is such an i then set c's abstract offset to i. // Requires: The signum of h is monotically increasing.
// Requires: The signum of h is monotically increasing. // Returns
// Performance: time=O(\log N) (calls to h) // 0 success
// Returns // DB_NOTFOUND no such value is found.
// 0 success // On nonzero return, *value and *index are unchanged.
// DB_NOTFOUND no such value is found. // Performance: time=O(\log N)
// ENOMEM if c!= NULL and we run out of memory // Rationale:
// On nonzero return, *value and *index are unchanged, and c (if nonnull) is either // Here's how to use the find function to find various things
// invalidated or unchanged. // Cases for find:
// Notes: It is possible that c was previously valid and was // find first value: ( h(v)=+1, direction=+1 )
// associated with a different OMT. If c is changed by this // find last value ( h(v)=-1, direction=-1 )
// function, the function must remove c's association with the old // find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 )
// OMT, and associate it with the new OMT. // find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 )
// Rationale: // find X or successor to X ( same as find first X. )
// Here's how to use the find function to find various things //
// Cases for find: // Rationale: To help understand heaviside functions and behavor of find:
// find first value: ( h(v)=+1, direction=+1 ) // There are 7 kinds of heaviside functions.
// find last value ( h(v)=-1, direction=-1 ) // The signus of the h must be monotonically increasing.
// find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 ) // Given a function of the following form, A is the element
// find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 ) // returned for direction>0, B is the element returned
// find X or successor to X ( same as find first X. ) // for direction<0, C is the element returned for
// // direction==0 (see find_zero) (with a return of 0), and D is the element
// Rationale: To help understand heaviside functions and behavor of find: // returned for direction==0 (see find_zero) with a return of DB_NOTFOUND.
// There are 7 kinds of heaviside functions. // If any of A, B, or C are not found, then asking for the
// The signum of the h must be monotonically increasing. // associated direction will return DB_NOTFOUND.
// Given a function of the following form, A is the element // See find_zero for more information.
// returned for direction>0, B is the element returned //
// for direction<0, C is the element returned for // Let the following represent the signus of the heaviside function.
// direction==0 (see find_zero) (with a return of 0), and D is the element //
// returned for direction==0 (see find_zero) with a return of DB_NOTFOUND. // -...-
// If any of A, B, or C are not found, then asking for the // A
// associated direction will return DB_NOTFOUND. // D
// See find_zero for more information. //
// // +...+
// Let the following represent the signum of the heaviside function. // B
// // D
// -...- //
// A // 0...0
// D // C
// //
// +...+ // -...-0...0
// B // AC
// D //
// // 0...0+...+
// 0...0 // C B
// C //
// // -...-+...+
// -...-0...0 // AB
// AC // D
// //
// 0...0+...+ // -...-0...0+...+
// C B // AC B
//
// -...-+...+
// AB
// D
//
// -...-0...0+...+
// AC B
int toku_omt_split_at(OMT omt, OMT *newomt, u_int32_t index); int toku_omt_split_at(OMT omt, OMT *newomt, u_int32_t index);
// Effect: Create a new OMT, storing it in *newomt. // Effect: Create a new OMT, storing it in *newomt.
...@@ -370,9 +378,12 @@ int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomt); ...@@ -370,9 +378,12 @@ int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomt);
void toku_omt_clear(OMT omt); void toku_omt_clear(OMT omt);
// Effect: Set the tree to be empty. // Effect: Set the tree to be empty.
// Note: Will not resize the array, since void precludes allowing a malloc. // Note: Will not reallocate or resize any memory, since returning void precludes calling malloc.
// Performance: time=O(1) // Performance: time=O(1)
unsigned long toku_omt_memory_size (OMT omt);
// Effect: Return the size (in bytes) of the omt, as it resides in main memory. Don't include any of the OMTVALUES.
int toku_omt_cursor_create (OMTCURSOR *p); int toku_omt_cursor_create (OMTCURSOR *p);
// Effect: Create an OMTCURSOR. Stores it in *p. The OMTCURSOR is // Effect: Create an OMTCURSOR. Stores it in *p. The OMTCURSOR is
// initially invalid. // initially invalid.
...@@ -386,19 +397,17 @@ void toku_omt_cursor_destroy (OMTCURSOR *p); ...@@ -386,19 +397,17 @@ void toku_omt_cursor_destroy (OMTCURSOR *p);
// Effect: Invalidates *p (if it is valid) and frees any memory // Effect: Invalidates *p (if it is valid) and frees any memory
// associated with *p. // associated with *p.
// Also sets *p=NULL. // Also sets *p=NULL.
// Requires: *p != NULL
// Rationale: The usage is to do something like // Rationale: The usage is to do something like
// toku_omt_cursor_destroy(&c); // toku_omt_cursor_destroy(&c);
// and now c will have a NULL pointer instead of a dangling freed pointer. // and now c will have a NULL pointer instead of a dangling freed pointer.
// Rationale: Returns no values since free() cannot fail. // Rationale: Returns no values since free() cannot fail.
// Performance: time=O(1) x #calls to free
int toku_omt_cursor_is_valid (OMTCURSOR c); int toku_omt_cursor_is_valid (OMTCURSOR c);
// Effect: returns 0 iff c is invalid. // Effect: returns 0 iff c is invalid.
// Performance: time=O(1) // Performance: time=O(1)
int toku_omt_cursor_next (OMTCURSOR c, OMTVALUE *v); int toku_omt_cursor_next (OMTCURSOR c, OMTVALUE *v);
// Effect: Increment c's abstract offset, and store the corresponding value in v. // Effect: Increment c's offset, and find and store the value in v.
// Requires: v != NULL // Requires: v != NULL
// Returns // Returns
// 0 success // 0 success
...@@ -417,7 +426,7 @@ int toku_omt_cursor_current (OMTCURSOR c, OMTVALUE *v); ...@@ -417,7 +426,7 @@ int toku_omt_cursor_current (OMTCURSOR c, OMTVALUE *v);
// Performance: O(1) time // Performance: O(1) time
int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v); int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v);
// Effect: Decrement c's abstract offset, and store the corresponding value in v. // Effect: Decrement c's offset, and find and store the value in v.
// Requires: v != NULL // Requires: v != NULL
// Returns // Returns
// 0 success // 0 success
...@@ -426,40 +435,11 @@ int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v); ...@@ -426,40 +435,11 @@ int toku_omt_cursor_prev (OMTCURSOR c, OMTVALUE *v);
// Performance: time=O(log N) worst case, expected time=O(1) for a randomly // Performance: time=O(log N) worst case, expected time=O(1) for a randomly
// chosen initial position. // chosen initial position.
void toku_omt_cursor_invalidate (OMTCURSOR c); void toku_omt_cursor_invalidate (OMTCURSOR c);
// Effect: Invalidate c. (This does not mean that c is destroyed or // Effect: Invalidate c. (This does not mean that c is destroyed or
// that its memory is freed.) // that its memory is freed.)
// Usage Hint: The OMTCURSOR is designed to be used inside the
// BRTcursor. A BRTcursor includes a pointer to an OMTCURSOR, which
// is created when the BRTcursor is created.
//
// The brt cursor implements its search by first finding a leaf node,
// containing an OMT. The BRT then passes its OMTCURSOR into the lookup
// method (i.e., one of toku_ebdomt_fetch, toku_omt_find_zero,
// toku_omt_find). The lookup method, if successful, sets the
// OMTCURSOR to refer to that element.
//
// As long as the OMTCURSOR remains valid, a BRTCURSOR next or prev
// operation can be implemented using next or prev on the OMTCURSOR.
//
// If the OMTCURSOR becomes invalidated, then the BRT must search
// again from the root of the tree. The only error that an OMTCURSOR
// next operation can raise is that it is invalid.
//
// If an element is inserted into the BRT, it may cause an OMTCURSOR
// to become invalid. This is especially true if the element will end
// up in the OMT associated with the cursor. A simple implementation
// is to invalidate all OMTCURSORS any time anything is inserted into
// into the BRT. Since the BRT already contains a list of BRT cursors
// associated with it, it is straightforward to go through that list
// and invalidate all the cursors.
//
// When the BRT closes a cursor, it destroys the OMTCURSOR.
size_t toku_omt_memory_size (OMT omt);
// Effect: Return the size (in bytes) of the omt, as it resides in main memory. Don't include any of the OMTVALUES.
#endif /* #ifndef OMT_H */ #endif /* #ifndef OMT_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment