Commit c10712de authored by Rich Prohaska's avatar Rich Prohaska

Change to implement delete by key. A BRT_DELETE command is injected into the

tree.  It replaces any INSERT or DELETE command with the same key at all
interior nodes.  It is translated into a PMA delete operation at a leaf
node.  

The database file format was changed for the contents of interior node
buffers to include a 1 byte type field.  There are currently 2 types:
BRT_INSERT and BRT_DELETE.




git-svn-id: file:///svn/tokudb@278 c7de825b-a66e-492c-adef-691d508d4ae1
parent de47452c
...@@ -11,9 +11,10 @@ typedef long long diskoff; /* Offset in a disk. -1 is the NULL pointer. */ ...@@ -11,9 +11,10 @@ typedef long long diskoff; /* Offset in a disk. -1 is the NULL pointer. */
#endif #endif
enum { TREE_FANOUT = BRT_FANOUT }; //, NODESIZE=1<<20 }; enum { TREE_FANOUT = BRT_FANOUT }; //, NODESIZE=1<<20 };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */ enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
enum { BRT_CMD_OVERHEAD = 1 };
struct nodeheader_in_file { struct nodeheader_in_file {
int n_in_buffer; int n_in_buffer;
}; };
enum { BUFFER_HEADER_SIZE = (4 // height// enum { BUFFER_HEADER_SIZE = (4 // height//
+ 4 // n_children + 4 // n_children
...@@ -140,3 +141,23 @@ void brt_update_cursors_new_root(BRT t, BRTNODE newroot, BRTNODE left, BRTNODE r ...@@ -140,3 +141,23 @@ void brt_update_cursors_new_root(BRT t, BRTNODE newroot, BRTNODE left, BRTNODE r
void brt_update_cursors_leaf_split(BRT t, BRTNODE oldnode, BRTNODE left, BRTNODE right); void brt_update_cursors_leaf_split(BRT t, BRTNODE oldnode, BRTNODE left, BRTNODE right);
void brt_update_cursors_nonleaf_expand(BRT t, BRTNODE oldnode, int childnum, BRTNODE left, BRTNODE right); void brt_update_cursors_nonleaf_expand(BRT t, BRTNODE oldnode, int childnum, BRTNODE left, BRTNODE right);
void brt_update_cursors_nonleaf_split(BRT t, BRTNODE oldnode, BRTNODE left, BRTNODE right); void brt_update_cursors_nonleaf_split(BRT t, BRTNODE oldnode, BRTNODE left, BRTNODE right);
enum brt_cmd_type {
BRT_NONE = 0,
BRT_INSERT = 1,
BRT_DELETE = 2,
};
struct brt_cmd {
enum brt_cmd_type type;
union {
/* insert or delete */
struct brt_cmd_insert_delete {
DBT *key;
DBT *val;
DB *db;
} id;
} u;
};
typedef struct brt_cmd BRT_CMD;
...@@ -27,10 +27,10 @@ void test_serialize(void) { ...@@ -27,10 +27,10 @@ void test_serialize(void) {
sn.u.n.children[1] = sn.nodesize*35; sn.u.n.children[1] = sn.nodesize*35;
r = toku_hashtable_create(&sn.u.n.htables[0]); assert(r==0); r = toku_hashtable_create(&sn.u.n.htables[0]); assert(r==0);
r = toku_hashtable_create(&sn.u.n.htables[1]); assert(r==0); r = toku_hashtable_create(&sn.u.n.htables[1]); assert(r==0);
r = toku_hash_insert(sn.u.n.htables[0], "a", 2, "aval", 5); assert(r==0); r = toku_hash_insert(sn.u.n.htables[0], "a", 2, "aval", 5, BRT_NONE); assert(r==0);
r = toku_hash_insert(sn.u.n.htables[0], "b", 2, "bval", 5); assert(r==0); r = toku_hash_insert(sn.u.n.htables[0], "b", 2, "bval", 5, BRT_NONE); assert(r==0);
r = toku_hash_insert(sn.u.n.htables[1], "x", 2, "xval", 5); assert(r==0); r = toku_hash_insert(sn.u.n.htables[1], "x", 2, "xval", 5, BRT_NONE); assert(r==0);
sn.u.n.n_bytes_in_hashtables = 3*(KEY_VALUE_OVERHEAD+2+5); sn.u.n.n_bytes_in_hashtables = 3*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5);
deserialize_brtnode_from(fd, nodesize*20, &dn, nodesize); deserialize_brtnode_from(fd, nodesize*20, &dn, nodesize);
...@@ -46,24 +46,26 @@ void test_serialize(void) { ...@@ -46,24 +46,26 @@ void test_serialize(void) {
assert(dn->u.n.children[0]==nodesize*30); assert(dn->u.n.children[0]==nodesize*30);
assert(dn->u.n.children[1]==nodesize*35); assert(dn->u.n.children[1]==nodesize*35);
{ {
bytevec data; ITEMLEN datalen; bytevec data; ITEMLEN datalen; int type;
int r = toku_hash_find(dn->u.n.htables[0], "a", 2, &data, &datalen); int r = toku_hash_find(dn->u.n.htables[0], "a", 2, &data, &datalen, &type);
assert(r==0); assert(r==0);
assert(strcmp(data,"aval")==0); assert(strcmp(data,"aval")==0);
assert(datalen==5); assert(datalen==5);
assert(type == BRT_NONE);
r=toku_hash_find(dn->u.n.htables[0], "b", 2, &data, &datalen); r=toku_hash_find(dn->u.n.htables[0], "b", 2, &data, &datalen, &type);
assert(r==0); assert(r==0);
assert(strcmp(data,"bval")==0); assert(strcmp(data,"bval")==0);
assert(datalen==5); assert(datalen==5);
assert(type == BRT_NONE);
r=toku_hash_find(dn->u.n.htables[1], "x", 2, &data, &datalen); r=toku_hash_find(dn->u.n.htables[1], "x", 2, &data, &datalen, &type);
assert(r==0); assert(r==0);
assert(strcmp(data,"xval")==0); assert(strcmp(data,"xval")==0);
assert(datalen==5); assert(datalen==5);
assert(type == BRT_NONE);
} }
brtnode_free(&dn); // brtnode_free(&dn);
toku_free(hello_string); toku_free(hello_string);
toku_hashtable_free(&sn.u.n.htables[0]); toku_hashtable_free(&sn.u.n.htables[0]);
......
...@@ -34,7 +34,8 @@ static unsigned int serialize_brtnode_size_slow(BRTNODE node) { ...@@ -34,7 +34,8 @@ static unsigned int serialize_brtnode_size_slow(BRTNODE node) {
HASHTABLE_ITERATE(node->u.n.htables[i], HASHTABLE_ITERATE(node->u.n.htables[i],
key __attribute__((__unused__)), keylen, key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen, data __attribute__((__unused__)), datalen,
(hsize+=8+keylen+datalen)); type __attribute__((__unused__)),
(hsize+=BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+keylen+datalen));
} }
assert(hsize==node->u.n.n_bytes_in_hashtables); assert(hsize==node->u.n.n_bytes_in_hashtables);
assert(csize==node->u.n.totalchildkeylens); assert(csize==node->u.n.totalchildkeylens);
...@@ -44,12 +45,11 @@ static unsigned int serialize_brtnode_size_slow(BRTNODE node) { ...@@ -44,12 +45,11 @@ static unsigned int serialize_brtnode_size_slow(BRTNODE node) {
PMA_ITERATE(node->u.l.buffer, PMA_ITERATE(node->u.l.buffer,
key __attribute__((__unused__)), keylen, key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen, data __attribute__((__unused__)), datalen,
(hsize+=8+keylen+datalen)); (hsize+=KEY_VALUE_OVERHEAD+keylen+datalen));
assert(hsize==node->u.l.n_bytes_in_buffer); assert(hsize==node->u.l.n_bytes_in_buffer);
hsize+=4; /* add n entries in buffer table. */ hsize+=4; /* add n entries in buffer table. */
return size+hsize; return size+hsize;
} }
} }
unsigned int serialize_brtnode_size (BRTNODE node) { unsigned int serialize_brtnode_size (BRTNODE node) {
...@@ -101,8 +101,8 @@ int serialize_brtnode_to(int fd, diskoff off, diskoff size, BRTNODE node) { ...@@ -101,8 +101,8 @@ int serialize_brtnode_to(int fd, diskoff off, diskoff size, BRTNODE node) {
for (i=0; i< n_hash_tables; i++) { for (i=0; i< n_hash_tables; i++) {
//printf("%s:%d p%d=%p n_entries=%d\n", __FILE__, __LINE__, i, node->mdicts[i], mdict_n_entries(node->mdicts[i])); //printf("%s:%d p%d=%p n_entries=%d\n", __FILE__, __LINE__, i, node->mdicts[i], mdict_n_entries(node->mdicts[i]));
wbuf_int(&w, toku_hashtable_n_entries(node->u.n.htables[i])); wbuf_int(&w, toku_hashtable_n_entries(node->u.n.htables[i]));
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen, HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen, type,
(wbuf_bytes(&w, key, keylen), (wbuf_char(&w, type), wbuf_bytes(&w, key, keylen),
wbuf_bytes(&w, data, datalen))); wbuf_bytes(&w, data, datalen)));
} }
} }
...@@ -220,17 +220,19 @@ int deserialize_brtnode_from (int fd, diskoff off, BRTNODE *brtnode, int nodesiz ...@@ -220,17 +220,19 @@ int deserialize_brtnode_from (int fd, diskoff off, BRTNODE *brtnode, int nodesiz
//printf("%d in hash\n", n_in_hash); //printf("%d in hash\n", n_in_hash);
for (i=0; i<n_in_this_hash; i++) { for (i=0; i<n_in_this_hash; i++) {
int diff; int diff;
int type;
bytevec key; ITEMLEN keylen; bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen; bytevec val; ITEMLEN vallen;
verify_counts(result); verify_counts(result);
type = rbuf_char(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */ rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
rbuf_bytes(&rc, &val, &vallen); rbuf_bytes(&rc, &val, &vallen);
//printf("Found %s,%s\n", key, val); //printf("Found %s,%s\n", (char*)key, (char*)val);
{ {
int r=toku_hash_insert(result->u.n.htables[cnum], key, keylen, val, vallen); /* Copies the data into the hash table. */ int r=toku_hash_insert(result->u.n.htables[cnum], key, keylen, val, vallen, type); /* Copies the data into the hash table. */
if (r!=0) { goto died_12; } if (r!=0) { goto died_12; }
} }
diff = keylen + vallen + KEY_VALUE_OVERHEAD; diff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
result->u.n.n_bytes_in_hashtables += diff; result->u.n.n_bytes_in_hashtables += diff;
result->u.n.n_bytes_in_hashtable[cnum] += diff; result->u.n.n_bytes_in_hashtable[cnum] += diff;
//printf("Inserted\n"); //printf("Inserted\n");
......
...@@ -1328,8 +1328,264 @@ void test_brt_cursor() { ...@@ -1328,8 +1328,264 @@ void test_brt_cursor() {
} }
} }
void test_large_kv(int bsize, int ksize, int vsize) {
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
printf("test_large_kv: %d %d %d\n", bsize, ksize, vsize);
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, bsize, ct, default_compare_fun);
assert(r==0);
DBT key, val;
char *k, *v;
k = toku_malloc(ksize); assert(k); memset(k, 0, ksize);
v = toku_malloc(vsize); assert(v); memset(v, 0, vsize);
fill_dbt(&key, k, ksize);
fill_dbt(&val, v, vsize);
r = brt_insert(t, &key, &val, 0);
assert(r == 0);
toku_free(k);
toku_free(v);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
/*
* test the key and value limits
* the current implementation crashes when kvsize == bsize/2 rather than fails
*/
void test_brt_limits() {
int bsize = 1024;
int kvsize = 4;
while (kvsize < bsize/2) {
test_large_kv(bsize, kvsize, kvsize); memory_check_all_free();
kvsize *= 2;
}
}
/*
* verify that a delete on an empty tree fails
*/
void test_brt_delete_empty() {
printf("test_brt_delete_empty\n");
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
assert(r==0);
DBT key;
int k = 1;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
assert(r != 0);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
/*
* insert n keys, delete all n keys, verify that lookups for all the keys fail,
* verify that a cursor walk of the tree finds nothing
*/
void test_brt_delete_present(int n) {
printf("test_brt_delete_present:%d\n", n);
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
int i;
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
assert(r==0);
DBT key, val;
int k, v;
for (i=0; i<n; i++) {
k = i; v = n + i;
fill_dbt(&key, &k, sizeof k);
fill_dbt(&val, &v, sizeof v);
r = brt_insert(t, &key, &val, 0);
assert(r == 0);
}
for (i=0; i<n; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
assert(r == 0);
}
/* lookups should all fail */
for (i=0; i<n; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
init_dbt(&val); val.flags = DB_DBT_MALLOC;
r = brt_lookup(t, &key, &val, 0);
assert(r == DB_NOTFOUND);
}
/* cursor should not find anything */
BRT_CURSOR cursor;
r = brt_cursor(t, &cursor);
assert(r == 0);
init_dbt(&key); key.flags = DB_DBT_MALLOC;
init_dbt(&val); val.flags = DB_DBT_MALLOC;
r = brt_c_get(cursor, &key, &val, DB_FIRST);
assert(r != 0);
r = brt_cursor_close(cursor);
assert(r == 0);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
void test_brt_delete_not_present(int n) {
printf("test_brt_delete_not_present:%d\n", n);
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
int i;
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
assert(r==0);
DBT key, val;
int k, v;
for (i=0; i<n; i++) {
k = i; v = n + i;
fill_dbt(&key, &k, sizeof k);
fill_dbt(&val, &v, sizeof v);
r = brt_insert(t, &key, &val, 0);
assert(r == 0);
}
for (i=0; i<n; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
assert(r == 0);
}
k = n+1;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
printf("brt_delete k=%d %d\n", k, r);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
void test_brt_delete_cursor_first(int n) {
printf("test_brt_delete_cursor_first:%d\n", n);
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
int i;
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
assert(r==0);
DBT key, val;
int k, v;
for (i=0; i<n; i++) {
k = i; v = ~i;
fill_dbt(&key, &k, sizeof k);
fill_dbt(&val, &v, sizeof v);
r = brt_insert(t, &key, &val, 0);
assert(r == 0);
}
for (i=0; i<n-1; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
assert(r == 0);
}
/* lookups should all fail */
for (i=0; i<n-1; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
init_dbt(&val); val.flags = DB_DBT_MALLOC;
r = brt_lookup(t, &key, &val, 0);
assert(r == DB_NOTFOUND);
}
/* cursor should find the last key */
BRT_CURSOR cursor;
r = brt_cursor(t, &cursor);
assert(r == 0);
init_dbt(&key); key.flags = DB_DBT_MALLOC;
init_dbt(&val); val.flags = DB_DBT_MALLOC;
r = brt_c_get(cursor, &key, &val, DB_FIRST);
assert(r == 0);
int vv;
assert(val.size == sizeof vv);
memcpy(&vv, val.data, val.size);
assert(vv == ~(n-1));
toku_free(key.data);
toku_free(val.data);
r = brt_cursor_close(cursor);
assert(r == 0);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
void test_brt_delete() {
test_brt_delete_empty(); memory_check_all_free();
test_brt_delete_present(1); memory_check_all_free();
test_brt_delete_present(100); memory_check_all_free();
test_brt_delete_present(500); memory_check_all_free();
test_brt_delete_not_present(1); memory_check_all_free();
test_brt_delete_not_present(100); memory_check_all_free();
test_brt_delete_not_present(500); memory_check_all_free();
test_brt_delete_cursor_first(1); memory_check_all_free();
test_brt_delete_cursor_first(100); memory_check_all_free();
test_brt_delete_cursor_first(500); memory_check_all_free();
}
static void brt_blackbox_test (void) { static void brt_blackbox_test (void) {
test_brt_cursor(); memory_check = 1;
test_wrongendian_compare(0, 2); memory_check_all_free(); test_wrongendian_compare(0, 2); memory_check_all_free();
test_wrongendian_compare(1, 2); memory_check_all_free(); test_wrongendian_compare(1, 2); memory_check_all_free();
test_wrongendian_compare(1, 257); memory_check_all_free(); test_wrongendian_compare(1, 257); memory_check_all_free();
...@@ -1377,7 +1633,11 @@ static void brt_blackbox_test (void) { ...@@ -1377,7 +1633,11 @@ static void brt_blackbox_test (void) {
// Once upon a time srandom(8) caused this test to fail. // Once upon a time srandom(8) caused this test to fail.
srandom(8); test4(2048, 1<<15, 1); srandom(8); test4(2048, 1<<15, 1);
memory_check = 1;
test_brt_limits();
test_brt_cursor();
test_brt_delete();
// test3(1<<19, 1<<20, 0); // test3(1<<19, 1<<20, 0);
// test4(1<<19, 1<<20, 0); // test4(1<<19, 1<<20, 0);
......
...@@ -277,9 +277,9 @@ static void insert_to_buffer_in_leaf (BRTNODE node, DBT *k, DBT *v, DB *db) { ...@@ -277,9 +277,9 @@ static void insert_to_buffer_in_leaf (BRTNODE node, DBT *k, DBT *v, DB *db) {
} }
#endif #endif
static int insert_to_hash_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT *v) { static int insert_to_hash_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT *v, int type) {
unsigned int n_bytes_added = KEY_VALUE_OVERHEAD + k->size + v->size; unsigned int n_bytes_added = BRT_CMD_OVERHEAD + KEY_VALUE_OVERHEAD + k->size + v->size;
int r = toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size); int r = toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size, type);
if (r!=0) return r; if (r!=0) return r;
node->u.n.n_bytes_in_hashtable[childnum] += n_bytes_added; node->u.n.n_bytes_in_hashtable[childnum] += n_bytes_added;
node->u.n.n_bytes_in_hashtables += n_bytes_added; node->u.n.n_bytes_in_hashtables += n_bytes_added;
...@@ -463,7 +463,7 @@ void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int ...@@ -463,7 +463,7 @@ void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int
if (keycompare(key, keylen, node->childkeys[cnum], node->childkeylens[cnum])<=0) if (keycompare(key, keylen, node->childkeys[cnum], node->childkeylens[cnum])<=0)
break; break;
} }
child_weights[cnum] += keylen + datalen + KEY_VALUE_OVERHEAD; child_weights[cnum] += keylen + datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
child_counts[cnum]++; child_counts[cnum]++;
})); }));
{ {
...@@ -504,25 +504,25 @@ void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int ...@@ -504,25 +504,25 @@ void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int
} }
#endif #endif
static int brtnode_insert (BRT t, BRTNODE node, DBT *k, DBT *v, static int brtnode_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
DBT *split, DBT *split,
int debug, int debug);
DB *db);
/* key is not in the hashtable in node. Either put the key-value pair in the child, or put it in the node. */ /* key is not in the hashtable in node. Either put the key-value pair in the child, or put it in the node. */
static int push_kvpair_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTNODE node, BRTNODE child, static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTNODE node, BRTNODE child,
DBT *k, DBT *v, BRT_CMD *cmd,
int childnum_of_node, int childnum_of_node) {
DB *db) {
assert(node->height>0); /* Not a leaf. */ assert(node->height>0); /* Not a leaf. */
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
int to_child=serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD <= child->nodesize; int to_child=serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD <= child->nodesize;
if (brt_debug_mode) { if (brt_debug_mode) {
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node); printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node);
if (childnum_of_node+1<node->u.n.n_children) { if (childnum_of_node+1<node->u.n.n_children) {
DBT k2; DBT k2;
printf(" nextsplitkey=%s\n", (char*)node->u.n.childkeys[childnum_of_node]); printf(" nextsplitkey=%s\n", (char*)node->u.n.childkeys[childnum_of_node]);
assert(t->compare_fun(db, k, fill_dbt(&k2, node->u.n.childkeys[childnum_of_node], node->u.n.childkeylens[childnum_of_node]))<=0); assert(t->compare_fun(cmd->u.id.db, k, fill_dbt(&k2, node->u.n.childkeys[childnum_of_node], node->u.n.childkeylens[childnum_of_node]))<=0);
} else { } else {
printf("\n"); printf("\n");
} }
...@@ -532,36 +532,35 @@ static int push_kvpair_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTN ...@@ -532,36 +532,35 @@ static int push_kvpair_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTN
DBT againk; DBT againk;
init_dbt(&againk); init_dbt(&againk);
//printf("%s:%d hello!\n", __FILE__, __LINE__); //printf("%s:%d hello!\n", __FILE__, __LINE__);
int r = brtnode_insert(t, child, k, v, int r = brtnode_put_cmd(t, child, cmd,
&again_split, &againa, &againb, &againk, &again_split, &againa, &againb, &againk,
0, 0);
db);
if (r!=0) return r; if (r!=0) return r;
assert(again_split==0); /* I only did the insert if I knew it wouldn't push down, and hence wouldn't split. */ assert(again_split==0); /* I only did the insert if I knew it wouldn't push down, and hence wouldn't split. */
return r; return r;
} else { } else {
int r=insert_to_hash_in_nonleaf(node, childnum_of_node, k, v); int r=insert_to_hash_in_nonleaf(node, childnum_of_node, k, v, cmd->type);
return r; return r;
} }
} }
static int push_a_kvpair_down (BRT t, BRTNODE node, BRTNODE child, int childnum, static int push_a_brt_cmd_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
DBT *k, DBT *v, BRT_CMD *cmd,
int *child_did_split, BRTNODE *childa, BRTNODE *childb, int *child_did_split, BRTNODE *childa, BRTNODE *childb,
DBT *childsplitk, DBT *childsplitk) {
DB *db) {
//if (debug) printf("%s:%d %*sinserting down\n", __FILE__, __LINE__, debug, ""); //if (debug) printf("%s:%d %*sinserting down\n", __FILE__, __LINE__, debug, "");
//printf("%s:%d hello!\n", __FILE__, __LINE__); //printf("%s:%d hello!\n", __FILE__, __LINE__);
assert(node->height>0); assert(node->height>0);
{ {
int r = brtnode_insert(t, child, k, v, int r = brtnode_put_cmd(t, child, cmd,
child_did_split, childa, childb, childsplitk, child_did_split, childa, childb, childsplitk,
0, 0);
db);
if (r!=0) return r; if (r!=0) return r;
} }
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
//if (debug) printf("%s:%d %*sinserted down child_did_split=%d\n", __FILE__, __LINE__, debug, "", child_did_split); //if (debug) printf("%s:%d %*sinserted down child_did_split=%d\n", __FILE__, __LINE__, debug, "", child_did_split);
{ {
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size); // Must delete after doing the insert, to avoid operating on freed' key int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size); // Must delete after doing the insert, to avoid operating on freed' key
...@@ -569,7 +568,7 @@ static int push_a_kvpair_down (BRT t, BRTNODE node, BRTNODE child, int childnum, ...@@ -569,7 +568,7 @@ static int push_a_kvpair_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
if (r!=0) return r; if (r!=0) return r;
} }
{ {
int n_bytes_removed = (k->size + v->size + KEY_VALUE_OVERHEAD); int n_bytes_removed = (k->size + v->size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD);
node->u.n.n_bytes_in_hashtables -= n_bytes_removed; node->u.n.n_bytes_in_hashtables -= n_bytes_removed;
node->u.n.n_bytes_in_hashtable[childnum] -= n_bytes_removed; node->u.n.n_bytes_in_hashtable[childnum] -= n_bytes_removed;
} }
...@@ -643,17 +642,20 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum, ...@@ -643,17 +642,20 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
node->u.n.n_bytes_in_hashtables -= old_count; /* By default, they are all removed. We might add them back in. */ node->u.n.n_bytes_in_hashtables -= old_count; /* By default, they are all removed. We might add them back in. */
/* Keep pushing to the children, but not if the children would require a pushdown */ /* Keep pushing to the children, but not if the children would require a pushdown */
HASHTABLE_ITERATE(old_h, skey, skeylen, sval, svallen, ({ HASHTABLE_ITERATE(old_h, skey, skeylen, sval, svallen, type, ({
DBT skd, svd; DBT skd, svd;
fill_dbt_ap(&skd, skey, skeylen, app_private); fill_dbt_ap(&skd, skey, skeylen, app_private);
fill_dbt(&svd, sval, svallen); fill_dbt(&svd, sval, svallen);
BRT_CMD brtcmd;
brtcmd.type = type; brtcmd.u.id.key = &skd; brtcmd.u.id.val = &svd; brtcmd.u.id.db = db;
if (t->compare_fun(db, &skd, childsplitk)<=0) { if (t->compare_fun(db, &skd, childsplitk)<=0) {
r=push_kvpair_down_only_if_it_wont_push_more_else_put_here(t, node, childa, &skd, &svd, childnum, db); r=push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, childa, &brtcmd, childnum);
} else { } else {
r=push_kvpair_down_only_if_it_wont_push_more_else_put_here(t, node, childb, &skd, &svd, childnum+1, db); r=push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, childb, &brtcmd, childnum+1);
} }
if (r!=0) return r; if (r!=0) return r;
})); }));
toku_hashtable_free(&old_h); toku_hashtable_free(&old_h);
r=cachetable_unpin(t->cf, childa->thisnodename, 1); r=cachetable_unpin(t->cf, childa->thisnodename, 1);
...@@ -687,7 +689,7 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum, ...@@ -687,7 +689,7 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
return 0; return 0;
} }
static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum, static int push_some_brt_cmds_down (BRT t, BRTNODE node, int childnum,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
DBT *splitk, DBT *splitk,
int debug, int debug,
...@@ -706,7 +708,7 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum, ...@@ -706,7 +708,7 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
verify_counts(child); verify_counts(child);
//printf("%s:%d height=%d n_bytes_in_hashtable = {%d, %d, %d, ...}\n", __FILE__, __LINE__, child->height, child->n_bytes_in_hashtable[0], child->n_bytes_in_hashtable[1], child->n_bytes_in_hashtable[2]); //printf("%s:%d height=%d n_bytes_in_hashtable = {%d, %d, %d, ...}\n", __FILE__, __LINE__, child->height, child->n_bytes_in_hashtable[0], child->n_bytes_in_hashtable[1], child->n_bytes_in_hashtable[2]);
if (child->height>0 && child->u.n.n_children>0) assert(child->u.n.children[child->u.n.n_children-1]!=0); if (child->height>0 && child->u.n.n_children>0) assert(child->u.n.children[child->u.n.n_children-1]!=0);
if (debug) printf("%s:%d %*spush_some_kvpairs_down to %lld\n", __FILE__, __LINE__, debug, "", child->thisnodename); if (debug) printf("%s:%d %*spush_some_brt_cmds_down to %lld\n", __FILE__, __LINE__, debug, "", child->thisnodename);
/* I am exposing the internals of the hash table here, mostly because I am not thinking of a really /* I am exposing the internals of the hash table here, mostly because I am not thinking of a really
* good way to do it otherwise. I want to loop over the elements of the hash table, deleting some as I * good way to do it otherwise. I want to loop over the elements of the hash table, deleting some as I
* go. The HASHTABLE_ITERATE macro will break if I delete something from the hash table. */ * go. The HASHTABLE_ITERATE macro will break if I delete something from the hash table. */
...@@ -722,30 +724,39 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum, ...@@ -722,30 +724,39 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
long int randomnumber = random(); long int randomnumber = random();
//printf("%s:%d Try random_pick, weight=%d \n", __FILE__, __LINE__, node->u.n.n_bytes_in_hashtable[childnum]); //printf("%s:%d Try random_pick, weight=%d \n", __FILE__, __LINE__, node->u.n.n_bytes_in_hashtable[childnum]);
assert(toku_hashtable_n_entries(node->u.n.htables[childnum])>0); assert(toku_hashtable_n_entries(node->u.n.htables[childnum])>0);
while(0==toku_hashtable_random_pick(node->u.n.htables[childnum], &key, &keylen, &val, &vallen, &randomnumber)) { int type;
while(0==toku_hashtable_random_pick(node->u.n.htables[childnum], &key, &keylen, &val, &vallen, &type, &randomnumber)) {
int child_did_split=0; BRTNODE childa, childb; int child_did_split=0; BRTNODE childa, childb;
DBT hk,hv; DBT hk,hv;
DBT childsplitk; DBT childsplitk;
BRT_CMD brtcmd;
fill_dbt_ap(&hk, key, keylen, app_private);
fill_dbt(&hv, val, vallen);
brtcmd.type = type;
brtcmd.u.id.key = &hk;
brtcmd.u.id.val = &hv;
brtcmd.u.id.db = db;
//printf("%s:%d random_picked\n", __FILE__, __LINE__); //printf("%s:%d random_picked\n", __FILE__, __LINE__);
init_dbt(&childsplitk); init_dbt(&childsplitk);
childsplitk.app_private = splitk->app_private; childsplitk.app_private = splitk->app_private;
if (debug) printf("%s:%d %*spush down %s\n", __FILE__, __LINE__, debug, "", (char*)key); if (debug) printf("%s:%d %*spush down %s\n", __FILE__, __LINE__, debug, "", (char*)key);
r = push_a_kvpair_down (t, node, child, childnum, r = push_a_brt_cmd_down (t, node, child, childnum,
fill_dbt_ap(&hk, key, keylen, app_private), fill_dbt(&hv, val, vallen), &brtcmd,
&child_did_split, &childa, &childb, &child_did_split, &childa, &childb,
&childsplitk, &childsplitk);
db);
if (0){ if (0){
unsigned int sum=0; unsigned int sum=0;
HASHTABLE_ITERATE(node->u.n.htables[childnum], hk __attribute__((__unused__)), hkl, hd __attribute__((__unused__)), hdl, HASHTABLE_ITERATE(node->u.n.htables[childnum], hk __attribute__((__unused__)), hkl, hd __attribute__((__unused__)), hdl, type __attribute__((__unused__)),
sum+=hkl+hdl+KEY_VALUE_OVERHEAD); sum+=hkl+hdl+KEY_VALUE_OVERHEAD+BRT_CMD_OVERHEAD);
printf("%s:%d sum=%d\n", __FILE__, __LINE__, sum); printf("%s:%d sum=%d\n", __FILE__, __LINE__, sum);
assert(sum==node->u.n.n_bytes_in_hashtable[childnum]); assert(sum==node->u.n.n_bytes_in_hashtable[childnum]);
} }
if (node->u.n.n_bytes_in_hashtable[childnum]>0) assert(toku_hashtable_n_entries(node->u.n.htables[childnum])>0); if (node->u.n.n_bytes_in_hashtable[childnum]>0) assert(toku_hashtable_n_entries(node->u.n.htables[childnum])>0);
//printf("%s:%d %d=push_a_kvpair_down=(); child_did_split=%d (weight=%d)\n", __FILE__, __LINE__, r, child_did_split, node->u.n.n_bytes_in_hashtable[childnum]); //printf("%s:%d %d=push_a_brt_cmd_down=(); child_did_split=%d (weight=%d)\n", __FILE__, __LINE__, r, child_did_split, node->u.n.n_bytes_in_hashtable[childnum]);
if (r!=0) return r; if (r!=0) return r;
if (child_did_split) { if (child_did_split) {
// If the child splits, we don't push down any further. // If the child splits, we don't push down any further.
...@@ -759,7 +770,7 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum, ...@@ -759,7 +770,7 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
} }
if (0) printf("%s:%d done random picking\n", __FILE__, __LINE__); if (0) printf("%s:%d done random picking\n", __FILE__, __LINE__);
} }
if (debug) printf("%s:%d %*sdone push_some_kvpairs_down, unpinning %lld\n", __FILE__, __LINE__, debug, "", targetchild); if (debug) printf("%s:%d %*sdone push_some_brt_cmds_down, unpinning %lld\n", __FILE__, __LINE__, debug, "", targetchild);
r=cachetable_unpin(t->cf, targetchild, 1); r=cachetable_unpin(t->cf, targetchild, 1);
if (r!=0) return r; if (r!=0) return r;
*did_split=0; *did_split=0;
...@@ -787,10 +798,10 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE ...@@ -787,10 +798,10 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE
find_heaviest_child(node, &childnum); find_heaviest_child(node, &childnum);
if (0) printf("%s:%d %*spush some down from %lld into %lld (child %d)\n", __FILE__, __LINE__, debug, "", node->thisnodename, node->u.n.children[childnum], childnum); if (0) printf("%s:%d %*spush some down from %lld into %lld (child %d)\n", __FILE__, __LINE__, debug, "", node->thisnodename, node->u.n.children[childnum], childnum);
assert(node->u.n.children[childnum]!=0); assert(node->u.n.children[childnum]!=0);
int r = push_some_kvpairs_down(t, node, childnum, did_split, nodea, nodeb, splitk, debugp1(debug), app_private, db); int r = push_some_brt_cmds_down(t, node, childnum, did_split, nodea, nodeb, splitk, debugp1(debug), app_private, db);
if (r!=0) return r; if (r!=0) return r;
assert(*did_split==0 || *did_split==1); assert(*did_split==0 || *did_split==1);
if (debug) printf("%s:%d %*sdid push_some_kvpairs_down did_split=%d\n", __FILE__, __LINE__, debug, "", *did_split); if (debug) printf("%s:%d %*sdid push_some_brt_cmds_down did_split=%d\n", __FILE__, __LINE__, debug, "", *did_split);
if (*did_split) { if (*did_split) {
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize); assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize); assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
...@@ -811,45 +822,69 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE ...@@ -811,45 +822,69 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE
#define INSERT_ALL_AT_ONCE #define INSERT_ALL_AT_ONCE
static int brt_leaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v, static int brt_leaf_insertm (BRT t, BRTNODE node, BRT_CMD *cmd,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, int *did_split, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
int debug, int debug) {
DB *db) { if (cmd->type == BRT_INSERT) {
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
DB *db = cmd->u.id.db;
#ifdef INSERT_ALL_AT_ONCE #ifdef INSERT_ALL_AT_ONCE
int replaced_v_size; int replaced_v_size;
enum pma_errors pma_status = pma_insert_or_replace(node->u.l.buffer, k, v, db, &replaced_v_size); enum pma_errors pma_status = pma_insert_or_replace(node->u.l.buffer, k, v, db, &replaced_v_size);
assert(pma_status==BRT_OK); assert(pma_status==BRT_OK);
//printf("replaced_v_size=%d\n", replaced_v_size); //printf("replaced_v_size=%d\n", replaced_v_size);
if (replaced_v_size>=0) { if (replaced_v_size>=0) {
node->u.l.n_bytes_in_buffer += v->size - replaced_v_size; node->u.l.n_bytes_in_buffer += v->size - replaced_v_size;
} else { } else {
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD; node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
} }
#else #else
DBT v2; DBT v2;
enum pma_errors pma_status = pma_lookup(node->u.l.buffer, k, init_dbt(&v2), db); enum pma_errors pma_status = pma_lookup(node->u.l.buffer, k, init_dbt(&v2), db);
if (pma_status==BRT_OK) { if (pma_status==BRT_OK) {
pma_status = pma_delete(node->u.l.buffer, k, db); pma_status = pma_delete(node->u.l.buffer, k, db);
assert(pma_status==BRT_OK); assert(pma_status==BRT_OK);
node->u.l.n_bytes_in_buffer -= k->size + v2.size + KEY_VALUE_OVERHEAD; node->u.l.n_bytes_in_buffer -= k->size + v2.size + KEY_VALUE_OVERHEAD;
} }
pma_status = pma_insert(node->u.l.buffer, k, v, db); pma_status = pma_insert(node->u.l.buffer, k, v, db);
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD; node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
#endif #endif
// If it doesn't fit, then split the leaf. // If it doesn't fit, then split the leaf.
if (serialize_brtnode_size(node) > node->nodesize) { if (serialize_brtnode_size(node) > node->nodesize) {
int r = brtleaf_split (t, node, nodea, nodeb, splitk, k->app_private, db); int r = brtleaf_split (t, node, nodea, nodeb, splitk, k->app_private, db);
if (r!=0) return r; if (r!=0) return r;
//printf("%s:%d splitkey=%s\n", __FILE__, __LINE__, (char*)*splitkey); //printf("%s:%d splitkey=%s\n", __FILE__, __LINE__, (char*)*splitkey);
split_count++; split_count++;
*did_split = 1; *did_split = 1;
verify_counts(*nodea); verify_counts(*nodeb); verify_counts(*nodea); verify_counts(*nodeb);
if (debug) printf("%s:%d %*snodeb->thisnodename=%lld nodeb->size=%d\n", __FILE__, __LINE__, debug, "", (*nodeb)->thisnodename, (*nodeb)->nodesize); if (debug) printf("%s:%d %*snodeb->thisnodename=%lld nodeb->size=%d\n", __FILE__, __LINE__, debug, "", (*nodeb)->thisnodename, (*nodeb)->nodesize);
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize); assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize); assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
} else { } else {
*did_split = 0; *did_split = 0;
}
return 0;
}
if (cmd->type == BRT_DELETE) {
int r;
DBT val;
/* TODO combine lookup and delete */
init_dbt(&val);
r = pma_lookup(node->u.l.buffer, cmd->u.id.key, &val, cmd->u.id.db);
if (r == 0) {
r = pma_delete(node->u.l.buffer, cmd->u.id.key, cmd->u.id.db);
assert(r == BRT_OK);
node->u.l.n_bytes_in_buffer -= cmd->u.id.key->size + val.size + KEY_VALUE_OVERHEAD;
}
*did_split = 0;
return r;
} }
/* unknown message */
assert(0);
return 0; return 0;
} }
...@@ -866,16 +901,20 @@ static unsigned int brtnode_which_child (BRTNODE node , DBT *k, BRT t, DB *db) { ...@@ -866,16 +901,20 @@ static unsigned int brtnode_which_child (BRTNODE node , DBT *k, BRT t, DB *db) {
} }
static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v, static int brt_nonleaf_insertm (BRT t, BRTNODE node, BRT_CMD *cmd,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
DBT *splitk, DBT *splitk,
int debug, int debug) {
DB *db) {
bytevec olddata; bytevec olddata;
ITEMLEN olddatalen; ITEMLEN olddatalen;
unsigned int childnum; unsigned int childnum;
int found; int found;
int type = cmd->type;
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
DB *db = cmd->u.id.db;
childnum = brtnode_which_child(node, k, t, db); childnum = brtnode_which_child(node, k, t, db);
/* non-buffering mode when cursors are open on this child */ /* non-buffering mode when cursors are open on this child */
...@@ -895,8 +934,9 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v, ...@@ -895,8 +934,9 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
assert(r == 0); assert(r == 0);
child = child_v; child = child_v;
r = brtnode_insert(t, child, k, v, child_did_split = 0;
&child_did_split, &childa, &childb, &childsplitk, 0, db); r = brtnode_put_cmd(t, child, cmd,
&child_did_split, &childa, &childb, &childsplitk, 0);
assert(r == 0); assert(r == 0);
if (child_did_split) { if (child_did_split) {
if (0) printf("brt_nonleaf_insert child_split %p\n", child); if (0) printf("brt_nonleaf_insert child_split %p\n", child);
...@@ -913,7 +953,7 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v, ...@@ -913,7 +953,7 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
return r; return r;
} }
found = !toku_hash_find(node->u.n.htables[childnum], k->data, k->size, &olddata, &olddatalen); found = !toku_hash_find(node->u.n.htables[childnum], k->data, k->size, &olddata, &olddatalen, &type);
if (0) { // It is faster to do this, except on yobiduck where things grind to a halt. if (0) { // It is faster to do this, except on yobiduck where things grind to a halt.
void *child_v; void *child_v;
...@@ -922,7 +962,7 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v, ...@@ -922,7 +962,7 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
/* If the child is in memory, then go ahead and put it in the child. */ /* If the child is in memory, then go ahead and put it in the child. */
BRTNODE child = child_v; BRTNODE child = child_v;
if (found) { if (found) {
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD; int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size); int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size);
assert(r==0); assert(r==0);
node->u.n.n_bytes_in_hashtables -= diff; node->u.n.n_bytes_in_hashtables -= diff;
...@@ -932,8 +972,8 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v, ...@@ -932,8 +972,8 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
int child_did_split; int child_did_split;
BRTNODE childa, childb; BRTNODE childa, childb;
DBT childsplitk; DBT childsplitk;
int r = brtnode_insert(t, child, k, v, int r = brtnode_put_cmd(t, child, cmd,
&child_did_split, &childa, &childb, &childsplitk, 0, db); &child_did_split, &childa, &childb, &childsplitk, 0);
if (r!=0) return r; if (r!=0) return r;
if (child_did_split) { if (child_did_split) {
r=handle_split_of_child(t, node, childnum, r=handle_split_of_child(t, node, childnum,
...@@ -954,19 +994,18 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v, ...@@ -954,19 +994,18 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
verify_counts(node); verify_counts(node);
if (found) { if (found) {
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size); int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size);
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD; int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
assert(r==0); assert(r==0);
node->u.n.n_bytes_in_hashtables -= diff; node->u.n.n_bytes_in_hashtables -= diff;
node->u.n.n_bytes_in_hashtable[childnum] -= diff; node->u.n.n_bytes_in_hashtable[childnum] -= diff;
//printf("%s:%d deleted %d bytes\n", __FILE__, __LINE__, diff); //printf("%s:%d deleted %d bytes\n", __FILE__, __LINE__, diff);
} }
{ {
int diff = k->size + v->size + KEY_VALUE_OVERHEAD; int diff = k->size + v->size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
int r=toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size); int r=toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size, type);
assert(r==0); assert(r==0);
node->u.n.n_bytes_in_hashtables += diff; node->u.n.n_bytes_in_hashtables += diff;
node->u.n.n_bytes_in_hashtable[childnum] += diff; node->u.n.n_bytes_in_hashtable[childnum] += diff;
} }
if (debug) printf("%s:%d %*sDoing maybe_push_down\n", __FILE__, __LINE__, debug, ""); if (debug) printf("%s:%d %*sDoing maybe_push_down\n", __FILE__, __LINE__, debug, "");
int r = brtnode_maybe_push_down(t, node, did_split, nodea, nodeb, splitk, debugp1(debug), k->app_private, db); int r = brtnode_maybe_push_down(t, node, did_split, nodea, nodeb, splitk, debugp1(debug), k->app_private, db);
...@@ -989,20 +1028,17 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v, ...@@ -989,20 +1028,17 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
} }
static int brtnode_insert (BRT t, BRTNODE node, DBT *k, DBT *v, static int brtnode_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, int *did_split, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
int debug, int debug) {
DB *db) {
if (node->height==0) { if (node->height==0) {
return brt_leaf_insert(t, node, k, v, return brt_leaf_insertm(t, node, cmd,
did_split, nodea, nodeb, splitk, did_split, nodea, nodeb, splitk,
debug, debug);
db);
} else { } else {
return brt_nonleaf_insert(t, node, k, v, return brt_nonleaf_insertm(t, node, cmd,
did_split, nodea, nodeb, splitk, did_split, nodea, nodeb, splitk,
debug, debug);
db);
} }
} }
...@@ -1218,10 +1254,11 @@ int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKE ...@@ -1218,10 +1254,11 @@ int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKE
return 0; return 0;
} }
int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) { int brt_root_put_cmd(BRT brt, BRT_CMD *cmd) {
void *node_v; void *node_v;
BRTNODE node; BRTNODE node;
CACHEKEY *rootp; CACHEKEY *rootp;
int result;
int r; int r;
int did_split; BRTNODE nodea=0, nodeb=0; int did_split; BRTNODE nodea=0, nodeb=0;
DBT splitk; DBT splitk;
...@@ -1239,10 +1276,10 @@ int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) { ...@@ -1239,10 +1276,10 @@ int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) {
} }
node=node_v; node=node_v;
if (debug) printf("%s:%d node inserting\n", __FILE__, __LINE__); if (debug) printf("%s:%d node inserting\n", __FILE__, __LINE__);
r = brtnode_insert(brt, node, k, v, did_split = 0;
result = brtnode_put_cmd(brt, node, cmd,
&did_split, &nodea, &nodeb, &splitk, &did_split, &nodea, &nodeb, &splitk,
debug, db); debug);
if (r!=0) return r;
if (debug) printf("%s:%d did_insert\n", __FILE__, __LINE__); if (debug) printf("%s:%d did_insert\n", __FILE__, __LINE__);
if (did_split) { if (did_split) {
//printf("%s:%d did_split=%d nodeb=%p nodeb->thisnodename=%lld nodeb->nodesize=%d\n", __FILE__, __LINE__, did_split, nodeb, nodeb->thisnodename, nodeb->nodesize); //printf("%s:%d did_split=%d nodeb=%p nodeb->thisnodename=%lld nodeb->nodesize=%d\n", __FILE__, __LINE__, did_split, nodeb, nodeb->thisnodename, nodeb->nodesize);
...@@ -1252,65 +1289,76 @@ int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) { ...@@ -1252,65 +1289,76 @@ int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) {
} }
if (did_split) { if (did_split) {
r = brt_init_new_root(brt, nodea, nodeb, splitk, rootp); r = brt_init_new_root(brt, nodea, nodeb, splitk, rootp);
if (r != 0) assert(r == 0);
return r;
} else { } else {
if (node->height>0) if (node->height>0)
assert(node->u.n.n_children<=TREE_FANOUT); assert(node->u.n.n_children<=TREE_FANOUT);
} }
cachetable_unpin(brt->cf, *rootp, 1); cachetable_unpin(brt->cf, *rootp, 1);
if ((r = unpin_brt_header(brt))!=0) return r; r = unpin_brt_header(brt);
assert(r == 0);
//assert(0==cachetable_assert_all_unpinned(brt->cachetable)); //assert(0==cachetable_assert_all_unpinned(brt->cachetable));
return 0; return result;
}
int brt_insert (BRT brt, DBT *key, DBT *val, DB* db) {
int r;
BRT_CMD brtcmd;
brtcmd.type = BRT_INSERT;
brtcmd.u.id.key = key;
brtcmd.u.id.val = val;
brtcmd.u.id.db = db;
r = brt_root_put_cmd(brt, &brtcmd);
return r;
} }
int brt_lookup_node (BRT brt, diskoff off, DBT *k, DBT *v, DB *db) { int brt_lookup_node (BRT brt, diskoff off, DBT *k, DBT *v, DB *db) {
int result;
void *node_v; void *node_v;
int r = cachetable_get_and_pin(brt->cf, off, &node_v, int r = cachetable_get_and_pin(brt->cf, off, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)(long)brt->h->nodesize); brtnode_flush_callback, brtnode_fetch_callback, (void*)(long)brt->h->nodesize);
BRTNODE node; BRTNODE node;
int childnum; int childnum;
if (r!=0) {
int r2; if (r!=0)
died0: return r;
// printf("%s:%d r=%d\n", __FILE__, __LINE__, r);
r2 = cachetable_unpin(brt->cf, off, 0);
return r;
}
node=node_v; node=node_v;
// Leaves have a single mdict, where the data is found.
if (node->height==0) { if (node->height==0) {
r = pma_lookup(node->u.l.buffer, k, v, db); result = pma_lookup(node->u.l.buffer, k, v, db);
//printf("%s:%d looked up something, got answerlen=%d\n", __FILE__, __LINE__, answerlen); //printf("%s:%d looked up something, got answerlen=%d\n", __FILE__, __LINE__, answerlen);
if (r!=0) goto died0;
r = cachetable_unpin(brt->cf, off, 0); r = cachetable_unpin(brt->cf, off, 0);
return r; assert(r == 0);
return result;
} }
childnum = brtnode_which_child(node, k, brt, db); childnum = brtnode_which_child(node, k, brt, db);
// Leaves have a single mdict, where the data is found.
{ {
bytevec hanswer; bytevec hanswer;
ITEMLEN hanswerlen; ITEMLEN hanswerlen;
if (toku_hash_find (node->u.n.htables[childnum], k->data, k->size, &hanswer, &hanswerlen)==0) { int type;
//printf("Found %d bytes\n", *vallen); if (toku_hash_find (node->u.n.htables[childnum], k->data, k->size, &hanswer, &hanswerlen, &type)==0) {
ybt_set_value(v, hanswer, hanswerlen, &brt->sval); if (type == BRT_INSERT) {
//printf("%s:%d Returning %p\n", __FILE__, __LINE__, v->data); //printf("Found %d bytes\n", *vallen);
r = cachetable_unpin(brt->cf, off, 0); ybt_set_value(v, hanswer, hanswerlen, &brt->sval);
assert(r==0); //printf("%s:%d Returning %p\n", __FILE__, __LINE__, v->data);
return 0; result = 0;
} else if (type == BRT_DELETE) {
result = DB_NOTFOUND;
} else
assert(0);
r = cachetable_unpin(brt->cf, off, 0);
assert(r == 0);
return result;
} }
} }
if (node->height==0) {
r = cachetable_unpin(brt->cf, off, 0); result = brt_lookup_node(brt, node->u.n.children[childnum], k, v, db);
if (r==0) return DB_NOTFOUND; r = cachetable_unpin(brt->cf, off, 0);
else return r; assert(r == 0);
} return result;
{
int result = brt_lookup_node(brt, node->u.n.children[childnum], k, v, db);
r = cachetable_unpin(brt->cf, off, 0);
if (r!=0) return r;
return result;
}
} }
...@@ -1336,6 +1384,22 @@ int brt_lookup (BRT brt, DBT *k, DBT *v, DB *db) { ...@@ -1336,6 +1384,22 @@ int brt_lookup (BRT brt, DBT *k, DBT *v, DB *db) {
return 0; return 0;
} }
int brt_delete(BRT brt, DBT *key, DB *db) {
int r;
BRT_CMD brtcmd;
DBT val;
init_dbt(&val);
val.size = 0;
brtcmd.type = BRT_DELETE;
brtcmd.u.id.key = key;
brtcmd.u.id.val = &val;
brtcmd.u.id.db = db;
r = brt_root_put_cmd(brt, &brtcmd);
return r;
}
int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen, int recurse); int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen, int recurse);
int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen) { int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen) {
...@@ -1356,9 +1420,9 @@ int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lole ...@@ -1356,9 +1420,9 @@ int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lole
int i; int i;
for (i=0; i< node->u.n.n_children-1; i++) { for (i=0; i< node->u.n.n_children-1; i++) {
printf("%*schild %d buffered (%d entries):\n", depth+1, "", i, toku_hashtable_n_entries(node->u.n.htables[i])); printf("%*schild %d buffered (%d entries):\n", depth+1, "", i, toku_hashtable_n_entries(node->u.n.htables[i]));
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen, HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen, type,
({ ({
printf("%*s %s %s\n", depth+2, "", (char*)key, (char*)data); printf("%*s %s %s %d\n", depth+2, "", (char*)key, (char*)data, type);
assert(strlen((char*)key)+1==keylen); assert(strlen((char*)key)+1==keylen);
assert(strlen((char*)data)+1==datalen); assert(strlen((char*)data)+1==datalen);
})); }));
...@@ -1468,7 +1532,9 @@ int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, byteve ...@@ -1468,7 +1532,9 @@ int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, byteve
} }
{ {
void verify_pair (bytevec key, unsigned int keylen, void verify_pair (bytevec key, unsigned int keylen,
bytevec data __attribute__((__unused__)), unsigned int datalen __attribute__((__unused__)), bytevec data __attribute__((__unused__)),
unsigned int datalen __attribute__((__unused__)),
int type __attribute__((__unused__)),
void *ignore __attribute__((__unused__))) { void *ignore __attribute__((__unused__))) {
if (thislorange) assert(keycompare(thislorange,thislolen,key,keylen)<0); if (thislorange) assert(keycompare(thislorange,thislolen,key,keylen)<0);
if (thishirange && keycompare(key,keylen,thishirange,thishilen)>0) { if (thishirange && keycompare(key,keylen,thishirange,thishilen)>0) {
...@@ -1525,11 +1591,6 @@ void brt_flush (BRT brt) { ...@@ -1525,11 +1591,6 @@ void brt_flush (BRT brt) {
} }
#endif #endif
int brtnode_flush_child (BRT brt, BRTNODE node, int cnum) {
brt=brt; node=node; cnum=cnum;
abort(); /* Algorithm: For each key in the cnum'th mdict, insert it to the childnode. It may cause a split. */
}
int brt_flush_debug = 0; int brt_flush_debug = 0;
/* /*
...@@ -1550,7 +1611,7 @@ void brt_flush_child(BRT t, BRTNODE node, int childnum, BRT_CURSOR cursor) { ...@@ -1550,7 +1611,7 @@ void brt_flush_child(BRT t, BRTNODE node, int childnum, BRT_CURSOR cursor) {
} }
init_dbt(&child_splitk); init_dbt(&child_splitk);
r = push_some_kvpairs_down(t, node, childnum, r = push_some_brt_cmds_down(t, node, childnum,
&child_did_split, &childa, &childb, &child_splitk, brt_flush_debug, 0, 0); &child_did_split, &childa, &childb, &child_splitk, brt_flush_debug, 0, 0);
assert(r == 0); assert(r == 0);
if (brt_flush_debug) { if (brt_flush_debug) {
......
...@@ -14,6 +14,7 @@ int open_brt (const char *fname, const char *dbname, int is_create, BRT *, int n ...@@ -14,6 +14,7 @@ int open_brt (const char *fname, const char *dbname, int is_create, BRT *, int n
//int brt_open (BRT *, char *fname, char *dbname); //int brt_open (BRT *, char *fname, char *dbname);
int brt_insert (BRT brt, DBT *k, DBT *v, DB*db); int brt_insert (BRT brt, DBT *k, DBT *v, DB*db);
int brt_lookup (BRT brt, DBT *k, DBT *v, DB*db); int brt_lookup (BRT brt, DBT *k, DBT *v, DB*db);
int brt_delete (BRT brt, DBT *k, DB *db);
int close_brt (BRT); int close_brt (BRT);
int dump_brt (BRT brt); int dump_brt (BRT brt);
void brt_fsync (BRT); /* fsync, but don't clear the caches. */ void brt_fsync (BRT); /* fsync, but don't clear the caches. */
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include "hashtable.h" #include "hashtable.h"
#include "memory.h" #include "memory.h"
#include "primes.h" #include "primes.h"
#include "../include/db.h" // #include "../include/ydb-constants.h"
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -41,7 +41,7 @@ static void hash_find_internal (HASHTABLE tab, unsigned int hash, const unsigned ...@@ -41,7 +41,7 @@ static void hash_find_internal (HASHTABLE tab, unsigned int hash, const unsigned
*hashelt = 0; *hashelt = 0;
} }
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, ITEMLEN *datalen) { int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, ITEMLEN *datalen, int *type) {
HASHELT he, *prev_ptr; HASHELT he, *prev_ptr;
hash_find_internal(tab, hash_key (key, keylen), key, keylen, &he, &prev_ptr); hash_find_internal(tab, hash_key (key, keylen), key, keylen, &he, &prev_ptr);
if (he==0) { if (he==0) {
...@@ -49,6 +49,7 @@ int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, I ...@@ -49,6 +49,7 @@ int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, I
} else { } else {
*data = &he->keyval[he->keylen]; *data = &he->keyval[he->keylen];
*datalen = he->vallen; *datalen = he->vallen;
*type = he->type;
return 0; return 0;
} }
} }
...@@ -82,7 +83,7 @@ int toku_hash_rehash_everything (HASHTABLE tab, unsigned int primeindexdelta) { ...@@ -82,7 +83,7 @@ int toku_hash_rehash_everything (HASHTABLE tab, unsigned int primeindexdelta) {
return 0; return 0;
} }
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *val, ITEMLEN vallen) int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *val, ITEMLEN vallen, int type)
{ {
unsigned int hk = hash_key (key,keylen); unsigned int hk = hash_key (key,keylen);
unsigned int h = hk%tab->arraysize; unsigned int h = hk%tab->arraysize;
...@@ -97,6 +98,7 @@ int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void ...@@ -97,6 +98,7 @@ int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void
/* Otherwise the key is not already present, so we need to add it. */ /* Otherwise the key is not already present, so we need to add it. */
HASHELT he=toku_malloc(sizeof(*he)+keylen+vallen); HASHELT he=toku_malloc(sizeof(*he)+keylen+vallen);
assert(he); // ????? assert(he); // ?????
he->type = type;
he->keylen = keylen; he->keylen = keylen;
he->vallen = vallen; he->vallen = vallen;
memmove(&he->keyval[0], key, keylen); memmove(&he->keyval[0], key, keylen);
...@@ -134,7 +136,7 @@ int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen) { ...@@ -134,7 +136,7 @@ int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen) {
} }
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, long int *randomnumber) { int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type, long int *randomnumber) {
unsigned int i; unsigned int i;
unsigned int usei = (*randomnumber)%h->arraysize; unsigned int usei = (*randomnumber)%h->arraysize;
for (i=0; i<h->arraysize; i++, usei++) { for (i=0; i<h->arraysize; i++, usei++) {
...@@ -145,6 +147,7 @@ int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytev ...@@ -145,6 +147,7 @@ int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytev
*keylen = he->keylen; *keylen = he->keylen;
*data = &he->keyval[he->keylen]; *data = &he->keyval[he->keylen];
*datalen = he->vallen; *datalen = he->vallen;
*type = he->type;
*randomnumber = usei; *randomnumber = usei;
return 0; return 0;
} }
...@@ -177,7 +180,7 @@ int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *dat ...@@ -177,7 +180,7 @@ int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *dat
} }
#endif #endif
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen, void*args), void* args) { void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen, int type, void*args), void* args) {
/* /*
int i; int i;
for (i=0; i<tab->arraysize; i++) { for (i=0; i<tab->arraysize; i++) {
...@@ -187,7 +190,7 @@ void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen ...@@ -187,7 +190,7 @@ void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen
} }
} }
*/ */
HASHTABLE_ITERATE(tab, key, keylen, val, vallen, f(key,keylen,val,vallen,args)); HASHTABLE_ITERATE(tab, key, keylen, val, vallen, type, f(key,keylen,val,vallen,type,args));
} }
int toku_hashtable_n_entries(HASHTABLE tab) { int toku_hashtable_n_entries(HASHTABLE tab) {
......
...@@ -12,10 +12,10 @@ int toku_hashtable_create (HASHTABLE*); ...@@ -12,10 +12,10 @@ int toku_hashtable_create (HASHTABLE*);
/* Return 0 if the key is found in the hashtable, -1 otherwise. */ /* Return 0 if the key is found in the hashtable, -1 otherwise. */
/* Warning: The data returned points to the internals of the hashtable. It is set to "const" to try to prevent you from messing it up. */ /* Warning: The data returned points to the internals of the hashtable. It is set to "const" to try to prevent you from messing it up. */
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen); int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen, int *type);
/* Replace the key if it was already there. */ /* Replace the key if it was already there. */
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen); int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, int type);
/* It is OK to delete something that isn't there. */ /* It is OK to delete something that isn't there. */
int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen); int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen);
...@@ -24,15 +24,16 @@ int toku_hashtable_n_entries(HASHTABLE); ...@@ -24,15 +24,16 @@ int toku_hashtable_n_entries(HASHTABLE);
void toku_hashtable_clear(HASHTABLE); void toku_hashtable_clear(HASHTABLE);
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, long int *randomnumber); int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type, long int *randomnumber);
//int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen); //int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
typedef struct hashelt *HASHELT; typedef struct hashelt *HASHELT;
struct hashelt { struct hashelt {
ITEMLEN keylen;
ITEMLEN vallen;
unsigned int hash; unsigned int hash;
HASHELT next; HASHELT next;
int type;
ITEMLEN keylen;
ITEMLEN vallen;
char keyval[]; /* the first KEYLEN bytes are the key. The next bytes are the value. */ char keyval[]; /* the first KEYLEN bytes are the key. The next bytes are the value. */
}; };
...@@ -44,9 +45,9 @@ struct hashtable { ...@@ -44,9 +45,9 @@ struct hashtable {
}; };
/* You cannot add or delete elements from the hashtable while iterating. */ /* You cannot add or delete elements from the hashtable while iterating. */
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,void*), void*); void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,int type, void*), void*);
// If you don't want to use something, do something like use "key __attribute__((__unused__))" for keyvar. // If you don't want to use something, do something like use "key __attribute__((__unused__))" for keyvar.
#define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \ #define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,typevar,body) ({ \
unsigned int hi_counter; \ unsigned int hi_counter; \
for (hi_counter=0; hi_counter<table->arraysize; hi_counter++) { \ for (hi_counter=0; hi_counter<table->arraysize; hi_counter++) { \
HASHELT hi_he; \ HASHELT hi_he; \
...@@ -55,6 +56,7 @@ void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen, ...@@ -55,6 +56,7 @@ void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,
ITEMLEN keylenvar = hi_he->keylen; \ ITEMLEN keylenvar = hi_he->keylen; \
const char *datavar = &hi_he->keyval[hi_he->keylen]; \ const char *datavar = &hi_he->keyval[hi_he->keylen]; \
ITEMLEN datalenvar = hi_he->vallen; \ ITEMLEN datalenvar = hi_he->vallen; \
int typevar = hi_he->type; \
body; \ body; \
}}}) }}})
......
...@@ -28,7 +28,7 @@ void verify_hash_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl, ...@@ -28,7 +28,7 @@ void verify_hash_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
fprintf(stderr, "%s isn't there\n", kv); abort(); fprintf(stderr, "%s isn't there\n", kv); abort();
} }
void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl, void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl, int type,
int N, int *data, char *saw) { int N, int *data, char *saw) {
char *kv = (char*)kv_v; char *kv = (char*)kv_v;
char *dv = (char*)dv_v; char *dv = (char*)dv_v;
...@@ -38,6 +38,7 @@ void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl, ...@@ -38,6 +38,7 @@ void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
assert(strcmp(kv+1, dv+1)==0); assert(strcmp(kv+1, dv+1)==0);
assert(strlen(kv)+1==kl); assert(strlen(kv)+1==kl);
assert(strlen(dv)+1==dl); assert(strlen(dv)+1==dl);
assert(type == 0);
num = atoi(kv+1); num = atoi(kv+1);
for (k=0; k<N; k++) { for (k=0; k<N; k++) {
if (data[k]==num) { if (data[k]==num) {
...@@ -54,8 +55,8 @@ void verify_htable (HASHTABLE htable, int N, int *data, char *saw) { ...@@ -54,8 +55,8 @@ void verify_htable (HASHTABLE htable, int N, int *data, char *saw) {
for (j=0; j<N; j++) { for (j=0; j<N; j++) {
saw[j]=0; saw[j]=0;
} }
HASHTABLE_ITERATE(htable, kv, kl, dv, dl, HASHTABLE_ITERATE(htable, kv, kl, dv, dl, type,
verify_htable_instance (kv, kl, dv, dl, verify_htable_instance (kv, kl, dv, dl, type,
N, data, saw)); N, data, saw));
for (j=0; j<N; j++) { for (j=0; j<N; j++) {
assert(saw[j]); assert(saw[j]);
...@@ -99,7 +100,7 @@ void test0 (void) { ...@@ -99,7 +100,7 @@ void test0 (void) {
} }
snprintf(kv, 99, "k%d", ra); snprintf(kv, 99, "k%d", ra);
snprintf(dv, 99, "d%d", ra); snprintf(dv, 99, "d%d", ra);
toku_hash_insert(htable, kv, strlen(kv)+1, dv, strlen(dv)+1); toku_hash_insert(htable, kv, strlen(kv)+1, dv, strlen(dv)+1, 0);
data[data_n++]=ra; data[data_n++]=ra;
} }
} else { } else {
...@@ -122,13 +123,14 @@ void test1(void) { ...@@ -122,13 +123,14 @@ void test1(void) {
for (j=0; j<4; j++) { for (j=0; j<4; j++) {
snprintf(keys[j], 100, "k%ld", (long)(random())); snprintf(keys[j], 100, "k%ld", (long)(random()));
snprintf(vals[j], 100, "v%d", j); snprintf(vals[j], 100, "v%d", j);
toku_hash_insert(table, keys[j], strlen(keys[j])+1, vals[j], strlen(vals[j])+1); toku_hash_insert(table, keys[j], strlen(keys[j])+1, vals[j], strlen(vals[j])+1, 0);
} }
for (j=0; j<4; j++) { for (j=0; j<4; j++) {
bytevec key, val; bytevec key, val;
ITEMLEN keylen, vallen; ITEMLEN keylen, vallen;
int type;
long int randnum=random(); long int randnum=random();
r = toku_hashtable_random_pick(table, &key, &keylen, &val, &vallen, &randnum); r = toku_hashtable_random_pick(table, &key, &keylen, &val, &vallen, &type, &randnum);
assert(r==0); assert(r==0);
r = toku_hash_delete(table, key, keylen); r = toku_hash_delete(table, key, keylen);
assert(r==0); assert(r==0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment