Commit c10712de authored by Rich Prohaska's avatar Rich Prohaska

Change to implement delete by key. A BRT_DELETE command is injected into the

tree.  It replaces any INSERT or DELETE command with the same key at all
interior nodes.  It is translated into a PMA delete operation at a leaf
node.  

The database file format was changed for the contents of interior node
buffers to include a 1 byte type field.  There are currently 2 types:
BRT_INSERT and BRT_DELETE.




git-svn-id: file:///svn/tokudb@278 c7de825b-a66e-492c-adef-691d508d4ae1
parent de47452c
......@@ -11,9 +11,10 @@ typedef long long diskoff; /* Offset in a disk. -1 is the NULL pointer. */
#endif
enum { TREE_FANOUT = BRT_FANOUT }; //, NODESIZE=1<<20 };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
enum { BRT_CMD_OVERHEAD = 1 };
struct nodeheader_in_file {
int n_in_buffer;
};
enum { BUFFER_HEADER_SIZE = (4 // height//
+ 4 // n_children
......@@ -140,3 +141,23 @@ void brt_update_cursors_new_root(BRT t, BRTNODE newroot, BRTNODE left, BRTNODE r
void brt_update_cursors_leaf_split(BRT t, BRTNODE oldnode, BRTNODE left, BRTNODE right);
void brt_update_cursors_nonleaf_expand(BRT t, BRTNODE oldnode, int childnum, BRTNODE left, BRTNODE right);
void brt_update_cursors_nonleaf_split(BRT t, BRTNODE oldnode, BRTNODE left, BRTNODE right);
enum brt_cmd_type {
BRT_NONE = 0,
BRT_INSERT = 1,
BRT_DELETE = 2,
};
struct brt_cmd {
enum brt_cmd_type type;
union {
/* insert or delete */
struct brt_cmd_insert_delete {
DBT *key;
DBT *val;
DB *db;
} id;
} u;
};
typedef struct brt_cmd BRT_CMD;
......@@ -27,10 +27,10 @@ void test_serialize(void) {
sn.u.n.children[1] = sn.nodesize*35;
r = toku_hashtable_create(&sn.u.n.htables[0]); assert(r==0);
r = toku_hashtable_create(&sn.u.n.htables[1]); assert(r==0);
r = toku_hash_insert(sn.u.n.htables[0], "a", 2, "aval", 5); assert(r==0);
r = toku_hash_insert(sn.u.n.htables[0], "b", 2, "bval", 5); assert(r==0);
r = toku_hash_insert(sn.u.n.htables[1], "x", 2, "xval", 5); assert(r==0);
sn.u.n.n_bytes_in_hashtables = 3*(KEY_VALUE_OVERHEAD+2+5);
r = toku_hash_insert(sn.u.n.htables[0], "a", 2, "aval", 5, BRT_NONE); assert(r==0);
r = toku_hash_insert(sn.u.n.htables[0], "b", 2, "bval", 5, BRT_NONE); assert(r==0);
r = toku_hash_insert(sn.u.n.htables[1], "x", 2, "xval", 5, BRT_NONE); assert(r==0);
sn.u.n.n_bytes_in_hashtables = 3*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5);
deserialize_brtnode_from(fd, nodesize*20, &dn, nodesize);
......@@ -46,24 +46,26 @@ void test_serialize(void) {
assert(dn->u.n.children[0]==nodesize*30);
assert(dn->u.n.children[1]==nodesize*35);
{
bytevec data; ITEMLEN datalen;
int r = toku_hash_find(dn->u.n.htables[0], "a", 2, &data, &datalen);
bytevec data; ITEMLEN datalen; int type;
int r = toku_hash_find(dn->u.n.htables[0], "a", 2, &data, &datalen, &type);
assert(r==0);
assert(strcmp(data,"aval")==0);
assert(datalen==5);
assert(type == BRT_NONE);
r=toku_hash_find(dn->u.n.htables[0], "b", 2, &data, &datalen);
r=toku_hash_find(dn->u.n.htables[0], "b", 2, &data, &datalen, &type);
assert(r==0);
assert(strcmp(data,"bval")==0);
assert(datalen==5);
assert(type == BRT_NONE);
r=toku_hash_find(dn->u.n.htables[1], "x", 2, &data, &datalen);
r=toku_hash_find(dn->u.n.htables[1], "x", 2, &data, &datalen, &type);
assert(r==0);
assert(strcmp(data,"xval")==0);
assert(datalen==5);
assert(type == BRT_NONE);
}
brtnode_free(&dn);
// brtnode_free(&dn);
toku_free(hello_string);
toku_hashtable_free(&sn.u.n.htables[0]);
......
......@@ -34,7 +34,8 @@ static unsigned int serialize_brtnode_size_slow(BRTNODE node) {
HASHTABLE_ITERATE(node->u.n.htables[i],
key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen,
(hsize+=8+keylen+datalen));
type __attribute__((__unused__)),
(hsize+=BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+keylen+datalen));
}
assert(hsize==node->u.n.n_bytes_in_hashtables);
assert(csize==node->u.n.totalchildkeylens);
......@@ -44,12 +45,11 @@ static unsigned int serialize_brtnode_size_slow(BRTNODE node) {
PMA_ITERATE(node->u.l.buffer,
key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen,
(hsize+=8+keylen+datalen));
(hsize+=KEY_VALUE_OVERHEAD+keylen+datalen));
assert(hsize==node->u.l.n_bytes_in_buffer);
hsize+=4; /* add n entries in buffer table. */
return size+hsize;
}
}
unsigned int serialize_brtnode_size (BRTNODE node) {
......@@ -101,8 +101,8 @@ int serialize_brtnode_to(int fd, diskoff off, diskoff size, BRTNODE node) {
for (i=0; i< n_hash_tables; i++) {
//printf("%s:%d p%d=%p n_entries=%d\n", __FILE__, __LINE__, i, node->mdicts[i], mdict_n_entries(node->mdicts[i]));
wbuf_int(&w, toku_hashtable_n_entries(node->u.n.htables[i]));
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen,
(wbuf_bytes(&w, key, keylen),
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen, type,
(wbuf_char(&w, type), wbuf_bytes(&w, key, keylen),
wbuf_bytes(&w, data, datalen)));
}
}
......@@ -220,17 +220,19 @@ int deserialize_brtnode_from (int fd, diskoff off, BRTNODE *brtnode, int nodesiz
//printf("%d in hash\n", n_in_hash);
for (i=0; i<n_in_this_hash; i++) {
int diff;
int type;
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
verify_counts(result);
type = rbuf_char(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
rbuf_bytes(&rc, &val, &vallen);
//printf("Found %s,%s\n", key, val);
//printf("Found %s,%s\n", (char*)key, (char*)val);
{
int r=toku_hash_insert(result->u.n.htables[cnum], key, keylen, val, vallen); /* Copies the data into the hash table. */
int r=toku_hash_insert(result->u.n.htables[cnum], key, keylen, val, vallen, type); /* Copies the data into the hash table. */
if (r!=0) { goto died_12; }
}
diff = keylen + vallen + KEY_VALUE_OVERHEAD;
diff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
result->u.n.n_bytes_in_hashtables += diff;
result->u.n.n_bytes_in_hashtable[cnum] += diff;
//printf("Inserted\n");
......
......@@ -1328,8 +1328,264 @@ void test_brt_cursor() {
}
}
void test_large_kv(int bsize, int ksize, int vsize) {
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
printf("test_large_kv: %d %d %d\n", bsize, ksize, vsize);
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, bsize, ct, default_compare_fun);
assert(r==0);
DBT key, val;
char *k, *v;
k = toku_malloc(ksize); assert(k); memset(k, 0, ksize);
v = toku_malloc(vsize); assert(v); memset(v, 0, vsize);
fill_dbt(&key, k, ksize);
fill_dbt(&val, v, vsize);
r = brt_insert(t, &key, &val, 0);
assert(r == 0);
toku_free(k);
toku_free(v);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
/*
* test the key and value limits
* the current implementation crashes when kvsize == bsize/2 rather than fails
*/
void test_brt_limits() {
int bsize = 1024;
int kvsize = 4;
while (kvsize < bsize/2) {
test_large_kv(bsize, kvsize, kvsize); memory_check_all_free();
kvsize *= 2;
}
}
/*
* verify that a delete on an empty tree fails
*/
void test_brt_delete_empty() {
printf("test_brt_delete_empty\n");
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
assert(r==0);
DBT key;
int k = 1;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
assert(r != 0);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
/*
* insert n keys, delete all n keys, verify that lookups for all the keys fail,
* verify that a cursor walk of the tree finds nothing
*/
void test_brt_delete_present(int n) {
printf("test_brt_delete_present:%d\n", n);
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
int i;
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
assert(r==0);
DBT key, val;
int k, v;
for (i=0; i<n; i++) {
k = i; v = n + i;
fill_dbt(&key, &k, sizeof k);
fill_dbt(&val, &v, sizeof v);
r = brt_insert(t, &key, &val, 0);
assert(r == 0);
}
for (i=0; i<n; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
assert(r == 0);
}
/* lookups should all fail */
for (i=0; i<n; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
init_dbt(&val); val.flags = DB_DBT_MALLOC;
r = brt_lookup(t, &key, &val, 0);
assert(r == DB_NOTFOUND);
}
/* cursor should not find anything */
BRT_CURSOR cursor;
r = brt_cursor(t, &cursor);
assert(r == 0);
init_dbt(&key); key.flags = DB_DBT_MALLOC;
init_dbt(&val); val.flags = DB_DBT_MALLOC;
r = brt_c_get(cursor, &key, &val, DB_FIRST);
assert(r != 0);
r = brt_cursor_close(cursor);
assert(r == 0);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
void test_brt_delete_not_present(int n) {
printf("test_brt_delete_not_present:%d\n", n);
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
int i;
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
assert(r==0);
DBT key, val;
int k, v;
for (i=0; i<n; i++) {
k = i; v = n + i;
fill_dbt(&key, &k, sizeof k);
fill_dbt(&val, &v, sizeof v);
r = brt_insert(t, &key, &val, 0);
assert(r == 0);
}
for (i=0; i<n; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
assert(r == 0);
}
k = n+1;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
printf("brt_delete k=%d %d\n", k, r);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
void test_brt_delete_cursor_first(int n) {
printf("test_brt_delete_cursor_first:%d\n", n);
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
int i;
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
assert(r==0);
DBT key, val;
int k, v;
for (i=0; i<n; i++) {
k = i; v = ~i;
fill_dbt(&key, &k, sizeof k);
fill_dbt(&val, &v, sizeof v);
r = brt_insert(t, &key, &val, 0);
assert(r == 0);
}
for (i=0; i<n-1; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
r = brt_delete(t, &key, 0);
assert(r == 0);
}
/* lookups should all fail */
for (i=0; i<n-1; i++) {
k = i;
fill_dbt(&key, &k, sizeof k);
init_dbt(&val); val.flags = DB_DBT_MALLOC;
r = brt_lookup(t, &key, &val, 0);
assert(r == DB_NOTFOUND);
}
/* cursor should find the last key */
BRT_CURSOR cursor;
r = brt_cursor(t, &cursor);
assert(r == 0);
init_dbt(&key); key.flags = DB_DBT_MALLOC;
init_dbt(&val); val.flags = DB_DBT_MALLOC;
r = brt_c_get(cursor, &key, &val, DB_FIRST);
assert(r == 0);
int vv;
assert(val.size == sizeof vv);
memcpy(&vv, val.data, val.size);
assert(vv == ~(n-1));
toku_free(key.data);
toku_free(val.data);
r = brt_cursor_close(cursor);
assert(r == 0);
r = close_brt(t); assert(r==0);
r = cachetable_close(&ct); assert(r==0);
}
void test_brt_delete() {
test_brt_delete_empty(); memory_check_all_free();
test_brt_delete_present(1); memory_check_all_free();
test_brt_delete_present(100); memory_check_all_free();
test_brt_delete_present(500); memory_check_all_free();
test_brt_delete_not_present(1); memory_check_all_free();
test_brt_delete_not_present(100); memory_check_all_free();
test_brt_delete_not_present(500); memory_check_all_free();
test_brt_delete_cursor_first(1); memory_check_all_free();
test_brt_delete_cursor_first(100); memory_check_all_free();
test_brt_delete_cursor_first(500); memory_check_all_free();
}
static void brt_blackbox_test (void) {
test_brt_cursor();
memory_check = 1;
test_wrongendian_compare(0, 2); memory_check_all_free();
test_wrongendian_compare(1, 2); memory_check_all_free();
test_wrongendian_compare(1, 257); memory_check_all_free();
......@@ -1377,7 +1633,11 @@ static void brt_blackbox_test (void) {
// Once upon a time srandom(8) caused this test to fail.
srandom(8); test4(2048, 1<<15, 1);
memory_check = 1;
test_brt_limits();
test_brt_cursor();
test_brt_delete();
// test3(1<<19, 1<<20, 0);
// test4(1<<19, 1<<20, 0);
......
......@@ -277,9 +277,9 @@ static void insert_to_buffer_in_leaf (BRTNODE node, DBT *k, DBT *v, DB *db) {
}
#endif
static int insert_to_hash_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT *v) {
unsigned int n_bytes_added = KEY_VALUE_OVERHEAD + k->size + v->size;
int r = toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size);
static int insert_to_hash_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT *v, int type) {
unsigned int n_bytes_added = BRT_CMD_OVERHEAD + KEY_VALUE_OVERHEAD + k->size + v->size;
int r = toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size, type);
if (r!=0) return r;
node->u.n.n_bytes_in_hashtable[childnum] += n_bytes_added;
node->u.n.n_bytes_in_hashtables += n_bytes_added;
......@@ -463,7 +463,7 @@ void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int
if (keycompare(key, keylen, node->childkeys[cnum], node->childkeylens[cnum])<=0)
break;
}
child_weights[cnum] += keylen + datalen + KEY_VALUE_OVERHEAD;
child_weights[cnum] += keylen + datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
child_counts[cnum]++;
}));
{
......@@ -504,25 +504,25 @@ void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int
}
#endif
static int brtnode_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
static int brtnode_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
DBT *split,
int debug,
DB *db);
int debug);
/* key is not in the hashtable in node. Either put the key-value pair in the child, or put it in the node. */
static int push_kvpair_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTNODE node, BRTNODE child,
DBT *k, DBT *v,
int childnum_of_node,
DB *db) {
static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTNODE node, BRTNODE child,
BRT_CMD *cmd,
int childnum_of_node) {
assert(node->height>0); /* Not a leaf. */
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
int to_child=serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD <= child->nodesize;
if (brt_debug_mode) {
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node);
if (childnum_of_node+1<node->u.n.n_children) {
DBT k2;
printf(" nextsplitkey=%s\n", (char*)node->u.n.childkeys[childnum_of_node]);
assert(t->compare_fun(db, k, fill_dbt(&k2, node->u.n.childkeys[childnum_of_node], node->u.n.childkeylens[childnum_of_node]))<=0);
assert(t->compare_fun(cmd->u.id.db, k, fill_dbt(&k2, node->u.n.childkeys[childnum_of_node], node->u.n.childkeylens[childnum_of_node]))<=0);
} else {
printf("\n");
}
......@@ -532,36 +532,35 @@ static int push_kvpair_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTN
DBT againk;
init_dbt(&againk);
//printf("%s:%d hello!\n", __FILE__, __LINE__);
int r = brtnode_insert(t, child, k, v,
int r = brtnode_put_cmd(t, child, cmd,
&again_split, &againa, &againb, &againk,
0,
db);
0);
if (r!=0) return r;
assert(again_split==0); /* I only did the insert if I knew it wouldn't push down, and hence wouldn't split. */
return r;
} else {
int r=insert_to_hash_in_nonleaf(node, childnum_of_node, k, v);
int r=insert_to_hash_in_nonleaf(node, childnum_of_node, k, v, cmd->type);
return r;
}
}
static int push_a_kvpair_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
DBT *k, DBT *v,
static int push_a_brt_cmd_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
BRT_CMD *cmd,
int *child_did_split, BRTNODE *childa, BRTNODE *childb,
DBT *childsplitk,
DB *db) {
DBT *childsplitk) {
//if (debug) printf("%s:%d %*sinserting down\n", __FILE__, __LINE__, debug, "");
//printf("%s:%d hello!\n", __FILE__, __LINE__);
assert(node->height>0);
{
int r = brtnode_insert(t, child, k, v,
int r = brtnode_put_cmd(t, child, cmd,
child_did_split, childa, childb, childsplitk,
0,
db);
0);
if (r!=0) return r;
}
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
//if (debug) printf("%s:%d %*sinserted down child_did_split=%d\n", __FILE__, __LINE__, debug, "", child_did_split);
{
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size); // Must delete after doing the insert, to avoid operating on freed' key
......@@ -569,7 +568,7 @@ static int push_a_kvpair_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
if (r!=0) return r;
}
{
int n_bytes_removed = (k->size + v->size + KEY_VALUE_OVERHEAD);
int n_bytes_removed = (k->size + v->size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD);
node->u.n.n_bytes_in_hashtables -= n_bytes_removed;
node->u.n.n_bytes_in_hashtable[childnum] -= n_bytes_removed;
}
......@@ -643,17 +642,20 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
node->u.n.n_bytes_in_hashtables -= old_count; /* By default, they are all removed. We might add them back in. */
/* Keep pushing to the children, but not if the children would require a pushdown */
HASHTABLE_ITERATE(old_h, skey, skeylen, sval, svallen, ({
HASHTABLE_ITERATE(old_h, skey, skeylen, sval, svallen, type, ({
DBT skd, svd;
fill_dbt_ap(&skd, skey, skeylen, app_private);
fill_dbt(&svd, sval, svallen);
BRT_CMD brtcmd;
brtcmd.type = type; brtcmd.u.id.key = &skd; brtcmd.u.id.val = &svd; brtcmd.u.id.db = db;
if (t->compare_fun(db, &skd, childsplitk)<=0) {
r=push_kvpair_down_only_if_it_wont_push_more_else_put_here(t, node, childa, &skd, &svd, childnum, db);
r=push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, childa, &brtcmd, childnum);
} else {
r=push_kvpair_down_only_if_it_wont_push_more_else_put_here(t, node, childb, &skd, &svd, childnum+1, db);
r=push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, childb, &brtcmd, childnum+1);
}
if (r!=0) return r;
}));
toku_hashtable_free(&old_h);
r=cachetable_unpin(t->cf, childa->thisnodename, 1);
......@@ -687,7 +689,7 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
return 0;
}
static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
static int push_some_brt_cmds_down (BRT t, BRTNODE node, int childnum,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
DBT *splitk,
int debug,
......@@ -706,7 +708,7 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
verify_counts(child);
//printf("%s:%d height=%d n_bytes_in_hashtable = {%d, %d, %d, ...}\n", __FILE__, __LINE__, child->height, child->n_bytes_in_hashtable[0], child->n_bytes_in_hashtable[1], child->n_bytes_in_hashtable[2]);
if (child->height>0 && child->u.n.n_children>0) assert(child->u.n.children[child->u.n.n_children-1]!=0);
if (debug) printf("%s:%d %*spush_some_kvpairs_down to %lld\n", __FILE__, __LINE__, debug, "", child->thisnodename);
if (debug) printf("%s:%d %*spush_some_brt_cmds_down to %lld\n", __FILE__, __LINE__, debug, "", child->thisnodename);
/* I am exposing the internals of the hash table here, mostly because I am not thinking of a really
* good way to do it otherwise. I want to loop over the elements of the hash table, deleting some as I
* go. The HASHTABLE_ITERATE macro will break if I delete something from the hash table. */
......@@ -722,30 +724,39 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
long int randomnumber = random();
//printf("%s:%d Try random_pick, weight=%d \n", __FILE__, __LINE__, node->u.n.n_bytes_in_hashtable[childnum]);
assert(toku_hashtable_n_entries(node->u.n.htables[childnum])>0);
while(0==toku_hashtable_random_pick(node->u.n.htables[childnum], &key, &keylen, &val, &vallen, &randomnumber)) {
int type;
while(0==toku_hashtable_random_pick(node->u.n.htables[childnum], &key, &keylen, &val, &vallen, &type, &randomnumber)) {
int child_did_split=0; BRTNODE childa, childb;
DBT hk,hv;
DBT childsplitk;
BRT_CMD brtcmd;
fill_dbt_ap(&hk, key, keylen, app_private);
fill_dbt(&hv, val, vallen);
brtcmd.type = type;
brtcmd.u.id.key = &hk;
brtcmd.u.id.val = &hv;
brtcmd.u.id.db = db;
//printf("%s:%d random_picked\n", __FILE__, __LINE__);
init_dbt(&childsplitk);
childsplitk.app_private = splitk->app_private;
if (debug) printf("%s:%d %*spush down %s\n", __FILE__, __LINE__, debug, "", (char*)key);
r = push_a_kvpair_down (t, node, child, childnum,
fill_dbt_ap(&hk, key, keylen, app_private), fill_dbt(&hv, val, vallen),
r = push_a_brt_cmd_down (t, node, child, childnum,
&brtcmd,
&child_did_split, &childa, &childb,
&childsplitk,
db);
&childsplitk);
if (0){
unsigned int sum=0;
HASHTABLE_ITERATE(node->u.n.htables[childnum], hk __attribute__((__unused__)), hkl, hd __attribute__((__unused__)), hdl,
sum+=hkl+hdl+KEY_VALUE_OVERHEAD);
HASHTABLE_ITERATE(node->u.n.htables[childnum], hk __attribute__((__unused__)), hkl, hd __attribute__((__unused__)), hdl, type __attribute__((__unused__)),
sum+=hkl+hdl+KEY_VALUE_OVERHEAD+BRT_CMD_OVERHEAD);
printf("%s:%d sum=%d\n", __FILE__, __LINE__, sum);
assert(sum==node->u.n.n_bytes_in_hashtable[childnum]);
}
if (node->u.n.n_bytes_in_hashtable[childnum]>0) assert(toku_hashtable_n_entries(node->u.n.htables[childnum])>0);
//printf("%s:%d %d=push_a_kvpair_down=(); child_did_split=%d (weight=%d)\n", __FILE__, __LINE__, r, child_did_split, node->u.n.n_bytes_in_hashtable[childnum]);
//printf("%s:%d %d=push_a_brt_cmd_down=(); child_did_split=%d (weight=%d)\n", __FILE__, __LINE__, r, child_did_split, node->u.n.n_bytes_in_hashtable[childnum]);
if (r!=0) return r;
if (child_did_split) {
// If the child splits, we don't push down any further.
......@@ -759,7 +770,7 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
}
if (0) printf("%s:%d done random picking\n", __FILE__, __LINE__);
}
if (debug) printf("%s:%d %*sdone push_some_kvpairs_down, unpinning %lld\n", __FILE__, __LINE__, debug, "", targetchild);
if (debug) printf("%s:%d %*sdone push_some_brt_cmds_down, unpinning %lld\n", __FILE__, __LINE__, debug, "", targetchild);
r=cachetable_unpin(t->cf, targetchild, 1);
if (r!=0) return r;
*did_split=0;
......@@ -787,10 +798,10 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE
find_heaviest_child(node, &childnum);
if (0) printf("%s:%d %*spush some down from %lld into %lld (child %d)\n", __FILE__, __LINE__, debug, "", node->thisnodename, node->u.n.children[childnum], childnum);
assert(node->u.n.children[childnum]!=0);
int r = push_some_kvpairs_down(t, node, childnum, did_split, nodea, nodeb, splitk, debugp1(debug), app_private, db);
int r = push_some_brt_cmds_down(t, node, childnum, did_split, nodea, nodeb, splitk, debugp1(debug), app_private, db);
if (r!=0) return r;
assert(*did_split==0 || *did_split==1);
if (debug) printf("%s:%d %*sdid push_some_kvpairs_down did_split=%d\n", __FILE__, __LINE__, debug, "", *did_split);
if (debug) printf("%s:%d %*sdid push_some_brt_cmds_down did_split=%d\n", __FILE__, __LINE__, debug, "", *did_split);
if (*did_split) {
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
......@@ -811,45 +822,69 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE
#define INSERT_ALL_AT_ONCE
static int brt_leaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
static int brt_leaf_insertm (BRT t, BRTNODE node, BRT_CMD *cmd,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
int debug,
DB *db) {
int debug) {
if (cmd->type == BRT_INSERT) {
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
DB *db = cmd->u.id.db;
#ifdef INSERT_ALL_AT_ONCE
int replaced_v_size;
enum pma_errors pma_status = pma_insert_or_replace(node->u.l.buffer, k, v, db, &replaced_v_size);
assert(pma_status==BRT_OK);
//printf("replaced_v_size=%d\n", replaced_v_size);
if (replaced_v_size>=0) {
node->u.l.n_bytes_in_buffer += v->size - replaced_v_size;
} else {
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
}
int replaced_v_size;
enum pma_errors pma_status = pma_insert_or_replace(node->u.l.buffer, k, v, db, &replaced_v_size);
assert(pma_status==BRT_OK);
//printf("replaced_v_size=%d\n", replaced_v_size);
if (replaced_v_size>=0) {
node->u.l.n_bytes_in_buffer += v->size - replaced_v_size;
} else {
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
}
#else
DBT v2;
enum pma_errors pma_status = pma_lookup(node->u.l.buffer, k, init_dbt(&v2), db);
if (pma_status==BRT_OK) {
pma_status = pma_delete(node->u.l.buffer, k, db);
assert(pma_status==BRT_OK);
node->u.l.n_bytes_in_buffer -= k->size + v2.size + KEY_VALUE_OVERHEAD;
}
pma_status = pma_insert(node->u.l.buffer, k, v, db);
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
DBT v2;
enum pma_errors pma_status = pma_lookup(node->u.l.buffer, k, init_dbt(&v2), db);
if (pma_status==BRT_OK) {
pma_status = pma_delete(node->u.l.buffer, k, db);
assert(pma_status==BRT_OK);
node->u.l.n_bytes_in_buffer -= k->size + v2.size + KEY_VALUE_OVERHEAD;
}
pma_status = pma_insert(node->u.l.buffer, k, v, db);
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
#endif
// If it doesn't fit, then split the leaf.
if (serialize_brtnode_size(node) > node->nodesize) {
int r = brtleaf_split (t, node, nodea, nodeb, splitk, k->app_private, db);
if (r!=0) return r;
//printf("%s:%d splitkey=%s\n", __FILE__, __LINE__, (char*)*splitkey);
split_count++;
*did_split = 1;
verify_counts(*nodea); verify_counts(*nodeb);
if (debug) printf("%s:%d %*snodeb->thisnodename=%lld nodeb->size=%d\n", __FILE__, __LINE__, debug, "", (*nodeb)->thisnodename, (*nodeb)->nodesize);
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
} else {
*did_split = 0;
// If it doesn't fit, then split the leaf.
if (serialize_brtnode_size(node) > node->nodesize) {
int r = brtleaf_split (t, node, nodea, nodeb, splitk, k->app_private, db);
if (r!=0) return r;
//printf("%s:%d splitkey=%s\n", __FILE__, __LINE__, (char*)*splitkey);
split_count++;
*did_split = 1;
verify_counts(*nodea); verify_counts(*nodeb);
if (debug) printf("%s:%d %*snodeb->thisnodename=%lld nodeb->size=%d\n", __FILE__, __LINE__, debug, "", (*nodeb)->thisnodename, (*nodeb)->nodesize);
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
} else {
*did_split = 0;
}
return 0;
}
if (cmd->type == BRT_DELETE) {
int r;
DBT val;
/* TODO combine lookup and delete */
init_dbt(&val);
r = pma_lookup(node->u.l.buffer, cmd->u.id.key, &val, cmd->u.id.db);
if (r == 0) {
r = pma_delete(node->u.l.buffer, cmd->u.id.key, cmd->u.id.db);
assert(r == BRT_OK);
node->u.l.n_bytes_in_buffer -= cmd->u.id.key->size + val.size + KEY_VALUE_OVERHEAD;
}
*did_split = 0;
return r;
}
/* unknown message */
assert(0);
return 0;
}
......@@ -866,16 +901,20 @@ static unsigned int brtnode_which_child (BRTNODE node , DBT *k, BRT t, DB *db) {
}
static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
static int brt_nonleaf_insertm (BRT t, BRTNODE node, BRT_CMD *cmd,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
DBT *splitk,
int debug,
DB *db) {
int debug) {
bytevec olddata;
ITEMLEN olddatalen;
unsigned int childnum;
int found;
int type = cmd->type;
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
DB *db = cmd->u.id.db;
childnum = brtnode_which_child(node, k, t, db);
/* non-buffering mode when cursors are open on this child */
......@@ -895,8 +934,9 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
assert(r == 0);
child = child_v;
r = brtnode_insert(t, child, k, v,
&child_did_split, &childa, &childb, &childsplitk, 0, db);
child_did_split = 0;
r = brtnode_put_cmd(t, child, cmd,
&child_did_split, &childa, &childb, &childsplitk, 0);
assert(r == 0);
if (child_did_split) {
if (0) printf("brt_nonleaf_insert child_split %p\n", child);
......@@ -913,7 +953,7 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
return r;
}
found = !toku_hash_find(node->u.n.htables[childnum], k->data, k->size, &olddata, &olddatalen);
found = !toku_hash_find(node->u.n.htables[childnum], k->data, k->size, &olddata, &olddatalen, &type);
if (0) { // It is faster to do this, except on yobiduck where things grind to a halt.
void *child_v;
......@@ -922,7 +962,7 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
/* If the child is in memory, then go ahead and put it in the child. */
BRTNODE child = child_v;
if (found) {
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD;
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size);
assert(r==0);
node->u.n.n_bytes_in_hashtables -= diff;
......@@ -932,8 +972,8 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
int child_did_split;
BRTNODE childa, childb;
DBT childsplitk;
int r = brtnode_insert(t, child, k, v,
&child_did_split, &childa, &childb, &childsplitk, 0, db);
int r = brtnode_put_cmd(t, child, cmd,
&child_did_split, &childa, &childb, &childsplitk, 0);
if (r!=0) return r;
if (child_did_split) {
r=handle_split_of_child(t, node, childnum,
......@@ -954,19 +994,18 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
verify_counts(node);
if (found) {
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size);
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD;
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
assert(r==0);
node->u.n.n_bytes_in_hashtables -= diff;
node->u.n.n_bytes_in_hashtable[childnum] -= diff;
//printf("%s:%d deleted %d bytes\n", __FILE__, __LINE__, diff);
}
{
int diff = k->size + v->size + KEY_VALUE_OVERHEAD;
int r=toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size);
int diff = k->size + v->size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
int r=toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size, type);
assert(r==0);
node->u.n.n_bytes_in_hashtables += diff;
node->u.n.n_bytes_in_hashtable[childnum] += diff;
}
if (debug) printf("%s:%d %*sDoing maybe_push_down\n", __FILE__, __LINE__, debug, "");
int r = brtnode_maybe_push_down(t, node, did_split, nodea, nodeb, splitk, debugp1(debug), k->app_private, db);
......@@ -989,20 +1028,17 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
}
static int brtnode_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
static int brtnode_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
int debug,
DB *db) {
int debug) {
if (node->height==0) {
return brt_leaf_insert(t, node, k, v,
return brt_leaf_insertm(t, node, cmd,
did_split, nodea, nodeb, splitk,
debug,
db);
debug);
} else {
return brt_nonleaf_insert(t, node, k, v,
return brt_nonleaf_insertm(t, node, cmd,
did_split, nodea, nodeb, splitk,
debug,
db);
debug);
}
}
......@@ -1218,10 +1254,11 @@ int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKE
return 0;
}
int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) {
int brt_root_put_cmd(BRT brt, BRT_CMD *cmd) {
void *node_v;
BRTNODE node;
CACHEKEY *rootp;
int result;
int r;
int did_split; BRTNODE nodea=0, nodeb=0;
DBT splitk;
......@@ -1239,10 +1276,10 @@ int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) {
}
node=node_v;
if (debug) printf("%s:%d node inserting\n", __FILE__, __LINE__);
r = brtnode_insert(brt, node, k, v,
did_split = 0;
result = brtnode_put_cmd(brt, node, cmd,
&did_split, &nodea, &nodeb, &splitk,
debug, db);
if (r!=0) return r;
debug);
if (debug) printf("%s:%d did_insert\n", __FILE__, __LINE__);
if (did_split) {
//printf("%s:%d did_split=%d nodeb=%p nodeb->thisnodename=%lld nodeb->nodesize=%d\n", __FILE__, __LINE__, did_split, nodeb, nodeb->thisnodename, nodeb->nodesize);
......@@ -1252,65 +1289,76 @@ int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) {
}
if (did_split) {
r = brt_init_new_root(brt, nodea, nodeb, splitk, rootp);
if (r != 0)
return r;
assert(r == 0);
} else {
if (node->height>0)
assert(node->u.n.n_children<=TREE_FANOUT);
}
cachetable_unpin(brt->cf, *rootp, 1);
if ((r = unpin_brt_header(brt))!=0) return r;
r = unpin_brt_header(brt);
assert(r == 0);
//assert(0==cachetable_assert_all_unpinned(brt->cachetable));
return 0;
return result;
}
int brt_insert (BRT brt, DBT *key, DBT *val, DB* db) {
int r;
BRT_CMD brtcmd;
brtcmd.type = BRT_INSERT;
brtcmd.u.id.key = key;
brtcmd.u.id.val = val;
brtcmd.u.id.db = db;
r = brt_root_put_cmd(brt, &brtcmd);
return r;
}
int brt_lookup_node (BRT brt, diskoff off, DBT *k, DBT *v, DB *db) {
int result;
void *node_v;
int r = cachetable_get_and_pin(brt->cf, off, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)(long)brt->h->nodesize);
BRTNODE node;
int childnum;
if (r!=0) {
int r2;
died0:
// printf("%s:%d r=%d\n", __FILE__, __LINE__, r);
r2 = cachetable_unpin(brt->cf, off, 0);
return r;
}
if (r!=0)
return r;
node=node_v;
// Leaves have a single mdict, where the data is found.
if (node->height==0) {
r = pma_lookup(node->u.l.buffer, k, v, db);
result = pma_lookup(node->u.l.buffer, k, v, db);
//printf("%s:%d looked up something, got answerlen=%d\n", __FILE__, __LINE__, answerlen);
if (r!=0) goto died0;
r = cachetable_unpin(brt->cf, off, 0);
return r;
assert(r == 0);
return result;
}
childnum = brtnode_which_child(node, k, brt, db);
// Leaves have a single mdict, where the data is found.
{
bytevec hanswer;
ITEMLEN hanswerlen;
if (toku_hash_find (node->u.n.htables[childnum], k->data, k->size, &hanswer, &hanswerlen)==0) {
//printf("Found %d bytes\n", *vallen);
ybt_set_value(v, hanswer, hanswerlen, &brt->sval);
//printf("%s:%d Returning %p\n", __FILE__, __LINE__, v->data);
r = cachetable_unpin(brt->cf, off, 0);
assert(r==0);
return 0;
int type;
if (toku_hash_find (node->u.n.htables[childnum], k->data, k->size, &hanswer, &hanswerlen, &type)==0) {
if (type == BRT_INSERT) {
//printf("Found %d bytes\n", *vallen);
ybt_set_value(v, hanswer, hanswerlen, &brt->sval);
//printf("%s:%d Returning %p\n", __FILE__, __LINE__, v->data);
result = 0;
} else if (type == BRT_DELETE) {
result = DB_NOTFOUND;
} else
assert(0);
r = cachetable_unpin(brt->cf, off, 0);
assert(r == 0);
return result;
}
}
if (node->height==0) {
r = cachetable_unpin(brt->cf, off, 0);
if (r==0) return DB_NOTFOUND;
else return r;
}
{
int result = brt_lookup_node(brt, node->u.n.children[childnum], k, v, db);
r = cachetable_unpin(brt->cf, off, 0);
if (r!=0) return r;
return result;
}
result = brt_lookup_node(brt, node->u.n.children[childnum], k, v, db);
r = cachetable_unpin(brt->cf, off, 0);
assert(r == 0);
return result;
}
......@@ -1336,6 +1384,22 @@ int brt_lookup (BRT brt, DBT *k, DBT *v, DB *db) {
return 0;
}
int brt_delete(BRT brt, DBT *key, DB *db) {
int r;
BRT_CMD brtcmd;
DBT val;
init_dbt(&val);
val.size = 0;
brtcmd.type = BRT_DELETE;
brtcmd.u.id.key = key;
brtcmd.u.id.val = &val;
brtcmd.u.id.db = db;
r = brt_root_put_cmd(brt, &brtcmd);
return r;
}
int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen, int recurse);
int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen) {
......@@ -1356,9 +1420,9 @@ int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lole
int i;
for (i=0; i< node->u.n.n_children-1; i++) {
printf("%*schild %d buffered (%d entries):\n", depth+1, "", i, toku_hashtable_n_entries(node->u.n.htables[i]));
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen,
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen, type,
({
printf("%*s %s %s\n", depth+2, "", (char*)key, (char*)data);
printf("%*s %s %s %d\n", depth+2, "", (char*)key, (char*)data, type);
assert(strlen((char*)key)+1==keylen);
assert(strlen((char*)data)+1==datalen);
}));
......@@ -1468,7 +1532,9 @@ int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, byteve
}
{
void verify_pair (bytevec key, unsigned int keylen,
bytevec data __attribute__((__unused__)), unsigned int datalen __attribute__((__unused__)),
bytevec data __attribute__((__unused__)),
unsigned int datalen __attribute__((__unused__)),
int type __attribute__((__unused__)),
void *ignore __attribute__((__unused__))) {
if (thislorange) assert(keycompare(thislorange,thislolen,key,keylen)<0);
if (thishirange && keycompare(key,keylen,thishirange,thishilen)>0) {
......@@ -1525,11 +1591,6 @@ void brt_flush (BRT brt) {
}
#endif
int brtnode_flush_child (BRT brt, BRTNODE node, int cnum) {
brt=brt; node=node; cnum=cnum;
abort(); /* Algorithm: For each key in the cnum'th mdict, insert it to the childnode. It may cause a split. */
}
int brt_flush_debug = 0;
/*
......@@ -1550,7 +1611,7 @@ void brt_flush_child(BRT t, BRTNODE node, int childnum, BRT_CURSOR cursor) {
}
init_dbt(&child_splitk);
r = push_some_kvpairs_down(t, node, childnum,
r = push_some_brt_cmds_down(t, node, childnum,
&child_did_split, &childa, &childb, &child_splitk, brt_flush_debug, 0, 0);
assert(r == 0);
if (brt_flush_debug) {
......
......@@ -14,6 +14,7 @@ int open_brt (const char *fname, const char *dbname, int is_create, BRT *, int n
//int brt_open (BRT *, char *fname, char *dbname);
int brt_insert (BRT brt, DBT *k, DBT *v, DB*db);
int brt_lookup (BRT brt, DBT *k, DBT *v, DB*db);
int brt_delete (BRT brt, DBT *k, DB *db);
int close_brt (BRT);
int dump_brt (BRT brt);
void brt_fsync (BRT); /* fsync, but don't clear the caches. */
......
......@@ -2,7 +2,7 @@
#include "hashtable.h"
#include "memory.h"
#include "primes.h"
#include "../include/db.h"
// #include "../include/ydb-constants.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
......@@ -41,7 +41,7 @@ static void hash_find_internal (HASHTABLE tab, unsigned int hash, const unsigned
*hashelt = 0;
}
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, ITEMLEN *datalen) {
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, ITEMLEN *datalen, int *type) {
HASHELT he, *prev_ptr;
hash_find_internal(tab, hash_key (key, keylen), key, keylen, &he, &prev_ptr);
if (he==0) {
......@@ -49,6 +49,7 @@ int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, I
} else {
*data = &he->keyval[he->keylen];
*datalen = he->vallen;
*type = he->type;
return 0;
}
}
......@@ -82,7 +83,7 @@ int toku_hash_rehash_everything (HASHTABLE tab, unsigned int primeindexdelta) {
return 0;
}
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *val, ITEMLEN vallen)
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *val, ITEMLEN vallen, int type)
{
unsigned int hk = hash_key (key,keylen);
unsigned int h = hk%tab->arraysize;
......@@ -97,6 +98,7 @@ int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void
/* Otherwise the key is not already present, so we need to add it. */
HASHELT he=toku_malloc(sizeof(*he)+keylen+vallen);
assert(he); // ?????
he->type = type;
he->keylen = keylen;
he->vallen = vallen;
memmove(&he->keyval[0], key, keylen);
......@@ -134,7 +136,7 @@ int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen) {
}
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, long int *randomnumber) {
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type, long int *randomnumber) {
unsigned int i;
unsigned int usei = (*randomnumber)%h->arraysize;
for (i=0; i<h->arraysize; i++, usei++) {
......@@ -145,6 +147,7 @@ int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytev
*keylen = he->keylen;
*data = &he->keyval[he->keylen];
*datalen = he->vallen;
*type = he->type;
*randomnumber = usei;
return 0;
}
......@@ -177,7 +180,7 @@ int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *dat
}
#endif
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen, void*args), void* args) {
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen, int type, void*args), void* args) {
/*
int i;
for (i=0; i<tab->arraysize; i++) {
......@@ -187,7 +190,7 @@ void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen
}
}
*/
HASHTABLE_ITERATE(tab, key, keylen, val, vallen, f(key,keylen,val,vallen,args));
HASHTABLE_ITERATE(tab, key, keylen, val, vallen, type, f(key,keylen,val,vallen,type,args));
}
int toku_hashtable_n_entries(HASHTABLE tab) {
......
......@@ -12,10 +12,10 @@ int toku_hashtable_create (HASHTABLE*);
/* Return 0 if the key is found in the hashtable, -1 otherwise. */
/* Warning: The data returned points to the internals of the hashtable. It is set to "const" to try to prevent you from messing it up. */
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen);
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen, int *type);
/* Replace the key if it was already there. */
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen);
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, int type);
/* It is OK to delete something that isn't there. */
int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen);
......@@ -24,15 +24,16 @@ int toku_hashtable_n_entries(HASHTABLE);
void toku_hashtable_clear(HASHTABLE);
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, long int *randomnumber);
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type, long int *randomnumber);
//int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
typedef struct hashelt *HASHELT;
struct hashelt {
ITEMLEN keylen;
ITEMLEN vallen;
unsigned int hash;
HASHELT next;
int type;
ITEMLEN keylen;
ITEMLEN vallen;
char keyval[]; /* the first KEYLEN bytes are the key. The next bytes are the value. */
};
......@@ -44,9 +45,9 @@ struct hashtable {
};
/* You cannot add or delete elements from the hashtable while iterating. */
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,void*), void*);
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,int type, void*), void*);
// If you don't want to use something, do something like use "key __attribute__((__unused__))" for keyvar.
#define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \
#define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,typevar,body) ({ \
unsigned int hi_counter; \
for (hi_counter=0; hi_counter<table->arraysize; hi_counter++) { \
HASHELT hi_he; \
......@@ -55,6 +56,7 @@ void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,
ITEMLEN keylenvar = hi_he->keylen; \
const char *datavar = &hi_he->keyval[hi_he->keylen]; \
ITEMLEN datalenvar = hi_he->vallen; \
int typevar = hi_he->type; \
body; \
}}})
......
......@@ -28,7 +28,7 @@ void verify_hash_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
fprintf(stderr, "%s isn't there\n", kv); abort();
}
void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl, int type,
int N, int *data, char *saw) {
char *kv = (char*)kv_v;
char *dv = (char*)dv_v;
......@@ -38,6 +38,7 @@ void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
assert(strcmp(kv+1, dv+1)==0);
assert(strlen(kv)+1==kl);
assert(strlen(dv)+1==dl);
assert(type == 0);
num = atoi(kv+1);
for (k=0; k<N; k++) {
if (data[k]==num) {
......@@ -54,8 +55,8 @@ void verify_htable (HASHTABLE htable, int N, int *data, char *saw) {
for (j=0; j<N; j++) {
saw[j]=0;
}
HASHTABLE_ITERATE(htable, kv, kl, dv, dl,
verify_htable_instance (kv, kl, dv, dl,
HASHTABLE_ITERATE(htable, kv, kl, dv, dl, type,
verify_htable_instance (kv, kl, dv, dl, type,
N, data, saw));
for (j=0; j<N; j++) {
assert(saw[j]);
......@@ -99,7 +100,7 @@ void test0 (void) {
}
snprintf(kv, 99, "k%d", ra);
snprintf(dv, 99, "d%d", ra);
toku_hash_insert(htable, kv, strlen(kv)+1, dv, strlen(dv)+1);
toku_hash_insert(htable, kv, strlen(kv)+1, dv, strlen(dv)+1, 0);
data[data_n++]=ra;
}
} else {
......@@ -122,13 +123,14 @@ void test1(void) {
for (j=0; j<4; j++) {
snprintf(keys[j], 100, "k%ld", (long)(random()));
snprintf(vals[j], 100, "v%d", j);
toku_hash_insert(table, keys[j], strlen(keys[j])+1, vals[j], strlen(vals[j])+1);
toku_hash_insert(table, keys[j], strlen(keys[j])+1, vals[j], strlen(vals[j])+1, 0);
}
for (j=0; j<4; j++) {
bytevec key, val;
ITEMLEN keylen, vallen;
int type;
long int randnum=random();
r = toku_hashtable_random_pick(table, &key, &keylen, &val, &vallen, &randnum);
r = toku_hashtable_random_pick(table, &key, &keylen, &val, &vallen, &type, &randnum);
assert(r==0);
r = toku_hash_delete(table, key, keylen);
assert(r==0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment