Commit accc92e4 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Add some information needed for recovery. Changed the size of the...

Add some information needed for recovery.  Changed the size of the serialization, and caused a new cursor bug to show up.  Addresses #27.


git-svn-id: file:///svn/tokudb@927 c7de825b-a66e-492c-adef-691d508d4ae1
parent 3ca8f1e0
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#endif #endif
enum { TREE_FANOUT = BRT_FANOUT }; enum { TREE_FANOUT = BRT_FANOUT };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */ enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
enum { PMA_ITEM_OVERHEAD = 4 };
enum { BRT_CMD_OVERHEAD = 1 }; enum { BRT_CMD_OVERHEAD = 1 };
enum { BRT_DEFAULT_NODE_SIZE = 1 << 20 }; enum { BRT_DEFAULT_NODE_SIZE = 1 << 20 };
...@@ -59,7 +60,7 @@ struct brtnode { ...@@ -59,7 +60,7 @@ struct brtnode {
} n; } n;
struct leaf { struct leaf {
PMA buffer; PMA buffer;
unsigned int n_bytes_in_buffer; unsigned int n_bytes_in_buffer; /* How many bytes to represent the PMA (including the per-key overheads, but not including the overheads for the node. */
} l; } l;
} u; } u;
}; };
......
...@@ -61,8 +61,9 @@ static unsigned int toku_serialize_brtnode_size_slow(BRTNODE node) { ...@@ -61,8 +61,9 @@ static unsigned int toku_serialize_brtnode_size_slow(BRTNODE node) {
PMA_ITERATE(node->u.l.buffer, PMA_ITERATE(node->u.l.buffer,
key __attribute__((__unused__)), keylen, key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen, data __attribute__((__unused__)), datalen,
(hsize+=KEY_VALUE_OVERHEAD+keylen+datalen)); (hsize+=PMA_ITEM_OVERHEAD+KEY_VALUE_OVERHEAD+keylen+datalen));
assert(hsize==node->u.l.n_bytes_in_buffer); assert(hsize==node->u.l.n_bytes_in_buffer);
hsize+=4; /* the PMA size */
hsize+=4; /* add n entries in buffer table. */ hsize+=4; /* add n entries in buffer table. */
return size+hsize; return size+hsize;
} }
...@@ -80,7 +81,8 @@ unsigned int toku_serialize_brtnode_size (BRTNODE node) { ...@@ -80,7 +81,8 @@ unsigned int toku_serialize_brtnode_size (BRTNODE node) {
result+=(8+4+4)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */ result+=(8+4+4)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */
result+=node->u.n.n_bytes_in_hashtables; result+=node->u.n.n_bytes_in_hashtables;
} else { } else {
result+=4; /* n_entries in buffer table. */ result+=(4 /* n_entries in buffer table. */
+4); /* the pma size */
result+=node->u.l.n_bytes_in_buffer; result+=node->u.l.n_bytes_in_buffer;
if (toku_memory_check) { if (toku_memory_check) {
unsigned int slowresult = toku_serialize_brtnode_size_slow(node); unsigned int slowresult = toku_serialize_brtnode_size_slow(node);
...@@ -166,9 +168,14 @@ void toku_serialize_brtnode_to(int fd, DISKOFF off, DISKOFF size, BRTNODE node) ...@@ -166,9 +168,14 @@ void toku_serialize_brtnode_to(int fd, DISKOFF off, DISKOFF size, BRTNODE node)
} else { } else {
//printf(" n_entries=%d\n", toku_pma_n_entries(node->u.l.buffer)); //printf(" n_entries=%d\n", toku_pma_n_entries(node->u.l.buffer));
wbuf_int(&w, toku_pma_n_entries(node->u.l.buffer)); wbuf_int(&w, toku_pma_n_entries(node->u.l.buffer));
PMA_ITERATE(node->u.l.buffer, key, keylen, data, datalen, wbuf_int(&w, toku_pma_index_limit(node->u.l.buffer));
(wbuf_bytes(&w, key, keylen), PMA_ITERATE_IDX(node->u.l.buffer, idx,
wbuf_bytes(&w, data, datalen))); key, keylen, data, datalen,
({
wbuf_int(&w, idx);
wbuf_bytes(&w, key, keylen);
wbuf_bytes(&w, data, datalen);
}));
} }
assert(w.ndone<=w.size); assert(w.ndone<=w.size);
#ifdef CRC_ATEND #ifdef CRC_ATEND
...@@ -377,16 +384,17 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl ...@@ -377,16 +384,17 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl
#if BRT_USE_PMA_BULK_INSERT #if BRT_USE_PMA_BULK_INSERT
{ {
DBT keys[n_in_buf], vals[n_in_buf]; DBT keys[n_in_buf], vals[n_in_buf];
int index_limit __attribute__((__unused__))= rbuf_int(&rc);
for (i=0; i<n_in_buf; i++) { for (i=0; i<n_in_buf; i++) {
bytevec key; ITEMLEN keylen; bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen; bytevec val; ITEMLEN vallen;
toku_verify_counts(result); toku_verify_counts(result);
int idx __attribute__((__unused__)) = rbuf_int(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */ rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
toku_fill_dbt(&keys[i], key, keylen); toku_fill_dbt(&keys[i], key, keylen);
rbuf_bytes(&rc, &val, &vallen); rbuf_bytes(&rc, &val, &vallen);
toku_fill_dbt(&vals[i], val, vallen); toku_fill_dbt(&vals[i], val, vallen);
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD; result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
} }
if (n_in_buf > 0) { if (n_in_buf > 0) {
u_int32_t actual_sum = 0; u_int32_t actual_sum = 0;
...@@ -413,7 +421,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl ...@@ -413,7 +421,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl
r = toku_pma_insert(result->u.l.buffer, toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen), 0); r = toku_pma_insert(result->u.l.buffer, toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen), 0);
if (r!=0) goto died_21; if (r!=0) goto died_21;
} }
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD; result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
} }
#endif #endif
} }
......
...@@ -565,7 +565,7 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT ...@@ -565,7 +565,7 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT
assert(node->height>0); /* Not a leaf. */ assert(node->height>0); /* Not a leaf. */
DBT *k = cmd->u.id.key; DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val; DBT *v = cmd->u.id.val;
int to_child=toku_serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD <= child->nodesize; int to_child=toku_serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD+BRT_CMD_OVERHEAD <= child->nodesize;
if (toku_brt_debug_mode) { if (toku_brt_debug_mode) {
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node); printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node);
if (childnum_of_node+1<node->u.n.n_children) { if (childnum_of_node+1<node->u.n.n_children) {
...@@ -944,7 +944,7 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd, ...@@ -944,7 +944,7 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
if (replaced_v_size>=0) { if (replaced_v_size>=0) {
node->u.l.n_bytes_in_buffer += v->size - replaced_v_size; node->u.l.n_bytes_in_buffer += v->size - replaced_v_size;
} else { } else {
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD; node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
} }
node->dirty = 1; node->dirty = 1;
...@@ -2912,7 +2912,7 @@ int toku_brt_cursor_delete(BRT_CURSOR cursor, int flags __attribute__((__unused_ ...@@ -2912,7 +2912,7 @@ int toku_brt_cursor_delete(BRT_CURSOR cursor, int flags __attribute__((__unused_
int kvsize; int kvsize;
r = toku_pma_cursor_delete_under(cursor->pmacurs, &kvsize); r = toku_pma_cursor_delete_under(cursor->pmacurs, &kvsize);
if (r == 0) { if (r == 0) {
node->u.l.n_bytes_in_buffer -= KEY_VALUE_OVERHEAD + kvsize; node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + kvsize;
node->dirty = 1; node->dirty = 1;
} }
} else } else
......
...@@ -1083,7 +1083,7 @@ static int pma_delete_dup (PMA pma, DBT *k, u_int32_t rand4sem, u_int32_t *finge ...@@ -1083,7 +1083,7 @@ static int pma_delete_dup (PMA pma, DBT *k, u_int32_t rand4sem, u_int32_t *finge
struct kv_pair *kv = pma->pairs[righthere]; struct kv_pair *kv = pma->pairs[righthere];
if (kv_pair_valid(kv)) { if (kv_pair_valid(kv)) {
/* mark the pair as deleted */ /* mark the pair as deleted */
*deleted_size += KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv); *deleted_size += PMA_ITEM_OVERHEAD+ KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*fingerprint -= rand4sem*toku_calccrc32_kvpair (kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv)); *fingerprint -= rand4sem*toku_calccrc32_kvpair (kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv));
pma->pairs[righthere] = kv_pair_set_deleted(kv); pma->pairs[righthere] = kv_pair_set_deleted(kv);
if (__pma_count_cursor_refs(pma, righthere) == 0) { if (__pma_count_cursor_refs(pma, righthere) == 0) {
...@@ -1109,7 +1109,7 @@ static int pma_delete_nodup (PMA pma, DBT *k, u_int32_t rand4sem, u_int32_t *fin ...@@ -1109,7 +1109,7 @@ static int pma_delete_nodup (PMA pma, DBT *k, u_int32_t rand4sem, u_int32_t *fin
if (0) printf("%s:%d l=%d r=%d\n", __FILE__, __LINE__, idx, DB_NOTFOUND); if (0) printf("%s:%d l=%d r=%d\n", __FILE__, __LINE__, idx, DB_NOTFOUND);
return DB_NOTFOUND; return DB_NOTFOUND;
} }
*deleted_size = KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv); *deleted_size = PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*fingerprint -= rand4sem*toku_calccrc32_kvpair (kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv)); *fingerprint -= rand4sem*toku_calccrc32_kvpair (kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv));
pma->pairs[idx] = kv_pair_set_deleted(kv); pma->pairs[idx] = kv_pair_set_deleted(kv);
if (__pma_count_cursor_refs(pma, idx) == 0) if (__pma_count_cursor_refs(pma, idx) == 0)
...@@ -1430,14 +1430,14 @@ int toku_pma_split(TOKUTXN txn, FILENUM filenum, ...@@ -1430,14 +1430,14 @@ int toku_pma_split(TOKUTXN txn, FILENUM filenum,
/* debug check the kv length sum */ /* debug check the kv length sum */
sumlen = 0; sumlen = 0;
for (i=0; i<npairs; i++) for (i=0; i<npairs; i++)
sumlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + KEY_VALUE_OVERHEAD; sumlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD;
if (origpma_size) if (origpma_size)
assert(*(int *)origpma_size == sumlen); assert(*(int *)origpma_size == sumlen);
runlen = 0; runlen = 0;
for (i=0; i<npairs;) { for (i=0; i<npairs;) {
runlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + KEY_VALUE_OVERHEAD; runlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD;
i++; i++;
if (2*runlen >= sumlen) if (2*runlen >= sumlen)
break; break;
......
...@@ -122,7 +122,7 @@ int toku_pma_cursor_delete_under(PMA_CURSOR c, int *kvsize); ...@@ -122,7 +122,7 @@ int toku_pma_cursor_delete_under(PMA_CURSOR c, int *kvsize);
int toku_pma_random_pick(PMA, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen); int toku_pma_random_pick(PMA, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
int toku_pma_index_limit(PMA); int toku_pma_index_limit(PMA); // How many slots are in the PMA right now?
int toku_pmanode_valid(PMA,int); int toku_pmanode_valid(PMA,int);
bytevec toku_pmanode_key(PMA,int); bytevec toku_pmanode_key(PMA,int);
ITEMLEN toku_pmanode_keylen(PMA,int); ITEMLEN toku_pmanode_keylen(PMA,int);
...@@ -131,16 +131,18 @@ ITEMLEN toku_pmanode_vallen(PMA,int); ...@@ -131,16 +131,18 @@ ITEMLEN toku_pmanode_vallen(PMA,int);
void toku_pma_iterate (PMA, void(*)(bytevec,ITEMLEN,bytevec,ITEMLEN, void*), void*); void toku_pma_iterate (PMA, void(*)(bytevec,ITEMLEN,bytevec,ITEMLEN, void*), void*);
#define PMA_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \ #define PMA_ITERATE_IDX(table,idx,keyvar,keylenvar,datavar,datalenvar,body) ({ \
int __i; \ int idx; \
for (__i=0; __i<toku_pma_index_limit(table); __i++) { \ for (idx=0; idx<toku_pma_index_limit(table); idx++) { \
if (toku_pmanode_valid(table,__i)) { \ if (toku_pmanode_valid(table,idx)) { \
bytevec keyvar = toku_pmanode_key(table,__i); \ bytevec keyvar = toku_pmanode_key(table,idx); \
ITEMLEN keylenvar = toku_pmanode_keylen(table,__i); \ ITEMLEN keylenvar = toku_pmanode_keylen(table,idx); \
bytevec datavar = toku_pmanode_val(table, __i); \ bytevec datavar = toku_pmanode_val(table, idx); \
ITEMLEN datalenvar = toku_pmanode_vallen(table, __i); \ ITEMLEN datalenvar = toku_pmanode_vallen(table, idx); \
body; \ body; \
} } }) } } })
#define PMA_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) PMA_ITERATE_IDX(table, __i, keyvar, keylenvar, datavar, datalenvar, body)
void toku_pma_verify_fingerprint (PMA pma, u_int32_t rand4fingerprint, u_int32_t fingerprint); void toku_pma_verify_fingerprint (PMA pma, u_int32_t rand4fingerprint, u_int32_t fingerprint);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment