Commit accc92e4 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Add some information needed for recovery. Changed the size of the...

Add some information needed for recovery.  Changed the size of the serialization, and caused a new cursor bug to show up.  Addresses #27.


git-svn-id: file:///svn/tokudb@927 c7de825b-a66e-492c-adef-691d508d4ae1
parent 3ca8f1e0
......@@ -14,6 +14,7 @@
#endif
enum { TREE_FANOUT = BRT_FANOUT };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
enum { PMA_ITEM_OVERHEAD = 4 };
enum { BRT_CMD_OVERHEAD = 1 };
enum { BRT_DEFAULT_NODE_SIZE = 1 << 20 };
......@@ -59,7 +60,7 @@ struct brtnode {
} n;
struct leaf {
PMA buffer;
unsigned int n_bytes_in_buffer;
unsigned int n_bytes_in_buffer; /* How many bytes to represent the PMA (including the per-key overheads, but not including the overheads for the node. */
} l;
} u;
};
......
......@@ -61,8 +61,9 @@ static unsigned int toku_serialize_brtnode_size_slow(BRTNODE node) {
PMA_ITERATE(node->u.l.buffer,
key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen,
(hsize+=KEY_VALUE_OVERHEAD+keylen+datalen));
(hsize+=PMA_ITEM_OVERHEAD+KEY_VALUE_OVERHEAD+keylen+datalen));
assert(hsize==node->u.l.n_bytes_in_buffer);
hsize+=4; /* the PMA size */
hsize+=4; /* add n entries in buffer table. */
return size+hsize;
}
......@@ -80,7 +81,8 @@ unsigned int toku_serialize_brtnode_size (BRTNODE node) {
result+=(8+4+4)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */
result+=node->u.n.n_bytes_in_hashtables;
} else {
result+=4; /* n_entries in buffer table. */
result+=(4 /* n_entries in buffer table. */
+4); /* the pma size */
result+=node->u.l.n_bytes_in_buffer;
if (toku_memory_check) {
unsigned int slowresult = toku_serialize_brtnode_size_slow(node);
......@@ -166,9 +168,14 @@ void toku_serialize_brtnode_to(int fd, DISKOFF off, DISKOFF size, BRTNODE node)
} else {
//printf(" n_entries=%d\n", toku_pma_n_entries(node->u.l.buffer));
wbuf_int(&w, toku_pma_n_entries(node->u.l.buffer));
PMA_ITERATE(node->u.l.buffer, key, keylen, data, datalen,
(wbuf_bytes(&w, key, keylen),
wbuf_bytes(&w, data, datalen)));
wbuf_int(&w, toku_pma_index_limit(node->u.l.buffer));
PMA_ITERATE_IDX(node->u.l.buffer, idx,
key, keylen, data, datalen,
({
wbuf_int(&w, idx);
wbuf_bytes(&w, key, keylen);
wbuf_bytes(&w, data, datalen);
}));
}
assert(w.ndone<=w.size);
#ifdef CRC_ATEND
......@@ -377,16 +384,17 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl
#if BRT_USE_PMA_BULK_INSERT
{
DBT keys[n_in_buf], vals[n_in_buf];
int index_limit __attribute__((__unused__))= rbuf_int(&rc);
for (i=0; i<n_in_buf; i++) {
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
toku_verify_counts(result);
int idx __attribute__((__unused__)) = rbuf_int(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
toku_fill_dbt(&keys[i], key, keylen);
rbuf_bytes(&rc, &val, &vallen);
toku_fill_dbt(&vals[i], val, vallen);
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD;
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
}
if (n_in_buf > 0) {
u_int32_t actual_sum = 0;
......@@ -413,7 +421,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl
r = toku_pma_insert(result->u.l.buffer, toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen), 0);
if (r!=0) goto died_21;
}
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD;
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
}
#endif
}
......
......@@ -565,7 +565,7 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT
assert(node->height>0); /* Not a leaf. */
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
int to_child=toku_serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD <= child->nodesize;
int to_child=toku_serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD+BRT_CMD_OVERHEAD <= child->nodesize;
if (toku_brt_debug_mode) {
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node);
if (childnum_of_node+1<node->u.n.n_children) {
......@@ -944,7 +944,7 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
if (replaced_v_size>=0) {
node->u.l.n_bytes_in_buffer += v->size - replaced_v_size;
} else {
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
}
node->dirty = 1;
......@@ -2912,7 +2912,7 @@ int toku_brt_cursor_delete(BRT_CURSOR cursor, int flags __attribute__((__unused_
int kvsize;
r = toku_pma_cursor_delete_under(cursor->pmacurs, &kvsize);
if (r == 0) {
node->u.l.n_bytes_in_buffer -= KEY_VALUE_OVERHEAD + kvsize;
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + kvsize;
node->dirty = 1;
}
} else
......
......@@ -1083,7 +1083,7 @@ static int pma_delete_dup (PMA pma, DBT *k, u_int32_t rand4sem, u_int32_t *finge
struct kv_pair *kv = pma->pairs[righthere];
if (kv_pair_valid(kv)) {
/* mark the pair as deleted */
*deleted_size += KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*deleted_size += PMA_ITEM_OVERHEAD+ KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*fingerprint -= rand4sem*toku_calccrc32_kvpair (kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv));
pma->pairs[righthere] = kv_pair_set_deleted(kv);
if (__pma_count_cursor_refs(pma, righthere) == 0) {
......@@ -1109,7 +1109,7 @@ static int pma_delete_nodup (PMA pma, DBT *k, u_int32_t rand4sem, u_int32_t *fin
if (0) printf("%s:%d l=%d r=%d\n", __FILE__, __LINE__, idx, DB_NOTFOUND);
return DB_NOTFOUND;
}
*deleted_size = KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*deleted_size = PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*fingerprint -= rand4sem*toku_calccrc32_kvpair (kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv));
pma->pairs[idx] = kv_pair_set_deleted(kv);
if (__pma_count_cursor_refs(pma, idx) == 0)
......@@ -1430,14 +1430,14 @@ int toku_pma_split(TOKUTXN txn, FILENUM filenum,
/* debug check the kv length sum */
sumlen = 0;
for (i=0; i<npairs; i++)
sumlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + KEY_VALUE_OVERHEAD;
sumlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD;
if (origpma_size)
assert(*(int *)origpma_size == sumlen);
runlen = 0;
for (i=0; i<npairs;) {
runlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + KEY_VALUE_OVERHEAD;
runlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD;
i++;
if (2*runlen >= sumlen)
break;
......
......@@ -122,7 +122,7 @@ int toku_pma_cursor_delete_under(PMA_CURSOR c, int *kvsize);
int toku_pma_random_pick(PMA, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
int toku_pma_index_limit(PMA);
int toku_pma_index_limit(PMA); // How many slots are in the PMA right now?
int toku_pmanode_valid(PMA,int);
bytevec toku_pmanode_key(PMA,int);
ITEMLEN toku_pmanode_keylen(PMA,int);
......@@ -131,16 +131,18 @@ ITEMLEN toku_pmanode_vallen(PMA,int);
void toku_pma_iterate (PMA, void(*)(bytevec,ITEMLEN,bytevec,ITEMLEN, void*), void*);
#define PMA_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \
int __i; \
for (__i=0; __i<toku_pma_index_limit(table); __i++) { \
if (toku_pmanode_valid(table,__i)) { \
bytevec keyvar = toku_pmanode_key(table,__i); \
ITEMLEN keylenvar = toku_pmanode_keylen(table,__i); \
bytevec datavar = toku_pmanode_val(table, __i); \
ITEMLEN datalenvar = toku_pmanode_vallen(table, __i); \
#define PMA_ITERATE_IDX(table,idx,keyvar,keylenvar,datavar,datalenvar,body) ({ \
int idx; \
for (idx=0; idx<toku_pma_index_limit(table); idx++) { \
if (toku_pmanode_valid(table,idx)) { \
bytevec keyvar = toku_pmanode_key(table,idx); \
ITEMLEN keylenvar = toku_pmanode_keylen(table,idx); \
bytevec datavar = toku_pmanode_val(table, idx); \
ITEMLEN datalenvar = toku_pmanode_vallen(table, idx); \
body; \
} } })
} } })
#define PMA_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) PMA_ITERATE_IDX(table, __i, keyvar, keylenvar, datavar, datalenvar, body)
void toku_pma_verify_fingerprint (PMA pma, u_int32_t rand4fingerprint, u_int32_t fingerprint);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment