Commit e99c98c1 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Removed the undo information from the logs (to rely on the checkpoint to save...

Removed the undo information from the logs (to rely on the checkpoint to save a good copy in the log).
Also, don't record the fingerprint changes, since they can be recalculated independently.  (This is a biggy.  The fingerprints updates were half the log changes.)
We'll have to do something about the fingerprints of internal nodes, so some if may come back in some form.
Speeds up {{{db-benchmark-test-tokudb -x}}} from 17477/s to 25696/s, and reduces the size of the {{{bench.tokudb}}} directory (containing logs and data) from 3.3GB to 1.7GB for a 0.33GB datafile.

Manipulations as follows:
 (cd tokudb;svn merge -r3676:3763 https://svn.tokutek.com/tokudb/tokudb.740 )
 svn delete tokudb.740

Fixes #775.


git-svn-id: file:///svn/tokudb@3764 c7de825b-a66e-492c-adef-691d508d4ae1
parent 74b72d7d
......@@ -102,8 +102,7 @@ void toku_verify_all_in_mempool(BRTNODE node) {
}
static void fixup_child_fingerprint(BRTNODE node, int childnum_of_node, BRTNODE child, BRT brt, TOKULOGGER logger) {
u_int32_t old_fingerprint = BNC_SUBTREE_FINGERPRINT(node,childnum_of_node);
static void fixup_child_fingerprint(BRTNODE node, int childnum_of_node, BRTNODE child, BRT UU(brt), TOKULOGGER UU(logger)) {
u_int64_t leafentry_estimate = 0;
u_int32_t sum = child->local_fingerprint;
if (child->height>0) {
......@@ -120,7 +119,6 @@ static void fixup_child_fingerprint(BRTNODE node, int childnum_of_node, BRTNODE
BNC_SUBTREE_FINGERPRINT(node,childnum_of_node)=sum;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(node,childnum_of_node)=leafentry_estimate;
node->dirty=1;
toku_log_changechildfingerprint(logger, &node->log_lsn, 0, toku_cachefile_filenum(brt->cf), node->thisnodename, childnum_of_node, old_fingerprint, sum);
}
// If you pass in data==0 then it only compares the key, not the data (even if is a DUPSORT database)
......@@ -482,7 +480,7 @@ static int log_and_save_brtenq(TOKULOGGER logger, BRT t, BRTNODE node, int child
u_int32_t new_fingerprint = old_fingerprint + fdiff;
//printf("%s:%d node=%lld fingerprint old=%08x new=%08x diff=%08x xid=%lld\n", __FILE__, __LINE__, (long long)node->thisnodename, old_fingerprint, new_fingerprint, fdiff, (long long)xid);
*fingerprint = new_fingerprint;
int r = toku_log_brtenq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum, xid, type, keybs, databs, old_fingerprint, new_fingerprint);
int r = toku_log_brtenq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum, xid, type, keybs, databs);
if (r!=0) return r;
return 0;
}
......@@ -537,13 +535,11 @@ static int brt_nonleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *node
int fr = toku_fifo_peek(from_htab, &key, &keylen, &data, &datalen, &type, &xid);
if (fr!=0) break;
int n_bytes_moved = keylen+datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
BYTESTRING keybs = { .len = keylen, .data = (char*)key };
BYTESTRING databs = { .len = datalen, .data = (char*)data };
u_int32_t old_from_fingerprint = node->local_fingerprint;
u_int32_t delta = toku_calccrc32_cmd(type, xid, key, keylen, data, datalen);
u_int32_t new_from_fingerprint = old_from_fingerprint - node->rand4fingerprint*delta;
if (r!=0) return r;
r = toku_log_brtdeq(logger, &node->log_lsn, 0, fnum, node->thisnodename, n_children_in_a, xid, type, keybs, databs, old_from_fingerprint, new_from_fingerprint);
r = toku_log_brtdeq(logger, &node->log_lsn, 0, fnum, node->thisnodename, n_children_in_a);
if (r!=0) return r;
r = log_and_save_brtenq(logger, t, B, targchild, xid, type, key, keylen, data, datalen, &B->local_fingerprint);
r = toku_fifo_enq(to_htab, key, keylen, data, datalen, type, xid);
......@@ -711,10 +707,7 @@ static int push_a_brt_cmd_down (BRT t, BRTNODE node, BRTNODE child, int childnum
u_int32_t new_fingerprint = old_fingerprint - node->rand4fingerprint*toku_calccrc32_cmdstruct(cmd);
node->local_fingerprint = new_fingerprint;
{
BYTESTRING keybs = { .len=k->size, .data=(char*)k->data };
BYTESTRING databs = { .len=v->size, .data=(char*)v->data };
int r = toku_log_brtdeq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum,
cmd->xid, cmd->type, keybs, databs, old_fingerprint, new_fingerprint);
int r = toku_log_brtdeq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum);
assert(r==0);
}
{
......@@ -802,12 +795,9 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
// Remove all the cmds from the local fingerprint. Some may get added in again when we try to push to the child.
FIFO_ITERATE(old_h, skey, skeylen, sval, svallen, type, xid,
({
BYTESTRING keybs = { .len = skeylen, .data = (char*)skey };
BYTESTRING databs = { .len = svallen, .data = (char*)sval };
u_int32_t old_fingerprint = node->local_fingerprint;
u_int32_t new_fingerprint = old_fingerprint - node->rand4fingerprint*toku_calccrc32_cmd(type, xid, skey, skeylen, sval, svallen);
r = toku_log_brtdeq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum,
xid, type, keybs, databs, old_fingerprint, new_fingerprint);
r = toku_log_brtdeq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum);
node->local_fingerprint = new_fingerprint;
}));
......@@ -1336,7 +1326,7 @@ static int brt_leaf_apply_cmd_once (BRT t, BRTNODE node, BRT_CMD cmd, TOKULOGGER
if (le) {
// It's there, note that it's gone and remove it from the mempool
if ((r = toku_log_deleteleafentry(logger, &node->log_lsn, 0, filenum, node->thisnodename, idx, le))) return r;
if ((r = toku_log_deleteleafentry(logger, &node->log_lsn, 0, filenum, node->thisnodename, idx))) return r;
if ((r = toku_omt_delete_at(node->u.l.buffer, idx))) return r;
......
......@@ -103,12 +103,6 @@ const struct logtype logtypes[] = {
{"u_int8_t", "is_dup_sort", 0},
{"u_int32_t", "rand4fingerprint", "%08x"},
NULLFIELD}},
{"changechildfingerprint", 'f', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0},
{"u_int32_t", "childnum", 0},
{"u_int32_t", "oldfingerprint", "%08x"},
{"u_int32_t", "newfingerprint", "%08x"},
NULLFIELD}},
{"changeunnamedroot", 'u', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "oldroot", 0},
{"DISKOFF", "newroot", 0},
......@@ -156,15 +150,10 @@ const struct logtype logtypes[] = {
{"cfclose", 'o', FA{{"BYTESTRING", "fname", 0}, // cfclose is logged when a cachefile actually closes ("cfclose" means cache file close)
{"FILENUM", "filenum", 0},
NULLFIELD}},
// Note that brtdeq and brtenq don't name the new size or fingerprint. We can calculate them properly.
{"brtdeq", 'U', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0},
{"u_int32_t", "childnum", 0},
{"TXNID", "xid", 0},
{"u_int32_t", "typ", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "data", 0},
{"u_int32_t", "oldfingerprint", "%08x"},
{"u_int32_t", "newfingerprint", "%08x"},
NULLFIELD}},
{"brtenq", 'Q', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0},
......@@ -173,8 +162,6 @@ const struct logtype logtypes[] = {
{"u_int32_t", "typ", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "data", 0},
{"u_int32_t", "oldfingerprint", "%08x"},
{"u_int32_t", "newfingerprint", "%08x"},
NULLFIELD}},
// {"insertinleaf", 'I', FA{{"TXNID", "txnid", 0},
// {"FILENUM", "filenum", 0},
......@@ -196,10 +183,6 @@ const struct logtype logtypes[] = {
{"BYTESTRING", "data", 0},
NULLFIELD}},
{"deqrootentry", 'A', FA{{"FILENUM", "filenum", 0},
{"TXNID", "xid", 0},
{"u_int32_t", "typ", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "data", 0},
NULLFIELD}},
{"insertleafentry", 'I', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0},
......@@ -209,7 +192,6 @@ const struct logtype logtypes[] = {
{"deleteleafentry", 'D', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0},
{"u_int32_t", "idx", 0},
{"LEAFENTRY", "oldleafentry", 0},
NULLFIELD}},
{"leafsplit", 's', FA{{"FILENUM", "filenum", 0}, // log the creation of a new node by splitting stuff out of an old node
{"DISKOFF", "old_diskoff", 0},
......
......@@ -223,7 +223,7 @@ static void recover_setup_node (FILENUM filenum, DISKOFF diskoff, CACHEFILE *cf,
*cf = pair->cf;
}
static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING val) {
static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
assert(r==0);
......@@ -237,18 +237,10 @@ static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILE
u_int32_t storedtype;
r = toku_fifo_peek(h->fifo, &storedkey, &storedkeylen, &storeddata, &storeddatalen, &storedtype, &storedxid);
assert(r==0);
assert(storedkeylen==key.len);
assert(storeddatalen==val.len);
assert(memcmp(storedkey, key.data, key.len)==0);
assert(memcmp(storeddata, val.data, val.len)==0);
assert(typ==storedtype);
assert(xid==storedxid);
r = toku_fifo_deq(h->fifo);
assert(r==0);
r = toku_cachetable_unpin(pair->cf, 0, 1, 0);
assert(r==0);
toku_free(key.data);
toku_free(val.data);
}
void toku_recover_enqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING val) {
......@@ -267,14 +259,13 @@ void toku_recover_enqrootentry (LSN lsn __attribute__((__unused__)), FILENUM fil
toku_free(val.data);
}
void toku_recover_brtdeq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t childnum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING data, u_int32_t oldfingerprint, u_int32_t newfingerprint) {
void toku_recover_brtdeq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t childnum) {
CACHEFILE cf;
BRTNODE node;
int r;
recover_setup_node(filenum, diskoff, &cf, &node);
assert(node->height>0);
//printf("deq: %lld expected_old_fingerprint=%08x actual=%08x new=%08x\n", diskoff, oldfingerprint, node->local_fingerprint, newfingerprint);
assert(node->local_fingerprint==oldfingerprint);
bytevec actual_key, actual_data;
ITEMLEN actual_keylen, actual_datalen;
u_int32_t actual_type;
......@@ -282,35 +273,26 @@ void toku_recover_brtdeq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t c
assert(childnum<(u_int32_t)node->u.n.n_children);
r = toku_fifo_peek(BNC_BUFFER(node, childnum), &actual_key, &actual_keylen, &actual_data, &actual_datalen, &actual_type, &actual_xid);
assert(r==0);
assert(actual_keylen==(ITEMLEN)key.len);
assert(memcmp(actual_key, key.data, actual_keylen)==0);
assert(actual_datalen=data.len);
assert(memcmp(actual_data, data.data, actual_datalen)==0);
assert(actual_type==typ);
assert(actual_xid==xid);
u_int32_t sizediff = key.len + data.len + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
node->local_fingerprint = newfingerprint;
u_int32_t sizediff = actual_keylen + actual_datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
node->local_fingerprint -= node->rand4fingerprint * toku_calccrc32_cmd(actual_type, actual_xid, actual_key, actual_keylen, actual_data, actual_datalen);
node->log_lsn = lsn;
node->u.n.n_bytes_in_buffers -= sizediff;
BNC_NBYTESINBUF(node, childnum) -= sizediff;
r = toku_fifo_deq(BNC_BUFFER(node, childnum)); // don't deq till were' done looking at the data.
r = toku_cachetable_unpin(cf, diskoff, 1, toku_serialize_brtnode_size(node));
assert(r==0);
toku_free(key.data);
toku_free(data.data);
}
void toku_recover_brtenq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t childnum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING data, u_int32_t oldfingerprint, u_int32_t newfingerprint) {
void toku_recover_brtenq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t childnum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING data) {
CACHEFILE cf;
BRTNODE node;
int r;
recover_setup_node(filenum, diskoff, &cf, &node);
assert(node->height>0);
//printf("enq: %lld expected_old_fingerprint=%08x actual=%08x new=%08x\n", diskoff, oldfingerprint, node->local_fingerprint, newfingerprint);
assert(node->local_fingerprint==oldfingerprint);
r = toku_fifo_enq(BNC_BUFFER(node, childnum), key.data, key.len, data.data, data.len, typ, xid);
assert(r==0);
node->local_fingerprint = newfingerprint;
node->local_fingerprint += node->rand4fingerprint * toku_calccrc32_cmd(typ, xid, key.data, key.len, data.data, data.len);
node->log_lsn = lsn;
u_int32_t sizediff = key.len + data.len + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
r = toku_cachetable_unpin(cf, diskoff, 1, toku_serialize_brtnode_size(node));
......@@ -609,7 +591,7 @@ void toku_recover_insertleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_
toku_free_LEAFENTRY(newleafentry);
}
void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t idx, LEAFENTRY oldleafentry) {
void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t idx) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
assert(r==0);
......@@ -623,6 +605,7 @@ void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_
node->log_lsn = lsn;
{
OMTVALUE data;
LEAFENTRY oldleafentry=data;
r=toku_omt_fetch(node->u.l.buffer, idx, &data);
assert(r==0);
u_int32_t len = leafentry_memsize(oldleafentry);
......@@ -637,7 +620,6 @@ void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_
}
r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
assert(r==0);
toku_free_LEAFENTRY(oldleafentry);
}
void toku_recover_changeunnamedroot (LSN UU(lsn), FILENUM filenum, DISKOFF UU(oldroot), DISKOFF newroot) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment