Commit 71b56ba1 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Calculate an estimate of the number of {{{LEAFENTRY}}}s below each node.

Increment {{{layout_version}}} to 6.
brtdump now has a {{{--nodata}}} option (to print only the metadata, but not the data)
Fixes #766.

Merged to main using
{{{
cd tokudb/tokudb
svn merge -r3713:3725 https://svn.tokutek.com/tokudb/tokudb.766
cd ..
svn delete tokudb.766
}}}


git-svn-id: file:///svn/tokudb@3728 c7de825b-a66e-492c-adef-691d508d4ae1
parent efea4313
......@@ -41,6 +41,7 @@ enum { BUFFER_HEADER_SIZE = (4 // height//
struct brtnode_nonleaf_childinfo {
u_int32_t subtree_fingerprint;
u_int64_t leafentry_estimate; // estimate how many leafentries are below us.
DISKOFF diskoff;
FIFO buffer;
unsigned int n_bytes_in_buffer; /* How many bytes are in each buffer (including overheads for the disk-representation) */
......@@ -77,6 +78,7 @@ struct brtnode {
struct brtnode_nonleaf_childinfo *childinfos; /* One extra so we can grow */
#define BNC_SUBTREE_FINGERPRINT(node,i) ((node)->u.n.childinfos[i].subtree_fingerprint)
#define BNC_SUBTREE_LEAFENTRY_ESTIMATE(node,i) ((node)->u.n.childinfos[i].leafentry_estimate)
#define BNC_DISKOFF(node,i) ((node)->u.n.childinfos[i].diskoff)
#define BNC_BUFFER(node,i) ((node)->u.n.childinfos[i].buffer)
#define BNC_NBYTESINBUF(node,i) ((node)->u.n.childinfos[i].n_bytes_in_buffer)
......@@ -223,6 +225,7 @@ void *mempool_malloc_from_omt(OMT omt, struct mempool *mp, size_t size);
void toku_verify_all_in_mempool(BRTNODE node);
#define BRT_LAYOUT_VERSION 5
// Diff from 5 to 6: Added leafentry_estimate
#define BRT_LAYOUT_VERSION 6
#endif
......@@ -41,6 +41,8 @@ static void test_serialize(void) {
BNC_DISKOFF(&sn, 1) = sn.nodesize*35;
BNC_SUBTREE_FINGERPRINT(&sn, 0) = random();
BNC_SUBTREE_FINGERPRINT(&sn, 1) = random();
BNC_SUBTREE_LEAFENTRY_ESTIMATE(&sn, 0) = random() + (((long long)random())<<32);
BNC_SUBTREE_LEAFENTRY_ESTIMATE(&sn, 1) = random() + (((long long)random())<<32);
r = toku_fifo_create(&BNC_BUFFER(&sn,0)); assert(r==0);
r = toku_fifo_create(&BNC_BUFFER(&sn,1)); assert(r==0);
r = toku_fifo_enq(BNC_BUFFER(&sn,0), "a", 2, "aval", 5, BRT_NONE, (TXNID)0); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, (TXNID)0, "a", 2, "aval", 5);
......@@ -70,6 +72,7 @@ static void test_serialize(void) {
int i;
for (i=0; i<2; i++) {
assert(BNC_SUBTREE_FINGERPRINT(dn, i)==BNC_SUBTREE_FINGERPRINT(&sn, i));
assert(BNC_SUBTREE_LEAFENTRY_ESTIMATE(dn, i)==BNC_SUBTREE_LEAFENTRY_ESTIMATE(&sn, i));
}
assert(dn->local_fingerprint==sn.local_fingerprint);
}
......
......@@ -40,7 +40,7 @@ static unsigned int toku_serialize_brtnode_size_slow (BRTNODE node) {
for (i=0; i<node->u.n.n_children-1; i++) {
csize+=toku_brtnode_pivot_key_len(node, node->u.n.childkeys[i]);
}
size+=(8+4+4)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */
size+=(8+4+4+8)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, the subtree fingerprint, and the leafentry_estimate. */
int n_buffers = node->u.n.n_children;
assert(0 <= n_buffers && n_buffers < TREE_FANOUT+1);
for (i=0; i< n_buffers; i++) {
......@@ -79,7 +79,7 @@ unsigned int toku_serialize_brtnode_size (BRTNODE node) {
result+=4*(node->u.n.n_children-1); /* key lengths*/
if (node->flags & TOKU_DB_DUPSORT) result += 4*(node->u.n.n_children-1); /* data lengths */
result+=node->u.n.totalchildkeylens; /* the lengths of the pivot keys, without their key lengths. */
result+=(8+4+4)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */
result+=(8+4+4+8)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, the subtree fingerprint, and the leafentry_estimate. */
result+=node->u.n.n_bytes_in_buffers;
} else {
result+=4; /* n_entries in buffer table. */
......@@ -140,6 +140,7 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) {
wbuf_int(&w, node->u.n.n_children);
for (i=0; i<node->u.n.n_children; i++) {
wbuf_uint(&w, BNC_SUBTREE_FINGERPRINT(node, i));
wbuf_ulonglong(&w, BNC_SUBTREE_LEAFENTRY_ESTIMATE(node, i));
}
//printf("%s:%d w.ndone=%d\n", __FILE__, __LINE__, w.ndone);
for (i=0; i<node->u.n.n_children-1; i++) {
......@@ -298,6 +299,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode) {
u_int32_t childfp = rbuf_int(&rc);
BNC_SUBTREE_FINGERPRINT(result, i)= childfp;
check_subtree_fingerprint += childfp;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(result, i)=rbuf_ulonglong(&rc);
}
for (i=0; i<result->u.n.n_children-1; i++) {
if (result->flags & TOKU_DB_DUPSORT) {
......
......@@ -30,6 +30,7 @@ int toku_testsetup_nonleaf (BRT brt, int height, DISKOFF *diskoff, int n_childre
int i;
for (i=0; i<n_children; i++) {
node->u.n.childinfos[i] = (struct brtnode_nonleaf_childinfo){ .subtree_fingerprint = subtree_fingerprints[i],
.leafentry_estimate = 0,
.diskoff = children[i],
.n_bytes_in_buffer = 0 };
r = toku_fifo_create(&BNC_BUFFER(node,i)); if (r!=0) return r;
......
......@@ -104,16 +104,21 @@ void toku_verify_all_in_mempool(BRTNODE node) {
static void fixup_child_fingerprint(BRTNODE node, int childnum_of_node, BRTNODE child, BRT brt, TOKULOGGER logger) {
u_int32_t old_fingerprint = BNC_SUBTREE_FINGERPRINT(node,childnum_of_node);
u_int64_t leafentry_estimate = 0;
u_int32_t sum = child->local_fingerprint;
if (child->height>0) {
int i;
for (i=0; i<child->u.n.n_children; i++) {
sum += BNC_SUBTREE_FINGERPRINT(child,i);
leafentry_estimate += BNC_SUBTREE_LEAFENTRY_ESTIMATE(child,i);
}
} else {
leafentry_estimate = toku_omt_size(child->u.l.buffer);
}
// Don't try to get fancy about not modifying the fingerprint if it didn't change.
// We only call this function if we have reason to believe that the child's fingerprint did change.
BNC_SUBTREE_FINGERPRINT(node,childnum_of_node)=sum;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(node,childnum_of_node)=leafentry_estimate;
node->dirty=1;
toku_log_changechildfingerprint(logger, &node->log_lsn, 0, toku_cachefile_filenum(brt->cf), node->thisnodename, childnum_of_node, old_fingerprint, sum);
}
......@@ -509,6 +514,7 @@ static int brt_nonleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *node
if (r!=0) return r;
BNC_NBYTESINBUF(B,i)=0;
BNC_SUBTREE_FINGERPRINT(B,i)=0;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(B,i)=0;
}
for (i=n_children_in_a; i<old_n_children; i++) {
......@@ -575,6 +581,9 @@ static int brt_nonleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *node
BNC_SUBTREE_FINGERPRINT(B, targchild) = BNC_SUBTREE_FINGERPRINT(node, i);
BNC_SUBTREE_FINGERPRINT(node, i) = 0;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(B, targchild) = BNC_SUBTREE_LEAFENTRY_ESTIMATE(node, i);
BNC_SUBTREE_LEAFENTRY_ESTIMATE(node, i) = 0;
assert(BNC_NBYTESINBUF(node, i) == 0);
}
......@@ -769,7 +778,8 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
REALLOC_N(node->u.n.n_children+2, node->u.n.childinfos);
REALLOC_N(node->u.n.n_children+1, node->u.n.childkeys);
// Slide the children over.
BNC_SUBTREE_FINGERPRINT(node, node->u.n.n_children+1)=0;
BNC_SUBTREE_FINGERPRINT (node, node->u.n.n_children+1)=0;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(node, node->u.n.n_children+1)=0;
for (cnum=node->u.n.n_children; cnum>childnum+1; cnum--) {
node->u.n.childinfos[cnum] = node->u.n.childinfos[cnum-1];
}
......@@ -779,7 +789,8 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
assert(BNC_DISKOFF(node, childnum)==childa->thisnodename); // use the same child
BNC_DISKOFF(node, childnum+1) = childb->thisnodename;
// BNC_SUBTREE_FINGERPRINT(node, childnum)=0; // leave the subtreefingerprint alone for the child, so we can log the change
BNC_SUBTREE_FINGERPRINT(node, childnum+1)=0;
BNC_SUBTREE_FINGERPRINT (node, childnum+1)=0;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(node, childnum+1)=0;
fixup_child_fingerprint(node, childnum, childa, t, logger);
fixup_child_fingerprint(node, childnum+1, childb, t, logger);
r=toku_fifo_create(&BNC_BUFFER(node,childnum+1)); assert(r==0);
......@@ -2177,6 +2188,8 @@ static int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk,
BNC_NBYTESINBUF(newroot, 1)=0;
BNC_SUBTREE_FINGERPRINT(newroot, 0)=0;
BNC_SUBTREE_FINGERPRINT(newroot, 1)=0;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(newroot, 0)=0;
BNC_SUBTREE_LEAFENTRY_ESTIMATE(newroot, 1)=0;
//verify_local_fingerprint_nonleaf(nodea);
//verify_local_fingerprint_nonleaf(nodeb);
r=toku_log_newbrtnode(logger, (LSN*)0, 0, toku_cachefile_filenum(brt->cf), newroot_diskoff, new_height, new_nodesize, (brt->flags&TOKU_DB_DUPSORT)!=0, newroot->rand4fingerprint);
......
......@@ -7,6 +7,8 @@
#include "key.h"
#include "brt-internal.h"
static int dump_data = 1;
void print_item (bytevec val, ITEMLEN len) {
printf("\"");
ITEMLEN i;
......@@ -43,27 +45,29 @@ void dump_header (int f, struct brt_header **header) {
printf("Fifo:\n");
r = toku_deserialize_fifo_at(f, h->unused_memory, &h->fifo);
printf(" fifo has %d entries\n", toku_fifo_n_entries(h->fifo));
FIFO_ITERATE(h->fifo, key, keylen, data, datalen, type, xid,
({
printf(" ");
switch (type) {
case BRT_NONE: printf("NONE"); goto ok;
case BRT_INSERT: printf("INSERT"); goto ok;
case BRT_DELETE_ANY: printf("DELETE_ANY"); goto ok;
case BRT_DELETE_BOTH: printf("DELETE_BOTH"); goto ok;
case BRT_ABORT_ANY: printf("ABORT_ANY"); goto ok;
case BRT_ABORT_BOTH: printf("ABORT_BOTH"); goto ok;
case BRT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok;
case BRT_COMMIT_BOTH: printf("COMMIT_BOTH"); goto ok;
}
printf("huh?");
ok:
printf(" %lld ", (long long)xid);
print_item(key, keylen);
printf(" ");
print_item(data, datalen);
printf("\n");
}));
if (dump_data) {
FIFO_ITERATE(h->fifo, key, keylen, data, datalen, type, xid,
({
printf(" ");
switch (type) {
case BRT_NONE: printf("NONE"); goto ok;
case BRT_INSERT: printf("INSERT"); goto ok;
case BRT_DELETE_ANY: printf("DELETE_ANY"); goto ok;
case BRT_DELETE_BOTH: printf("DELETE_BOTH"); goto ok;
case BRT_ABORT_ANY: printf("ABORT_ANY"); goto ok;
case BRT_ABORT_BOTH: printf("ABORT_BOTH"); goto ok;
case BRT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok;
case BRT_COMMIT_BOTH: printf("COMMIT_BOTH"); goto ok;
}
printf("huh?");
ok:
printf(" %lld ", (long long)xid);
print_item(key, keylen);
printf(" ");
print_item(data, datalen);
printf("\n");
}));
}
}
void dump_node (int f, DISKOFF off) {
......@@ -93,6 +97,12 @@ void dump_node (int f, DISKOFF off) {
printf("%08x", BNC_SUBTREE_FINGERPRINT(n, i));
}
printf("}\n");
printf(" subleafentry_estimates={");
for (i=0; i<n->u.n.n_children; i++) {
if (i>0) printf(" ");
printf("%lld", (unsigned long long)(BNC_SUBTREE_LEAFENTRY_ESTIMATE(n, i)));
}
printf("}\n");
printf(" pivots:\n");
for (i=0; i<n->u.n.n_children-1; i++) {
struct kv_pair *piv = n->u.n.childkeys[i];
......@@ -105,30 +115,32 @@ void dump_node (int f, DISKOFF off) {
for (i=0; i<n->u.n.n_children; i++) {
printf(" child %d: %lld\n", i, BNC_DISKOFF(n, i));
printf(" buffer contains %d bytes (%d items)\n", BNC_NBYTESINBUF(n, i), toku_fifo_n_entries(BNC_BUFFER(n,i)));
FIFO_ITERATE(BNC_BUFFER(n,i), key, keylen, data, datalen, typ, xid,
({
printf(" TYPE=");
switch ((enum brt_cmd_type)typ) {
case BRT_NONE: printf("NONE"); goto ok;
case BRT_INSERT: printf("INSERT"); goto ok;
case BRT_DELETE_ANY: printf("DELETE_ANY"); goto ok;
case BRT_DELETE_BOTH: printf("DELETE_BOTH"); goto ok;
case BRT_ABORT_ANY: printf("ABORT_ANY"); goto ok;
case BRT_ABORT_BOTH: printf("ABORT_BOTH"); goto ok;
case BRT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok;
case BRT_COMMIT_BOTH: printf("COMMIT_BOTH"); goto ok;
}
printf("HUH?");
ok:
printf(" xid=%"PRId64" ", xid);
print_item(key, keylen);
if (datalen>0) {
printf(" ");
print_item(data, datalen);
}
printf("\n");
})
);
if (dump_data) {
FIFO_ITERATE(BNC_BUFFER(n,i), key, keylen, data, datalen, typ, xid,
({
printf(" TYPE=");
switch ((enum brt_cmd_type)typ) {
case BRT_NONE: printf("NONE"); goto ok;
case BRT_INSERT: printf("INSERT"); goto ok;
case BRT_DELETE_ANY: printf("DELETE_ANY"); goto ok;
case BRT_DELETE_BOTH: printf("DELETE_BOTH"); goto ok;
case BRT_ABORT_ANY: printf("ABORT_ANY"); goto ok;
case BRT_ABORT_BOTH: printf("ABORT_BOTH"); goto ok;
case BRT_COMMIT_ANY: printf("COMMIT_ANY"); goto ok;
case BRT_COMMIT_BOTH: printf("COMMIT_BOTH"); goto ok;
}
printf("HUH?");
ok:
printf(" xid=%"PRId64" ", xid);
print_item(key, keylen);
if (datalen>0) {
printf(" ");
print_item(data, datalen);
}
printf("\n");
})
);
}
}
} else {
printf(" n_bytes_in_buffer=%d\n", n->u.l.n_bytes_in_buffer);
......@@ -139,13 +151,24 @@ void dump_node (int f, DISKOFF off) {
printf("\n");
return 0;
}
toku_omt_iterate(n->u.l.buffer, print_le, 0);
if (dump_data) toku_omt_iterate(n->u.l.buffer, print_le, 0);
}
}
int main (int argc, const char *argv[]) {
assert(argc==2);
const char *n = argv[1];
const char *arg0 = argv[0];
argc--; argv++;
while (argc>1) {
if (strcmp(argv[0], "--nodata")==0) {
dump_data = 0;
} else {
printf("Usage: %s [--nodata] brtfilename\n", arg0);
exit(1);
}
argc--; argv++;
}
assert(argc==1);
const char *n = argv[0];
int f = open(n, O_RDONLY); assert(f>=0);
struct brt_header *h;
dump_header(f, &h);
......
......@@ -341,6 +341,7 @@ void toku_recover_addchild (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t
}
BNC_DISKOFF(node, childnum) = child;
BNC_SUBTREE_FINGERPRINT(node, childnum) = childfingerprint;
//BNC_SUBTREE_LEAFENTRY_ESTIMATE(node, childnum) = 0; // This isn't right, but recovery is broken right now anyway, so just leaf it unininitalized.
int r= toku_fifo_create(&BNC_BUFFER(node, childnum)); assert(r==0);
BNC_NBYTESINBUF(node, childnum) = 0;
node->u.n.n_children++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment