Commit 980e282b authored by Rich Prohaska's avatar Rich Prohaska Committed by Yoni Fogel

#3678 use clean or mvcc leaf entries when appropriate refs[t:3678]

git-svn-id: file:///svn/toku/tokudb@32718 c7de825b-a66e-492c-adef-691d508d4ae1
parent cdf087c5
......@@ -246,8 +246,6 @@ int toku_brt_loader_internal_init (/* out */ BRTLOADER *blp,
void toku_brtloader_internal_destroy (BRTLOADER bl, BOOL is_error);
enum { disksize_row_overhead = 9 }; // how much overhead for a row in the fractal tree (#3588, 9 = cmd + keylen + vallen?)
// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.)
uint64_t toku_brtloader_get_rowset_budget_for_testing (void);
......
......@@ -1097,6 +1097,22 @@ static DBT make_dbt (void *data, u_int32_t size) {
#define inc_error_count() error_count++
#endif
static TXNID leafentry_xid(BRTLOADER bl, int which_db) {
TXNID le_xid = TXNID_NONE;
if (bl->root_xids_that_created && bl->load_root_xid != bl->root_xids_that_created[which_db])
le_xid = bl->load_root_xid;
return le_xid;
}
size_t brtloader_leafentry_size(size_t key_size, size_t val_size, TXNID xid) {
size_t s = 0;
if (xid == TXNID_NONE)
s = LE_CLEAN_MEMSIZE(key_size, val_size);
else
s = LE_MVCC_COMMITTED_MEMSIZE(key_size, val_size);
return s;
}
CILK_BEGIN
static int process_primary_rows_internal (BRTLOADER bl, struct rowset *primary_rowset)
......@@ -1144,7 +1160,7 @@ static int process_primary_rows_internal (BRTLOADER bl, struct rowset *primary_r
}
}
bl->extracted_datasizes[i] += skey.size + sval.size + disksize_row_overhead;
bl->extracted_datasizes[i] += brtloader_leafentry_size(skey.size, sval.size, leafentry_xid(bl, i));
if (row_wont_fit(rows, skey.size + sval.size)) {
//printf("%s:%d rows.n_rows=%ld rows.n_bytes=%ld\n", __FILE__, __LINE__, rows->n_rows, rows->n_bytes);
......@@ -2027,11 +2043,12 @@ struct dbuf {
struct leaf_buf {
BLOCKNUM blocknum;
TXNID xid;
int nkeys, ndata, dsize;
uint64_t nkeys, ndata, dsize;
BRTNODE node;
XIDS xids;
uint64_t off;
};
struct translation {
int64_t off, size;
};
......@@ -2166,14 +2183,21 @@ static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc)
lbuf->blocknum.b = lblocknum;
lbuf->xid = xid;
lbuf->nkeys = lbuf->ndata = lbuf->dsize = 0;
lbuf->off = 0;
BRTNODE XMALLOC(node);
lbuf->node = node;
lbuf->xids = xids_get_root_xids();
if (xid != TXNID_NONE) {
XIDS new_xids = NULL;
int r = xids_create_child(lbuf->xids, &new_xids, xid);
assert(r == 0 && new_xids);
xids_destroy(&lbuf->xids);
lbuf->xids = new_xids;
}
int height = 0;
int n_bn = 1;
toku_initialize_empty_brtnode(node, lbuf->blocknum, height, n_bn, BRT_LAYOUT_VERSION, target_nodesize, 0);
BRTNODE XMALLOC(node);
toku_initialize_empty_brtnode(node, lbuf->blocknum, 0 /*height*/, 1 /*basement nodes*/, BRT_LAYOUT_VERSION, target_nodesize, 0);
BP_STATE(node, 0) = PT_AVAIL;
lbuf->node = node;
return lbuf;
}
......@@ -2182,7 +2206,7 @@ CILK_BEGIN
static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, BRTLOADER bl);
static int write_nonleaves (BRTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const DESCRIPTOR descriptor, uint32_t target_nodesize);
CILK_END
static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen);
static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen, int this_leafentry_size);
static int write_translation_table (struct dbout *out, long long *off_of_translation_p);
static int write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk, BLOCKNUM root_blocknum_on_disk, LSN load_lsn, TXNID root_xid, uint32_t target_nodesize);
......@@ -2290,10 +2314,8 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl,
int64_t lblock;
result = allocate_block(&out, &lblock);
invariant(result == 0); // can not fail since translations reserved above
TXNID le_xid = TXNID_NONE;
if (bl->root_xids_that_created && bl->load_root_xid != bl->root_xids_that_created[which_db]) {
le_xid = bl->load_root_xid;
}
TXNID le_xid = leafentry_xid(bl, which_db);
struct leaf_buf *lbuf = start_leaf(&out, descriptor, lblock, le_xid, target_nodesize);
u_int64_t n_rows_remaining = bl->n_rows;
u_int64_t old_n_rows_remaining = bl->n_rows;
......@@ -2320,34 +2342,20 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl,
DBT key = make_dbt(output_rowset->data+output_rowset->rows[i].off, output_rowset->rows[i].klen);
DBT val = make_dbt(output_rowset->data+output_rowset->rows[i].off + output_rowset->rows[i].klen, output_rowset->rows[i].vlen);
used_estimate += key.size + val.size + disksize_row_overhead;
#if 0
// Spawn off a node if
// a) there is at least one row in it, and
// b) this item would make the nodesize too big, or
// c) the remaining amount won't fit in the current node and the current node's data is more than the remaining amount
int remaining_amount = total_disksize_estimate - used_estimate;
int used_here = lbuf->dbuf.off + 1000; // leave 1000 for various overheads.
int target_size = (target_nodesize*7L)/8; // use only 7/8 of the node.
int used_here_with_next_key = used_here + key.size + val.size + disksize_row_overhead;
if (lbuf->n_in_buf > 0 &&
((used_here_with_next_key >= target_size) || (used_here + remaining_amount >= target_size && lbuf->dbuf.off > remaining_amount))) {
#else
size_t this_leafentry_size = brtloader_leafentry_size(key.size, val.size, le_xid);
used_estimate += this_leafentry_size;
// Spawn off a node if
// a) there is at least one row in it, and
// b) this item would make the nodesize too big, or
// c) the remaining amount won't fit in the current node and the current node's data is more than the remaining amount
int remaining_amount = total_disksize_estimate - used_estimate;
int off = lbuf->dsize + lbuf->nkeys * disksize_row_overhead;
int used_here = off + 1000; // leave 1000 for various overheads.
int target_size = (target_nodesize*7L)/8; // use only 7/8 of the node.
int used_here_with_next_key = used_here + key.size + val.size + disksize_row_overhead;
uint64_t remaining_amount = total_disksize_estimate - used_estimate;
uint64_t used_here = lbuf->off + 1000; // leave 1000 for various overheads.
uint64_t target_size = (target_nodesize*7L)/8; // use only 7/8 of the node.
uint64_t used_here_with_next_key = used_here + this_leafentry_size;
if (lbuf->nkeys > 0 &&
((used_here_with_next_key >= target_size) || (used_here + remaining_amount >= target_size && off > remaining_amount))) {
#endif
//if (used_here_with_next_key < target_size) {
// printf("%s:%d Runt avoidance: used_here=%d, remaining_amount=%d target_size=%d dbuf.off=%d\n", __FILE__, __LINE__, used_here, remaining_amount, target_size, lbuf->dbuf.off);
//}
((used_here_with_next_key >= target_size) || (used_here + remaining_amount >= target_size && lbuf->off > remaining_amount))) {
int progress_this_node = progress_allocation * (double)(old_n_rows_remaining - n_rows_remaining)/(double)old_n_rows_remaining;
progress_allocation -= progress_this_node;
......@@ -2377,7 +2385,7 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl,
lbuf = start_leaf(&out, descriptor, lblock, le_xid, target_nodesize);
}
add_pair_to_leafnode(lbuf, (unsigned char *) key.data, key.size, (unsigned char *) val.data, val.size);
add_pair_to_leafnode(lbuf, (unsigned char *) key.data, key.size, (unsigned char *) val.data, val.size, this_leafentry_size);
n_rows_remaining--;
update_maxkey(&maxkey, &key); // set the new maxkey to the current key
......@@ -2746,10 +2754,11 @@ int toku_brt_loader_get_error(BRTLOADER bl, int *error) {
return 0;
}
static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen) {
static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen, int this_leafentry_size) {
lbuf->nkeys++; // assume NODUP
lbuf->ndata++;
lbuf->dsize += keylen + vallen;
lbuf->off += this_leafentry_size;
// append this key val pair to the leafnode
// #3588 TODO just make a clean ule and append it to the omt
......@@ -2758,7 +2767,7 @@ static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int
uint32_t idx = toku_omt_size(BLB_BUFFER(leafnode, 0));
DBT thekey = { .data = key, .size = keylen };
DBT theval = { .data = val, .size = vallen };
BRT_MSG_S cmd = { BRT_INSERT, ZERO_MSN, xids_get_root_xids(), .u.id = { &thekey, &theval } };
BRT_MSG_S cmd = { BRT_INSERT, ZERO_MSN, lbuf->xids, .u.id = { &thekey, &theval } };
uint64_t workdone=0;
brt_leaf_apply_cmd_once(BLB(leafnode,0), &BP_SUBTREE_EST(leafnode,0), &cmd, idx, NULL, NULL, &workdone);
}
......@@ -2797,6 +2806,7 @@ static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progr
if (serialized_leaf)
toku_free(serialized_leaf);
toku_brtnode_free(&lbuf->node);
xids_destroy(&lbuf->xids);
toku_free(lbuf);
//printf("Nodewrite %d (%.1f%%):", progress_allocation, 100.0*progress_allocation/PROGRESS_MAX);
......@@ -2995,13 +3005,10 @@ static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknu
totalchildkeylens += kv_pair_keylen(childkey);
}
node->totalchildkeylens = totalchildkeylens;
XMALLOC_N(n_children, node->bp);
assert(node->bp);
for (int i=0; i<n_children; i++) {
set_BNC(node, i, toku_create_empty_nl());
BP_BLOCKNUM(node,i)= make_blocknum(subtree_info[i].block);
BP_BLOCKNUM(node,i) = make_blocknum(subtree_info[i].block);
BP_SUBTREE_EST(node,i) = subtree_info[i].subtree_estimates;
BP_HAVE_FULLHASH(node,i) = FALSE;
BP_FULLHASH(node,i) = 0;
BP_STATE(node,i) = PT_AVAIL;
}
......
......@@ -44,6 +44,8 @@ void toku_brtloader_set_size_factor (uint32_t factor);
void brtloader_set_os_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*));
size_t brtloader_leafentry_size(size_t key_size, size_t val_size, TXNID xid);
C_END
#endif // BRTLOADER_H
......@@ -91,15 +91,15 @@ struct __attribute__ ((__packed__)) leafentry {
#pragma pack(pop)
#endif
#define LE_CLEAN_MEMSIZE(keylen, vallen) \
(sizeof(((LEAFENTRY)NULL)->type) /* num_uxrs */ \
#define LE_CLEAN_MEMSIZE(_keylen, _vallen) \
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
+sizeof(((LEAFENTRY)NULL)->keylen) /* keylen */ \
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen) /* vallen */ \
+keylen /* actual key */ \
+vallen) /* actual val */
+(_keylen) /* actual key */ \
+(_vallen)) /* actual val */
#define LE_MVCC_COMMITTED_HEADER_MEMSIZE \
(sizeof(((LEAFENTRY)NULL)->type) /* num_uxrs */ \
(sizeof(((LEAFENTRY)NULL)->type) /* type */ \
+sizeof(((LEAFENTRY)NULL)->keylen) /* keylen */ \
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_cxrs) /* committed */ \
+sizeof(((LEAFENTRY)NULL)->u.mvcc.num_pxrs) /* provisional */ \
......@@ -107,10 +107,10 @@ struct __attribute__ ((__packed__)) leafentry {
+sizeof(uint32_t) /* length+bit */ \
+sizeof(uint32_t)) /* length+bit */
#define LE_MVCC_COMMITTED_MEMSIZE(keylen, vallen) \
#define LE_MVCC_COMMITTED_MEMSIZE(_keylen, _vallen) \
(LE_MVCC_COMMITTED_HEADER_MEMSIZE \
+keylen /* actual key */ \
+vallen) /* actual val */
+(_keylen) /* actual key */ \
+(_vallen)) /* actual val */
typedef struct leafentry *LEAFENTRY;
......
......@@ -125,13 +125,26 @@ check_brtloader-test-extractor-errors-2: brtloader-test-extractor-errors$(BINSUF
check_brtloader-test-open$(BINSUF): EXTRA_ARGS=dir_$@
check_brtloader-test-writer$(BINSUF): $(patsubst %,check_brtloader-test-writer-%, 1 2)
check_brtloader-test-writer$(BINSUF): $(patsubst %,check_brtloader-test-writer-%, 1 1000 100000 1000000 1-42 1000-42 100000-42 1000000-42)
true $(SUMMARIZE_CMD)
check_brtloader-test-writer-1: brtloader-test-writer$(BINSUF)
$(VGRIND) ./$< $(VERBVERBOSE) -r 1 -s dir.$@ $(SUMMARIZE_CMD)
check_brtloader-test-writer-1000: brtloader-test-writer$(BINSUF)
$(VGRIND) ./$< $(VERBVERBOSE) -r 1000 -s dir.$@ $(SUMMARIZE_CMD)
check_brtloader-test-writer-100000: brtloader-test-writer$(BINSUF)
$(VGRIND) ./$< $(VERBVERBOSE) -r 100000 -s dir.$@ $(SUMMARIZE_CMD)
check_brtloader-test-writer-2: brtloader-test-writer$(BINSUF)
check_brtloader-test-writer-1000000: brtloader-test-writer$(BINSUF)
$(VGRIND) ./$< $(VERBVERBOSE) -r 1000000 -s dir.$@ $(SUMMARIZE_CMD)
check_brtloader-test-writer-1-42: brtloader-test-writer$(BINSUF)
$(VGRIND) ./$< $(VERBVERBOSE) -r 1 -s -x 42 dir.$@ $(SUMMARIZE_CMD)
check_brtloader-test-writer-1000-42: brtloader-test-writer$(BINSUF)
$(VGRIND) ./$< $(VERBVERBOSE) -r 1000 -s -x 42 dir.$@ $(SUMMARIZE_CMD)
check_brtloader-test-writer-100000-42: brtloader-test-writer$(BINSUF)
$(VGRIND) ./$< $(VERBVERBOSE) -r 100000 -s -x 42 dir.$@ $(SUMMARIZE_CMD)
check_brtloader-test-writer-1000000-42: brtloader-test-writer$(BINSUF)
$(VGRIND) ./$< $(VERBVERBOSE) -r 1000000 -s -x 42 dir.$@ $(SUMMARIZE_CMD)
check_brtloader-test-writer-errors$(BINSUF): $(patsubst %,check_brtloader-test-writer-errors-%, 1 2 3 4)
true $(SUMMARIZE_CMD)
check_brtloader-test-writer-errors-1: brtloader-test-writer-errors$(BINSUF)
......
......@@ -65,7 +65,7 @@ static void write_dbfile (char *template, int n, char *output_name, BOOL expect_
DBT val = {.size=sizeof i,
.data=&i};
add_row(&aset, &key, &val);
size_est += key.size + val.size + disksize_row_overhead;
size_est += brtloader_leafentry_size(key.size, val.size, TXNID_NONE);
}
toku_brt_loader_set_n_rows(&bl, n);
......@@ -102,7 +102,7 @@ static void write_dbfile (char *template, int n, char *output_name, BOOL expect_
assert(row->klen==sizeof(int));
assert(row->vlen==sizeof(int));
assert((int)(num_found+i)==*(int*)(rs->data+row->off));
found_size_est += row->klen + row->vlen + disksize_row_overhead;
found_size_est += brtloader_leafentry_size(row->klen, row->vlen, TXNID_NONE);
}
num_found += rs->n_rows;
......
......@@ -25,7 +25,6 @@ static int qsort_compare_ints (const void *a, const void *b) {
if (avalue<bvalue) return -1;
if (avalue>bvalue) return +1;
return 0;
}
static int compare_ints (DB *dest_db, const DBT *akey, const DBT *bkey) {
......@@ -48,7 +47,6 @@ static void verify_dbfile(int n, const char *name) {
CACHETABLE ct;
r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); assert(r==0);
TOKUTXN const null_txn = NULL;
BRT t = NULL;
r = toku_brt_create(&t); assert(r == 0);
......@@ -84,14 +82,19 @@ static void verify_dbfile(int n, const char *name) {
if (verbose) traceit("verify done");
}
static void test_write_dbfile (char *template, int n, char *output_name) {
static void test_write_dbfile (char *template, int n, char *output_name, TXNID xid) {
if (verbose) traceit("test start");
DB *dest_db = NULL;
struct brtloader_s bl = {
.temp_file_template = template,
.reserved_memory = 512*1024*1024,
.load_root_xid = xid,
};
if (xid) {
bl.root_xids_that_created = toku_xcalloc(1, sizeof (TXNID));
bl.root_xids_that_created[0] = 0;
}
int r = brtloader_init_file_infos(&bl.file_infos); CKERR(r);
r = brt_loader_lock_init(&bl); CKERR(r);
brt_loader_set_fractal_workers_count_from_c(&bl);
......@@ -104,12 +107,10 @@ static void test_write_dbfile (char *template, int n, char *output_name) {
uint64_t size_est = 0;
init_rowset(&aset, toku_brtloader_get_rowset_budget_for_testing());
for (int i=0; i<n; i++) {
DBT key = {.size=sizeof i,
.data=&i};
DBT val = {.size=sizeof i,
.data=&i};
DBT key = { .size = sizeof i, .data = &i};
DBT val = { .size = sizeof i, .data = &i};
add_row(&aset, &key, &val);
size_est += key.size + val.size + disksize_row_overhead;
size_est += brtloader_leafentry_size(key.size, val.size, xid);
}
toku_brt_loader_set_n_rows(&bl, n);
......@@ -145,7 +146,7 @@ static void test_write_dbfile (char *template, int n, char *output_name) {
assert(row->klen==sizeof(int));
assert(row->vlen==sizeof(int));
assert((int)(num_found+i)==*(int*)(rs->data+row->off));
found_size_est += row->klen + row->vlen + disksize_row_overhead;
found_size_est += brtloader_leafentry_size(row->klen, row->vlen, xid);
}
num_found += rs->n_rows;
......@@ -172,7 +173,7 @@ static void test_write_dbfile (char *template, int n, char *output_name) {
assert(r==0);
r = queue_destroy(q2);
assert(r==0);
assert_zero(r);
destroy_merge_fileset(&fs);
brtloader_fi_destroy(&bl.file_infos, FALSE);
......@@ -182,12 +183,15 @@ static void test_write_dbfile (char *template, int n, char *output_name) {
brt_loader_destroy_error_callback(&bl.error_callback);
brt_loader_lock_destroy(&bl);
toku_free(bl.root_xids_that_created);
}
static int nrows = 1;
static TXNID xid = 0;
static int usage(const char *progname) {
fprintf(stderr, "Usage:\n %s [-h] [-v] [-q] [-r %d] [-s] directory\n", progname, nrows);
fprintf(stderr, "Usage:\n %s [-h] [-v] [-q] [-r %d] [-x %lu] [-s] directory\n", progname, nrows, xid);
return 1;
}
......@@ -204,6 +208,9 @@ int test_main (int argc, const char *argv[]) {
} else if (strcmp(argv[0], "-r") == 0) {
argc--; argv++;
nrows = atoi(argv[0]);
} else if (strcmp(argv[0], "-x") == 0) {
argc--; argv++;
xid = atol(argv[0]);
} else if (strcmp(argv[0], "-s") == 0) {
toku_brtloader_set_size_factor(1);
} else if (argv[0][0] == '-' || argc != 1) {
......@@ -232,7 +239,7 @@ int test_main (int argc, const char *argv[]) {
int olen = snprintf(output_name, templen, "%s/test.tokudb", directory);
assert (olen>0 && olen<templen);
test_write_dbfile(template, nrows, output_name);
test_write_dbfile(template, nrows, output_name, xid);
#if 0
r = system(unlink_all);
......
......@@ -232,12 +232,10 @@ static void fill_rowset (struct rowset *rows,
uint64_t *size_est) {
init_rowset(rows, toku_brtloader_get_rowset_budget_for_testing());
for (int i=0; i<n; i++) {
DBT key = {.size=sizeof(keys[i]),
.data=&keys[i]};
DBT val = {.size=strlen(vals[i]),
.data=(void *)vals[i]};
DBT key = {.size=sizeof(keys[i]), .data=&keys[i]};
DBT val = {.size=strlen(vals[i]), .data=(void *)vals[i]};
add_row(rows, &key, &val);
*size_est += key.size + val.size + disksize_row_overhead;
*size_est += brtloader_leafentry_size(key.size, val.size, TXNID_NONE);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment