Commit dacd2abf authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:3724], [t:3717], [t:3727], merge fixes to main

git-svn-id: file:///svn/toku/tokudb@32829 c7de825b-a66e-492c-adef-691d508d4ae1
parent bacc1c7e
......@@ -136,10 +136,11 @@ struct brtnode_nonleaf_childinfo {
struct brtnode_leaf_basement_node {
uint32_t optimized_for_upgrade; // version number to which this leaf has been optimized, zero if never optimized for upgrade
BOOL soft_copy_is_up_to_date; // the data in the OMT reflects the softcopy state.
OMT buffer;
unsigned int n_bytes_in_buffer; /* How many bytes to represent the OMT (including the per-key overheads, but not including the overheads for the node. */
unsigned int seqinsert; /* number of sequential inserts to this leaf */
MSN max_msn_applied;
DSN max_dsn_applied; // max deserialization sequence number applied
};
#define PT_INVALID 0
......@@ -204,8 +205,8 @@ struct __attribute__((__packed__)) brtnode_partition {
};
struct brtnode {
MSN max_msn_applied_to_node_in_memory; // max msn that has been applied to this node (for root node, this is max msn for the tree)
MSN max_msn_applied_to_node_on_disk; // same as above, but for data on disk, only meaningful if node is clean
MSN max_msn_applied_to_node_on_disk; // max_msn_applied that will be written to disk
DSN dsn; // deserialization sequence number
unsigned int nodesize;
unsigned int flags;
BLOCKNUM thisnodename; // Which block number is this node?
......@@ -303,7 +304,8 @@ static inline void set_BSB(BRTNODE node, int i, SUB_BLOCK sb) {
// leaf node macros
#define BLB_OPTIMIZEDFORUPGRADE(node,i) (BLB(node,i)->optimized_for_upgrade)
#define BLB_SOFTCOPYISUPTODATE(node,i) (BLB(node,i)->soft_copy_is_up_to_date)
#define BLB_MAX_MSN_APPLIED(node,i) (BLB(node,i)->max_msn_applied)
#define BLB_MAX_DSN_APPLIED(node,i) (BLB(node,i)->max_dsn_applied)
#define BLB_BUFFER(node,i) (BLB(node,i)->buffer)
#define BLB_NBYTESINBUF(node,i) (BLB(node,i)->n_bytes_in_buffer)
#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
......@@ -394,6 +396,8 @@ struct brt {
int (*close_db)(DB*, u_int32_t);
u_int32_t close_flags;
DSN curr_dsn;
struct toku_list live_brt_link;
struct toku_list zombie_brt_link;
};
......@@ -521,6 +525,11 @@ void toku_create_new_brtnode (BRT t, BRTNODE *result, int height, int n_children
void toku_initialize_empty_brtnode (BRTNODE n, BLOCKNUM nodename, int height, int num_children,
int layout_version, unsigned int nodesize, unsigned int flags);
int toku_pin_brtnode_if_clean(
BRT brt, BLOCKNUM blocknum, u_int32_t fullhash,
ANCESTORS ancestors, struct pivot_bounds const * const bounds,
BRTNODE *node_p
);
int toku_pin_brtnode (BRT brt, BLOCKNUM blocknum, u_int32_t fullhash,
UNLOCKERS unlockers,
ANCESTORS ancestors, struct pivot_bounds const * const pbounds,
......
......@@ -384,7 +384,7 @@ serialize_brtnode_info(
struct wbuf wb;
wbuf_init(&wb, sb->uncompressed_ptr, sb->uncompressed_size);
wbuf_MSN(&wb, node->max_msn_applied_to_node_in_memory);
wbuf_MSN(&wb, node->max_msn_applied_to_node_on_disk);
wbuf_nocrc_uint(&wb, node->nodesize);
wbuf_nocrc_uint(&wb, node->flags);
wbuf_nocrc_int (&wb, node->height);
......@@ -478,6 +478,7 @@ static void
rebalance_brtnode_leaf(BRTNODE node)
{
assert(node->height == 0);
assert(node->dirty);
// first create an array of OMTVALUE's that store all the data
u_int32_t num_le = 0;
for (int i = 0; i < node->n_children; i++) {
......@@ -525,6 +526,16 @@ rebalance_brtnode_leaf(BRTNODE node)
u_int32_t tmp_optimized_for_upgrade = BLB_OPTIMIZEDFORUPGRADE(node, node->n_children-1);
u_int32_t tmp_seqinsert = BLB_SEQINSERT(node, node->n_children-1);
MSN max_msn = MIN_MSN;
DSN min_dsn = MAX_DSN;
for (int i = 0; i < node->n_children; i++) {
DSN curr_dsn = BLB_MAX_DSN_APPLIED(node,i);
MSN curr_msn = BLB_MAX_MSN_APPLIED(node,i);
min_dsn = (curr_dsn < min_dsn) ? curr_dsn : min_dsn;
max_msn = (curr_msn.msn > max_msn.msn) ? curr_msn : max_msn;
}
// Now destroy the old stuff;
toku_destroy_brtnode_internals(node);
......@@ -582,7 +593,11 @@ rebalance_brtnode_leaf(BRTNODE node)
BP_STATE(node,i) = PT_AVAIL;
BP_TOUCH_CLOCK(node,i);
BLB_MAX_DSN_APPLIED(node,i) = min_dsn;
BLB_MAX_MSN_APPLIED(node,i) = max_msn;
}
node->max_msn_applied_to_node_on_disk = max_msn;
// now the subtree estimates
toku_brt_leaf_reset_calc_leaf_stats(node);
......@@ -723,7 +738,6 @@ toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct brt_h
//printf("%s:%d wrote %d bytes for %lld size=%lld\n", __FILE__, __LINE__, w.ndone, off, size);
toku_free(compressed_buf);
node->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_in_memory;
return 0;
}
......@@ -799,7 +813,8 @@ BASEMENTNODE toku_create_empty_bn(void) {
BASEMENTNODE toku_create_empty_bn_no_buffer(void) {
BASEMENTNODE XMALLOC(bn);
bn->soft_copy_is_up_to_date = TRUE;
bn->max_dsn_applied = 0;
bn->max_msn_applied.msn = 0;
bn->buffer = NULL;
bn->n_bytes_in_buffer = 0;
bn->seqinsert = 0;
......@@ -924,8 +939,9 @@ deserialize_brtnode_info(
struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
rbuf_init(&rb, sb->uncompressed_ptr, data_size);
node->dsn = INVALID_DSN;
node->max_msn_applied_to_node_on_disk = rbuf_msn(&rb);
node->max_msn_applied_to_node_in_memory = node->max_msn_applied_to_node_on_disk;
node->nodesize = rbuf_int(&rb);
node->flags = rbuf_int(&rb);
node->height = rbuf_int(&rb);
......@@ -988,6 +1004,8 @@ static void
setup_available_brtnode_partition(BRTNODE node, int i) {
if (node->height == 0) {
set_BLB(node, i, toku_create_empty_bn());
BLB_MAX_MSN_APPLIED(node,i) = node->max_msn_applied_to_node_on_disk;
BLB_MAX_DSN_APPLIED(node,i) = 0;
}
else {
set_BNC(node, i, toku_create_empty_nl());
......@@ -1054,7 +1072,7 @@ deserialize_brtnode_partition(
unsigned char ch = rbuf_char(&rb);
assert(ch == BRTNODE_PARTITION_OMT_LEAVES);
BLB_OPTIMIZEDFORUPGRADE(node, index) = rbuf_int(&rb);
BLB_SOFTCOPYISUPTODATE(node, index) = FALSE;
// dont need to set max_dsn_applied because creation of basement node set it to correct value
BLB_SEQINSERT(node, index) = 0;
u_int32_t num_entries = rbuf_int(&rb);
OMTVALUE *XMALLOC_N(num_entries, array);
......@@ -1246,6 +1264,9 @@ toku_deserialize_bp_from_disk(BRTNODE node, int childnum, int fd, struct brtnode
read_and_decompress_sub_block(&rb, &curr_sb);
// at this point, sb->uncompressed_ptr stores the serialized node partition
deserialize_brtnode_partition(&curr_sb, node, childnum);
if (node->height == 0) {
toku_brt_bn_reset_stats(node, childnum);
}
toku_free(curr_sb.uncompressed_ptr);
toku_free(raw_block);
}
......@@ -1269,6 +1290,9 @@ toku_deserialize_bp_from_compressed(BRTNODE node, int childnum) {
curr_sb->compressed_size
);
deserialize_brtnode_partition(curr_sb, node, childnum);
if (node->height == 0) {
toku_brt_bn_reset_stats(node, childnum);
}
toku_free(curr_sb->uncompressed_ptr);
toku_free(curr_sb->compressed_ptr);
toku_free(curr_sb);
......
......@@ -134,7 +134,7 @@ toku_verify_brtnode (BRT brt,
//printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
node = node_v;
toku_assert_entire_node_in_memory(node);
thismsn = node->max_msn_applied_to_node_in_memory;
thismsn = node->max_msn_applied_to_node_on_disk;
if (rootmsn.msn == ZERO_MSN.msn) {
assert(parentmsn.msn == ZERO_MSN.msn);
rootmsn = thismsn;
......
This diff is collapsed.
......@@ -292,6 +292,7 @@ BOOL toku_brt_is_empty_fast (BRT brt) __attribute__ ((warn_unused_result));
BOOL toku_brt_is_recovery_logging_suppressed (BRT) __attribute__ ((warn_unused_result));
void toku_brt_bn_reset_stats(BRTNODE node, int childnum);
void toku_brt_leaf_reset_calc_leaf_stats(BRTNODE node);
int toku_brt_strerror_r(int error, char *buf, size_t buflen);
......
......@@ -57,9 +57,14 @@ typedef struct __toku_lsn { u_int64_t lsn; } LSN;
* Make the MSN be a struct instead of an integer so that we get better type checking. */
typedef struct __toku_msn { u_int64_t msn; } MSN;
#define ZERO_MSN ((MSN){0}) // dummy used for message construction, to be filled in when msg is applied to tree
#define MIN_MSN ((MSN){(u_int64_t)1<<32}) // first 2**32 values reserved for messages created before Dr. No (for upgrade)
#define MIN_MSN ((MSN){(u_int64_t)1000*1000*1000}) // first 1B values reserved for messages created before Dr. No (for upgrade)
#define MAX_MSN ((MSN){UINT64_MAX})
typedef int64_t DSN; // DESERIALIZATION sequence number
#define INVALID_DSN -1
#define MIN_DSN 0
#define MAX_DSN INT64_MAX
/* At the brt layer, a FILENUM uniquely identifies an open file.
* At the ydb layer, a DICTIONARY_ID uniquely identifies an open dictionary.
* With the introduction of the loader (ticket 2216), it is possible for the file that holds
......
......@@ -146,7 +146,6 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
int fd = open(__FILE__ ".brt", O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.max_msn_applied_to_node_in_memory.msn = 0;
sn.nodesize = 4*(1<<20);
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
......@@ -154,6 +153,7 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) {
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 0;
sn.n_children = nrows;
sn.dirty = 1;
LEAFENTRY les[nrows];
{
......@@ -267,7 +267,6 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
int fd = open(__FILE__ ".brt", O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.max_msn_applied_to_node_in_memory.msn = 0;
sn.nodesize = 4*(1<<20);
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
......@@ -275,6 +274,7 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) {
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 0;
sn.n_children = 1;
sn.dirty = 1;
LEAFENTRY les[nrows];
{
......@@ -382,7 +382,6 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
int fd = open(__FILE__ ".brt", O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.max_msn_applied_to_node_in_memory.msn = 0;
sn.nodesize = 4*(1<<20);
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
......@@ -390,6 +389,7 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) {
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 0;
sn.n_children = 1;
sn.dirty = 1;
LEAFENTRY les[7];
{
......@@ -503,7 +503,6 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
int r;
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.max_msn_applied_to_node_in_memory.msn = 0;
sn.nodesize = nodesize;
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
......@@ -511,6 +510,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) {
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 0;
sn.n_children = 7;
sn.dirty = 1;
LEAFENTRY elts[3];
elts[0] = le_malloc("a", "aval");
elts[1] = le_malloc("b", "bval");
......@@ -628,7 +628,6 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type
int r;
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.max_msn_applied_to_node_in_memory.msn = 0;
sn.nodesize = nodesize;
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
......@@ -636,6 +635,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 0;
sn.n_children = 4;
sn.dirty = 1;
MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children-1, sn.childkeys);
sn.childkeys[0] = kv_pair_malloc("A", 2, 0, 0);
......@@ -737,7 +737,6 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
int r;
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.max_msn_applied_to_node_in_memory.msn = 0;
sn.nodesize = nodesize;
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
......@@ -745,6 +744,7 @@ test_serialize_leaf(enum brtnode_verify_type bft) {
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 0;
sn.n_children = 2;
sn.dirty = 1;
LEAFENTRY elts[3];
elts[0] = le_malloc("a", "aval");
elts[1] = le_malloc("b", "bval");
......@@ -862,10 +862,9 @@ test_serialize_nonleaf(enum brtnode_verify_type bft) {
// source_brt.fd=fd;
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.max_msn_applied_to_node_in_memory.msn = 0;
char *hello_string;
sn.max_msn_applied_to_node_on_disk.msn = TESTMSNDSKVAL;
sn.max_msn_applied_to_node_in_memory.msn = TESTMSNMEMVAL;
//sn.max_msn_applied_to_node_in_memory.msn = TESTMSNMEMVAL;
sn.nodesize = nodesize;
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
......@@ -873,6 +872,7 @@ test_serialize_nonleaf(enum brtnode_verify_type bft) {
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 1;
sn.n_children = 2;
sn.dirty = 1;
hello_string = toku_strdup("hello");
MALLOC_N(2, sn.bp);
MALLOC_N(1, sn.childkeys);
......@@ -938,14 +938,14 @@ test_serialize_nonleaf(enum brtnode_verify_type bft) {
r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE);
assert(r==0);
assert(sn.max_msn_applied_to_node_on_disk.msn == TESTMSNMEMVAL);
assert(sn.max_msn_applied_to_node_in_memory.msn == TESTMSNMEMVAL);
//assert(sn.max_msn_applied_to_node_on_disk.msn == TESTMSNMEMVAL);
//assert(sn.max_msn_applied_to_node_in_memory.msn == TESTMSNMEMVAL);
setup_dn(bft, fd, brt_h, &dn);
assert(dn->thisnodename.b==20);
assert(dn->max_msn_applied_to_node_on_disk.msn == TESTMSNMEMVAL);
assert(dn->max_msn_applied_to_node_in_memory.msn == TESTMSNMEMVAL);
//assert(dn->max_msn_applied_to_node_on_disk.msn == TESTMSNMEMVAL);
//assert(dn->max_msn_applied_to_node_in_memory.msn == TESTMSNMEMVAL);
assert(dn->layout_version ==BRT_LAYOUT_VERSION);
assert(dn->layout_version_original ==BRT_LAYOUT_VERSION);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment