Commit b0ccec78 authored by Yoni Fogel's avatar Yoni Fogel

Refs Tokutek/ft-index#46 Add dmt (dynamic OMT)

Use dmt to replace omt in bn_data class for storing leafentries.
Optimization for serial inserts and mempool
parent 5a61f344
...@@ -31,6 +31,7 @@ set(FT_SOURCES ...@@ -31,6 +31,7 @@ set(FT_SOURCES
checkpoint checkpoint
compress compress
dbufio dbufio
dmt-wrapper
fifo fifo
ft ft
ft-cachetable-wrappers ft-cachetable-wrappers
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -1178,6 +1178,8 @@ typedef enum { ...@@ -1178,6 +1178,8 @@ typedef enum {
FT_PRO_NUM_STOP_LOCK_CHILD, FT_PRO_NUM_STOP_LOCK_CHILD,
FT_PRO_NUM_STOP_CHILD_INMEM, FT_PRO_NUM_STOP_CHILD_INMEM,
FT_PRO_NUM_DIDNT_WANT_PROMOTE, FT_PRO_NUM_DIDNT_WANT_PROMOTE,
FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, // how many basement nodes were deserialized with a fixed keysize
FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, // how many basement nodes were deserialized with a variable keysize
FT_STATUS_NUM_ROWS FT_STATUS_NUM_ROWS
} ft_status_entry; } ft_status_entry;
......
...@@ -363,6 +363,8 @@ status_init(void) ...@@ -363,6 +363,8 @@ status_init(void)
STATUS_INIT(FT_PRO_NUM_STOP_LOCK_CHILD, PROMOTION_STOPPED_CHILD_LOCKED_OR_NOT_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was locked or not at all in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(FT_PRO_NUM_STOP_LOCK_CHILD, PROMOTION_STOPPED_CHILD_LOCKED_OR_NOT_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was locked or not at all in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(FT_PRO_NUM_STOP_CHILD_INMEM, PROMOTION_STOPPED_CHILD_NOT_FULLY_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was not fully in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(FT_PRO_NUM_STOP_CHILD_INMEM, PROMOTION_STOPPED_CHILD_NOT_FULLY_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was not fully in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(FT_PRO_NUM_DIDNT_WANT_PROMOTE, PROMOTION_STOPPED_AFTER_LOCKING_CHILD, PARCOUNT, "promotion: stopped anyway, after locking the child", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS); STATUS_INIT(FT_PRO_NUM_DIDNT_WANT_PROMOTE, PROMOTION_STOPPED_AFTER_LOCKING_CHILD, PARCOUNT, "promotion: stopped anyway, after locking the child", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, BASEMENT_DESERIALIZATION_FIXED_KEY, PARCOUNT, "basement nodes deserialized with fixed-keysize", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, BASEMENT_DESERIALIZATION_VARIABLE_KEY, PARCOUNT, "basement nodes deserialized with variable-keysize", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
ft_status.initialized = true; ft_status.initialized = true;
} }
...@@ -389,6 +391,14 @@ toku_ft_get_status(FT_STATUS s) { ...@@ -389,6 +391,14 @@ toku_ft_get_status(FT_STATUS s) {
} \ } \
} while (0) } while (0)
void toku_note_deserialized_basement_node(bool fixed_key_size) {
if (fixed_key_size) {
STATUS_INC(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, 1);
} else {
STATUS_INC(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, 1);
}
}
bool is_entire_node_in_memory(FTNODE node) { bool is_entire_node_in_memory(FTNODE node) {
for (int i = 0; i < node->n_children; i++) { for (int i = 0; i < node->n_children; i++) {
if(BP_STATE(node,i) != PT_AVAIL) { if(BP_STATE(node,i) != PT_AVAIL) {
...@@ -595,6 +605,7 @@ ftnode_memory_size (FTNODE node) ...@@ -595,6 +605,7 @@ ftnode_memory_size (FTNODE node)
int n_children = node->n_children; int n_children = node->n_children;
retval += sizeof(*node); retval += sizeof(*node);
retval += (n_children)*(sizeof(node->bp[0])); retval += (n_children)*(sizeof(node->bp[0]));
retval += (n_children > 0 ? n_children-1 : 0)*(sizeof(node->childkeys[0]));
retval += node->totalchildkeylens; retval += node->totalchildkeylens;
// now calculate the sizes of the partitions // now calculate the sizes of the partitions
...@@ -1722,6 +1733,8 @@ toku_ft_bn_apply_cmd_once ( ...@@ -1722,6 +1733,8 @@ toku_ft_bn_apply_cmd_once (
&new_le, &new_le,
&numbytes_delta &numbytes_delta
); );
// at this point, we cannot trust cmd->u.id.key to be valid.
// The dmt may have realloced its mempool and freed the one containing key.
newsize = new_le ? (leafentry_memsize(new_le) + + key_storage_size) : 0; newsize = new_le ? (leafentry_memsize(new_le) + + key_storage_size) : 0;
if (le && new_le) { if (le && new_le) {
...@@ -1986,6 +1999,7 @@ toku_ft_bn_apply_cmd ( ...@@ -1986,6 +1999,7 @@ toku_ft_bn_apply_cmd (
int deleted = 0; int deleted = 0;
if (!le_is_clean(storeddata)) { //If already clean, nothing to do. if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, workdone, stats_to_update); toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, workdone, stats_to_update);
// at this point, we cannot trust cmd->u.id.key to be valid.
uint32_t new_omt_size = bn->data_buffer.omt_size(); uint32_t new_omt_size = bn->data_buffer.omt_size();
if (new_omt_size != omt_size) { if (new_omt_size != omt_size) {
paranoid_invariant(new_omt_size+1 == omt_size); paranoid_invariant(new_omt_size+1 == omt_size);
......
...@@ -351,6 +351,8 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen); ...@@ -351,6 +351,8 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen);
extern bool garbage_collection_debug; extern bool garbage_collection_debug;
void toku_note_deserialized_basement_node(bool fixed_key_size);
// This is a poor place to put global options like these. // This is a poor place to put global options like these.
void toku_ft_set_direct_io(bool direct_io_on); void toku_ft_set_direct_io(bool direct_io_on);
void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers); void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);
......
...@@ -118,7 +118,7 @@ enum ft_layout_version_e { ...@@ -118,7 +118,7 @@ enum ft_layout_version_e {
FT_LAYOUT_VERSION_22 = 22, // Ming: Add oldest known referenced xid to each ftnode, for better garbage collection FT_LAYOUT_VERSION_22 = 22, // Ming: Add oldest known referenced xid to each ftnode, for better garbage collection
FT_LAYOUT_VERSION_23 = 23, // Ming: Fix upgrade path #5902 FT_LAYOUT_VERSION_23 = 23, // Ming: Fix upgrade path #5902
FT_LAYOUT_VERSION_24 = 24, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs FT_LAYOUT_VERSION_24 = 24, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs
FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry, basements store key/vals separately on disk
FT_NEXT_VERSION, // the version after the current version FT_NEXT_VERSION, // the version after the current version
FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line. FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line.
FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported
......
...@@ -320,7 +320,7 @@ serialize_ftnode_partition_size (FTNODE node, int i) ...@@ -320,7 +320,7 @@ serialize_ftnode_partition_size (FTNODE node, int i)
result += toku_bnc_nbytesinbuf(BNC(node, i)); result += toku_bnc_nbytesinbuf(BNC(node, i));
} }
else { else {
result += 4; // n_entries in buffer table result += 4 + bn_data::HEADER_LENGTH; // n_entries in buffer table + basement header
result += BLB_NBYTESINDATA(node, i); result += BLB_NBYTESINDATA(node, i);
} }
result += 4; // checksum result += 4; // checksum
...@@ -380,10 +380,16 @@ serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) { ...@@ -380,10 +380,16 @@ serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) {
wbuf_nocrc_char(&wb, ch); wbuf_nocrc_char(&wb, ch);
wbuf_nocrc_uint(&wb, bd->omt_size()); wbuf_nocrc_uint(&wb, bd->omt_size());
// bd->prepare_to_serialize();
// iterate over leafentries and place them into the buffer bd->serialize_header(&wb);
// if (bd->need_to_serialize_each_leafentry_with_key()) {
bd->omt_iterate<struct wbuf, wbufwriteleafentry>(&wb); //
// iterate over leafentries and place them into the buffer
//
bd->omt_iterate<struct wbuf, wbufwriteleafentry>(&wb);
} else {
bd->serialize_rest(&wb);
}
} }
uint32_t end_to_end_checksum = x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb)); uint32_t end_to_end_checksum = x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb));
wbuf_nocrc_int(&wb, end_to_end_checksum); wbuf_nocrc_int(&wb, end_to_end_checksum);
...@@ -592,9 +598,14 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize) ...@@ -592,9 +598,14 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
// Create an array that will store the size of each basement. // Create an array that will store the size of each basement.
// This is the sum of the leaf sizes of all the leaves in that basement. // This is the sum of the leaf sizes of all the leaves in that basement.
// We don't know how many basements there will be, so we use num_le as the upper bound. // We don't know how many basements there will be, so we use num_le as the upper bound.
toku::scoped_malloc bn_sizes_buf(sizeof(size_t) * num_alloc);
size_t *bn_sizes = reinterpret_cast<size_t *>(bn_sizes_buf.get()); // Sum of all le sizes in a single basement
bn_sizes[0] = 0; toku::scoped_calloc bn_le_sizes_buf(sizeof(size_t) * num_alloc);
size_t *bn_le_sizes = reinterpret_cast<size_t *>(bn_le_sizes_buf.get());
// Sum of all key sizes in a single basement
toku::scoped_calloc bn_key_sizes_buf(sizeof(size_t) * num_alloc);
size_t *bn_key_sizes = reinterpret_cast<size_t *>(bn_key_sizes_buf.get());
// TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les). // TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les).
// Each entry is the number of leafentries in this basement. (Again, num_le is overkill upper baound.) // Each entry is the number of leafentries in this basement. (Again, num_le is overkill upper baound.)
...@@ -611,17 +622,20 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize) ...@@ -611,17 +622,20 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
for (uint32_t i = 0; i < num_le; i++) { for (uint32_t i = 0; i < num_le; i++) {
uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]); uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]);
le_sizes[i] = curr_le_size; le_sizes[i] = curr_le_size;
if ((bn_size_so_far + curr_le_size > basementnodesize) && (num_le_in_curr_bn != 0)) { if ((bn_size_so_far + curr_le_size + sizeof(uint32_t) + key_sizes[i] > basementnodesize) && (num_le_in_curr_bn != 0)) {
// cap off the current basement node to end with the element before i // cap off the current basement node to end with the element before i
new_pivots[curr_pivot] = i-1; new_pivots[curr_pivot] = i-1;
curr_pivot++; curr_pivot++;
num_le_in_curr_bn = 0; num_le_in_curr_bn = 0;
bn_size_so_far = 0; bn_size_so_far = 0;
bn_le_sizes[curr_pivot] = 0;
bn_key_sizes[curr_pivot] = 0;
} }
num_le_in_curr_bn++; num_le_in_curr_bn++;
num_les_this_bn[curr_pivot] = num_le_in_curr_bn; num_les_this_bn[curr_pivot] = num_le_in_curr_bn;
bn_le_sizes[curr_pivot] += curr_le_size;
bn_key_sizes[curr_pivot] += sizeof(uint32_t) + key_sizes[i]; // uint32_t le_offset
bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i]; bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i];
bn_sizes[curr_pivot] = bn_size_so_far;
} }
// curr_pivot is now the total number of pivot keys in the leaf node // curr_pivot is now the total number of pivot keys in the leaf node
int num_pivots = curr_pivot; int num_pivots = curr_pivot;
...@@ -688,9 +702,6 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize) ...@@ -688,9 +702,6 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
uint32_t num_les_to_copy = num_les_this_bn[i]; uint32_t num_les_to_copy = num_les_this_bn[i];
invariant(num_les_to_copy == num_in_bn); invariant(num_les_to_copy == num_in_bn);
// construct mempool for this basement
size_t size_this_bn = bn_sizes[i];
BN_DATA bd = BLB_DATA(node, i); BN_DATA bd = BLB_DATA(node, i);
bd->replace_contents_with_clone_of_sorted_array( bd->replace_contents_with_clone_of_sorted_array(
num_les_to_copy, num_les_to_copy,
...@@ -698,7 +709,8 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize) ...@@ -698,7 +709,8 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
&key_sizes[baseindex_this_bn], &key_sizes[baseindex_this_bn],
&leafpointers[baseindex_this_bn], &leafpointers[baseindex_this_bn],
&le_sizes[baseindex_this_bn], &le_sizes[baseindex_this_bn],
size_this_bn bn_key_sizes[i], // Total key sizes
bn_le_sizes[i] // total le sizes
); );
BP_STATE(node,i) = PT_AVAIL; BP_STATE(node,i) = PT_AVAIL;
...@@ -1546,10 +1558,9 @@ deserialize_ftnode_partition( ...@@ -1546,10 +1558,9 @@ deserialize_ftnode_partition(
uint32_t num_entries = rbuf_int(&rb); uint32_t num_entries = rbuf_int(&rb);
// we are now at the first byte of first leafentry // we are now at the first byte of first leafentry
data_size -= rb.ndone; // remaining bytes of leafentry data data_size -= rb.ndone; // remaining bytes of leafentry data
BASEMENTNODE bn = BLB(node, childnum); BASEMENTNODE bn = BLB(node, childnum);
bn->data_buffer.initialize_from_data(num_entries, &rb.buf[rb.ndone], data_size); bn->data_buffer.initialize_from_data(num_entries, &rb, data_size, node->layout_version_read_from_disk);
rb.ndone += data_size;
} }
assert(rb.ndone == rb.size); assert(rb.ndone == rb.size);
exit: exit:
...@@ -2101,8 +2112,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node, ...@@ -2101,8 +2112,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
if (has_end_to_end_checksum) { if (has_end_to_end_checksum) {
data_size -= sizeof(uint32_t); data_size -= sizeof(uint32_t);
} }
bn->data_buffer.initialize_from_data(n_in_buf, &rb->buf[rb->ndone], data_size); bn->data_buffer.initialize_from_data(n_in_buf, rb, data_size, node->layout_version_read_from_disk);
rb->ndone += data_size;
} }
// Whatever this is must be less than the MSNs of every message above // Whatever this is must be less than the MSNs of every message above
......
...@@ -98,6 +98,7 @@ struct memarena { ...@@ -98,6 +98,7 @@ struct memarena {
char *buf; char *buf;
size_t buf_used, buf_size; size_t buf_used, buf_size;
size_t size_of_other_bufs; // the buf_size of all the other bufs. size_t size_of_other_bufs; // the buf_size of all the other bufs.
size_t footprint_of_other_bufs; // the footprint of all the other bufs.
char **other_bufs; char **other_bufs;
int n_other_bufs; int n_other_bufs;
}; };
...@@ -108,6 +109,7 @@ MEMARENA memarena_create_presized (size_t initial_size) { ...@@ -108,6 +109,7 @@ MEMARENA memarena_create_presized (size_t initial_size) {
result->buf_used = 0; result->buf_used = 0;
result->other_bufs = NULL; result->other_bufs = NULL;
result->size_of_other_bufs = 0; result->size_of_other_bufs = 0;
result->footprint_of_other_bufs = 0;
result->n_other_bufs = 0; result->n_other_bufs = 0;
XMALLOC_N(result->buf_size, result->buf); XMALLOC_N(result->buf_size, result->buf);
return result; return result;
...@@ -128,6 +130,7 @@ void memarena_clear (MEMARENA ma) { ...@@ -128,6 +130,7 @@ void memarena_clear (MEMARENA ma) {
// But reuse the main buffer // But reuse the main buffer
ma->buf_used = 0; ma->buf_used = 0;
ma->size_of_other_bufs = 0; ma->size_of_other_bufs = 0;
ma->footprint_of_other_bufs = 0;
} }
static size_t static size_t
...@@ -151,6 +154,7 @@ void* malloc_in_memarena (MEMARENA ma, size_t size) { ...@@ -151,6 +154,7 @@ void* malloc_in_memarena (MEMARENA ma, size_t size) {
ma->other_bufs[old_n]=ma->buf; ma->other_bufs[old_n]=ma->buf;
ma->n_other_bufs = old_n+1; ma->n_other_bufs = old_n+1;
ma->size_of_other_bufs += ma->buf_size; ma->size_of_other_bufs += ma->buf_size;
ma->footprint_of_other_bufs += toku_memory_footprint(ma->buf, ma->buf_used);
} }
// Make a new one // Make a new one
{ {
...@@ -217,7 +221,9 @@ void memarena_move_buffers(MEMARENA dest, MEMARENA source) { ...@@ -217,7 +221,9 @@ void memarena_move_buffers(MEMARENA dest, MEMARENA source) {
#endif #endif
dest ->size_of_other_bufs += source->size_of_other_bufs + source->buf_size; dest ->size_of_other_bufs += source->size_of_other_bufs + source->buf_size;
dest ->footprint_of_other_bufs += source->footprint_of_other_bufs + toku_memory_footprint(source->buf, source->buf_used);
source->size_of_other_bufs = 0; source->size_of_other_bufs = 0;
source->footprint_of_other_bufs = 0;
assert(other_bufs); assert(other_bufs);
dest->other_bufs = other_bufs; dest->other_bufs = other_bufs;
...@@ -247,3 +253,11 @@ memarena_total_size_in_use (MEMARENA m) ...@@ -247,3 +253,11 @@ memarena_total_size_in_use (MEMARENA m)
{ {
return m->size_of_other_bufs + m->buf_used; return m->size_of_other_bufs + m->buf_used;
} }
size_t
memarena_total_footprint (MEMARENA m)
{
return m->footprint_of_other_bufs + toku_memory_footprint(m->buf, m->buf_used) +
sizeof(*m) +
m->n_other_bufs * sizeof(*m->other_bufs);
}
...@@ -129,5 +129,6 @@ size_t memarena_total_memory_size (MEMARENA); ...@@ -129,5 +129,6 @@ size_t memarena_total_memory_size (MEMARENA);
size_t memarena_total_size_in_use (MEMARENA); size_t memarena_total_size_in_use (MEMARENA);
size_t memarena_total_footprint (MEMARENA);
#endif #endif
...@@ -146,7 +146,7 @@ PAIR_ATTR ...@@ -146,7 +146,7 @@ PAIR_ATTR
rollback_memory_size(ROLLBACK_LOG_NODE log) { rollback_memory_size(ROLLBACK_LOG_NODE log) {
size_t size = sizeof(*log); size_t size = sizeof(*log);
if (log->rollentry_arena) { if (log->rollentry_arena) {
size += memarena_total_memory_size(log->rollentry_arena); size += memarena_total_footprint(log->rollentry_arena);
} }
return make_rollback_pair_attr(size); return make_rollback_pair_attr(size);
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -127,7 +127,7 @@ long_key_cmp(DB *UU(e), const DBT *a, const DBT *b) ...@@ -127,7 +127,7 @@ long_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
} }
static void static void
test_serialize_leaf(int valsize, int nelts, double entropy) { test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
// struct ft_handle source_ft; // struct ft_handle source_ft;
struct ftnode *sn, *dn; struct ftnode *sn, *dn;
...@@ -214,32 +214,63 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { ...@@ -214,32 +214,63 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
assert(size == 100); assert(size == 100);
} }
struct timeval total_start;
struct timeval total_end;
total_start.tv_sec = total_start.tv_usec = 0;
total_end.tv_sec = total_end.tv_usec = 0;
struct timeval t[2]; struct timeval t[2];
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd = NULL; FTNODE_DISK_DATA ndd = NULL;
r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, brt->ft, false); for (int i = 0; i < ser_runs; i++) {
assert(r==0); gettimeofday(&t[0], NULL);
gettimeofday(&t[1], NULL); ndd = NULL;
sn->dirty = 1;
r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, brt->ft, false);
assert(r==0);
gettimeofday(&t[1], NULL);
total_start.tv_sec += t[0].tv_sec;
total_start.tv_usec += t[0].tv_usec;
total_end.tv_sec += t[1].tv_sec;
total_end.tv_usec += t[1].tv_usec;
toku_free(ndd);
}
double dt; double dt;
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
printf("serialize leaf: %0.05lf\n", dt); dt *= 1000;
dt /= ser_runs;
printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
//reset
total_start.tv_sec = total_start.tv_usec = 0;
total_end.tv_sec = total_end.tv_usec = 0;
struct ftnode_fetch_extra bfe; struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt_h); for (int i = 0; i < deser_runs; i++) {
gettimeofday(&t[0], NULL); fill_bfe_for_full_read(&bfe, brt_h);
FTNODE_DISK_DATA ndd2 = NULL; gettimeofday(&t[0], NULL);
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); FTNODE_DISK_DATA ndd2 = NULL;
assert(r==0); r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
gettimeofday(&t[1], NULL); assert(r==0);
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); gettimeofday(&t[1], NULL);
printf("deserialize leaf: %0.05lf\n", dt);
printf("io time %lf decompress time %lf deserialize time %lf\n", total_start.tv_sec += t[0].tv_sec;
tokutime_to_seconds(bfe.io_time), total_start.tv_usec += t[0].tv_usec;
tokutime_to_seconds(bfe.decompress_time), total_end.tv_sec += t[1].tv_sec;
tokutime_to_seconds(bfe.deserialize_time) total_end.tv_usec += t[1].tv_usec;
toku_ftnode_free(&dn);
toku_free(ndd2);
}
dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
dt *= 1000;
dt /= deser_runs;
printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n",
tokutime_to_seconds(bfe.io_time)*1000,
tokutime_to_seconds(bfe.decompress_time)*1000,
tokutime_to_seconds(bfe.deserialize_time)*1000,
deser_runs
); );
toku_ftnode_free(&dn);
toku_ftnode_free(&sn); toku_ftnode_free(&sn);
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
...@@ -247,14 +278,12 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { ...@@ -247,14 +278,12 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
toku_free(brt_h->h); toku_free(brt_h->h);
toku_free(brt_h); toku_free(brt_h);
toku_free(brt); toku_free(brt);
toku_free(ndd);
toku_free(ndd2);
r = close(fd); assert(r != -1); r = close(fd); assert(r != -1);
} }
static void static void
test_serialize_nonleaf(int valsize, int nelts, double entropy) { test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
// struct ft_handle source_ft; // struct ft_handle source_ft;
struct ftnode sn, *dn; struct ftnode sn, *dn;
...@@ -353,7 +382,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { ...@@ -353,7 +382,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
gettimeofday(&t[1], NULL); gettimeofday(&t[1], NULL);
double dt; double dt;
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
printf("serialize nonleaf: %0.05lf\n", dt); dt *= 1000;
printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
struct ftnode_fetch_extra bfe; struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt_h); fill_bfe_for_full_read(&bfe, brt_h);
...@@ -363,11 +393,13 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { ...@@ -363,11 +393,13 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
assert(r==0); assert(r==0);
gettimeofday(&t[1], NULL); gettimeofday(&t[1], NULL);
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
printf("deserialize nonleaf: %0.05lf\n", dt); dt *= 1000;
printf("io time %lf decompress time %lf deserialize time %lf\n", printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
tokutime_to_seconds(bfe.io_time), printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n",
tokutime_to_seconds(bfe.decompress_time), tokutime_to_seconds(bfe.io_time)*1000,
tokutime_to_seconds(bfe.deserialize_time) tokutime_to_seconds(bfe.decompress_time)*1000,
tokutime_to_seconds(bfe.deserialize_time)*1000,
deser_runs
); );
toku_ftnode_free(&dn); toku_ftnode_free(&dn);
...@@ -394,19 +426,32 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { ...@@ -394,19 +426,32 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
int int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
long valsize, nelts; const int DEFAULT_RUNS = 5;
long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
double entropy = 0.3; double entropy = 0.3;
if (argc != 3) { if (argc != 3 && argc != 5) {
fprintf(stderr, "Usage: %s <valsize> <nelts>\n", argv[0]); fprintf(stderr, "Usage: %s <valsize> <nelts> [<serialize_runs> <deserialize_runs>]\n", argv[0]);
fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
return 2; return 2;
} }
valsize = strtol(argv[1], NULL, 0); valsize = strtol(argv[1], NULL, 0);
nelts = strtol(argv[2], NULL, 0); nelts = strtol(argv[2], NULL, 0);
if (argc == 5) {
ser_runs = strtol(argv[3], NULL, 0);
deser_runs = strtol(argv[4], NULL, 0);
}
if (ser_runs <= 0) {
ser_runs = DEFAULT_RUNS;
}
if (deser_runs <= 0) {
deser_runs = DEFAULT_RUNS;
}
initialize_dummymsn(); initialize_dummymsn();
test_serialize_leaf(valsize, nelts, entropy); test_serialize_leaf(valsize, nelts, entropy, ser_runs, deser_runs);
test_serialize_nonleaf(valsize, nelts, entropy); test_serialize_nonleaf(valsize, nelts, entropy, ser_runs, deser_runs);
return 0; return 0;
} }
This diff is collapsed.
...@@ -189,7 +189,7 @@ doit (void) { ...@@ -189,7 +189,7 @@ doit (void) {
r = toku_testsetup_root(t, node_root); r = toku_testsetup_root(t, node_root);
assert(r==0); assert(r==0);
char filler[900]; char filler[900-2*bn_data::HEADER_LENGTH];
memset(filler, 0, sizeof(filler)); memset(filler, 0, sizeof(filler));
// now we insert filler data so that a merge does not happen // now we insert filler data so that a merge does not happen
r = toku_testsetup_insert_to_leaf ( r = toku_testsetup_insert_to_leaf (
......
...@@ -187,6 +187,13 @@ static inline void wbuf_uint (struct wbuf *w, uint32_t i) { ...@@ -187,6 +187,13 @@ static inline void wbuf_uint (struct wbuf *w, uint32_t i) {
wbuf_int(w, (int32_t)i); wbuf_int(w, (int32_t)i);
} }
static inline uint8_t* wbuf_nocrc_reserve_literal_bytes(struct wbuf *w, uint32_t nbytes) {
assert(w->ndone + nbytes <= w->size);
uint8_t * dest = w->buf + w->ndone;
w->ndone += nbytes;
return dest;
}
static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) { static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) {
const unsigned char *bytes = (const unsigned char *) bytes_bv; const unsigned char *bytes = (const unsigned char *) bytes_bv;
#if 0 #if 0
......
This diff is collapsed.
This diff is collapsed.
...@@ -131,7 +131,7 @@ void toku_mempool_init(struct mempool *mp, void *base, size_t free_offset, size_ ...@@ -131,7 +131,7 @@ void toku_mempool_init(struct mempool *mp, void *base, size_t free_offset, size_
void toku_mempool_construct(struct mempool *mp, size_t data_size) { void toku_mempool_construct(struct mempool *mp, size_t data_size) {
if (data_size) { if (data_size) {
size_t mpsize = data_size + (data_size/4); // allow 1/4 room for expansion (would be wasted if read-only) size_t mpsize = data_size + (data_size/4); // allow 1/4 room for expansion (would be wasted if read-only)
mp->base = toku_xmalloc(mpsize); // allocate buffer for mempool mp->base = toku_xmalloc_aligned(64, mpsize); // allocate buffer for mempool
mp->size = mpsize; mp->size = mpsize;
mp->free_offset = 0; // address of first available memory for new data mp->free_offset = 0; // address of first available memory for new data
mp->frag_size = 0; // all allocated space is now in use mp->frag_size = 0; // all allocated space is now in use
...@@ -142,6 +142,16 @@ void toku_mempool_construct(struct mempool *mp, size_t data_size) { ...@@ -142,6 +142,16 @@ void toku_mempool_construct(struct mempool *mp, size_t data_size) {
} }
} }
void toku_mempool_realloc_larger(struct mempool *mp, size_t data_size) {
invariant(data_size > mp->free_offset);
size_t mpsize = data_size + (data_size/4); // allow 1/4 room for expansion (would be wasted if read-only)
void* newmem = toku_xmalloc_aligned(64, mpsize); // allocate new buffer for mempool
memcpy(newmem, mp->base, mp->free_offset); // Copy old info
toku_free(mp->base);
mp->base = newmem;
mp->size = mpsize;
}
void toku_mempool_destroy(struct mempool *mp) { void toku_mempool_destroy(struct mempool *mp) {
// printf("mempool_destroy %p %p %lu %lu\n", mp, mp->base, mp->size, mp->frag_size); // printf("mempool_destroy %p %p %lu %lu\n", mp, mp->base, mp->size, mp->frag_size);
...@@ -150,27 +160,40 @@ void toku_mempool_destroy(struct mempool *mp) { ...@@ -150,27 +160,40 @@ void toku_mempool_destroy(struct mempool *mp) {
toku_mempool_zero(mp); toku_mempool_zero(mp);
} }
void *toku_mempool_get_base(struct mempool *mp) { void *toku_mempool_get_base(const struct mempool *mp) {
return mp->base; return mp->base;
} }
size_t toku_mempool_get_size(struct mempool *mp) { void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, size_t offset) {
return reinterpret_cast<void*>(reinterpret_cast<char*>(mp->base) + offset);
}
size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, void* p) {
paranoid_invariant(p >= mp->base);
return reinterpret_cast<char*>(p) - reinterpret_cast<char*>(mp->base);
}
size_t toku_mempool_get_size(const struct mempool *mp) {
return mp->size; return mp->size;
} }
size_t toku_mempool_get_frag_size(struct mempool *mp) { size_t toku_mempool_get_frag_size(const struct mempool *mp) {
return mp->frag_size; return mp->frag_size;
} }
size_t toku_mempool_get_used_space(struct mempool *mp) { size_t toku_mempool_get_used_space(const struct mempool *mp) {
return mp->free_offset - mp->frag_size; return mp->free_offset - mp->frag_size;
} }
size_t toku_mempool_get_free_space(struct mempool *mp) { void* toku_mempool_get_next_free_ptr(const struct mempool *mp) {
return toku_mempool_get_pointer_from_base_and_offset(mp, mp->free_offset);
}
size_t toku_mempool_get_free_space(const struct mempool *mp) {
return mp->size - mp->free_offset; return mp->size - mp->free_offset;
} }
size_t toku_mempool_get_allocated_space(struct mempool *mp) { size_t toku_mempool_get_allocated_space(const struct mempool *mp) {
return mp->free_offset; return mp->free_offset;
} }
...@@ -211,10 +234,10 @@ size_t toku_mempool_footprint(struct mempool *mp) { ...@@ -211,10 +234,10 @@ size_t toku_mempool_footprint(struct mempool *mp) {
return rval; return rval;
} }
void toku_mempool_clone(struct mempool* orig_mp, struct mempool* new_mp) { void toku_mempool_clone(const struct mempool* orig_mp, struct mempool* new_mp) {
new_mp->frag_size = orig_mp->frag_size; new_mp->frag_size = orig_mp->frag_size;
new_mp->free_offset = orig_mp->free_offset; new_mp->free_offset = orig_mp->free_offset;
new_mp->size = orig_mp->free_offset; // only make the cloned mempool store what is needed new_mp->size = orig_mp->free_offset; // only make the cloned mempool store what is needed
new_mp->base = toku_xmalloc(new_mp->size); new_mp->base = toku_xmalloc_aligned(64, new_mp->size);
memcpy(new_mp->base, orig_mp->base, new_mp->size); memcpy(new_mp->base, orig_mp->base, new_mp->size);
} }
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment