Commit b0ccec78 authored by Yoni Fogel's avatar Yoni Fogel

Refs Tokutek/ft-index#46 Add dmt (dynamic OMT)

Use dmt to replace omt in bn_data class for storing leafentries.
Optimization for serial inserts and mempool
parent 5a61f344
......@@ -31,6 +31,7 @@ set(FT_SOURCES
checkpoint
compress
dbufio
dmt-wrapper
fifo
ft
ft-cachetable-wrappers
......
This diff is collapsed.
......@@ -91,9 +91,10 @@ PATENT RIGHTS GRANT:
#pragma once
#include <util/omt.h>
#include "leafentry.h"
#include <util/mempool.h>
#include "wbuf.h"
#include <util/dmt.h>
#include "leafentry.h"
#if 0 //for implementation
static int
......@@ -110,50 +111,80 @@ UU() verify_in_mempool(OMTVALUE lev, uint32_t UU(idx), void *mpv)
#endif
struct klpair_struct {
uint32_t keylen;
uint32_t le_offset; //Offset of leafentry (in leafentry mempool)
uint8_t key_le[0]; // key, followed by le
};
typedef struct klpair_struct *KLPAIR;
static inline LEAFENTRY get_le_from_klpair(KLPAIR klpair){
uint32_t keylen = klpair->keylen;
LEAFENTRY le = (LEAFENTRY)(klpair->key_le + keylen);
return le;
static constexpr uint32_t keylen_from_klpair_len(const uint32_t klpair_len) {
return klpair_len - __builtin_offsetof(klpair_struct, key_le);
}
template<typename omtcmp_t,
int (*h)(const DBT &, const omtcmp_t &)>
static int wrappy_fun_find(const KLPAIR &klpair, const omtcmp_t &extra) {
//TODO: kill this function when we split, and/or use toku_fill_dbt
typedef struct klpair_struct KLPAIR_S, *KLPAIR;
static_assert(__builtin_offsetof(klpair_struct, key_le) == 1*sizeof(uint32_t), "klpair alignment issues");
static_assert(__builtin_offsetof(klpair_struct, key_le) == sizeof(klpair_struct), "klpair size issues");
template<typename dmtcmp_t,
int (*h)(const DBT &, const dmtcmp_t &)>
static int wrappy_fun_find(const uint32_t klpair_len, const klpair_struct &klpair, const dmtcmp_t &extra) {
DBT kdbt;
kdbt.data = klpair->key_le;
kdbt.size = klpair->keylen;
kdbt.data = const_cast<void*>(reinterpret_cast<const void*>(klpair.key_le));
kdbt.size = keylen_from_klpair_len(klpair_len);
return h(kdbt, extra);
}
template<typename inner_iterate_extra_t>
struct wrapped_iterate_extra_t {
public:
inner_iterate_extra_t *inner;
const class bn_data * bd;
};
template<typename iterate_extra_t,
int (*h)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t idx, iterate_extra_t *const)>
static int wrappy_fun_iterate(const KLPAIR &klpair, const uint32_t idx, iterate_extra_t *const extra) {
uint32_t keylen = klpair->keylen;
void* key = klpair->key_le;
LEAFENTRY le = get_le_from_klpair(klpair);
return h(key, keylen, le, idx, extra);
static int wrappy_fun_iterate(const uint32_t klpair_len, const klpair_struct &klpair, const uint32_t idx, wrapped_iterate_extra_t<iterate_extra_t> *const extra) {
const void* key = &klpair.key_le;
LEAFENTRY le = extra->bd->get_le_from_klpair(&klpair);
return h(key, keylen_from_klpair_len(klpair_len), le, idx, extra->inner);
}
namespace toku {
template<>
class dmt_functor<klpair_struct> {
public:
size_t get_dmtdatain_t_size(void) const {
return sizeof(klpair_struct) + this->keylen;
}
void write_dmtdata_t_to(klpair_struct *const dest) const {
dest->le_offset = this->le_offset;
memcpy(dest->key_le, this->keyp, this->keylen);
}
dmt_functor(uint32_t _keylen, uint32_t _le_offset, const void* _keyp)
: keylen(_keylen), le_offset(_le_offset), keyp(_keyp) {}
dmt_functor(const uint32_t klpair_len, klpair_struct *const src)
: keylen(keylen_from_klpair_len(klpair_len)), le_offset(src->le_offset), keyp(src->key_le) {}
private:
const uint32_t keylen;
const uint32_t le_offset;
const void* keyp;
};
}
typedef toku::omt<KLPAIR> klpair_omt_t;
typedef toku::dmt<KLPAIR_S, KLPAIR> klpair_dmt_t;
// This class stores the data associated with a basement node
class bn_data {
public:
void init_zero(void);
void initialize_empty(void);
void initialize_from_data(uint32_t num_entries, unsigned char *buf, uint32_t data_size);
void initialize_from_data(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version);
// globals
uint64_t get_memory_size(void);
uint64_t get_disk_size(void);
void verify_mempool(void);
// Interact with "omt"
// Interact with "dmt"
uint32_t omt_size(void) const;
template<typename iterate_extra_t,
......@@ -165,14 +196,16 @@ public:
template<typename iterate_extra_t,
int (*f)(const void * key, const uint32_t keylen, const LEAFENTRY &, const uint32_t, iterate_extra_t *const)>
int omt_iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const {
return m_buffer.iterate_on_range< iterate_extra_t, wrappy_fun_iterate<iterate_extra_t, f> >(left, right, iterate_extra);
wrapped_iterate_extra_t<iterate_extra_t> wrapped_extra = { iterate_extra, this };
return m_buffer.iterate_on_range< wrapped_iterate_extra_t<iterate_extra_t>, wrappy_fun_iterate<iterate_extra_t, f> >(left, right, &wrapped_extra);
}
template<typename omtcmp_t,
int (*h)(const DBT &, const omtcmp_t &)>
int find_zero(const omtcmp_t &extra, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
template<typename dmtcmp_t,
int (*h)(const DBT &, const dmtcmp_t &)>
int find_zero(const dmtcmp_t &extra, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
KLPAIR klpair = NULL;
int r = m_buffer.find_zero< omtcmp_t, wrappy_fun_find<omtcmp_t, h> >(extra, &klpair, idxp);
uint32_t klpair_len;
int r = m_buffer.find_zero< dmtcmp_t, wrappy_fun_find<dmtcmp_t, h> >(extra, &klpair_len, &klpair, idxp);
if (r == 0) {
if (value) {
*value = get_le_from_klpair(klpair);
......@@ -180,20 +213,21 @@ public:
if (key) {
paranoid_invariant(keylen != NULL);
*key = klpair->key_le;
*keylen = klpair->keylen;
*keylen = keylen_from_klpair_len(klpair_len);
}
else {
paranoid_invariant(keylen == NULL);
paranoid_invariant_null(keylen);
}
}
return r;
}
template<typename omtcmp_t,
int (*h)(const DBT &, const omtcmp_t &)>
int find(const omtcmp_t &extra, int direction, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
template<typename dmtcmp_t,
int (*h)(const DBT &, const dmtcmp_t &)>
int find(const dmtcmp_t &extra, int direction, LEAFENTRY *const value, void** key, uint32_t* keylen, uint32_t *const idxp) const {
KLPAIR klpair = NULL;
int r = m_buffer.find< omtcmp_t, wrappy_fun_find<omtcmp_t, h> >(extra, direction, &klpair, idxp);
uint32_t klpair_len;
int r = m_buffer.find< dmtcmp_t, wrappy_fun_find<dmtcmp_t, h> >(extra, direction, &klpair_len, &klpair, idxp);
if (r == 0) {
if (value) {
*value = get_le_from_klpair(klpair);
......@@ -201,7 +235,7 @@ public:
if (key) {
paranoid_invariant(keylen != NULL);
*key = klpair->key_le;
*keylen = klpair->keylen;
*keylen = keylen_from_klpair_len(klpair_len);
}
else {
paranoid_invariant(keylen == NULL);
......@@ -232,7 +266,8 @@ public:
uint32_t* old_keylens,
LEAFENTRY* old_les,
size_t *le_sizes,
size_t mempool_size
size_t total_key_size,
size_t total_le_size
);
void clone(bn_data* orig_bn_data);
......@@ -243,14 +278,39 @@ public:
);
void get_space_for_overwrite(uint32_t idx, const void* keyp, uint32_t keylen, uint32_t old_size, uint32_t new_size, LEAFENTRY* new_le_space);
void get_space_for_insert(uint32_t idx, const void* keyp, uint32_t keylen, size_t size, LEAFENTRY* new_le_space);
LEAFENTRY get_le_from_klpair(const klpair_struct *klpair) const;
void prepare_to_serialize(void);
void serialize_header(struct wbuf *wb) const;
void serialize_rest(struct wbuf *wb) const;
bool need_to_serialize_each_leafentry_with_key(void) const;
static const uint32_t HEADER_LENGTH = 0
+ sizeof(uint32_t) // key_data_size
+ sizeof(uint32_t) // val_data_size
+ sizeof(uint32_t) // fixed_key_length
+ sizeof(uint8_t) // all_keys_same_length
+ sizeof(uint8_t) // keys_vals_separate
+ 0;
private:
// Private functions
KLPAIR mempool_malloc_from_omt(size_t size, void **maybe_free);
void omt_compress_kvspace(size_t added_size, void **maybe_free);
LEAFENTRY mempool_malloc_and_update_omt(size_t size, void **maybe_free);
void omt_compress_kvspace(size_t added_size, void **maybe_free, bool force_compress);
void add_key(uint32_t keylen);
void add_keys(uint32_t n_keys, uint32_t combined_keylen);
void remove_key(uint32_t keylen);
klpair_omt_t m_buffer; // pointers to individual leaf entries
klpair_dmt_t m_buffer; // pointers to individual leaf entries
struct mempool m_buffer_mempool; // storage for all leaf entries
friend class bndata_bugfix_test;
uint32_t klpair_disksize(const uint32_t klpair_len, const klpair_struct *klpair) const;
size_t m_disksize_of_keys;
void initialize_from_separate_keys_and_vals(uint32_t num_entries, struct rbuf *rb, uint32_t data_size, uint32_t version,
uint32_t key_data_size, uint32_t val_data_size, bool all_keys_same_length,
uint32_t fixed_key_length);
};
This diff is collapsed.
This diff is collapsed.
......@@ -1178,6 +1178,8 @@ typedef enum {
FT_PRO_NUM_STOP_LOCK_CHILD,
FT_PRO_NUM_STOP_CHILD_INMEM,
FT_PRO_NUM_DIDNT_WANT_PROMOTE,
FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, // how many basement nodes were deserialized with a fixed keysize
FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, // how many basement nodes were deserialized with a variable keysize
FT_STATUS_NUM_ROWS
} ft_status_entry;
......
......@@ -363,6 +363,8 @@ status_init(void)
STATUS_INIT(FT_PRO_NUM_STOP_LOCK_CHILD, PROMOTION_STOPPED_CHILD_LOCKED_OR_NOT_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was locked or not at all in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(FT_PRO_NUM_STOP_CHILD_INMEM, PROMOTION_STOPPED_CHILD_NOT_FULLY_IN_MEMORY, PARCOUNT, "promotion: stopped because the child was not fully in memory", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(FT_PRO_NUM_DIDNT_WANT_PROMOTE, PROMOTION_STOPPED_AFTER_LOCKING_CHILD, PARCOUNT, "promotion: stopped anyway, after locking the child", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, BASEMENT_DESERIALIZATION_FIXED_KEY, PARCOUNT, "basement nodes deserialized with fixed-keysize", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
STATUS_INIT(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, BASEMENT_DESERIALIZATION_VARIABLE_KEY, PARCOUNT, "basement nodes deserialized with variable-keysize", TOKU_ENGINE_STATUS|TOKU_GLOBAL_STATUS);
ft_status.initialized = true;
}
......@@ -389,6 +391,14 @@ toku_ft_get_status(FT_STATUS s) {
} \
} while (0)
void toku_note_deserialized_basement_node(bool fixed_key_size) {
if (fixed_key_size) {
STATUS_INC(FT_BASEMENT_DESERIALIZE_FIXED_KEYSIZE, 1);
} else {
STATUS_INC(FT_BASEMENT_DESERIALIZE_VARIABLE_KEYSIZE, 1);
}
}
bool is_entire_node_in_memory(FTNODE node) {
for (int i = 0; i < node->n_children; i++) {
if(BP_STATE(node,i) != PT_AVAIL) {
......@@ -595,6 +605,7 @@ ftnode_memory_size (FTNODE node)
int n_children = node->n_children;
retval += sizeof(*node);
retval += (n_children)*(sizeof(node->bp[0]));
retval += (n_children > 0 ? n_children-1 : 0)*(sizeof(node->childkeys[0]));
retval += node->totalchildkeylens;
// now calculate the sizes of the partitions
......@@ -1722,6 +1733,8 @@ toku_ft_bn_apply_cmd_once (
&new_le,
&numbytes_delta
);
// at this point, we cannot trust cmd->u.id.key to be valid.
// The dmt may have realloced its mempool and freed the one containing key.
newsize = new_le ? (leafentry_memsize(new_le) + + key_storage_size) : 0;
if (le && new_le) {
......@@ -1986,6 +1999,7 @@ toku_ft_bn_apply_cmd (
int deleted = 0;
if (!le_is_clean(storeddata)) { //If already clean, nothing to do.
toku_ft_bn_apply_cmd_once(bn, cmd, idx, storeddata, oldest_referenced_xid_known, gc_info, workdone, stats_to_update);
// at this point, we cannot trust cmd->u.id.key to be valid.
uint32_t new_omt_size = bn->data_buffer.omt_size();
if (new_omt_size != omt_size) {
paranoid_invariant(new_omt_size+1 == omt_size);
......
......@@ -351,6 +351,8 @@ int toku_ft_strerror_r(int error, char *buf, size_t buflen);
extern bool garbage_collection_debug;
void toku_note_deserialized_basement_node(bool fixed_key_size);
// This is a poor place to put global options like these.
void toku_ft_set_direct_io(bool direct_io_on);
void toku_ft_set_compress_buffers_before_eviction(bool compress_buffers);
......
......@@ -118,7 +118,7 @@ enum ft_layout_version_e {
FT_LAYOUT_VERSION_22 = 22, // Ming: Add oldest known referenced xid to each ftnode, for better garbage collection
FT_LAYOUT_VERSION_23 = 23, // Ming: Fix upgrade path #5902
FT_LAYOUT_VERSION_24 = 24, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs
FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry
FT_LAYOUT_VERSION_25 = 25, // SecretSquirrel: ROLLBACK_LOG_NODES (on disk and in memory) now just use blocknum (instead of blocknum + hash) to point to other log nodes. same for xstillopen log entry, basements store key/vals separately on disk
FT_NEXT_VERSION, // the version after the current version
FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line.
FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported
......
......@@ -320,7 +320,7 @@ serialize_ftnode_partition_size (FTNODE node, int i)
result += toku_bnc_nbytesinbuf(BNC(node, i));
}
else {
result += 4; // n_entries in buffer table
result += 4 + bn_data::HEADER_LENGTH; // n_entries in buffer table + basement header
result += BLB_NBYTESINDATA(node, i);
}
result += 4; // checksum
......@@ -380,10 +380,16 @@ serialize_ftnode_partition(FTNODE node, int i, struct sub_block *sb) {
wbuf_nocrc_char(&wb, ch);
wbuf_nocrc_uint(&wb, bd->omt_size());
bd->prepare_to_serialize();
bd->serialize_header(&wb);
if (bd->need_to_serialize_each_leafentry_with_key()) {
//
// iterate over leafentries and place them into the buffer
//
bd->omt_iterate<struct wbuf, wbufwriteleafentry>(&wb);
} else {
bd->serialize_rest(&wb);
}
}
uint32_t end_to_end_checksum = x1764_memory(sb->uncompressed_ptr, wbuf_get_woffset(&wb));
wbuf_nocrc_int(&wb, end_to_end_checksum);
......@@ -592,9 +598,14 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
// Create an array that will store the size of each basement.
// This is the sum of the leaf sizes of all the leaves in that basement.
// We don't know how many basements there will be, so we use num_le as the upper bound.
toku::scoped_malloc bn_sizes_buf(sizeof(size_t) * num_alloc);
size_t *bn_sizes = reinterpret_cast<size_t *>(bn_sizes_buf.get());
bn_sizes[0] = 0;
// Sum of all le sizes in a single basement
toku::scoped_calloc bn_le_sizes_buf(sizeof(size_t) * num_alloc);
size_t *bn_le_sizes = reinterpret_cast<size_t *>(bn_le_sizes_buf.get());
// Sum of all key sizes in a single basement
toku::scoped_calloc bn_key_sizes_buf(sizeof(size_t) * num_alloc);
size_t *bn_key_sizes = reinterpret_cast<size_t *>(bn_key_sizes_buf.get());
// TODO 4050: All these arrays should be combined into a single array of some bn_info struct (pivot, msize, num_les).
// Each entry is the number of leafentries in this basement. (Again, num_le is overkill upper baound.)
......@@ -611,17 +622,20 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
for (uint32_t i = 0; i < num_le; i++) {
uint32_t curr_le_size = leafentry_disksize((LEAFENTRY) leafpointers[i]);
le_sizes[i] = curr_le_size;
if ((bn_size_so_far + curr_le_size > basementnodesize) && (num_le_in_curr_bn != 0)) {
if ((bn_size_so_far + curr_le_size + sizeof(uint32_t) + key_sizes[i] > basementnodesize) && (num_le_in_curr_bn != 0)) {
// cap off the current basement node to end with the element before i
new_pivots[curr_pivot] = i-1;
curr_pivot++;
num_le_in_curr_bn = 0;
bn_size_so_far = 0;
bn_le_sizes[curr_pivot] = 0;
bn_key_sizes[curr_pivot] = 0;
}
num_le_in_curr_bn++;
num_les_this_bn[curr_pivot] = num_le_in_curr_bn;
bn_le_sizes[curr_pivot] += curr_le_size;
bn_key_sizes[curr_pivot] += sizeof(uint32_t) + key_sizes[i]; // uint32_t le_offset
bn_size_so_far += curr_le_size + sizeof(uint32_t) + key_sizes[i];
bn_sizes[curr_pivot] = bn_size_so_far;
}
// curr_pivot is now the total number of pivot keys in the leaf node
int num_pivots = curr_pivot;
......@@ -688,9 +702,6 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
uint32_t num_les_to_copy = num_les_this_bn[i];
invariant(num_les_to_copy == num_in_bn);
// construct mempool for this basement
size_t size_this_bn = bn_sizes[i];
BN_DATA bd = BLB_DATA(node, i);
bd->replace_contents_with_clone_of_sorted_array(
num_les_to_copy,
......@@ -698,7 +709,8 @@ rebalance_ftnode_leaf(FTNODE node, unsigned int basementnodesize)
&key_sizes[baseindex_this_bn],
&leafpointers[baseindex_this_bn],
&le_sizes[baseindex_this_bn],
size_this_bn
bn_key_sizes[i], // Total key sizes
bn_le_sizes[i] // total le sizes
);
BP_STATE(node,i) = PT_AVAIL;
......@@ -1548,8 +1560,7 @@ deserialize_ftnode_partition(
data_size -= rb.ndone; // remaining bytes of leafentry data
BASEMENTNODE bn = BLB(node, childnum);
bn->data_buffer.initialize_from_data(num_entries, &rb.buf[rb.ndone], data_size);
rb.ndone += data_size;
bn->data_buffer.initialize_from_data(num_entries, &rb, data_size, node->layout_version_read_from_disk);
}
assert(rb.ndone == rb.size);
exit:
......@@ -2101,8 +2112,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
if (has_end_to_end_checksum) {
data_size -= sizeof(uint32_t);
}
bn->data_buffer.initialize_from_data(n_in_buf, &rb->buf[rb->ndone], data_size);
rb->ndone += data_size;
bn->data_buffer.initialize_from_data(n_in_buf, rb, data_size, node->layout_version_read_from_disk);
}
// Whatever this is must be less than the MSNs of every message above
......
......@@ -98,6 +98,7 @@ struct memarena {
char *buf;
size_t buf_used, buf_size;
size_t size_of_other_bufs; // the buf_size of all the other bufs.
size_t footprint_of_other_bufs; // the footprint of all the other bufs.
char **other_bufs;
int n_other_bufs;
};
......@@ -108,6 +109,7 @@ MEMARENA memarena_create_presized (size_t initial_size) {
result->buf_used = 0;
result->other_bufs = NULL;
result->size_of_other_bufs = 0;
result->footprint_of_other_bufs = 0;
result->n_other_bufs = 0;
XMALLOC_N(result->buf_size, result->buf);
return result;
......@@ -128,6 +130,7 @@ void memarena_clear (MEMARENA ma) {
// But reuse the main buffer
ma->buf_used = 0;
ma->size_of_other_bufs = 0;
ma->footprint_of_other_bufs = 0;
}
static size_t
......@@ -151,6 +154,7 @@ void* malloc_in_memarena (MEMARENA ma, size_t size) {
ma->other_bufs[old_n]=ma->buf;
ma->n_other_bufs = old_n+1;
ma->size_of_other_bufs += ma->buf_size;
ma->footprint_of_other_bufs += toku_memory_footprint(ma->buf, ma->buf_used);
}
// Make a new one
{
......@@ -217,7 +221,9 @@ void memarena_move_buffers(MEMARENA dest, MEMARENA source) {
#endif
dest ->size_of_other_bufs += source->size_of_other_bufs + source->buf_size;
dest ->footprint_of_other_bufs += source->footprint_of_other_bufs + toku_memory_footprint(source->buf, source->buf_used);
source->size_of_other_bufs = 0;
source->footprint_of_other_bufs = 0;
assert(other_bufs);
dest->other_bufs = other_bufs;
......@@ -247,3 +253,11 @@ memarena_total_size_in_use (MEMARENA m)
{
return m->size_of_other_bufs + m->buf_used;
}
size_t
memarena_total_footprint (MEMARENA m)
{
return m->footprint_of_other_bufs + toku_memory_footprint(m->buf, m->buf_used) +
sizeof(*m) +
m->n_other_bufs * sizeof(*m->other_bufs);
}
......@@ -129,5 +129,6 @@ size_t memarena_total_memory_size (MEMARENA);
size_t memarena_total_size_in_use (MEMARENA);
size_t memarena_total_footprint (MEMARENA);
#endif
......@@ -146,7 +146,7 @@ PAIR_ATTR
rollback_memory_size(ROLLBACK_LOG_NODE log) {
size_t size = sizeof(*log);
if (log->rollentry_arena) {
size += memarena_total_memory_size(log->rollentry_arena);
size += memarena_total_footprint(log->rollentry_arena);
}
return make_rollback_pair_attr(size);
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -127,7 +127,7 @@ long_key_cmp(DB *UU(e), const DBT *a, const DBT *b)
}
static void
test_serialize_leaf(int valsize, int nelts, double entropy) {
test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
// struct ft_handle source_ft;
struct ftnode *sn, *dn;
......@@ -214,32 +214,63 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
assert(size == 100);
}
struct timeval total_start;
struct timeval total_end;
total_start.tv_sec = total_start.tv_usec = 0;
total_end.tv_sec = total_end.tv_usec = 0;
struct timeval t[2];
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd = NULL;
for (int i = 0; i < ser_runs; i++) {
gettimeofday(&t[0], NULL);
ndd = NULL;
sn->dirty = 1;
r = toku_serialize_ftnode_to(fd, make_blocknum(20), sn, &ndd, true, brt->ft, false);
assert(r==0);
gettimeofday(&t[1], NULL);
total_start.tv_sec += t[0].tv_sec;
total_start.tv_usec += t[0].tv_usec;
total_end.tv_sec += t[1].tv_sec;
total_end.tv_usec += t[1].tv_usec;
toku_free(ndd);
}
double dt;
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
printf("serialize leaf: %0.05lf\n", dt);
dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
dt *= 1000;
dt /= ser_runs;
printf("serialize leaf(ms): %0.05lf (average of %d runs)\n", dt, ser_runs);
//reset
total_start.tv_sec = total_start.tv_usec = 0;
total_end.tv_sec = total_end.tv_usec = 0;
struct ftnode_fetch_extra bfe;
for (int i = 0; i < deser_runs; i++) {
fill_bfe_for_full_read(&bfe, brt_h);
gettimeofday(&t[0], NULL);
FTNODE_DISK_DATA ndd2 = NULL;
r = toku_deserialize_ftnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe);
assert(r==0);
gettimeofday(&t[1], NULL);
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
printf("deserialize leaf: %0.05lf\n", dt);
printf("io time %lf decompress time %lf deserialize time %lf\n",
tokutime_to_seconds(bfe.io_time),
tokutime_to_seconds(bfe.decompress_time),
tokutime_to_seconds(bfe.deserialize_time)
);
total_start.tv_sec += t[0].tv_sec;
total_start.tv_usec += t[0].tv_usec;
total_end.tv_sec += t[1].tv_sec;
total_end.tv_usec += t[1].tv_usec;
toku_ftnode_free(&dn);
toku_free(ndd2);
}
dt = (total_end.tv_sec - total_start.tv_sec) + ((total_end.tv_usec - total_start.tv_usec) / USECS_PER_SEC);
dt *= 1000;
dt /= deser_runs;
printf("deserialize leaf(ms): %0.05lf (average of %d runs)\n", dt, deser_runs);
printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (average of %d runs)\n",
tokutime_to_seconds(bfe.io_time)*1000,
tokutime_to_seconds(bfe.decompress_time)*1000,
tokutime_to_seconds(bfe.deserialize_time)*1000,
deser_runs
);
toku_ftnode_free(&sn);
toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
......@@ -247,14 +278,12 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
toku_free(brt_h->h);
toku_free(brt_h);
toku_free(brt);
toku_free(ndd);
toku_free(ndd2);
r = close(fd); assert(r != -1);
}
static void
test_serialize_nonleaf(int valsize, int nelts, double entropy) {
test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int deser_runs) {
// struct ft_handle source_ft;
struct ftnode sn, *dn;
......@@ -353,7 +382,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
gettimeofday(&t[1], NULL);
double dt;
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
printf("serialize nonleaf: %0.05lf\n", dt);
dt *= 1000;
printf("serialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, ser_runs);
struct ftnode_fetch_extra bfe;
fill_bfe_for_full_read(&bfe, brt_h);
......@@ -363,11 +393,13 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
assert(r==0);
gettimeofday(&t[1], NULL);
dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC);
printf("deserialize nonleaf: %0.05lf\n", dt);
printf("io time %lf decompress time %lf deserialize time %lf\n",
tokutime_to_seconds(bfe.io_time),
tokutime_to_seconds(bfe.decompress_time),
tokutime_to_seconds(bfe.deserialize_time)
dt *= 1000;
printf("deserialize nonleaf(ms): %0.05lf (IGNORED RUNS=%d)\n", dt, deser_runs);
printf("io time(ms) %lf decompress time(ms) %lf deserialize time(ms) %lf (IGNORED RUNS=%d)\n",
tokutime_to_seconds(bfe.io_time)*1000,
tokutime_to_seconds(bfe.decompress_time)*1000,
tokutime_to_seconds(bfe.deserialize_time)*1000,
deser_runs
);
toku_ftnode_free(&dn);
......@@ -394,19 +426,32 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
long valsize, nelts;
const int DEFAULT_RUNS = 5;
long valsize, nelts, ser_runs = DEFAULT_RUNS, deser_runs = DEFAULT_RUNS;
double entropy = 0.3;
if (argc != 3) {
fprintf(stderr, "Usage: %s <valsize> <nelts>\n", argv[0]);
if (argc != 3 && argc != 5) {
fprintf(stderr, "Usage: %s <valsize> <nelts> [<serialize_runs> <deserialize_runs>]\n", argv[0]);
fprintf(stderr, "Default (and min) runs is %d\n", DEFAULT_RUNS);
return 2;
}
valsize = strtol(argv[1], NULL, 0);
nelts = strtol(argv[2], NULL, 0);
if (argc == 5) {
ser_runs = strtol(argv[3], NULL, 0);
deser_runs = strtol(argv[4], NULL, 0);
}
if (ser_runs <= 0) {
ser_runs = DEFAULT_RUNS;
}
if (deser_runs <= 0) {
deser_runs = DEFAULT_RUNS;
}
initialize_dummymsn();
test_serialize_leaf(valsize, nelts, entropy);
test_serialize_nonleaf(valsize, nelts, entropy);
test_serialize_leaf(valsize, nelts, entropy, ser_runs, deser_runs);
test_serialize_nonleaf(valsize, nelts, entropy, ser_runs, deser_runs);
return 0;
}
This diff is collapsed.
......@@ -189,7 +189,7 @@ doit (void) {
r = toku_testsetup_root(t, node_root);
assert(r==0);
char filler[900];
char filler[900-2*bn_data::HEADER_LENGTH];
memset(filler, 0, sizeof(filler));
// now we insert filler data so that a merge does not happen
r = toku_testsetup_insert_to_leaf (
......
......@@ -187,6 +187,13 @@ static inline void wbuf_uint (struct wbuf *w, uint32_t i) {
wbuf_int(w, (int32_t)i);
}
static inline uint8_t* wbuf_nocrc_reserve_literal_bytes(struct wbuf *w, uint32_t nbytes) {
assert(w->ndone + nbytes <= w->size);
uint8_t * dest = w->buf + w->ndone;
w->ndone += nbytes;
return dest;
}
static inline void wbuf_nocrc_literal_bytes(struct wbuf *w, bytevec bytes_bv, uint32_t nbytes) {
const unsigned char *bytes = (const unsigned char *) bytes_bv;
#if 0
......
This diff is collapsed.
This diff is collapsed.
......@@ -131,7 +131,7 @@ void toku_mempool_init(struct mempool *mp, void *base, size_t free_offset, size_
void toku_mempool_construct(struct mempool *mp, size_t data_size) {
if (data_size) {
size_t mpsize = data_size + (data_size/4); // allow 1/4 room for expansion (would be wasted if read-only)
mp->base = toku_xmalloc(mpsize); // allocate buffer for mempool
mp->base = toku_xmalloc_aligned(64, mpsize); // allocate buffer for mempool
mp->size = mpsize;
mp->free_offset = 0; // address of first available memory for new data
mp->frag_size = 0; // all allocated space is now in use
......@@ -142,6 +142,16 @@ void toku_mempool_construct(struct mempool *mp, size_t data_size) {
}
}
void toku_mempool_realloc_larger(struct mempool *mp, size_t data_size) {
invariant(data_size > mp->free_offset);
size_t mpsize = data_size + (data_size/4); // allow 1/4 room for expansion (would be wasted if read-only)
void* newmem = toku_xmalloc_aligned(64, mpsize); // allocate new buffer for mempool
memcpy(newmem, mp->base, mp->free_offset); // Copy old info
toku_free(mp->base);
mp->base = newmem;
mp->size = mpsize;
}
void toku_mempool_destroy(struct mempool *mp) {
// printf("mempool_destroy %p %p %lu %lu\n", mp, mp->base, mp->size, mp->frag_size);
......@@ -150,27 +160,40 @@ void toku_mempool_destroy(struct mempool *mp) {
toku_mempool_zero(mp);
}
void *toku_mempool_get_base(struct mempool *mp) {
void *toku_mempool_get_base(const struct mempool *mp) {
return mp->base;
}
size_t toku_mempool_get_size(struct mempool *mp) {
void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, size_t offset) {
return reinterpret_cast<void*>(reinterpret_cast<char*>(mp->base) + offset);
}
size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, void* p) {
paranoid_invariant(p >= mp->base);
return reinterpret_cast<char*>(p) - reinterpret_cast<char*>(mp->base);
}
size_t toku_mempool_get_size(const struct mempool *mp) {
return mp->size;
}
size_t toku_mempool_get_frag_size(struct mempool *mp) {
size_t toku_mempool_get_frag_size(const struct mempool *mp) {
return mp->frag_size;
}
size_t toku_mempool_get_used_space(struct mempool *mp) {
size_t toku_mempool_get_used_space(const struct mempool *mp) {
return mp->free_offset - mp->frag_size;
}
size_t toku_mempool_get_free_space(struct mempool *mp) {
void* toku_mempool_get_next_free_ptr(const struct mempool *mp) {
return toku_mempool_get_pointer_from_base_and_offset(mp, mp->free_offset);
}
size_t toku_mempool_get_free_space(const struct mempool *mp) {
return mp->size - mp->free_offset;
}
size_t toku_mempool_get_allocated_space(struct mempool *mp) {
size_t toku_mempool_get_allocated_space(const struct mempool *mp) {
return mp->free_offset;
}
......@@ -211,10 +234,10 @@ size_t toku_mempool_footprint(struct mempool *mp) {
return rval;
}
void toku_mempool_clone(struct mempool* orig_mp, struct mempool* new_mp) {
void toku_mempool_clone(const struct mempool* orig_mp, struct mempool* new_mp) {
new_mp->frag_size = orig_mp->frag_size;
new_mp->free_offset = orig_mp->free_offset;
new_mp->size = orig_mp->free_offset; // only make the cloned mempool store what is needed
new_mp->base = toku_xmalloc(new_mp->size);
new_mp->base = toku_xmalloc_aligned(64, new_mp->size);
memcpy(new_mp->base, orig_mp->base, new_mp->size);
}
......@@ -123,26 +123,39 @@ void toku_mempool_init(struct mempool *mp, void *base, size_t free_offset, size_
*/
void toku_mempool_construct(struct mempool *mp, size_t data_size);
/* reallocate memory for construct mempool
*/
void toku_mempool_realloc_larger(struct mempool *mp, size_t data_size);
/* destroy the memory pool */
void toku_mempool_destroy(struct mempool *mp);
/* get the base address of the memory pool */
void *toku_mempool_get_base(struct mempool *mp);
void *toku_mempool_get_base(const struct mempool *mp);
/* get the a pointer that is offset bytes in front of base of the memory pool */
void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, size_t offset);
/* get the offset from base of a pointer */
size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, void* p);
/* get the a pointer of the first free byte (if any) */
void* toku_mempool_get_next_free_ptr(const struct mempool *mp);
/* get the size of the memory pool */
size_t toku_mempool_get_size(struct mempool *mp);
size_t toku_mempool_get_size(const struct mempool *mp);
/* get the amount of fragmented (wasted) space in the memory pool */
size_t toku_mempool_get_frag_size(struct mempool *mp);
size_t toku_mempool_get_frag_size(const struct mempool *mp);
/* get the amount of space that is holding useful data */
size_t toku_mempool_get_used_space(struct mempool *mp);
size_t toku_mempool_get_used_space(const struct mempool *mp);
/* get the amount of space that is available for new data */
size_t toku_mempool_get_free_space(struct mempool *mp);
size_t toku_mempool_get_free_space(const struct mempool *mp);
/* get the amount of space that has been allocated for use (wasted or not) */
size_t toku_mempool_get_allocated_space(struct mempool *mp);
size_t toku_mempool_get_allocated_space(const struct mempool *mp);
/* allocate a chunk of memory from the memory pool suitably aligned */
void *toku_mempool_malloc(struct mempool *mp, size_t size, int alignment);
......@@ -160,6 +173,8 @@ static inline int toku_mempool_inrange(struct mempool *mp, void *vp, size_t size
/* get memory footprint */
size_t toku_mempool_footprint(struct mempool *mp);
void toku_mempool_clone(struct mempool* orig_mp, struct mempool* new_mp);
void toku_mempool_clone(const struct mempool* orig_mp, struct mempool* new_mp);
#endif // UTIL_MEMPOOL_H
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment