Commit 9ff84158 authored by John Esmet's avatar John Esmet

FT-259 Clean up memarena API / code. Use a memarena in the locktree to store

each transaction's ranges instead of a hand-rolled buffer.
parent dc50ba27
......@@ -2548,7 +2548,7 @@ serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calc
wbuf_nocrc_BLOCKNUM(&wb, log->previous);
wbuf_nocrc_ulonglong(&wb, log->rollentry_resident_bytecount);
//Write down memarena size needed to restore
wbuf_nocrc_ulonglong(&wb, toku_memarena_total_size_in_use(log->rollentry_arena));
wbuf_nocrc_ulonglong(&wb, log->rollentry_arena.total_size_in_use());
//Store rollback logs
......@@ -2712,8 +2712,8 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
result->rollentry_resident_bytecount = rbuf_ulonglong(rb);
size_t arena_initial_size = rbuf_ulonglong(rb);
result->rollentry_arena = toku_memarena_create_presized(arena_initial_size);
if (0) { died1: toku_memarena_destroy(&result->rollentry_arena); goto died0; }
if (0) { died1: result->rollentry_arena.destroy(); goto died0; }
//Load rollback entries
lazy_assert(rb->size > 4);
......@@ -2725,7 +2725,7 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
bytevec item_vec;
rbuf_literal_bytes(rb, &item_vec, rollback_fsize-4);
unsigned char* item_buf = (unsigned char*)item_vec;
r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, result->rollentry_arena);
r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, &result->rollentry_arena);
if (r!=0) {
r = toku_db_badformat();
goto died1;
......@@ -798,7 +798,7 @@ generate_rollbacks (void) {
fprintf(cf, " }\n assert(0);\n return 0;\n");
fprintf(cf, "}\n");
fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, MEMARENA ma)");
fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, memarena *ma)");
fprintf(hf, ";\n");
fprintf(cf, " {\n assert(n_bytes>0);\n struct roll_entry *item;\n enum rt_cmd cmd = (enum rt_cmd)(buf[0]);\n size_t mem_needed;\n");
fprintf(cf, " struct rbuf rc = {buf, n_bytes, 1};\n");
......@@ -806,7 +806,7 @@ generate_rollbacks (void) {
fprintf(cf, " case RT_%s:\n", lt->name);
fprintf(cf, " mem_needed = sizeof(item->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name);
fprintf(cf, " CAST_FROM_VOIDP(item, toku_memarena_malloc(ma, mem_needed));\n");
fprintf(cf, " CAST_FROM_VOIDP(item, ma->malloc_from_arena(mem_needed));\n");
fprintf(cf, " item->cmd = cmd;\n");
DO_FIELDS(field_type, lt, fprintf(cf, " rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", field_type->type, lt->name, field_type->name));
fprintf(cf, " *itemp = item;\n");
......@@ -92,13 +92,14 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <toku_portability.h>
#include "toku_assert.h"
#include "fttypes.h"
#include "memory.h"
#include <toku_htonl.h>
#include <string.h>
#include <util/memarena.h>
#include "ft/fttypes.h"
#include "portability/memory.h"
#include "portability/toku_assert.h"
#include "portability/toku_htonl.h"
#include "portability/toku_portability.h"
#include "util/memarena.h"
struct rbuf {
unsigned char *buf;
......@@ -122,11 +123,11 @@ static inline unsigned char rbuf_char (struct rbuf *r) {
return r->buf[r->ndone++];
static inline void rbuf_ma_uint8_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint8_t *num) {
static inline void rbuf_ma_uint8_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint8_t *num) {
*num = rbuf_char(r);
static inline void rbuf_ma_bool (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), bool *b) {
static inline void rbuf_ma_bool (struct rbuf *r, memarena *ma __attribute__((__unused__)), bool *b) {
uint8_t n = rbuf_char(r);
*b = (n!=0);
......@@ -199,15 +200,15 @@ static inline BLOCKNUM rbuf_blocknum (struct rbuf *r) {
BLOCKNUM result = make_blocknum(rbuf_longlong(r));
return result;
static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), BLOCKNUM *blocknum) {
static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, memarena *ma __attribute__((__unused__)), BLOCKNUM *blocknum) {
*blocknum = rbuf_blocknum(r);
static inline void rbuf_ma_uint32_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint32_t *num) {
static inline void rbuf_ma_uint32_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint32_t *num) {
*num = rbuf_int(r);
static inline void rbuf_ma_uint64_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint64_t *num) {
static inline void rbuf_ma_uint64_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint64_t *num) {
*num = rbuf_ulonglong(r);
......@@ -221,18 +222,18 @@ static inline void rbuf_TXNID_PAIR (struct rbuf *r, TXNID_PAIR *txnid) {
txnid->child_id64 = rbuf_ulonglong(r);
static inline void rbuf_ma_TXNID (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID *txnid) {
static inline void rbuf_ma_TXNID (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID *txnid) {
rbuf_TXNID(r, txnid);
static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
rbuf_TXNID_PAIR(r, txnid);
static inline void rbuf_FILENUM (struct rbuf *r, FILENUM *filenum) {
filenum->fileid = rbuf_int(r);
static inline void rbuf_ma_FILENUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUM *filenum) {
static inline void rbuf_ma_FILENUM (struct rbuf *r, memarena *ma __attribute__((__unused__)), FILENUM *filenum) {
rbuf_FILENUM(r, filenum);
......@@ -248,9 +249,9 @@ static inline void rbuf_FILENUMS(struct rbuf *r, FILENUMS *filenums) {
// 2954
static inline void rbuf_ma_FILENUMS (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUMS *filenums) {
static inline void rbuf_ma_FILENUMS (struct rbuf *r, memarena *ma __attribute__((__unused__)), FILENUMS *filenums) {
rbuf_ma_uint32_t(r, ma, &(filenums->num));
filenums->filenums = (FILENUM *) toku_memarena_malloc(ma, filenums->num * sizeof(FILENUM) );
filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM));
assert(filenums->filenums != NULL);
for (uint32_t i=0; i < filenums->num; i++) {
rbuf_ma_FILENUM(r, ma, &(filenums->filenums[i]));
......@@ -267,11 +268,12 @@ static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) {
r->ndone = newndone;
static inline void rbuf_ma_BYTESTRING (struct rbuf *r, MEMARENA ma, BYTESTRING *bs) {
static inline void rbuf_ma_BYTESTRING (struct rbuf *r, memarena *ma, BYTESTRING *bs) {
bs->len = rbuf_int(r);
uint32_t newndone = r->ndone + bs->len;
assert(newndone <= r->size);
bs->data = (char *) toku_memarena_memdup(ma, &r->buf[r->ndone], (size_t)bs->len);
bs->data = (char *) ma->malloc_from_arena(bs->len);
memcpy(bs->data, &r->buf[r->ndone], bs->len);
r->ndone = newndone;
......@@ -258,9 +258,9 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
child_log->newest_logentry = child_log->oldest_logentry = 0;
// Put all the memarena data into the parent.
if (toku_memarena_total_size_in_use(child_log->rollentry_arena) > 0) {
if (child_log->rollentry_arena.total_size_in_use() > 0) {
// If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed.
toku_memarena_move_buffers(parent_log->rollentry_arena, child_log->rollentry_arena);
// each txn tries to give back at most one rollback log node
// to the cache. All other rollback log nodes for this child
......@@ -120,13 +120,17 @@ toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind) {
return 0;
// TODO: fix this name
// toku_rollback_malloc
void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) {
return toku_memarena_malloc(log->rollentry_arena, size);
return log->rollentry_arena.malloc_from_arena(size);
// TODO: fix this name
// toku_rollback_memdup
void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len) {
void *r=toku_malloc_in_rollback(log, len);
void *r = toku_malloc_in_rollback(log, len);
memcpy(r, v, len);
return r;
......@@ -145,8 +149,8 @@ static inline PAIR_ATTR make_rollback_pair_attr(long size) {
rollback_memory_size(ROLLBACK_LOG_NODE log) {
size_t size = sizeof(*log);
if (log->rollentry_arena) {
size += toku_memarena_total_footprint(log->rollentry_arena);
if (&log->rollentry_arena) {
size += log->rollentry_arena.total_footprint();
return make_rollback_pair_attr(size);
......@@ -175,12 +179,10 @@ void rollback_empty_log_init(ROLLBACK_LOG_NODE log) {
log->previous = make_blocknum(0);
log->oldest_logentry = NULL;
log->newest_logentry = NULL;
log->rollentry_arena = NULL;
log->rollentry_resident_bytecount = 0;
static void rollback_initialize_for_txn(
......@@ -192,13 +194,14 @@ static void rollback_initialize_for_txn(
log->previous = previous;
log->oldest_logentry = NULL;
log->newest_logentry = NULL;
log->rollentry_arena = toku_memarena_create();
log->rollentry_resident_bytecount = 0;
log->dirty = true;
// TODO: fix this name
void make_rollback_log_empty(ROLLBACK_LOG_NODE log) {
......@@ -165,7 +165,7 @@ struct rollback_log_node {
BLOCKNUM previous;
struct roll_entry *oldest_logentry;
struct roll_entry *newest_logentry;
MEMARENA rollentry_arena;
struct memarena rollentry_arena;
size_t rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory.
PAIR ct_pair;
......@@ -258,18 +258,18 @@ void locktree::sto_append(const DBT *left_key, const DBT *right_key) {
keyrange range;
range.create(left_key, right_key);
buffer_mem = m_sto_buffer.get_num_bytes();
buffer_mem = m_sto_buffer.total_memory_size();
m_sto_buffer.append(left_key, right_key);
delta = m_sto_buffer.get_num_bytes() - buffer_mem;
delta = m_sto_buffer.total_memory_size() - buffer_mem;
if (m_mgr != nullptr) {
void locktree::sto_end(void) {
uint64_t num_bytes = m_sto_buffer.get_num_bytes();
uint64_t mem_size = m_sto_buffer.total_memory_size();
if (m_mgr != nullptr) {
......@@ -302,9 +302,8 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
// insert all of the ranges from the single txnid buffer into a new rangtree
range_buffer::iterator iter;
range_buffer::iterator iter(&m_sto_buffer);
range_buffer::iterator::record rec;
while (iter.current(&rec)) {
int r = acquire_lock_consolidated(&sto_lkr,
......@@ -575,9 +574,8 @@ void locktree::release_locks(TXNID txnid, const range_buffer *ranges) {
// locks are already released, otherwise we need to do it here.
bool released = sto_try_release(txnid);
if (!released) {
range_buffer::iterator iter;
range_buffer::iterator iter(ranges);
range_buffer::iterator::record rec;
while (iter.current(&rec)) {
const DBT *left_key = rec.get_left_key();
const DBT *right_key = rec.get_right_key();
......@@ -647,10 +645,10 @@ struct txnid_range_buffer {
TXNID txnid;
range_buffer buffer;
static int find_by_txnid(const struct txnid_range_buffer &other_buffer, const TXNID &txnid) {
if (txnid < other_buffer.txnid) {
static int find_by_txnid(struct txnid_range_buffer *const &other_buffer, const TXNID &txnid) {
if (txnid < other_buffer->txnid) {
return -1;
} else if (other_buffer.txnid == txnid) {
} else if (other_buffer->txnid == txnid) {
return 0;
} else {
return 1;
......@@ -666,7 +664,7 @@ struct txnid_range_buffer {
// has locks in a random/alternating order, then this does
// not work so well.
void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_escalate_callback_extra) {
omt<struct txnid_range_buffer, struct txnid_range_buffer *> range_buffers;
omt<struct txnid_range_buffer *, struct txnid_range_buffer *> range_buffers;
// prepare and acquire a locked keyrange on the entire locktree
......@@ -716,7 +714,6 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
// Try to find a range buffer for the current txnid. Create one if it doesn't exist.
// Then, append the new escalated range to the buffer.
uint32_t idx;
struct txnid_range_buffer new_range_buffer;
struct txnid_range_buffer *existing_range_buffer;
int r = range_buffers.find_zero<TXNID, txnid_range_buffer::find_by_txnid>(
......@@ -724,9 +721,10 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
if (r == DB_NOTFOUND) {
new_range_buffer.txnid = current_txnid;
new_range_buffer.buffer.append(escalated_left_key, escalated_right_key);
struct txnid_range_buffer *XMALLOC(new_range_buffer);
new_range_buffer->txnid = current_txnid;
new_range_buffer->buffer.append(escalated_left_key, escalated_right_key);
range_buffers.insert_at(new_range_buffer, idx);
} else {
......@@ -754,9 +752,8 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
const TXNID current_txnid = current_range_buffer->txnid;
range_buffer::iterator iter;
range_buffer::iterator iter(&current_range_buffer->buffer);
range_buffer::iterator::record rec;
while (iter.current(&rec)) {
keyrange range;
range.create(rec.get_left_key(), rec.get_right_key());
......@@ -771,6 +768,15 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
while (range_buffers.size() > 0) {
struct txnid_range_buffer *buffer;
int r = range_buffers.fetch(0, &buffer);
r = range_buffers.delete_at(0);
......@@ -137,7 +137,6 @@ namespace toku {
class locktree;
class locktree_manager;
class lock_request;
class memory_tracker;
class concurrent_tree;
typedef int (*lt_create_cb)(locktree *lt, void *extra);
......@@ -246,7 +245,6 @@ namespace toku {
// tracks the current number of locks and lock memory
uint64_t m_max_lock_memory;
uint64_t m_current_lock_memory;
memory_tracker *m_mem_tracker;
struct lt_counters m_lt_counters;
This diff is collapsed.
......@@ -91,128 +91,120 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <toku_stdint.h>
#include <ft/ybt.h>
#include "ft/ybt.h"
#include "portability/toku_stdint.h"
#include "util/memarena.h"
namespace toku {
// a key range buffer represents a set of key ranges that can
// be stored, iterated over, and then destroyed all at once.
class range_buffer {
// Private in spirit: We fail POD asserts when we try to store range_buffers in an omt.
// So make it all public, but don't touch.
// the key range buffer is a bunch of records in a row.
// each record has the following header, followed by the
// left key and right key data payload, if applicable.
// a key range buffer represents a set of key ranges that can
// be stored, iterated over, and then destroyed all at once.
class range_buffer {
struct record_header {
bool left_neg_inf;
bool left_pos_inf;
bool right_pos_inf;
bool right_neg_inf;
uint32_t left_key_size;
uint32_t right_key_size;
// the key range buffer is a bunch of records in a row.
// each record has the following header, followed by the
// left key and right key data payload, if applicable.
// we limit keys to be 2^16, since we store lengths as 2 bytes.
static const size_t MAX_KEY_SIZE = 1 << 16;
bool left_is_infinite(void) const;
struct record_header {
bool left_neg_inf;
bool left_pos_inf;
bool right_pos_inf;
bool right_neg_inf;
uint16_t left_key_size;
uint16_t right_key_size;
bool right_is_infinite(void) const;
bool left_is_infinite(void) const;
void init(const DBT *left_key, const DBT *right_key);
static_assert(sizeof(record_header) == 12, "record header format is off");
bool right_is_infinite(void) const;
// the iterator abstracts reading over a buffer of variable length
// records one by one until there are no more left.
class iterator {
void init(const DBT *left_key, const DBT *right_key);
static_assert(sizeof(record_header) == 8, "record header format is off");
// a record represents the user-view of a serialized key range.
// it handles positive and negative infinity and the optimized
// point range case, where left and right points share memory.
class record {
// the iterator abstracts reading over a buffer of variable length
// records one by one until there are no more left.
class iterator {
// get a read-only pointer to the left key of this record's range
const DBT *get_left_key(void) const;
// get a read-only pointer to the right key of this record's range
const DBT *get_right_key(void) const;
iterator(const range_buffer *buffer);
// how big is this record? this tells us where the next record is
size_t size(void) const;
// a record represents the user-view of a serialized key range.
// it handles positive and negative infinity and the optimized
// point range case, where left and right points share memory.
class record {
// get a read-only pointer to the left key of this record's range
const DBT *get_left_key(void) const;
// populate a record header and point our DBT's
// buffers into ours if they are not infinite.
void deserialize(const char *buf);
// get a read-only pointer to the right key of this record's range
const DBT *get_right_key(void) const;
record_header m_header;
DBT m_left_key;
DBT m_right_key;
void create(const range_buffer *buffer);
// how big is this record? this tells us where the next record is
size_t size(void) const;
// populate the given record object with the current
// the memory referred to by record is valid for only
// as long as the record exists.
bool current(record *rec);
// populate a record header and point our DBT's
// buffers into ours if they are not infinite.
void deserialize(const char *buf);
// move the iterator to the next record in the buffer
void next(void);
record_header _header;
DBT _left_key;
DBT _right_key;
// the key range buffer we are iterating over, the current
// offset in that buffer, and the size of the current record.
const range_buffer *m_buffer;
size_t m_current_offset;
size_t m_current_size;
// populate the given record object with the current
// the memory referred to by record is valid for only
// as long as the record exists.
bool current(record *rec);
// allocate buffer space lazily instead of on creation. this way,
// no malloc/free is done if the transaction ends up taking no locks.
void create(void);
// move the iterator to the next record in the buffer
void next(void);
// append a left/right key range to the buffer.
// if the keys are equal, then only one copy is stored.
void append(const DBT *left_key, const DBT *right_key);
void reset_current_chunk();
// the key range buffer we are iterating over, the current
// offset in that buffer, and the size of the current record.
memarena::chunk_iterator _ma_chunk_iterator;
const void *_current_chunk_base;
size_t _current_chunk_offset;
size_t _current_chunk_max;
size_t _current_rec_size;
// is this range buffer empty?
bool is_empty(void) const;
// allocate buffer space lazily instead of on creation. this way,
// no malloc/free is done if the transaction ends up taking no locks.
void create(void);
// how many bytes are stored in this range buffer?
uint64_t get_num_bytes(void) const;
// append a left/right key range to the buffer.
// if the keys are equal, then only one copy is stored.
void append(const DBT *left_key, const DBT *right_key);
// how many ranges are stored in this range buffer?
int get_num_ranges(void) const;
// is this range buffer empty?
bool is_empty(void) const;
void destroy(void);
// how much memory is being used by this range buffer?
uint64_t total_memory_size(void) const;
char *m_buf;
size_t m_buf_size;
size_t m_buf_current;
int m_num_ranges;
// how many ranges are stored in this range buffer?
int get_num_ranges(void) const;
void append_range(const DBT *left_key, const DBT *right_key);
void destroy(void);
// append a point to the buffer. this is the space/time saving
// optimization for key ranges where left == right.
void append_point(const DBT *key);
memarena _arena;
int _num_ranges;
void maybe_grow(size_t size);
void append_range(const DBT *left_key, const DBT *right_key);
// the initial size of the buffer is the next power of 2
// greater than the first entry we insert into the buffer.
size_t get_initial_size(size_t n) const;
// append a point to the buffer. this is the space/time saving
// optimization for key ranges where left == right.
void append_point(const DBT *key);
} /* namespace toku */
......@@ -121,9 +121,8 @@ static void test_points(void) {
size_t i = 0;
range_buffer::iterator iter;
range_buffer::iterator iter(&buffer);
range_buffer::iterator::record rec;
while (iter.current(&rec)) {
const DBT *expected_point = get_dbt_by_iteration(i);
invariant(compare_dbts(nullptr, expected_point, rec.get_left_key()) == 0);
......@@ -151,9 +150,8 @@ static void test_ranges(void) {
size_t i = 0;
range_buffer::iterator iter;
range_buffer::iterator iter(&buffer);
range_buffer::iterator::record rec;
while (iter.current(&rec)) {
const DBT *expected_left = get_dbt_by_iteration(i);
const DBT *expected_right = get_dbt_by_iteration(i + 1);
......@@ -187,9 +185,8 @@ static void test_mixed(void) {
size_t i = 0;
range_buffer::iterator iter;
range_buffer::iterator iter(&buffer);
range_buffer::iterator::record rec;
while (iter.current(&rec)) {
const DBT *expected_left = get_dbt_by_iteration(i);
const DBT *expected_right = get_dbt_by_iteration(i + 1);
......@@ -232,10 +229,10 @@ static void test_small_and_large_points(void) {
// Append a small dbt, the buf should be able to fit it.
buffer.append(&small_dbt, &small_dbt);
invariant(buffer.m_buf_size >= small_dbt.size);
invariant(buffer.total_memory_size() >= small_dbt.size);
// Append a large dbt, the buf should be able to fit it.
buffer.append(&large_dbt, &large_dbt);
invariant(buffer.m_buf_size >= (small_dbt.size + large_dbt.size));
invariant(buffer.total_memory_size() >= (small_dbt.size + large_dbt.size));
......@@ -2463,7 +2463,7 @@ struct iter_txn_row_locks_callback_extra {
const int r = lt_map->fetch(which_lt, &ranges);
current_db = locked_get_db_by_dict_id(env,>get_dict_id());
iter = toku::range_buffer::iterator(ranges.buffer);
DB_ENV *env;
......@@ -144,11 +144,11 @@ static void db_txn_note_row_lock(DB *db, DB_TXN *txn, const DBT *left_key, const
// add a new lock range to this txn's row lock buffer
size_t old_num_bytes = ranges.buffer->get_num_bytes();
size_t old_mem_size = ranges.buffer->total_memory_size();
ranges.buffer->append(left_key, right_key);
size_t new_num_bytes = ranges.buffer->get_num_bytes();
invariant(new_num_bytes > old_num_bytes);
lt->get_manager()->note_mem_used(new_num_bytes - old_num_bytes);
size_t new_mem_size = ranges.buffer->total_memory_size();
invariant(new_mem_size > old_mem_size);
lt->get_manager()->note_mem_used(new_mem_size - old_mem_size);
......@@ -201,17 +201,16 @@ void toku_db_txn_escalate_callback(TXNID txnid, const toku::locktree *lt, const
// We could theoretically steal the memory from the caller instead of copying
// it, but it's simpler to have a callback API that doesn't transfer memory ownership.
toku::range_buffer::iterator iter;
toku::range_buffer::iterator iter(&buffer);
toku::range_buffer::iterator::record rec;
while (iter.current(&rec)) {
ranges.buffer->append(rec.get_left_key(), rec.get_right_key());;
} else {
// In rare cases, we may not find the associated locktree, because we are
// racing with the transaction trying to add this locktree to the lt map
......@@ -315,7 +314,7 @@ void toku_db_release_lt_key_ranges(DB_TXN *txn, txn_lt_key_ranges *ranges) {
// release all of the locks this txn has ever successfully
// acquired and stored in the range buffer for this locktree
lt->release_locks(txnid, ranges->buffer);
......@@ -89,157 +89,142 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <algorithm>
#include <string.h>
#include <memory.h>
#include <util/memarena.h>
struct memarena {
char *buf;
size_t buf_used, buf_size;
size_t size_of_other_bufs; // the buf_size of all the other bufs.
size_t footprint_of_other_bufs; // the footprint of all the other bufs.
char **other_bufs;
int n_other_bufs;
MEMARENA toku_memarena_create_presized (size_t initial_size) {
result->buf_size = initial_size;
result->buf_used = 0;
result->other_bufs = NULL;
result->size_of_other_bufs = 0;
result->footprint_of_other_bufs = 0;
result->n_other_bufs = 0;
XMALLOC_N(result->buf_size, result->buf);
return result;
void memarena::create(size_t initial_size) {
_current_chunk = arena_chunk();
_other_chunks = nullptr;
_size_of_other_chunks = 0;
_footprint_of_other_chunks = 0;
_n_other_chunks = 0;
MEMARENA toku_memarena_create (void) {
return toku_memarena_create_presized(1024);
_current_chunk.size = initial_size;
if (_current_chunk.size > 0) {
XMALLOC_N(_current_chunk.size, _current_chunk.buf);
void toku_memarena_clear (MEMARENA ma) {
// Free the other bufs.
int i;
for (i=0; i<ma->n_other_bufs; i++) {
void memarena::destroy(void) {
if (_current_chunk.buf) {
for (int i = 0; i < _n_other_chunks; i++) {
// But reuse the main buffer
ma->buf_used = 0;
ma->size_of_other_bufs = 0;
ma->footprint_of_other_bufs = 0;
if (_other_chunks) {
_current_chunk = arena_chunk();
_other_chunks = nullptr;
_n_other_chunks = 0;
static size_t
round_to_page (size_t size) {
const size_t _PAGE_SIZE = 4096;
const size_t result = _PAGE_SIZE+((size-1)&~(_PAGE_SIZE-1));
assert(0==(result&(_PAGE_SIZE-1))); // make sure it's aligned
assert(result>=size); // make sure it's not too small
assert(result<size+_PAGE_SIZE); // make sure we didn't grow by more than a page.
return result;
static size_t round_to_page(size_t size) {
const size_t page_size = 4096;
const size_t r = page_size + ((size - 1) & ~(page_size - 1));
assert((r & (page_size - 1)) == 0); // make sure it's aligned
assert(r >= size); // make sure it's not too small
assert(r < size + page_size); // make sure we didn't grow by more than a page.
return r;
void* toku_memarena_malloc (MEMARENA ma, size_t size) {
if (ma->buf_size < ma->buf_used + size) {
static const size_t MEMARENA_MAX_CHUNK_SIZE = 64 * 1024 * 1024;
void *memarena::malloc_from_arena(size_t size) {
if (_current_chunk.buf == nullptr || _current_chunk.size < _current_chunk.used + size) {
// The existing block isn't big enough.
// Add the block to the vector of blocks.
if (ma->buf) {
int old_n = ma->n_other_bufs;
REALLOC_N(old_n+1, ma->other_bufs);
ma->n_other_bufs = old_n+1;
ma->size_of_other_bufs += ma->buf_size;
ma->footprint_of_other_bufs += toku_memory_footprint(ma->buf, ma->buf_used);
if (_current_chunk.buf) {
invariant(_current_chunk.size > 0);
int old_n = _n_other_chunks;
XREALLOC_N(old_n + 1, _other_chunks);
_other_chunks[old_n] = _current_chunk;
_n_other_chunks = old_n + 1;
_size_of_other_chunks += _current_chunk.size;
_footprint_of_other_chunks += toku_memory_footprint(_current_chunk.buf, _current_chunk.used);
// Make a new one
size_t new_size = 2*ma->buf_size;
if (new_size<size) new_size=size;
new_size=round_to_page(new_size); // at least size, but round to the next page size
XMALLOC_N(new_size, ma->buf);
ma->buf_used = 0;
ma->buf_size = new_size;
// Make a new one. Grow the buffer size exponentially until we hit
// the max chunk size, but make it at least `size' bytes so the
// current allocation always fit.
size_t new_size = std::min(MEMARENA_MAX_CHUNK_SIZE, 2 * _current_chunk.size);
if (new_size < size) {
new_size = size;
new_size = round_to_page(new_size); // at least size, but round to the next page size
XMALLOC_N(new_size, _current_chunk.buf);
_current_chunk.used = 0;
_current_chunk.size = new_size;
invariant(_current_chunk.buf != nullptr);
// allocate in the existing block.
char *result=ma->buf+ma->buf_used;
return result;
char *p = _current_chunk.buf + _current_chunk.used;
_current_chunk.used += size;
return p;
void *toku_memarena_memdup (MEMARENA ma, const void *v, size_t len) {
void *r=toku_memarena_malloc(ma, len);
return r;
void memarena::move_memory(memarena *dest) {
// Move memory to dest
XREALLOC_N(dest->_n_other_chunks + _n_other_chunks + 1, dest->_other_chunks);
dest->_size_of_other_chunks += _size_of_other_chunks + _current_chunk.size;
dest->_footprint_of_other_chunks += _footprint_of_other_chunks + toku_memory_footprint(_current_chunk.buf, _current_chunk.used);
for (int i = 0; i < _n_other_chunks; i++) {
dest->_other_chunks[dest->_n_other_chunks++] = _other_chunks[i];
dest->_other_chunks[dest->_n_other_chunks++] = _current_chunk;
// Clear out this memarena's memory
_current_chunk = arena_chunk();
_other_chunks = nullptr;
_size_of_other_chunks = 0;
_footprint_of_other_chunks = 0;
_n_other_chunks = 0;
void toku_memarena_destroy(MEMARENA *map) {
MEMARENA ma=*map;
if (ma->buf) {
int i;
for (i=0; i<ma->n_other_bufs; i++) {
if (ma->other_bufs) toku_free(ma->other_bufs);
*map = 0;
size_t memarena::total_memory_size(void) const {
return sizeof(*this) +
total_size_in_use() +
_n_other_chunks * sizeof(*_other_chunks);
void toku_memarena_move_buffers(MEMARENA dest, MEMARENA source) {
int i;
char **other_bufs = dest->other_bufs;
static int move_counter = 0;
REALLOC_N(dest->n_other_bufs + source->n_other_bufs + 1, other_bufs);
dest ->size_of_other_bufs += source->size_of_other_bufs + source->buf_size;
dest ->footprint_of_other_bufs += source->footprint_of_other_bufs + toku_memory_footprint(source->buf, source->buf_used);
source->size_of_other_bufs = 0;
source->footprint_of_other_bufs = 0;
dest->other_bufs = other_bufs;
for (i=0; i<source->n_other_bufs; i++) {
dest->other_bufs[dest->n_other_bufs++] = source->other_bufs[i];
dest->other_bufs[dest->n_other_bufs++] = source->buf;
source->n_other_bufs = 0;
source->other_bufs = 0;
source->buf = 0;
source->buf_size = 0;
source->buf_used = 0;
size_t memarena::total_size_in_use(void) const {
return _size_of_other_chunks + _current_chunk.used;
size_t memarena::total_footprint(void) const {
return sizeof(*this) +
_footprint_of_other_chunks +
toku_memory_footprint(_current_chunk.buf, _current_chunk.used) +
_n_other_chunks * sizeof(*_other_chunks);
toku_memarena_total_memory_size (MEMARENA m)
return (toku_memarena_total_size_in_use(m) +
sizeof(*m) +
m->n_other_bufs * sizeof(*m->other_bufs));
const void *memarena::chunk_iterator::current(size_t *used) const {
if (_chunk_idx < 0) {
*used = _ma->_current_chunk.used;
return _ma->_current_chunk.buf;
} else if (_chunk_idx < _ma->_n_other_chunks) {
*used = _ma->_other_chunks[_chunk_idx].used;
return _ma->_other_chunks[_chunk_idx].buf;
*used = 0;
return nullptr;
toku_memarena_total_size_in_use (MEMARENA m)
return m->size_of_other_bufs + m->buf_used;
void memarena::chunk_iterator::next() {
toku_memarena_total_footprint (MEMARENA m)
return m->footprint_of_other_bufs + toku_memory_footprint(m->buf, m->buf_used) +
sizeof(*m) +
m->n_other_bufs * sizeof(*m->other_bufs);
bool memarena::chunk_iterator::more() const {
if (_chunk_idx < 0) {
return _ma->_current_chunk.buf != nullptr;
return _chunk_idx < _ma->_n_other_chunks;
......@@ -92,43 +92,85 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
/* We have too many memory management tricks:
* memarena (this code) is for a collection of objects that cannot be moved.
* The pattern is allocate more and more stuff.
* Don't free items as you go.
* Free all the items at once.
* Then reuse the same buffer again.
* Allocated objects never move.
* A memarena (as currently implemented) is not suitable for interprocess memory sharing. No reason it couldn't be made to work though.
* A memarena is used to efficiently store a collection of objects that never move
* The pattern is allocate more and more stuff and free all of the items at once.
* The underlying memory will store 1 or more objects per chunk. Each chunk is
* contiguously laid out in memory but chunks are not necessarily contiguous with
* each other.
struct memarena;
typedef struct memarena *MEMARENA;
MEMARENA toku_memarena_create_presized (size_t initial_size);
// Effect: Create a memarena with initial size. In case of ENOMEM, aborts.
MEMARENA toku_memarena_create (void);
// Effect: Create a memarena with default initial size. In case of ENOMEM, aborts.
void toku_memarena_clear (MEMARENA ma);
// Effect: Reset the internal state so that the allocated memory can be used again.
void* toku_memarena_malloc (MEMARENA ma, size_t size);
// Effect: Allocate some memory. The returned value remains valid until the memarena is cleared or closed.
// In case of ENOMEM, aborts.
void *toku_memarena_memdup (MEMARENA ma, const void *v, size_t len);
void toku_memarena_destroy(MEMARENA *ma);
void toku_memarena_move_buffers(MEMARENA dest, MEMARENA source);
// Effect: Move all the memory from SOURCE into DEST. When SOURCE is closed the memory won't be freed. When DEST is closed, the memory will be freed. (Unless DEST moves its memory to another memarena...)
size_t toku_memarena_total_memory_size (MEMARENA);
// Effect: Calculate the amount of memory used by a memory arena.
size_t toku_memarena_total_size_in_use (MEMARENA);
size_t toku_memarena_total_footprint (MEMARENA);
class memarena {
memarena() :
_footprint_of_other_chunks(0) {
// Effect: Create a memarena with the specified initial size
void create(size_t initial_size);
void destroy(void);
// Effect: Allocate some memory. The returned value remains valid until the memarena is cleared or closed.
// In case of ENOMEM, aborts.
void *malloc_from_arena(size_t size);
// Effect: Move all the memory from this memarena into DEST.
// When SOURCE is closed the memory won't be freed.
// When DEST is closed, the memory will be freed, unless DEST moves its memory to another memarena...
void move_memory(memarena *dest);
// Effect: Calculate the amount of memory used by a memory arena.
size_t total_memory_size(void) const;
// Effect: Calculate the used space of the memory arena (ie: excludes unused space)
size_t total_size_in_use(void) const;
// Effect: Calculate the amount of memory used, according to toku_memory_footprint(),
// which is a more expensive but more accurate count of memory used.
size_t total_footprint(void) const;
// iterator over the underlying chunks that store objects in the memarena.
// a chunk is represented by a pointer to const memory and a usable byte count.
class chunk_iterator {
chunk_iterator(const memarena *ma) :
_ma(ma), _chunk_idx(-1) {
// returns: base pointer to the current chunk
// *used set to the number of usable bytes
// if more() is false, returns nullptr and *used = 0
const void *current(size_t *used) const;
// requires: more() is true
void next();
bool more() const;
// -1 represents the 'initial' chunk in a memarena, ie: ma->_current_chunk
// >= 0 represents the i'th chunk in the ma->_other_chunks array
const memarena *_ma;
int _chunk_idx;
struct arena_chunk {
arena_chunk() : buf(nullptr), used(0), size(0) { }
char *buf;
size_t used;
size_t size;
struct arena_chunk _current_chunk;
struct arena_chunk *_other_chunks;
int _n_other_chunks;
size_t _size_of_other_chunks; // the buf_size of all the other chunks.
size_t _footprint_of_other_chunks; // the footprint of all the other chunks.
friend class memarena_unit_test;
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation, and provided that the
following conditions are met:
* Redistributions of source code must retain this COPYING
GRANT (below).
* Redistributions in binary form must reproduce this COPYING
GRANT (below) in the documentation and/or other materials
provided with the distribution.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
TokuDB, Tokutek Fractal Tree Indexing Library.
Copyright (C) 2007-2013 Tokutek, Inc.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
General Public License for more details.
The technology is licensed by the Massachusetts Institute of
Technology, Rutgers State University of New Jersey, and the Research
Foundation of State University of New York at Stony Brook under
United States of America Serial No. 11/760379 and to the patents
and/or patent applications resulting from it.
This software is covered by US Patent No. 8,185,551.
This software is covered by US Patent No. 8,489,638.
"THIS IMPLEMENTATION" means the copyrightable works distributed by
Tokutek as part of the Fractal Tree project.
"PATENT CLAIMS" means the claims of patents that are owned or
licensable by Tokutek, both currently or in the future; and that in
the absence of this license would be infringed by THIS
"PATENT CHALLENGE" shall mean a challenge to the validity,
patentability, enforceability and/or non-infringement of any of the
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
Tokutek hereby grants to you, for the term and geographical scope of
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to
make, have made, use, offer to sell, sell, import, transfer, and
otherwise run, modify, and propagate the contents of THIS
IMPLEMENTATION, where such license applies only to the PATENT
CLAIMS. This grant does not include claims that would be infringed
only as a consequence of further modifications of THIS
IMPLEMENTATION. If you or your agent or licensee institute or order
or agree to the institution of patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
THIS IMPLEMENTATION constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any rights
granted to you under this License shall terminate as of the date
such litigation is filed. If you or your agent or exclusive
licensee institute or order or agree to the institution of a PATENT
CHALLENGE, then Tokutek may terminate any rights granted to you
under this License.
#include <string.h>
#include "portability/toku_assert.h"
#include "util/memarena.h"
class memarena_unit_test {
static const int magic = 37;
template <typename F>
void iterate_chunks(memarena *ma, F &fn) {
for (memarena::chunk_iterator it(ma); it.more(); {
size_t used = 0;
const void *buf = it.current(&used);
fn(buf, used);
void test_create(size_t size) {
memarena ma;
invariant(ma._current_chunk.size == size);
invariant(ma._current_chunk.used == 0);
if (size == 0) {
} else {
// make sure memory was allocated ok by
// writing to buf and reading it back
memset(ma._current_chunk.buf, magic, size);
for (size_t i = 0; i < size; i++) {
const char *buf = reinterpret_cast<char *>(ma._current_chunk.buf);
invariant(buf[i] == magic);
void test_malloc(size_t size) {
memarena ma;
void *v = ma.malloc_from_arena(size);
// make sure memory was allocated ok by
// writing to buf and reading it back
memset(ma._current_chunk.buf, magic, size);
for (size_t i = 0; i < size; i++) {
const char *c = reinterpret_cast<char *>(ma._current_chunk.buf);
invariant(c[i] == magic);
static void test_iterate_fn(const void *buf, size_t used) {
for (size_t i = 0; i < used; i++) {
const char *c = reinterpret_cast<const char *>(buf);
invariant(c[i] == (char) ((intptr_t) &c[i]));
void test_iterate(size_t size) {
memarena ma;
for (size_t k = 0; k < size / 64; k += 64) {
void *v = ma.malloc_from_arena(64);
for (size_t i = 0; i < 64; i++) {
char *c = reinterpret_cast<char *>(v);
c[i] = (char) ((intptr_t) &c[i]);
size_t rest = size % 64;
if (rest != 0) {
void *v = ma.malloc_from_arena(64);
for (size_t i = 0; i < 64; i++) {
char *c = reinterpret_cast<char *>(v);
c[i] = (char) ((intptr_t) &c[i]);
iterate_chunks(&ma, test_iterate_fn);
void test_move_memory(size_t size) {
memarena ma;
for (size_t k = 0; k < size / 64; k += 64) {
void *v = ma.malloc_from_arena(64);
for (size_t i = 0; i < 64; i++) {
char *c = reinterpret_cast<char *>(v);
c[i] = (char) ((intptr_t) &c[i]);
size_t rest = size % 64;
if (rest != 0) {
void *v = ma.malloc_from_arena(64);
for (size_t i = 0; i < 64; i++) {
char *c = reinterpret_cast<char *>(v);
c[i] = (char) ((intptr_t) &c[i]);
memarena ma2;
iterate_chunks(&ma2, test_iterate_fn);
void test() {
test_create(128 * 1024 * 1024);
test_malloc(64 * 1024 * 1024);
test_malloc((64 * 1024 * 1024) + 1);
test_iterate(128 * 1024);
test_iterate(64 * 1024 * 1024);
test_iterate((64 * 1024 * 1024) + 1);
test_move_memory(65 * 1024 * 1024);
test_move_memory(101 * 1024 * 1024);
int main(void) {
memarena_unit_test test;
return 0;
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment