Commit 5943bb41 authored by John Esmet's avatar John Esmet Committed by Yoni Fogel

fixes #5771 merge the single txnid optimization to main. single threaded write...

fixes #5771 merge the single txnid optimization to main. single threaded write performance is up 20-50% in mysql and multithreaded performance is largely unchanged.


git-svn-id: file:///svn/toku/tokudb@51108 c7de825b-a66e-492c-adef-691d508d4ae1
parent f50474f9
......@@ -29,6 +29,7 @@ void concurrent_tree::locked_keyrange::prepare(concurrent_tree *tree) {
treenode *const root = &tree->m_root;
m_tree = tree;
m_subtree = root;
m_range = keyrange::get_infinite_range();
root->mutex_lock();
}
......@@ -83,12 +84,6 @@ void concurrent_tree::locked_keyrange::remove(const keyrange &range) {
}
}
uint64_t concurrent_tree::locked_keyrange::remove_all(uint64_t *mem_released) {
// in practice, remove_all is only okay on the root node, because you
// want to remove all of the elements in the tree, not some subtree.
//
// so we lazily enforce that you are removing from a non-empty root.
invariant(m_subtree->is_root());
invariant(!m_subtree->is_empty());
return m_subtree->recursive_remove(mem_released);
void concurrent_tree::locked_keyrange::remove_all(void) {
m_subtree->recursive_remove();
}
......@@ -36,11 +36,14 @@ class concurrent_tree {
// effect: prepare to acquire a locked keyrange over the given
// concurrent_tree, preventing other threads from preparing
// until this thread either does acquire() or release().
// rationale: this provides the user with a serialization point
// for descending / modifying the the tree.
// note: operations performed on a prepared keyrange are equivalent
// to ones performed on an acquired keyrange over -inf, +inf.
// rationale: this provides the user with a serialization point for descending
// or modifying the the tree. it also proives a convenient way of
// doing serializable operations on the tree.
// There are two valid sequences of calls:
// - prepare, acquire, release
// - prepare, release
// - prepare, acquire, [operations], release
// - prepare, [operations],release
void prepare(concurrent_tree *tree);
// requires: the locked keyrange was prepare()'d
......@@ -68,12 +71,9 @@ class concurrent_tree {
// rationale: caller is responsible for only removing existing ranges
void remove(const keyrange &range);
// effect: removes everything within this locked keyrange
// requires: the locked keyrange is over -inf, +inf
// returns: the number of nodes removed
// returns: *mem_released updated to the cumulative size of all keyranges destroyed
// rationale: we'd like a fast O(n) way of removing everything from the tree
uint64_t remove_all(uint64_t *mem_released);
// effect: removes all of the keys represented by this locked keyrange
// rationale: we'd like a fast way to empty out a tree
void remove_all(void);
private:
// the concurrent tree this locked keyrange is for
......
......@@ -11,12 +11,14 @@
#include <portability/toku_time.h>
#include "locktree.h"
#include "range_buffer.h"
// including the concurrent_tree here expands the templates
// and "defines" the implementation, so we do it here in
// the locktree source file instead of the header.
#include "concurrent_tree.h"
namespace toku {
// A locktree represents the set of row locks owned by all transactions
......@@ -41,7 +43,10 @@ void locktree::create(manager::memory_tracker *mem_tracker, DICTIONARY_ID dict_i
m_userdata = nullptr;
XCALLOC(m_rangetree);
m_rangetree->create(m_cmp);
reset_single_txnid_optimization(TXNID_NONE);
m_sto_txnid = TXNID_NONE;
m_sto_buffer.create();
m_sto_score = STO_SCORE_THRESHOLD;
m_lock_request_info.pending_lock_requests.create();
m_lock_request_info.mutex = TOKU_MUTEX_INITIALIZER;
......@@ -63,6 +68,7 @@ void locktree::destroy(void) {
m_rangetree->destroy();
toku_free(m_cmp);
toku_free(m_rangetree);
m_sto_buffer.destroy();
m_lock_request_info.pending_lock_requests.destroy();
}
......@@ -78,7 +84,7 @@ struct row_lock {
// caller does not own the range inside the returned row locks,
// so remove from the tree with care using them as keys.
static void iterate_and_get_overlapping_row_locks(
const concurrent_tree::locked_keyrange &lkr,
const concurrent_tree::locked_keyrange *lkr,
GrowableArray<row_lock> *row_locks) {
struct copy_fn_obj {
GrowableArray<row_lock> *row_locks;
......@@ -89,7 +95,7 @@ static void iterate_and_get_overlapping_row_locks(
}
} copy_fn;
copy_fn.row_locks = row_locks;
lkr.iterate(&copy_fn);
lkr->iterate(&copy_fn);
}
// given a txnid and a set of overlapping row locks, determine
......@@ -112,75 +118,140 @@ static bool determine_conflicting_txnids(const GrowableArray<row_lock> &row_lock
return conflicts_exist;
}
// given a row lock, what is the effective memory size?
// that is, how much memory does it take when stored in a tree?
static uint64_t effective_row_lock_memory_size(const row_lock &lock) {
// how much memory does a row lock take up in a concurrent tree?
static uint64_t row_lock_size_in_tree(const row_lock &lock) {
const uint64_t overhead = concurrent_tree::get_insertion_memory_overhead();
return lock.range.get_memory_size() + overhead;
}
// remove all row locks from the given lkr, then notify the memory tracker.
static void remove_all_row_locks(concurrent_tree::locked_keyrange *lkr,
locktree::manager::memory_tracker *mem_tracker) {
uint64_t mem_released = 0;
uint64_t num_removed = lkr->remove_all(&mem_released);
mem_released += num_removed * concurrent_tree::get_insertion_memory_overhead();
mem_tracker->note_mem_released(mem_released);
}
// remove and destroy the given row lock from the locked keyrange,
// then notify the memory tracker of the newly freed lock.
static void remove_row_lock(concurrent_tree::locked_keyrange *lkr,
static void remove_row_lock_from_tree(concurrent_tree::locked_keyrange *lkr,
const row_lock &lock, locktree::manager::memory_tracker *mem_tracker) {
const uint64_t mem_released = effective_row_lock_memory_size(lock);
const uint64_t mem_released = row_lock_size_in_tree(lock);
lkr->remove(lock.range);
mem_tracker->note_mem_released(mem_released);
}
// insert a row lock into the locked keyrange, then notify
// the memory tracker of this newly acquired lock.
static void insert_row_lock(concurrent_tree::locked_keyrange *lkr,
static void insert_row_lock_into_tree(concurrent_tree::locked_keyrange *lkr,
const row_lock &lock, locktree::manager::memory_tracker *mem_tracker) {
uint64_t mem_used = effective_row_lock_memory_size(lock);
uint64_t mem_used = row_lock_size_in_tree(lock);
lkr->insert(lock.range, lock.txnid);
mem_tracker->note_mem_used(mem_used);
}
void locktree::update_single_txnid_optimization(TXNID txnid) {
if (m_rangetree->is_empty()) {
// the optimization becomes possible for this txnid if the
// tree was previously empy before we touched it. the idea
// here is that if we are still the only one to have touched
// it by the time we commit, the optimization holds.
reset_single_txnid_optimization(txnid);
} else {
// the tree is not empty, so some txnid must have touched it.
invariant(m_single_txnid != TXNID_NONE);
// the optimization is not possible if the txnid has changed
if (m_single_txnid_optimization_possible && m_single_txnid != txnid) {
m_single_txnid_optimization_possible = false;
void locktree::sto_begin(TXNID txnid) {
invariant(m_sto_txnid == TXNID_NONE);
invariant(m_sto_buffer.is_empty());
m_sto_txnid = txnid;
}
void locktree::sto_append(const DBT *left_key, const DBT *right_key) {
uint64_t buffer_mem, delta;
keyrange range;
range.create(left_key, right_key);
buffer_mem = m_sto_buffer.get_num_bytes();
m_sto_buffer.append(left_key, right_key);
delta = m_sto_buffer.get_num_bytes() - buffer_mem;
m_mem_tracker->note_mem_used(delta);
}
void locktree::sto_end(void) {
uint64_t num_bytes = m_sto_buffer.get_num_bytes();
m_mem_tracker->note_mem_released(num_bytes);
m_sto_buffer.destroy();
m_sto_buffer.create();
m_sto_txnid = TXNID_NONE;
}
void locktree::sto_end_early(void *prepared_lkr) {
sto_migrate_buffer_ranges_to_tree(prepared_lkr);
sto_end();
m_sto_score = 0;
}
void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
// There should be something to migrate, and nothing in the rangetree.
invariant(!m_sto_buffer.is_empty());
invariant(m_rangetree->is_empty());
concurrent_tree sto_rangetree;
concurrent_tree::locked_keyrange sto_lkr;
sto_rangetree.create(m_cmp);
// insert all of the ranges from the single txnid buffer into a new rangtree
range_buffer::iterator iter;
range_buffer::iterator::record rec;
iter.create(&m_sto_buffer);
while (iter.current(&rec)) {
sto_lkr.prepare(&sto_rangetree);
int r = acquire_lock_consolidated(&sto_lkr,
m_sto_txnid, rec.get_left_key(), rec.get_right_key(), nullptr);
invariant_zero(r);
sto_lkr.release();
iter.next();
}
// Iterate the newly created rangetree and insert each range into the
// locktree's rangetree, on behalf of the old single txnid.
struct migrate_fn_obj {
concurrent_tree::locked_keyrange *dst_lkr;
bool fn(const keyrange &range, TXNID txnid) {
dst_lkr->insert(range, txnid);
return true;
}
} migrate_fn;
migrate_fn.dst_lkr = static_cast<concurrent_tree::locked_keyrange *>(prepared_lkr);
sto_lkr.prepare(&sto_rangetree);
sto_lkr.iterate(&migrate_fn);
sto_lkr.remove_all();
sto_lkr.release();
sto_rangetree.destroy();
invariant(!m_rangetree->is_empty());
}
// acquire a lock in the given key range, inclusive. if successful,
// return 0. otherwise, populate the conflicts txnid_set with the set of
// transactions that conflict with this request.
int locktree::acquire_lock(bool is_write_request, TXNID txnid,
bool locktree::sto_try_acquire(void *prepared_lkr, TXNID txnid,
const DBT *left_key, const DBT *right_key) {
if (m_rangetree->is_empty() && m_sto_buffer.is_empty() && m_sto_score >= STO_SCORE_THRESHOLD) {
// We can do the optimization because the rangetree is empty, and
// we know its worth trying because the sto score is big enough.
sto_begin(txnid);
} else if (m_sto_txnid != TXNID_NONE) {
// We are currently doing the optimization. Check if we need to cancel
// it because a new txnid appeared, or if the current single txnid has
// taken too many locks already.
if (m_sto_txnid != txnid || m_sto_buffer.get_num_ranges() > STO_BUFFER_MAX_SIZE) {
sto_end_early(prepared_lkr);
}
}
// At this point the sto txnid is properly set. If it is valid, then
// this txnid can append its lock to the sto buffer successfully.
if (m_sto_txnid != TXNID_NONE) {
invariant(m_sto_txnid == txnid);
sto_append(left_key, right_key);
return true;
} else {
invariant(m_sto_buffer.is_empty());
return false;
}
}
// try to acquire a lock and consolidate it with existing locks if possible
// param: lkr, a prepared locked keyrange
// return: 0 on success, DB_LOCK_NOTGRANTED if conflicting locks exist.
int locktree::acquire_lock_consolidated(void *prepared_lkr, TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts) {
int r = 0;
concurrent_tree::locked_keyrange *lkr;
keyrange requested_range;
requested_range.create(left_key, right_key);
// we are only supporting write locks for simplicity
invariant(is_write_request);
// acquire and prepare a locked keyrange over the requested range.
// prepare is a serialzation point, so we take the opportunity to
// update the single txnid optimization bits.
concurrent_tree::locked_keyrange lkr;
lkr.prepare(m_rangetree);
update_single_txnid_optimization(txnid);
lkr.acquire(requested_range);
lkr = static_cast<concurrent_tree::locked_keyrange *>(prepared_lkr);
lkr->acquire(requested_range);
// copy out the set of overlapping row locks.
GrowableArray<row_lock> overlapping_row_locks;
......@@ -189,7 +260,8 @@ int locktree::acquire_lock(bool is_write_request, TXNID txnid,
size_t num_overlapping_row_locks = overlapping_row_locks.get_size();
// if any overlapping row locks conflict with this request, bail out.
bool conflicts_exist = determine_conflicting_txnids(overlapping_row_locks, txnid, conflicts);
bool conflicts_exist = determine_conflicting_txnids(
overlapping_row_locks, txnid, conflicts);
if (!conflicts_exist) {
// there are no conflicts, so all of the overlaps are for the requesting txnid.
// so, we must consolidate all existing overlapping ranges and the requested
......@@ -198,23 +270,43 @@ int locktree::acquire_lock(bool is_write_request, TXNID txnid,
row_lock overlapping_lock = overlapping_row_locks.fetch_unchecked(i);
invariant(overlapping_lock.txnid == txnid);
requested_range.extend(m_cmp, overlapping_lock.range);
remove_row_lock(&lkr, overlapping_lock, m_mem_tracker);
remove_row_lock_from_tree(lkr, overlapping_lock, m_mem_tracker);
}
row_lock new_lock = { .range = requested_range, .txnid = txnid };
insert_row_lock(&lkr, new_lock, m_mem_tracker);
insert_row_lock_into_tree(lkr, new_lock, m_mem_tracker);
} else {
r = DB_LOCK_NOTGRANTED;
}
lkr.release();
overlapping_row_locks.deinit();
requested_range.destroy();
overlapping_row_locks.deinit();
return r;
}
// if there were conflicts, the lock is not granted.
if (conflicts_exist) {
return DB_LOCK_NOTGRANTED;
} else {
return 0;
// acquire a lock in the given key range, inclusive. if successful,
// return 0. otherwise, populate the conflicts txnid_set with the set of
// transactions that conflict with this request.
int locktree::acquire_lock(bool is_write_request, TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts) {
int r = 0;
// we are only supporting write locks for simplicity
invariant(is_write_request);
// acquire and prepare a locked keyrange over the requested range.
// prepare is a serialzation point, so we take the opportunity to
// try the single txnid optimization first.
concurrent_tree::locked_keyrange lkr;
lkr.prepare(m_rangetree);
bool acquired = sto_try_acquire(&lkr, txnid, left_key, right_key);
if (!acquired) {
r = acquire_lock_consolidated(&lkr, txnid, left_key, right_key, conflicts);
}
lkr.release();
return r;
}
int locktree::try_acquire_lock(bool is_write_request, TXNID txnid,
......@@ -252,7 +344,7 @@ void locktree::get_conflicts(bool is_write_request, TXNID txnid,
// copy out the set of overlapping row locks and determine the conflicts
GrowableArray<row_lock> overlapping_row_locks;
overlapping_row_locks.init();
iterate_and_get_overlapping_row_locks(lkr, &overlapping_row_locks);
iterate_and_get_overlapping_row_locks(&lkr, &overlapping_row_locks);
// we don't care if conflicts exist. we just want the conflicts set populated.
(void) determine_conflicting_txnids(overlapping_row_locks, txnid, conflicts);
......@@ -300,7 +392,7 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid,
// copy out the set of overlapping row locks.
GrowableArray<row_lock> overlapping_row_locks;
overlapping_row_locks.init();
iterate_and_get_overlapping_row_locks(lkr, &overlapping_row_locks);
iterate_and_get_overlapping_row_locks(&lkr, &overlapping_row_locks);
size_t num_overlapping_row_locks = overlapping_row_locks.get_size();
for (size_t i = 0; i < num_overlapping_row_locks; i++) {
......@@ -308,7 +400,7 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid,
// If this isn't our lock, that's ok, just don't remove it.
// See rationale above.
if (lock.txnid == txnid) {
remove_row_lock(&lkr, lock, m_mem_tracker);
remove_row_lock_from_tree(&lkr, lock, m_mem_tracker);
}
}
......@@ -317,41 +409,27 @@ void locktree::remove_overlapping_locks_for_txnid(TXNID txnid,
release_range.destroy();
}
// reset the optimization bit to possible for the given txnid
void locktree::reset_single_txnid_optimization(TXNID txnid) {
m_single_txnid = txnid;
m_single_txnid_optimization_possible = true;
inline bool locktree::sto_txnid_is_valid_unsafe(void) const {
return m_sto_txnid != TXNID_NONE;
}
inline bool locktree::unsafe_read_single_txnid_optimization_possible(void) const {
return m_single_txnid_optimization_possible;
inline int locktree::sto_get_score_unsafe(void) const {
return m_sto_score;
}
bool locktree::try_single_txnid_release_optimization(TXNID txnid) {
bool locktree::sto_try_release(TXNID txnid) {
bool released = false;
if (unsafe_read_single_txnid_optimization_possible()) {
if (sto_txnid_is_valid_unsafe()) {
// check the bit again with a prepared locked keyrange,
// which protects the optimization bits and rangetree data
concurrent_tree::locked_keyrange lkr;
lkr.prepare(m_rangetree);
if (m_single_txnid_optimization_possible) {
if (m_sto_txnid != TXNID_NONE) {
// this txnid better be the single txnid on this locktree,
// or else we are in big trouble (meaning the logic is broken)
invariant(m_single_txnid == txnid);
// acquire a locked range on -inf, +inf. this is just for
// readability's sake, since the prepared lkr already has
// the root locked, but the API says to do this so we do.
keyrange infinite_range = keyrange::get_infinite_range();
lkr.acquire(infinite_range);
// knowing that only our row locks exist in the locktree
// and that we have the entire thing locked, remove everything.
remove_all_row_locks(&lkr, m_mem_tracker);
// reset the optimization back to possible, with no txnid
// we set txnid to TXNID_NONE for invariant purposes.
reset_single_txnid_optimization(TXNID_NONE);
invariant(m_sto_txnid == txnid);
invariant(m_rangetree->is_empty());
sto_end();
released = true;
}
lkr.release();
......@@ -364,7 +442,7 @@ bool locktree::try_single_txnid_release_optimization(TXNID txnid) {
void locktree::release_locks(TXNID txnid, const range_buffer *ranges) {
// try the single txn optimization. if it worked, then all of the
// locks are already released, otherwise we need to do it here.
bool released = try_single_txnid_release_optimization(txnid);
bool released = sto_try_release(txnid);
if (!released) {
range_buffer::iterator iter;
range_buffer::iterator::record rec;
......@@ -375,6 +453,13 @@ void locktree::release_locks(TXNID txnid, const range_buffer *ranges) {
remove_overlapping_locks_for_txnid(txnid, left_key, right_key);
iter.next();
}
// Increase the sto score slightly. Eventually it will hit
// the threshold and we'll try the optimization again. This
// is how a previously multithreaded system transitions into
// a single threaded system that benefits from the optimization.
if (sto_get_score_unsafe() < STO_SCORE_THRESHOLD) {
toku_sync_fetch_and_add(&m_sto_score, 1);
}
}
}
......@@ -409,12 +494,12 @@ static int extract_first_n_row_locks(concurrent_tree::locked_keyrange *lkr,
// now that the ranges have been copied out, complete
// the extraction by removing the ranges from the tree.
// use remove_row_lock() so we properly track the
// use remove_row_lock_from_tree() so we properly track the
// amount of memory and number of locks freed.
int num_extracted = extract_fn.num_extracted;
invariant(num_extracted <= num_to_extract);
for (int i = 0; i < num_extracted; i++) {
remove_row_lock(lkr, row_locks[i], mem_tracker);
remove_row_lock_from_tree(lkr, row_locks[i], mem_tracker);
}
return num_extracted;
......@@ -437,6 +522,13 @@ void locktree::escalate(void) {
lkr.prepare(m_rangetree);
lkr.acquire(infinite_range);
// if we're in the single txnid optimization, simply call it off.
// if you have to run escalation, you probably don't care about
// the optimization anyway, and this makes things easier.
if (m_sto_txnid != TXNID_NONE) {
sto_end_early(&lkr);
}
// extract and remove batches of row locks from the locktree
int num_extracted;
static const int num_row_locks_per_batch = 128;
......@@ -487,7 +579,7 @@ void locktree::escalate(void) {
size_t new_num_locks = escalated_locks.get_size();
for (size_t i = 0; i < new_num_locks; i++) {
row_lock lock = escalated_locks.fetch_unchecked(i);
insert_row_lock(&lkr, lock, m_mem_tracker);
insert_row_lock_into_tree(&lkr, lock, m_mem_tracker);
lock.range.destroy();
}
......
......@@ -168,6 +168,12 @@ class locktree {
};
ENSURE_POD(memory_tracker);
// effect: calls the private function run_escalation(), only ok to
// do for tests.
// rationale: to get better stress test coverage, we want a way to
// deterministicly trigger lock escalation.
void run_escalation_for_test(void);
private:
static const uint64_t DEFAULT_MAX_LOCK_MEMORY = 64L * 1024 * 1024;
static const uint64_t DEFAULT_LOCK_WAIT_TIME = 0;
......@@ -240,54 +246,148 @@ class locktree {
struct lt_lock_request_info m_lock_request_info;
// the following is an optimization for locktrees that contain
// locks for only a single txnid. in this case, we can just
// delete everything from the locktree when that txnid unlocks.
// The following fields and members prefixed with "sto_" are for
// the single txnid optimization, intended to speed up the case
// when only one transaction is using the locktree. If we know
// the locktree has only one transaction, then acquiring locks
// takes O(1) work and releasing all locks takes O(1) work.
//
// How do we know that the locktree only has a single txnid?
// What do we do if it does?
//
// When a txn with txnid T requests a lock:
// - If the tree is empty, the optimization is possible. Set the single
// txnid to T, and insert the lock range into the buffer.
// - If the tree is not empty, check if the single txnid is T. If so,
// append the lock range to the buffer. Otherwise, migrate all of
// the locks in the buffer into the rangetree on behalf of txnid T,
// and invalid the single txnid.
//
// When a txn with txnid T releases its locks:
// - If the single txnid is valid, it must be for T. Destroy the buffer.
// - If it's not valid, release locks the normal way in the rangetree.
//
// To carry out the optimization we need to record a single txnid
// and a range buffer for each locktree, each protected by the root
// lock of the locktree's rangetree. The root lock for a rangetree
// is grabbed by preparing a locked keyrange on the rangetree.
TXNID m_sto_txnid;
range_buffer m_sto_buffer;
// The single txnid optimization speeds up the case when only one
// transaction is using the locktree. But it has the potential to
// hurt the case when more than one txnid exists.
//
// how do we know that this locktree only has a single txnid?
// There are two things we need to do to make the optimization only
// optimize the case we care about, and not hurt the general case.
//
// when a txn requests a lock:
// - if the tree is empty, set the single txnid to that txn, set
// the optimization to true.
// - if the tree is not empty, then some txnid has inserted into
// the tree before and its txnid is m_single_txnid. set the bit
// to false if that txnid is different than the one about to insert.
// - if the txnid never changes (ie: only one txnid inserts into
// a locktree) then the bit stays true and the optimization happens.
// Bound the worst-case latency for lock migration when the
// optimization stops working:
// - Idea: Stop the optimization and migrate immediate if we notice
// the single txnid has takes many locks in the range buffer.
// - Implementation: Enforce a max size on the single txnid range buffer.
// - Analysis: Choosing the perfect max value, M, is difficult to do
// without some feedback from the field. Intuition tells us that M should
// not be so small that the optimization is worthless, and it should not
// be so big that it's unreasonable to have to wait behind a thread doing
// the work of converting M buffer locks into rangetree locks.
//
// when a txn releases its locks
// - check the optimization bit. if it is set, take the locktree's mutex
// and then check it agian. if it is still set, then perform the optimizaiton.
// - if the bit was not set, then carry out release locks as usual.
// Prevent concurrent-transaction workloads from trying the optimization
// in vain:
// - Idea: Don't even bother trying the optimization if we think the
// system is in a concurrent-transaction state.
// - Implementation: Do something even simpler than detecting whether the
// system is in a concurent-transaction state. Just keep a "score" value
// and some threshold. If at any time the locktree is eligible for the
// optimization, only do it if the score is at this threshold. When you
// actually do the optimization but someone has to migrate locks in the buffer
// (expensive), then reset the score back to zero. Each time a txn
// releases locks, the score is incremented by 1.
// - Analysis: If you let the threshold be "C", then at most 1 / C txns will
// do the optimization in a concurrent-transaction system. Similarly, it
// takes at most C txns to start using the single txnid optimzation, which
// is good when the system transitions from multithreaded to single threaded.
//
// the single txnid and the optimizable possible bit are both protected
// by the root lock on the concurrent tree. the way this is implemented
// is by a locked keyrange function called prepare(), which grabs
// the root lock and returns. once acquire/release() is called, the root
// lock is unlocked if necessary. so prepare() acts as a serialization
// point where we can safely read and modify these bits.
TXNID m_single_txnid;
bool m_single_txnid_optimization_possible;
// effect: If the single txnid is possible, assert that it
// is for the given txnid and then release all of
// the locks in the locktree.
// returns: True if locks were released, false otherwise
bool try_single_txnid_release_optimization(TXNID txnid);
// effect: Checks if the single txnid bit is set and, if so,
// sets it to false iff the given txnid differs
// from the current known single txnid.
void update_single_txnid_optimization(TXNID txnid);
// effect: Sets the single txnid bit to be true for the given txnid
void reset_single_txnid_optimization(TXNID txnid);
// STO_BUFFER_MAX_SIZE:
//
// We choose the max value to be 1 million since most transactions are smaller
// than 1 million and we can create a rangetree of 1 million elements in
// less than a second. So we can be pretty confident that this threshold
// enables the optimization almost always, and prevents super pathological
// latency issues for the first lock taken by a second thread.
//
// STO_SCORE_THRESHOLD:
//
// A simple first guess at a good value for the score threshold is 100.
// By our analysis, we'd end up doing the optimization in vain for
// around 1% of all transactions, which seems reasonable. Further,
// if the system goes single threaded, it ought to be pretty quick
// for 100 transactions to go by, so we won't have to wait long before
// we start doing the single txind optimzation again.
static const int STO_BUFFER_MAX_SIZE = 1 * 1024 * 1024;
static const int STO_SCORE_THRESHOLD = 100;
int m_sto_score;
// effect: begins the single txnid optimizaiton, setting m_sto_txnid
// to the given txnid.
// requires: m_sto_txnid is invalid
void sto_begin(TXNID txnid);
// effect: append a range to the sto buffer
// requires: m_sto_txnid is valid
void sto_append(const DBT *left_key, const DBT *right_key);
// effect: ends the single txnid optimization, releaseing any memory
// stored in the sto buffer, notifying the tracker, and
// invalidating m_sto_txnid.
// requires: m_sto_txnid is valid
void sto_end(void);
// params: prepared_lkr is a void * to a prepared locked keyrange. see below.
// effect: ends the single txnid optimization early, migrating buffer locks
// into the rangetree, calling sto_end(), and then setting the
// sto_score back to zero.
// requires: m_sto_txnid is valid
void sto_end_early(void *prepared_lkr);
// params: prepared_lkr is a void * to a prepared locked keyrange. we can't use
// the real type because the compiler won't allow us to forward declare
// concurrent_tree::locked_keyrange without including concurrent_tree.h,
// which we cannot do here because it is a template implementation.
// requires: the prepared locked keyrange is for the locktree's rangetree
// requires: m_sto_txnid is valid
// effect: migrates each lock in the single txnid buffer into the locktree's
// rangetree, notifying the memory tracker as necessary.
void sto_migrate_buffer_ranges_to_tree(void *prepared_lkr);
// effect: If m_sto_txnid is valid, then release the txnid's locks
// by ending the optimization.
// requires: If m_sto_txnid is valid, it is equal to the given txnid
// returns: True if locks were released for this txnid
bool sto_try_release(TXNID txnid);
// params: prepared_lkr is a void * to a prepared locked keyrange. see above.
// requires: the prepared locked keyrange is for the locktree's rangetree
// effect: If m_sto_txnid is valid and equal to the given txnid, then
// append a range onto the buffer. Otherwise, if m_sto_txnid is valid
// but not equal to this txnid, then migrate the buffer's locks
// into the rangetree and end the optimization, setting the score
// back to zero.
// returns: true if the lock was acquired for this txnid
bool sto_try_acquire(void *prepared_lkr, TXNID txnid,
const DBT *left_key, const DBT *right_key);
// Effect:
// Provides a hook for a helgrind suppression.
// Returns:
// m_single_txnid_optimization_possible
bool unsafe_read_single_txnid_optimization_possible(void) const;
// true if m_sto_txnid is not TXNID_NONE
bool sto_txnid_is_valid_unsafe(void) const;
// Effect:
// Provides a hook for a helgrind suppression.
// Returns:
// m_sto_score
int sto_get_score_unsafe(void )const;
// effect: Creates a locktree that uses the given memory tracker
// to report memory usage and honor memory constraints.
......@@ -299,12 +399,15 @@ class locktree {
void remove_overlapping_locks_for_txnid(TXNID txnid,
const DBT *left_key, const DBT *right_key);
int try_acquire_lock(bool is_write_request, TXNID txnid,
int acquire_lock_consolidated(void *prepared_lkr, TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts);
int acquire_lock(bool is_write_request, TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts);
int try_acquire_lock(bool is_write_request, TXNID txnid,
const DBT *left_key, const DBT *right_key, txnid_set *conflicts);
void escalate();
friend class locktree_unit_test;
......
......@@ -196,6 +196,11 @@ void locktree::manager::release_lt(locktree *lt) {
}
}
// test-only version of lock escalation
void locktree::manager::run_escalation_for_test(void) {
run_escalation();
}
// effect: escalate's the locks in each locktree
// requires: manager's mutex is held
void locktree::manager::run_escalation(void) {
......
......@@ -116,6 +116,7 @@ void range_buffer::create(void) {
m_buf = nullptr;
m_buf_size = 0;
m_buf_current = 0;
m_num_ranges = 0;
}
void range_buffer::append(const DBT *left_key, const DBT *right_key) {
......@@ -125,6 +126,19 @@ void range_buffer::append(const DBT *left_key, const DBT *right_key) {
} else {
append_range(left_key, right_key);
}
m_num_ranges++;
}
bool range_buffer::is_empty(void) const {
return m_buf == nullptr;
}
uint64_t range_buffer::get_num_bytes(void) const {
return m_buf_current;
}
int range_buffer::get_num_ranges(void) const {
return m_num_ranges;
}
void range_buffer::destroy(void) {
......
......@@ -98,12 +98,22 @@ class range_buffer {
// if the keys are equal, then only one copy is stored.
void append(const DBT *left_key, const DBT *right_key);
// is this range buffer empty?
bool is_empty(void) const;
// how many bytes are stored in this range buffer?
uint64_t get_num_bytes(void) const;
// how many ranges are stored in this range buffer?
int get_num_ranges(void) const;
void destroy(void);
private:
char *m_buf;
size_t m_buf_size;
size_t m_buf_current;
int m_num_ranges;
void append_range(const DBT *left_key, const DBT *right_key);
......
......@@ -18,13 +18,6 @@ void concurrent_tree_unit_test::test_lkr_remove_all(void) {
const uint64_t min = 0;
const uint64_t max = 20;
// determine how much memory should be released
keyrange example_keyrange;
example_keyrange.create(get_dbt(0), get_dbt(0));
const uint64_t num_elements = max + 1;
const uint64_t expected_mem_released =
example_keyrange.get_memory_size() * num_elements;
// remove_all should work regardless of how the
// data was inserted into the tree, so we test it
// on a tree whose elements were populated starting
......@@ -44,10 +37,7 @@ void concurrent_tree_unit_test::test_lkr_remove_all(void) {
// remove_all() from the locked keyrange and assert that
// the number of elements and memory removed is correct.
uint64_t mem_released = 0;
uint64_t num_removed = lkr.remove_all(&mem_released);
invariant(num_removed == num_elements);
invariant(mem_released == expected_mem_released);
lkr.remove_all();
invariant(lkr.m_subtree->is_empty());
invariant(tree.is_empty());
......
......@@ -51,7 +51,7 @@ void lock_request_unit_test::test_start_pending(void) {
invariant(compare_dbts(nullptr, &request.m_right_key_copy, one) == 0);
// release the range lock for txnid b
lt->remove_overlapping_locks_for_txnid(txnid_b, zero, two);
locktree_unit_test::locktree_test_release_lock(lt, txnid_b, zero, two);
// now retry the lock requests.
// it should transition the request to successfully complete.
......@@ -60,7 +60,7 @@ void lock_request_unit_test::test_start_pending(void) {
invariant(request.m_state == lock_request::state::COMPLETE);
invariant(request.m_complete_r == 0);
lt->remove_overlapping_locks_for_txnid(txnid_a, one, one);
locktree_unit_test::locktree_test_release_lock(lt, txnid_a, one, one);
request.destroy();
mgr.release_lt(lt);
......
......@@ -8,6 +8,7 @@
#define TOKU_LOCK_REQUEST_UNIT_TEST_H
#include "test.h"
#include "locktree_unit_test.h"
#include "lock_request.h"
......@@ -37,7 +38,7 @@ class lock_request_unit_test {
// lt->release_locks(), not individually using lt->remove_overlapping_locks_for_txnid).
void release_lock_and_retry_requests(locktree *lt,
TXNID txnid, const DBT *left_key, const DBT * right_key) {
lt->remove_overlapping_locks_for_txnid(txnid, left_key, right_key);
locktree_unit_test::locktree_test_release_lock(lt, txnid, left_key, right_key);
lock_request::retry_all_lock_requests(lt);
}
};
......
......@@ -77,10 +77,9 @@ void locktree_unit_test::test_conflicts(void) {
#undef ACQUIRE_LOCK
}
invariant(num_row_locks(lt) == 2);
lt->remove_overlapping_locks_for_txnid(txnid_a, one, one);
lt->remove_overlapping_locks_for_txnid(txnid_a, three, four);
invariant(num_row_locks(lt) == 0);
invariant(no_row_locks(lt));
}
mgr.release_lt(lt);
......
......@@ -29,6 +29,16 @@ void locktree_unit_test::test_overlapping_relock(void) {
int r;
TXNID txnid_a = 1001;
// because of the single txnid optimization, there is no consolidation
// of read or write ranges until there is at least two txnids in
// the locktree. so here we add some arbitrary txnid to get a point
// lock [100, 100] so that the test below can expect to actually
// do something. at the end of the test, we release 100, 100.
const TXNID the_other_txnid = 9999;
const DBT *hundred = get_dbt(100);
r = lt->acquire_write_lock(the_other_txnid, hundred, hundred, nullptr);
invariant(r == 0);
for (int test_run = 0; test_run < 2; test_run++) {
// test_run == 0 means test with read lock
// test_run == 1 means test with write lock
......@@ -40,19 +50,22 @@ void locktree_unit_test::test_overlapping_relock(void) {
// ensure only [1,2] exists in the tree
r = ACQUIRE_LOCK(txnid_a, one, one, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 1);
r = ACQUIRE_LOCK(txnid_a, two, two, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 2);
r = ACQUIRE_LOCK(txnid_a, one, two, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 1);
struct verify_fn_obj {
bool saw_the_other;
TXNID expected_txnid;
keyrange *expected_range;
comparator *cmp;
bool fn(const keyrange &range, TXNID txnid) {
if (txnid == the_other_txnid) {
invariant(!saw_the_other);
saw_the_other = true;
return true;
}
invariant(txnid == expected_txnid);
keyrange::comparison c = range.compare(cmp, *expected_range);
invariant(c == keyrange::comparison::EQUALS);
......@@ -61,55 +74,52 @@ void locktree_unit_test::test_overlapping_relock(void) {
} verify_fn;
verify_fn.cmp = lt->m_cmp;
#define do_verify() \
do { verify_fn.saw_the_other = false; locktree_iterate<verify_fn_obj>(lt, &verify_fn); } while (0)
keyrange range;
range.create(one, two);
verify_fn.expected_txnid = txnid_a;
verify_fn.expected_range = &range;
locktree_iterate<verify_fn_obj>(lt, &verify_fn);
do_verify();
// unlocking [1,1] should remove the only range,
// the other unlocks shoudl do nothing.
invariant(num_row_locks(lt) == 1);
lt->remove_overlapping_locks_for_txnid(txnid_a, one, one);
invariant(num_row_locks(lt) == 0);
lt->remove_overlapping_locks_for_txnid(txnid_a, two, two);
invariant(num_row_locks(lt) == 0);
lt->remove_overlapping_locks_for_txnid(txnid_a, one, two);
invariant(num_row_locks(lt) == 0);
// try overlapping from the right
r = ACQUIRE_LOCK(txnid_a, one, three, nullptr);
invariant(num_row_locks(lt) == 1);
r = ACQUIRE_LOCK(txnid_a, two, five, nullptr);
invariant(num_row_locks(lt) == 1);
verify_fn.expected_txnid = txnid_a;
range.create(one, five);
verify_fn.expected_range = &range;
locktree_iterate<verify_fn_obj>(lt, &verify_fn);
do_verify();
// now overlap from the left
r = ACQUIRE_LOCK(txnid_a, zero, four, nullptr);
invariant(num_row_locks(lt) == 1);
verify_fn.expected_txnid = txnid_a;
range.create(zero, five);
verify_fn.expected_range = &range;
locktree_iterate<verify_fn_obj>(lt, &verify_fn);
do_verify();
// now relock in a range that is already dominated
r = ACQUIRE_LOCK(txnid_a, five, five, nullptr);
invariant(num_row_locks(lt) == 1);
verify_fn.expected_txnid = txnid_a;
range.create(zero, five);
verify_fn.expected_range = &range;
locktree_iterate<verify_fn_obj>(lt, &verify_fn);
do_verify();
// release one of the locks we acquired. this should clean up the whole range.
lt->remove_overlapping_locks_for_txnid(txnid_a, zero, four);
invariant(num_row_locks(lt) == 0);
#undef ACQUIRE_LOCK
}
// remove the other txnid's lock now
lt->remove_overlapping_locks_for_txnid(the_other_txnid, hundred, hundred);
mgr.release_lt(lt);
mgr.destroy();
}
......
......@@ -36,60 +36,35 @@ void locktree_unit_test::test_simple_lock(void) {
// four txns, four points
r = ACQUIRE_LOCK(txnid_a, one, one, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 1);
r = ACQUIRE_LOCK(txnid_b, two, two, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 2);
r = ACQUIRE_LOCK(txnid_c, three, three, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 3);
r = ACQUIRE_LOCK(txnid_d, four, four, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 4);
lt->remove_overlapping_locks_for_txnid(txnid_a, one, one);
invariant(num_row_locks(lt) == 3);
lt->remove_overlapping_locks_for_txnid(txnid_b, two, two);
invariant(num_row_locks(lt) == 2);
lt->remove_overlapping_locks_for_txnid(txnid_c, three, three);
invariant(num_row_locks(lt) == 1);
lt->remove_overlapping_locks_for_txnid(txnid_d, four, four);
invariant(num_row_locks(lt) == 0);
locktree_test_release_lock(lt, txnid_a, one, one);
locktree_test_release_lock(lt, txnid_b, two, two);
locktree_test_release_lock(lt, txnid_c, three, three);
locktree_test_release_lock(lt, txnid_d, four, four);
invariant(no_row_locks(lt));
// two txns, two ranges
r = ACQUIRE_LOCK(txnid_c, one, two, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 1);
r = ACQUIRE_LOCK(txnid_b, three, four, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 2);
lt->remove_overlapping_locks_for_txnid(txnid_c, one, two);
invariant(num_row_locks(lt) == 1);
lt->remove_overlapping_locks_for_txnid(txnid_b, three, four);
invariant(num_row_locks(lt) == 0);
// one txn, one range, one point
r = ACQUIRE_LOCK(txnid_a, two, three, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 1);
r = ACQUIRE_LOCK(txnid_a, four, four, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 2);
lt->remove_overlapping_locks_for_txnid(txnid_a, two, three);
invariant(num_row_locks(lt) == 1);
lt->remove_overlapping_locks_for_txnid(txnid_a, four, four);
invariant(num_row_locks(lt) == 0);
locktree_test_release_lock(lt, txnid_c, one, two);
locktree_test_release_lock(lt, txnid_b, three, four);
invariant(no_row_locks(lt));
// two txns, one range, one point
r = ACQUIRE_LOCK(txnid_c, three, four, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 1);
r = ACQUIRE_LOCK(txnid_d, one, one, nullptr);
invariant(r == 0);
invariant(num_row_locks(lt) == 2);
lt->remove_overlapping_locks_for_txnid(txnid_c, three, four);
invariant(num_row_locks(lt) == 1);
lt->remove_overlapping_locks_for_txnid(txnid_d, one, one);
invariant(num_row_locks(lt) == 0);
locktree_test_release_lock(lt, txnid_c, three, four);
locktree_test_release_lock(lt, txnid_d, one, one);
invariant(no_row_locks(lt));
#undef ACQUIRE_LOCK
}
......@@ -124,7 +99,7 @@ void locktree_unit_test::test_simple_lock(void) {
for (int64_t i = 0; i < num_locks; i++) {
k.data = (void *) &keys[i];
lt->remove_overlapping_locks_for_txnid(txnid_a, &k, &k);
locktree_test_release_lock(lt, txnid_a, &k, &k);
}
toku_free(keys);
......
......@@ -58,20 +58,10 @@ void locktree_unit_test::test_single_txnid_optimization(void) {
lock_and_append_point_for_txnid_a(zero);
maybe_point_locks_for_txnid_b(2);
// txnid b does not take a lock on iteration 3
if (where != 3) {
invariant(num_row_locks(lt) == 4);
} else {
invariant(num_row_locks(lt) == 3);
}
lt->release_locks(txnid_a, &buffer);
// txnid b does not take a lock on iteration 3
if (where != 3) {
invariant(num_row_locks(lt) == 1);
struct verify_fn_obj {
TXNID expected_txnid;
keyrange *expected_range;
......
......@@ -56,20 +56,19 @@ class locktree_unit_test {
ltr.release();
}
static size_t num_row_locks(const locktree *lt) {
struct count_fn_obj {
size_t count;
bool fn(const keyrange &range, TXNID txnid) {
(void) range;
(void) txnid;
count++;
return true;
static bool no_row_locks(const locktree *lt) {
return lt->m_rangetree->is_empty() && lt->m_sto_buffer.is_empty();
}
} count_fn;
count_fn.count = 0;
locktree_iterate<count_fn_obj>(lt, &count_fn);
return count_fn.count;
static void locktree_test_release_lock(locktree *lt, TXNID txnid, const DBT *left_key, const DBT *right_key) {
range_buffer buffer;
buffer.create();
buffer.append(left_key, right_key);
lt->release_locks(txnid, &buffer);
buffer.destroy();
}
friend class lock_request_unit_test;
};
} /* namespace toku */
......
......@@ -266,31 +266,23 @@ treenode *treenode::remove_root_of_subtree() {
return this;
}
uint64_t treenode::recursive_remove(uint64_t *mem_released) {
// remove left and right subtrees
uint64_t nodes_removed = 0;
void treenode::recursive_remove(void) {
treenode *left = m_left_child.ptr;
if (left) {
nodes_removed += left->recursive_remove(mem_released);
left->recursive_remove();
}
m_left_child.set(nullptr);
treenode *right = m_right_child.ptr;
if (right) {
nodes_removed += right->recursive_remove(mem_released);
right->recursive_remove();
}
m_right_child.set(nullptr);
// note the amount of memory to-be released by this node
if (mem_released) {
*mem_released += m_range.get_memory_size();
}
// we do not take locks on the way down, so we know non-root nodes
// are unlocked here and the caller is required to pass a locked
// root, so this free is correct.
treenode::free(this);
return nodes_removed + 1;
}
treenode *treenode::remove(const keyrange &range) {
......
......@@ -86,10 +86,7 @@ class treenode {
// effect: removes this node and all of its children, recursively
// requires: every node at and below this node is unlocked
// returns: the number of nodes removed
// returns: *mem_released is the total amount of keyrange memory released.
// mem_released does not account for treenode insertion overhead.
uint64_t recursive_remove(uint64_t *mem_released);
void recursive_remove(void);
private:
......
......@@ -343,7 +343,9 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
# libtokudb.so.
# We link the test with util directly so that the test code itself can use
# some of those things (i.e. kibbutz in the threaded tests).
target_link_libraries(${base}.tdb util ${LIBTOKUDB} ${LIBTOKUPORTABILITY})
# We link the locktree so that threaded stress tests can call some
# functions (ie: lock escalation) directly.
target_link_libraries(${base}.tdb util locktree ${LIBTOKUDB} ${LIBTOKUPORTABILITY})
set_property(TARGET ${base}.tdb APPEND PROPERTY
COMPILE_DEFINITIONS "ENVDIR=\"dir.${bin}\";USE_TDB;IS_TDB=1;TOKUDB=1")
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "dir.${bin}")
......@@ -378,7 +380,7 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
function(add_custom_executable prefix binary source)
add_executable(${prefix}_${binary} ${source})
target_link_libraries(${prefix}_${binary} util ${LIBTOKUDB} ${LIBTOKUPORTABILITY})
target_link_libraries(${prefix}_${binary} util locktree ${LIBTOKUDB} ${LIBTOKUPORTABILITY})
set_target_properties(${prefix}_${binary} PROPERTIES
COMPILE_DEFINITIONS "ENVDIR=\"dir.${prefix}_${source}.tdb\";USE_TDB;IS_TDB=1;TOKUDB=1")
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "dir.${prefix}_${source}.tdb")
......
......@@ -40,9 +40,18 @@ stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) {
myargs[0].operation_extra = &soe[0];
myargs[0].operation = scan_op;
// make the lock escalation thread.
// it should sleep somewhere between 10 and 20
// seconds between each escalation.
struct lock_escalation_op_extra eoe;
eoe.min_sleep_time_micros = 10UL * (1000 * 1000);
eoe.max_sleep_time_micros = 20UL * (1000 * 1000);
myargs[1].operation_extra = &eoe;
myargs[1].operation = lock_escalation_op;
// make the threads that update the db
struct update_op_args uoe = get_update_op_args(cli_args, NULL);
for (int i = 1; i < 1 + cli_args->num_update_threads; ++i) {
for (int i = 2; i < 2 + cli_args->num_update_threads; ++i) {
myargs[i].operation_extra = &uoe;
myargs[i].operation = update_op;
myargs[i].do_prepare = false;
......@@ -50,7 +59,7 @@ stress_table(DB_ENV *env, DB **dbp, struct cli_args *cli_args) {
// doing sequential updates. the rest of the threads
// will take point write locks on update as usual.
// this ensures both ranges and points are stressed.
myargs[i].prelock_updates = i < 4 ? true : false;
myargs[i].prelock_updates = i < 5 ? true : false;
}
run_workers(myargs, num_threads, cli_args->num_seconds, false, cli_args);
......
......@@ -34,6 +34,8 @@
#include <util/rwlock.h>
#include <util/kibbutz.h>
#include <src/ydb-internal.h>
#include <ft/ybt.h>
using namespace toku;
......@@ -888,6 +890,27 @@ static int UU() verify_op(DB_TXN* UU(txn), ARG UU(arg), void* UU(operation_extra
return r;
}
struct lock_escalation_op_extra {
// sleep somewhere between these times before running escalation.
// this will add some chaos into the mix.
uint64_t min_sleep_time_micros;
uint64_t max_sleep_time_micros;
};
static int UU() lock_escalation_op(DB_TXN *UU(txn), ARG arg, void* operation_extra, void *UU(stats_extra)) {
struct lock_escalation_op_extra *CAST_FROM_VOIDP(extra, operation_extra);
if (extra->max_sleep_time_micros > 0) {
invariant(extra->max_sleep_time_micros >= extra->min_sleep_time_micros);
uint64_t extra_sleep_time = (extra->max_sleep_time_micros - extra->min_sleep_time_micros) + 1;
uint64_t sleep_time = extra->min_sleep_time_micros + (myrandom_r(arg->random_data) % extra_sleep_time);
usleep(sleep_time);
}
if (!arg->cli->nolocktree) {
toku_env_run_lock_escalation_for_test(arg->env);
}
return 0;
}
static int UU() scan_op(DB_TXN *txn, ARG UU(arg), void* operation_extra, void *UU(stats_extra)) {
struct scan_op_extra* CAST_FROM_VOIDP(extra, operation_extra);
for (int i = 0; run_test && i < arg->cli->num_DBs; i++) {
......
......@@ -95,6 +95,12 @@ struct __toku_db_env_internal {
int tmpdir_lockfd;
};
// test-only environment function for running lock escalation
static inline void toku_env_run_lock_escalation_for_test(DB_ENV *env) {
toku::locktree::manager *mgr = &env->i->ltm;
mgr->run_escalation_for_test();
}
// Common error handling macros and panic detection
#define MAYBE_RETURN_ERROR(cond, status) if (cond) return status;
#define HANDLE_PANICKED_ENV(env) if (toku_env_is_panicked(env)) { sleep(1); return EINVAL; }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment