Commit d2d3de2e authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:4920], [t:4953], separate ydb lock from hot indexing and checkpointing

git-svn-id: file:///svn/toku/tokudb@44202 c7de825b-a66e-492c-adef-691d508d4ae1
parent 464a7a7a
......@@ -20,8 +20,7 @@
* to set all the "pending" bits and to create the checkpoint-in-progress versions
* of the header and translation table (btt).
* The following operations must take the multi_operation_lock:
* - insertion into multiple indexes
* - "replace-into" (matching delete and insert on a single key)
* - any set of operations that must be atomic with respect to begin checkpoint
*
* - checkpoint_safe_lock
* This is a new reader-writer lock.
......@@ -35,13 +34,6 @@
* The application can use this lock to disable checkpointing during other sensitive
* operations, such as making a backup copy of the database.
*
* - ydb_big_lock
* This is the existing lock used to serialize all access to tokudb.
* This lock is held by the checkpoint function only for as long as is required to
* to set all the "pending" bits and to create the checkpoint-in-progress versions
* of the header and translation table (btt).
*
* Once the "pending" bits are set and the snapshots are taken of the header and btt,
* most normal database operations are permitted to resume.
*
......@@ -118,10 +110,6 @@ static LSN last_completed_checkpoint_lsn;
static toku_pthread_rwlock_t checkpoint_safe_lock;
static toku_pthread_rwlock_t multi_operation_lock;
// Call through function pointers because this layer has no access to ydb lock functions.
static void (*ydb_lock)(void) = NULL;
static void (*ydb_unlock)(void) = NULL;
static BOOL initialized = FALSE; // sanity check
static volatile BOOL locked_mo = FALSE; // true when the multi_operation write lock is held (by checkpoint)
static volatile BOOL locked_cs = FALSE; // true when the checkpoint_safe write lock is held (by checkpoint)
......@@ -220,9 +208,7 @@ toku_checkpoint_safe_client_unlock(void) {
// Initialize the checkpoint mechanism, must be called before any client operations.
void
toku_checkpoint_init(void (*ydb_lock_callback)(void), void (*ydb_unlock_callback)(void)) {
ydb_lock = ydb_lock_callback;
ydb_unlock = ydb_unlock_callback;
toku_checkpoint_init(void) {
multi_operation_lock_init();
checkpoint_safe_lock_init();
initialized = TRUE;
......@@ -280,7 +266,6 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
}
multi_operation_checkpoint_lock();
SET_CHECKPOINT_FOOTPRINT(20);
ydb_lock();
toku_ft_open_close_lock();
SET_CHECKPOINT_FOOTPRINT(30);
......@@ -289,7 +274,6 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
toku_ft_open_close_unlock();
multi_operation_checkpoint_unlock();
ydb_unlock();
SET_CHECKPOINT_FOOTPRINT(40);
if (r==0) {
......
......@@ -58,7 +58,7 @@ void toku_multi_operation_client_unlock(void);
// Initialize the checkpoint mechanism, must be called before any client operations.
// Must pass in function pointers to take/release ydb lock.
void toku_checkpoint_init(void (*ydb_lock_callback)(void), void (*ydb_unlock_callback)(void));
void toku_checkpoint_init(void);
void toku_checkpoint_destroy(void);
......
......@@ -5423,35 +5423,28 @@ int toku_dump_ft (FILE *f, FT_HANDLE brt) {
return r;
}
int toku_ft_layer_init(void (*ydb_lock_callback)(void),
void (*ydb_unlock_callback)(void)) {
int toku_ft_layer_init(void) {
int r = 0;
//Portability must be initialized first
r = toku_portability_init();
if (r) { goto exit; }
toku_checkpoint_init(ydb_lock_callback, ydb_unlock_callback);
toku_checkpoint_init();
r = toku_ft_serialize_layer_init();
if (r) { goto exit; }
toku_ft_serialize_layer_init();
toku_mutex_init(&ft_open_close_lock, NULL);
exit:
return r;
}
int toku_ft_layer_destroy(void) {
int r = 0;
void toku_ft_layer_destroy(void) {
toku_mutex_destroy(&ft_open_close_lock);
if (r == 0)
r = toku_ft_serialize_layer_destroy();
if (r==0)
toku_checkpoint_destroy();
toku_ft_serialize_layer_destroy();
toku_checkpoint_destroy();
//Portability must be cleaned up last
if (r==0)
r = toku_portability_destroy();
return r;
toku_portability_destroy();
}
void toku_ft_open_close_lock(void) {
......
......@@ -238,14 +238,12 @@ struct ftstat64_s {
int
toku_ft_handle_stat64 (FT_HANDLE, TOKUTXN, struct ftstat64_s *stat) __attribute__ ((warn_unused_result));
int toku_ft_layer_init(void (*ydb_lock_callback)(void),
void (*ydb_unlock_callback)(void))
__attribute__ ((warn_unused_result));
int toku_ft_layer_init(void) __attribute__ ((warn_unused_result));
void toku_ft_open_close_lock(void);
void toku_ft_open_close_unlock(void);
int toku_ft_layer_destroy(void) __attribute__ ((warn_unused_result));
int toku_ft_serialize_layer_init(void) __attribute__ ((warn_unused_result));
int toku_ft_serialize_layer_destroy(void) __attribute__ ((warn_unused_result));
void toku_ft_layer_destroy(void);
void toku_ft_serialize_layer_init(void);
void toku_ft_serialize_layer_destroy(void);
void toku_maybe_truncate_cachefile (CACHEFILE cf, int fd, u_int64_t size_used);
// Effect: truncate file if overallocated by at least 32MiB
......
......@@ -783,7 +783,7 @@ toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft_h, TOKU
// (old_ft_h may be one of many handles to the dictionary.)
// txn that created the loader
// Requires:
// ydb_lock is held.
// multi operation lock is held.
// The brt is open. (which implies there can be no zombies.)
// The new file must be a valid dictionary.
// The block size and flags in the new file must match the existing BRT.
......
......@@ -67,17 +67,15 @@ static inline void do_toku_trace(const char *cp, int len) {
static int num_cores = 0; // cache the number of cores for the parallelization
static struct toku_thread_pool *ft_pool = NULL;
int
void
toku_ft_serialize_layer_init(void) {
num_cores = toku_os_get_number_active_processors();
int r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r);
return 0;
}
int
void
toku_ft_serialize_layer_destroy(void) {
toku_thread_pool_destroy(&ft_pool);
return 0;
}
enum {FILE_CHANGE_INCREMENT = (16<<20)};
......
......@@ -172,6 +172,7 @@ struct tokutxn {
// Protected by the txn manager lock:
TOKUTXN_STATE state;
struct toku_list prepared_txns_link; // list of prepared transactions
uint32_t num_pin;
};
static inline int
......
......@@ -14,19 +14,14 @@
static int recovery_main(int argc, const char *const argv[]);
static void dummy(void) {}
int
main(int argc, const char *const argv[]) {
{
int rr = toku_ft_layer_init(dummy, dummy);
int rr = toku_ft_layer_init();
assert(rr==0);
}
int r = recovery_main(argc, argv);
{
int rr = toku_ft_layer_destroy();
assert(rr==0);
}
toku_ft_layer_destroy();
return r;
}
......
......@@ -234,16 +234,13 @@ default_parse_args (int argc, const char *argv[]) {
int test_main(int argc, const char *argv[]);
static void dummy(void) {}
int
main(int argc, const char *argv[]) {
initialize_dummymsn();
int rinit = toku_ft_layer_init(dummy, dummy);
int rinit = toku_ft_layer_init();
CKERR(rinit);
int r = test_main(argc, argv);
int rdestroy = toku_ft_layer_destroy();
CKERR(rdestroy);
toku_ft_layer_destroy();
return r;
}
......
......@@ -170,7 +170,8 @@ toku_txn_create_txn (
.txnid64 = xid,
.ancestor_txnid64 = (parent_tokutxn ? parent_tokutxn->ancestor_txnid64 : xid),
.xids = xids,
.roll_info = roll_info
.roll_info = roll_info,
.num_pin = 0
};
......
......@@ -27,6 +27,8 @@ struct txn_manager {
time_t oldest_living_starttime; // timestamp in seconds of when txn with oldest_living_xid started
struct toku_list prepared_txns; // transactions that have been prepared and are unresolved, but have not been returned through txn_recover.
struct toku_list prepared_and_returned_txns; // transactions that have been prepared and unresolved, and have been returned through txn_recover. We need this list so that we can restart the recovery.
toku_cond_t wait_for_unpin_of_txn;
};
static TXN_MANAGER_STATUS_S txn_manager_status;
......@@ -201,6 +203,8 @@ void toku_txn_manager_init(TXN_MANAGER* txn_managerp) {
txn_manager->oldest_living_starttime = 0;
toku_list_init(&txn_manager->prepared_txns);
toku_list_init(&txn_manager->prepared_and_returned_txns);
toku_cond_init(&txn_manager->wait_for_unpin_of_txn, 0);
*txn_managerp = txn_manager;
}
......@@ -210,6 +214,7 @@ void toku_txn_manager_destroy(TXN_MANAGER txn_manager) {
toku_omt_destroy(&txn_manager->live_root_txns);
toku_omt_destroy(&txn_manager->snapshot_txnids);
toku_omt_destroy(&txn_manager->live_list_reverse);
toku_cond_destroy(&txn_manager->wait_for_unpin_of_txn);
toku_free(txn_manager);
}
......@@ -692,6 +697,16 @@ void toku_txn_manager_note_abort_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
invalidate_xa_xid(&txn->xa_xid);
toku_list_remove(&txn->prepared_txns_link);
}
// for hot indexing, if hot index is processing
// this transaction in some leafentry, then we cannot change
// the state to commit or abort until
// hot index is done with that leafentry
while (txn->num_pin > 0) {
toku_cond_wait(
&txn_manager->wait_for_unpin_of_txn,
&txn_manager->txn_manager_lock
);
}
txn->state = TOKUTXN_ABORTING;
toku_mutex_unlock(&txn_manager->txn_manager_lock);
}
......@@ -702,6 +717,16 @@ void toku_txn_manager_note_commit_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
invalidate_xa_xid(&txn->xa_xid);
toku_list_remove(&txn->prepared_txns_link);
}
// for hot indexing, if hot index is processing
// this transaction in some leafentry, then we cannot change
// the state to commit or abort until
// hot index is done with that leafentry
while (txn->num_pin > 0) {
toku_cond_wait(
&txn_manager->wait_for_unpin_of_txn,
&txn_manager->txn_manager_lock
);
}
txn->state = TOKUTXN_COMMITTING;
toku_mutex_unlock(&txn_manager->txn_manager_lock);
}
......@@ -749,6 +774,20 @@ exit:
}
// needed for hot indexing
void toku_txn_manager_pin_live_txn_unlocked(TXN_MANAGER UU(txn_manager), TOKUTXN txn) {
assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING);
txn->num_pin++;
}
void toku_txn_manager_unpin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn) {
assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING);
assert(txn->num_pin > 0);
txn->num_pin--;
if (txn->num_pin == 0) {
toku_cond_broadcast(&txn_manager->wait_for_unpin_of_txn);
}
}
void toku_txn_manager_suspend(TXN_MANAGER txn_manager) {
toku_mutex_lock(&txn_manager->txn_manager_lock);
}
......
......@@ -78,6 +78,9 @@ int toku_txn_manager_recover_txn(
u_int32_t flags
);
void toku_txn_manager_pin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_unpin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_suspend(TXN_MANAGER txn_manager);
void toku_txn_manager_resume(TXN_MANAGER txn_manager);
......
......@@ -47,10 +47,9 @@ toku_portability_init(void) {
return r;
}
int
void
toku_portability_destroy(void) {
toku_memory_shutdown();
return 0;
}
int
......
../../windows/tests/test.h
\ No newline at end of file
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: expandtab:ts=8:sw=4:softtabstop=4:
#include <toku_portability.h>
#include <toku_assert.h>
#define CKERR(r) ({ int __r = r; if (__r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, __r, strerror(r)); assert(__r==0); })
#define CKERR2(r,r2) do { if (r!=r2) fprintf(stderr, "%s:%d error %d %s, expected %d\n", __FILE__, __LINE__, r, strerror(r), r2); assert(r==r2); } while (0)
#define CKERR2s(r,r2,r3) do { if (r!=r2 && r!=r3) fprintf(stderr, "%s:%d error %d %s, expected %d or %d\n", __FILE__, __LINE__, r, strerror(r), r2,r3); assert(r==r2||r==r3); } while (0)
#define DEBUG_LINE do { \
fprintf(stderr, "%s() %s:%d\n", __FUNCTION__, __FILE__, __LINE__); \
fflush(stderr); \
} while (0)
int test_main(int argc, char *const argv[]);
int
main(int argc, char *const argv[]) {
int ri = toku_portability_init();
assert(ri==0);
int r = test_main(argc, argv);
toku_portability_destroy();
return r;
}
......@@ -93,7 +93,7 @@ toku_ydb_lock(void) {
// Update status
STATUS_VALUE(YDB_LOCK_TAKEN)++;
if (new_num_waiters > STATUS_VALUE(YDB_MAX_WAITERS))
STATUS_VALUE(YDB_MAX_WAITERS) = new_num_waiters;
STATUS_VALUE(YDB_MAX_WAITERS) = new_num_waiters;
STATUS_VALUE(YDB_TOTAL_TIME_SINCE_START) = now - ydb_big_lock.starttime;
}
......@@ -105,7 +105,7 @@ ydb_unlock_internal(unsigned long useconds) {
tokutime_t time_held = now - ydb_big_lock.acquired_time;
STATUS_VALUE(YDB_TOTAL_TIME_YDB_LOCK_HELD) += time_held;
if (time_held > STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD))
STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD) = time_held;
STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD) = time_held;
STATUS_VALUE(YDB_TOTAL_TIME_SINCE_START) = now - ydb_big_lock.starttime;
toku_mutex_unlock(&ydb_big_lock.lock);
......@@ -113,7 +113,7 @@ ydb_unlock_internal(unsigned long useconds) {
int new_num_waiters = __sync_add_and_fetch(&STATUS_VALUE(YDB_NUM_WAITERS_NOW), -1);
if (new_num_waiters > 0 && useconds > 0) {
__sync_add_and_fetch(&STATUS_VALUE(YDB_TOTAL_SLEEP_TIME), useconds);
__sync_add_and_fetch(&STATUS_VALUE(YDB_TOTAL_SLEEP_TIME), useconds);
usleep(useconds);
}
}
......
......@@ -12,6 +12,7 @@
#define TOKU_INDEXER_INTERNAL_H
#include <ft/txn_state.h>
#include <toku_pthread.h>
// the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array.
// the array is a resizeable array with max size "max_keys" and current size "current_keys".
......@@ -24,7 +25,8 @@ struct indexer_commit_keys {
struct __toku_indexer_internal {
DB_ENV *env;
DB_TXN *txn;
DB_TXN *txn;
toku_mutex_t indexer_lock;
DB *src_db;
int N;
DB **dest_dbs; /* [N] */
......
......@@ -24,6 +24,7 @@
#include <ft/ule.h>
#include <ft/xids.h>
#include "ft/txn_manager.h"
#include <ft/checkpoint.h>
#include "ydb_row_lock.h"
#include "indexer-internal.h"
......@@ -77,13 +78,12 @@ static int indexer_append_xid(DB_INDEXER *indexer, TXNID xid, XIDS *xids_result)
static BOOL indexer_find_prev_xr(DB_INDEXER *indexer, ULEHANDLE ule, uint64_t xrindex, uint64_t *prev_xrindex);
static int indexer_generate_hot_key_val(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule, UXRHANDLE uxr, DBT *hotkey, DBT *hotval);
static int indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids);
static int indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn);
static int indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids);
static int indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids);
static int indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn);
static int indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids);
static int indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids);
static int indexer_lock_key(DB_INDEXER *indexer, DB *hotdb, DBT *key, TXNID outermost_live_xid);
static TOKUTXN_STATE indexer_xid_state(DB_INDEXER *indexer, TXNID xid);
static int indexer_lock_key(DB_INDEXER *indexer, DB *hotdb, DBT *key, TXNID outermost_live_xid, TOKUTXN txn);
// initialize undo globals located in the indexer private object
......@@ -177,43 +177,113 @@ indexer_undo_do_committed(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
return result;
}
static void fill_prov_info(
ULEHANDLE ule,
TXNID* prov_ids,
TOKUTXN_STATE* prov_states,
TOKUTXN* prov_txns,
DB_INDEXER *indexer
)
{
uint32_t num_provisional = ule_get_num_provisional(ule);
uint32_t num_committed = ule_get_num_committed(ule);
DB_ENV *env = indexer->i->env;
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(env->i->logger);
toku_txn_manager_suspend(txn_manager);
for (uint32_t i = 0; i < num_provisional; i++) {
UXRHANDLE uxr = ule_get_uxr(ule, num_committed+i);
prov_ids[i] = uxr_get_txnid(uxr);
if (indexer->i->test_xid_state) {
prov_states[i] = indexer->i->test_xid_state(indexer, prov_ids[i]);
prov_txns[i] = NULL;
}
else {
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
txn_manager,
prov_ids[i],
&txn
);
prov_txns[i] = txn;
if (txn) {
prov_states[i] = toku_txn_get_state(txn);
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_LIVE) {
// pin
toku_txn_manager_pin_live_txn_unlocked(txn_manager, txn);
}
}
else {
prov_states[i] = TOKUTXN_RETIRED;
}
}
}
toku_txn_manager_resume(txn_manager);
}
static void release_txns(
ULEHANDLE ule,
TOKUTXN_STATE* prov_states,
TOKUTXN* prov_txns,
DB_INDEXER *indexer
)
{
uint32_t num_provisional = ule_get_num_provisional(ule);
DB_ENV *env = indexer->i->env;
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(env->i->logger);
BOOL some_txn_pinned = FALSE;
if (indexer->i->test_xid_state) {
goto exit;
}
// see if any txn pinned before bothering to grab txn_manager lock
for (u_int32_t i = 0; i < num_provisional; i++) {
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_LIVE) {
assert(prov_txns[i]);
some_txn_pinned = TRUE;
}
}
if (some_txn_pinned) {
toku_txn_manager_suspend(txn_manager);
for (u_int32_t i = 0; i < num_provisional; i++) {
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_LIVE) {
toku_txn_manager_unpin_live_txn_unlocked(txn_manager, prov_txns[i]);
}
}
toku_txn_manager_resume(txn_manager);
}
exit:
return;
}
static int
indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
int result = 0;
BOOL txn_manager_suspended = FALSE;
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(indexer->i->env->i->logger);
uint32_t num_committed = ule_get_num_committed(ule);
uint32_t num_provisional = ule_get_num_provisional(ule);
indexer_commit_keys_set_empty(&indexer->i->commit_keys);
toku_txn_manager_suspend(txn_manager);
txn_manager_suspended = TRUE;
// init the xids to the root xid
XIDS xids = xids_get_root_xids();
// scan the provisional stack from the outermost to the innermost transaction record
uint32_t num_committed = ule_get_num_committed(ule);
uint32_t num_provisional = ule_get_num_provisional(ule);
BOOL outermost_retired = FALSE;
TXNID outermost_xid = TXNID_NONE;
TOKUTXN_STATE outermost_xid_state = TOKUTXN_RETIRED;
if (num_provisional) {
outermost_xid = uxr_get_txnid(ule_get_uxr(ule, num_committed));
outermost_xid_state = indexer_xid_state(indexer, outermost_xid);
outermost_retired = outermost_xid_state == TOKUTXN_RETIRED;
}
else {
outermost_retired = TRUE;
}
if (outermost_retired) {
toku_txn_manager_resume(txn_manager);
txn_manager_suspended = FALSE;
TXNID prov_ids[num_provisional];
TOKUTXN_STATE prov_states[num_provisional];
TOKUTXN prov_txns[num_provisional];
memset(prov_txns, 0, sizeof(prov_txns));
if (num_provisional == 0) {
goto exit;
}
fill_prov_info(ule, prov_ids, prov_states, prov_txns, indexer);
TXNID outermost_xid_state = prov_states[0];
// scan the provisional stack from the outermost to the innermost transaction record
TOKUTXN curr_txn = NULL;
for (uint64_t xrindex = num_committed; xrindex < num_committed + num_provisional; xrindex++) {
// get the ith transaction record
UXRHANDLE uxr = ule_get_uxr(ule, xrindex);
TXNID this_xid = uxr_get_txnid(uxr);
TOKUTXN_STATE this_xid_state = outermost_retired ? TOKUTXN_RETIRED : indexer_xid_state(indexer, this_xid);
TOKUTXN_STATE this_xid_state = prov_states[xrindex - num_committed];
if (this_xid_state == TOKUTXN_ABORTING) {
break; // nothing to do once we reach a transaction that is aborting
......@@ -221,10 +291,15 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
if (xrindex == num_committed) { // if this is the outermost xr
result = indexer_set_xid(indexer, this_xid, &xids); // always add the outermost xid to the XIDS list
curr_txn = prov_txns[xrindex - num_committed];
} else {
switch (this_xid_state) {
case TOKUTXN_LIVE:
result = indexer_append_xid(indexer, this_xid, &xids); // append a live xid to the XIDS list
curr_txn = prov_txns[xrindex - num_committed];
if (!indexer->i->test_xid_state) {
assert(curr_txn);
}
break;
case TOKUTXN_PREPARING:
assert(0); // not allowed
......@@ -242,9 +317,9 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
assert(this_xid_state == TOKUTXN_RETIRED);
}
if (uxr_is_placeholder(uxr))
if (uxr_is_placeholder(uxr)) {
continue; // skip placeholders
}
// undo
uint64_t prev_xrindex;
BOOL prev_xrindex_found = indexer_find_prev_xr(indexer, ule, xrindex, &prev_xrindex);
......@@ -261,9 +336,9 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
case TOKUTXN_LIVE:
case TOKUTXN_PREPARING:
assert(this_xid_state != TOKUTXN_ABORTING);
result = indexer_ft_delete_provisional(indexer, hotdb, &indexer->i->hotkey, xids);
result = indexer_ft_delete_provisional(indexer, hotdb, &indexer->i->hotkey, xids, curr_txn);
if (result == 0)
result = indexer_lock_key(indexer, hotdb, &indexer->i->hotkey, outermost_xid);
result = indexer_lock_key(indexer, hotdb, &indexer->i->hotkey, prov_ids[0], curr_txn);
break;
case TOKUTXN_COMMITTING:
case TOKUTXN_RETIRED:
......@@ -293,9 +368,10 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
case TOKUTXN_LIVE:
case TOKUTXN_PREPARING:
assert(this_xid_state != TOKUTXN_ABORTING);
result = indexer_ft_insert_provisional(indexer, hotdb, &indexer->i->hotkey, &indexer->i->hotval, xids);
if (result == 0)
result = indexer_lock_key(indexer, hotdb, &indexer->i->hotkey, outermost_xid);
result = indexer_ft_insert_provisional(indexer, hotdb, &indexer->i->hotkey, &indexer->i->hotval, xids, curr_txn);
if (result == 0) {
result = indexer_lock_key(indexer, hotdb, &indexer->i->hotkey, prov_ids[0], prov_txns[0]);
}
break;
case TOKUTXN_COMMITTING:
case TOKUTXN_RETIRED:
......@@ -319,10 +395,14 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
for (int i = 0; result == 0 && i < indexer_commit_keys_valid(&indexer->i->commit_keys); i++) {
result = indexer_ft_commit(indexer, hotdb, &indexer->i->commit_keys.keys[i], xids);
}
// be careful with this in the future. Right now, only exit path
// is BEFORE we call fill_prov_info, so this happens before exit
// If in the future we add a way to exit after fill_prov_info,
// then this will need to be handled below exit
release_txns(ule, prov_states, prov_txns, indexer);
exit:
xids_destroy(&xids);
if (txn_manager_suspended) {
toku_txn_manager_resume(toku_logger_get_txn_manager(indexer->i->env->i->logger));
}
return result;
}
......@@ -396,46 +476,14 @@ indexer_generate_hot_key_val(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule, UXRH
return result;
}
// return the state of a transaction given a transaction id. if the transaction no longer exists,
// then return TOKUTXN_RETIRED.
static TOKUTXN_STATE
indexer_xid_state(DB_INDEXER *indexer, TXNID xid) {
TOKUTXN_STATE result;
// TEST
if (indexer->i->test_xid_state) {
result = indexer->i->test_xid_state(indexer, xid);
} else {
DB_ENV *env = indexer->i->env;
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
toku_logger_get_txn_manager(env->i->logger),
xid,
&txn
);
if (txn)
result = toku_txn_get_state(txn);
else
result = TOKUTXN_RETIRED;
}
return result;
}
// Take a write lock on the given key for the outermost xid in the xids list.
static int
indexer_lock_key(DB_INDEXER *indexer, DB *hotdb, DBT *key, TXNID outermost_live_xid) {
indexer_lock_key(DB_INDEXER *indexer, DB *hotdb, DBT *key, TXNID outermost_live_xid, TOKUTXN txn) {
int result = 0;
// TEST
if (indexer->i->test_lock_key) {
result = indexer->i->test_lock_key(indexer, outermost_live_xid, hotdb, key);
} else {
DB_ENV *env = indexer->i->env;
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
toku_logger_get_txn_manager(env->i->logger),
outermost_live_xid,
&txn
);
assert(txn != NULL);
result = toku_grab_write_lock(hotdb, key, txn);
}
return result;
......@@ -460,26 +508,10 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u
return prev_found;
}
// get the innermost live txn from the xids stack. the xid on the top of the xids stack must be live
// when calling this function. the indexer_append_xid only appends live xid's onto the stack.
static TOKUTXN
indexer_get_innermost_live_txn(DB_INDEXER *indexer, XIDS xids) {
DB_ENV *env = indexer->i->env;
uint8_t num_xids = xids_get_num_xids(xids);
TXNID xid = xids_get_xid(xids, (u_int8_t)(num_xids-1));
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
toku_logger_get_txn_manager(env->i->logger),
xid,
&txn
);
return txn;
}
// inject "delete" message into brt with logging in recovery and rollback logs,
// and making assocation between txn and brt
static int
indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) {
indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) {
int result = 0;
// TEST
if (indexer->i->test_delete_provisional) {
......@@ -487,9 +519,16 @@ indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
TOKUTXN txn = indexer_get_innermost_live_txn(indexer, xids);
assert(txn != NULL);
// Not sure if this is really necessary, as
// the hot index DB should have to be checkpointed
// upon commit of the hot index transaction, but
// it is safe to do this
// this question apples to delete_committed, insert_provisional
// and insert_committed
toku_multi_operation_client_lock();
result = toku_ft_maybe_delete (hotdb->i->ft_handle, hotkey, txn, FALSE, ZERO_LSN, TRUE);
toku_multi_operation_client_unlock();
}
}
return result;
......@@ -504,8 +543,14 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
result = indexer->i->test_delete_committed(indexer, hotdb, hotkey, xids);
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0)
if (result == 0) {
// MO lock needed because toku_ft_root_put_cmd must be atomic
// with respect to checkpointing
// comment/question in indexer_ft_delete_provisional applies
toku_multi_operation_client_lock();
result = toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids);
toku_multi_operation_client_unlock();
}
}
return result;
}
......@@ -513,7 +558,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
// inject "insert" message into brt with logging in recovery and rollback logs,
// and making assocation between txn and brt
static int
indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids) {
indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) {
int result = 0;
// TEST
if (indexer->i->test_insert_provisional) {
......@@ -521,12 +566,14 @@ indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) {
TOKUTXN txn = indexer_get_innermost_live_txn(indexer, xids);
assert(txn != NULL);
// comment/question in indexer_ft_delete_provisional applies
toku_multi_operation_client_lock();
result = toku_ft_maybe_insert (hotdb->i->ft_handle, hotkey, hotval, txn, FALSE, ZERO_LSN, TRUE, FT_INSERT);
toku_multi_operation_client_unlock();
}
}
return result;
return result;
}
// send an insert message into the tree without rollback or recovery logging
......@@ -539,8 +586,14 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho
result = indexer->i->test_insert_committed(indexer, hotdb, hotkey, hotval, xids);
} else {
result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0)
if (result == 0) {
// MO lock needed because toku_ft_root_put_cmd must be atomic
// with respect to checkpointing
// comment/question in indexer_ft_delete_provisional applies
toku_multi_operation_client_lock();
result = toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT);
toku_multi_operation_client_unlock();
}
}
return result;
}
......
......@@ -113,7 +113,8 @@ disassociate_indexer_from_hot_dbs(DB_INDEXER *indexer) {
static void
free_indexer_resources(DB_INDEXER *indexer) {
if ( indexer->i ) {
if ( indexer->i ) {
toku_mutex_destroy(&indexer->i->indexer_lock);
if ( indexer->i->lec ) { le_cursor_close(indexer->i->lec); }
if ( indexer->i->fnums ) {
toku_free(indexer->i->fnums);
......@@ -135,6 +136,16 @@ free_indexer(DB_INDEXER *indexer) {
}
}
void
toku_indexer_lock(DB_INDEXER* indexer) {
toku_mutex_lock(&indexer->i->indexer_lock);
}
void
toku_indexer_unlock(DB_INDEXER* indexer) {
toku_mutex_unlock(&indexer->i->indexer_lock);
}
int
toku_indexer_create_indexer(DB_ENV *env,
DB_TXN *txn,
......@@ -180,6 +191,8 @@ toku_indexer_create_indexer(DB_ENV *env,
indexer->close = close_indexer;
indexer->abort = abort_indexer;
toku_mutex_init(&indexer->i->indexer_lock, NULL);
//
// create and close a dummy loader to get redirection going for the hot indexer
// This way, if the hot index aborts, but other transactions have references to the
......@@ -273,7 +286,7 @@ build_index(DB_INDEXER *indexer) {
BOOL done = FALSE;
for (uint64_t loop_count = 0; !done; loop_count++) {
toku_ydb_lock();
toku_indexer_lock(indexer);
result = le_cursor_next(indexer->i->lec, &le);
if (result != 0) {
......@@ -295,10 +308,7 @@ build_index(DB_INDEXER *indexer) {
toku_ule_free(ule);
}
// if there is lock contention, then sleep for 1 millisecond after the unlock
// note: the value 1000 was empirically determined to provide good query performance
// during hotindexing
toku_ydb_unlock_and_yield(1000);
toku_indexer_unlock(indexer);
if (result == 0)
result = maybe_call_poll_func(indexer, loop_count);
......
......@@ -16,6 +16,12 @@ extern "C" {
#endif
// locking and unlocking functions to synchronize cursor position with
// XXX_multiple APIs
void toku_indexer_lock(DB_INDEXER* indexer);
void toku_indexer_unlock(DB_INDEXER* indexer);
// The indexer populates multiple destination db's from the contents of one source db.
// While the indexes are being built by the indexer, the application may continue to
// change the contents of the source db. The changes will be reflected into the destination
......
......@@ -9,6 +9,8 @@
#include <sys/stat.h>
#include "key-val.h"
toku_mutex_t put_lock;
enum {NUM_INDEXER_INDEXES=1};
static const int NUM_DBS = NUM_INDEXER_INDEXES + 1; // 1 for source DB
static const int NUM_ROWS = 100000;
......@@ -83,6 +85,7 @@ static void * client(void *arg)
dbt_init(&val, &v, sizeof(v));
while ( retry++ < 10 ) {
toku_mutex_lock(&put_lock);
rr = env->put_multiple(env,
cs->dbs[0],
txn,
......@@ -93,6 +96,7 @@ static void * client(void *arg)
dest_keys,
dest_vals,
cs->flags);
toku_mutex_unlock(&put_lock);
if ( rr == 0 ) break;
sleep(0);
}
......@@ -257,12 +261,14 @@ static void test_indexer(DB *src, DB **dbs)
CKERR(r);
if ( verbose ) printf("test_indexer create_indexer\n");
toku_mutex_lock(&put_lock);
r = env->create_indexer(env, txn, &indexer, src, NUM_DBS-1, &dbs[1], db_flags, 0);
CKERR(r);
r = indexer->set_error_callback(indexer, NULL, NULL);
CKERR(r);
r = indexer->set_poll_function(indexer, poll_print, NULL);
CKERR(r);
toku_mutex_unlock(&put_lock);
// start threads doing additional inserts - no lock issues since indexer already created
r = toku_pthread_create(&client_threads[0], 0, client, (void *)&client_specs[0]); CKERR(r);
......@@ -283,8 +289,10 @@ static void test_indexer(DB *src, DB **dbs)
}
if ( verbose ) printf("test_indexer close\n");
toku_mutex_lock(&put_lock);
r = indexer->close(indexer);
CKERR(r);
toku_mutex_unlock(&put_lock);
r = txn->commit(txn, DB_TXN_SYNC);
CKERR(r);
......@@ -306,6 +314,7 @@ static void test_indexer(DB *src, DB **dbs)
static void run_test(void)
{
int r;
toku_mutex_init(&put_lock, NULL);
r = system("rm -rf " ENVDIR); CKERR(r);
r = toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = toku_os_mkdir(ENVDIR "/log", S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
......@@ -350,6 +359,7 @@ static void run_test(void)
for(int i=0;i<NUM_DBS;i++) {
r = dbs[i]->close(dbs[i], 0); CKERR(r);
}
toku_mutex_destroy(&put_lock);
r = env->close(env, 0); CKERR(r);
}
......
......@@ -426,8 +426,7 @@ main(int argc, char * const argv[])
toku_os_initialize_settings(1);
r = test_main(argc, argv);
#if IS_TDB && TOKU_WINDOWS
int rdestroy = toku_ydb_destroy();
CKERR(rdestroy);
toku_ydb_destroy();
#endif
return r;
}
......
......@@ -193,22 +193,22 @@ int
toku_ydb_init(void) {
int r = 0;
//Lower level must be initialized first.
if (r==0)
r = toku_ft_layer_init(toku_ydb_lock, toku_ydb_unlock);
if (r==0)
if (r==0) {
r = toku_ft_layer_init();
}
if (r==0) {
toku_ydb_lock_init();
}
return r;
}
// Do not clean up resources if env is panicked, just exit ugly
int
void
toku_ydb_destroy(void) {
int r = 0;
if (env_is_panicked == 0) {
toku_ydb_lock_destroy();
r = toku_ft_layer_destroy();
toku_ft_layer_destroy();
}
return r;
}
static int
......
......@@ -14,7 +14,7 @@ extern "C" {
int toku_ydb_init(void);
// Called when the ydb library is unloaded.
int toku_ydb_destroy(void);
void toku_ydb_destroy(void);
// db_env_create for the trace library
int db_env_create_toku10(DB_ENV **, u_int32_t) __attribute__((__visibility__("default")));
......
......@@ -21,8 +21,7 @@ static void __attribute__((constructor)) libtokudb_init(void) {
static void __attribute__((destructor)) libtokudb_destroy(void) {
// printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__);
int r = toku_ydb_destroy();
assert(r==0);
toku_ydb_destroy();
}
#endif
......@@ -40,7 +39,7 @@ BOOL WINAPI DllMain(HINSTANCE h, DWORD reason, LPVOID reserved) {
r = toku_ydb_init();
break;
case DLL_PROCESS_DETACH:
r = toku_ydb_destroy();
toku_ydb_destroy();
break;
case DLL_THREAD_ATTACH:
//TODO: Any new thread code if necessary, i.e. allocate per-thread
......
......@@ -374,6 +374,39 @@ do_del_multiple(DB_TXN *txn, uint32_t num_dbs, DB *db_array[], DBT keys[], DB *s
return r;
}
//
// if a hot index is in progress, gets the indexer
// also verifies that there is at most one hot index
// in progress. If it finds more than one, then returns EINVAL
//
static int
get_indexer_if_exists(
uint32_t num_dbs,
DB **db_array,
DB_INDEXER** indexerp
)
{
int r = 0;
DB_INDEXER* first_indexer = NULL;
for (uint32_t i = 0; i < num_dbs; i++) {
DB_INDEXER* indexer = toku_db_get_indexer(db_array[i]);
if (indexer) {
if (!first_indexer) {
first_indexer = indexer;
}
else {
if (first_indexer != indexer) {
r = EINVAL;
}
}
}
}
if (r == 0) {
*indexerp = first_indexer;
}
return r;
}
int
env_del_multiple(
DB_ENV *env,
......@@ -388,6 +421,7 @@ env_del_multiple(
{
int r;
DBT del_keys[num_dbs];
DB_INDEXER* indexer = NULL;
HANDLE_PANICKED_ENV(env);
......@@ -401,6 +435,10 @@ env_del_multiple(
}
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
r = get_indexer_if_exists(num_dbs, db_array, &indexer);
if (r) {
goto cleanup;
}
{
uint32_t lock_flags[num_dbs];
......@@ -442,7 +480,10 @@ env_del_multiple(
brts[which_db] = db->i->ft_handle;
}
toku_ydb_lock();
if (indexer) {
toku_indexer_lock(indexer);
}
toku_multi_operation_client_lock();
if (num_dbs == 1) {
r = log_del_single(txn, brts[0], &del_keys[0]);
}
......@@ -452,7 +493,10 @@ env_del_multiple(
if (r == 0)
r = do_del_multiple(txn, num_dbs, db_array, del_keys, src_db, src_key);
}
toku_ydb_unlock();
toku_multi_operation_client_unlock();
if (indexer) {
toku_indexer_unlock(indexer);
}
cleanup:
if (r == 0)
......@@ -527,6 +571,7 @@ env_put_multiple_internal(
int r;
DBT put_keys[num_dbs];
DBT put_vals[num_dbs];
DB_INDEXER* indexer = NULL;
HANDLE_PANICKED_ENV(env);
......@@ -544,6 +589,10 @@ env_put_multiple_internal(
}
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
r = get_indexer_if_exists(num_dbs, db_array, &indexer);
if (r) {
goto cleanup;
}
for (uint32_t which_db = 0; which_db < num_dbs; which_db++) {
DB *db = db_array[which_db];
......@@ -587,7 +636,10 @@ env_put_multiple_internal(
brts[which_db] = db->i->ft_handle;
}
toku_ydb_lock();
if (indexer) {
toku_indexer_lock(indexer);
}
toku_multi_operation_client_lock();
if (num_dbs == 1) {
r = log_put_single(txn, brts[0], &put_keys[0], &put_vals[0]);
}
......@@ -597,7 +649,10 @@ env_put_multiple_internal(
if (r == 0) {
r = do_put_multiple(txn, num_dbs, db_array, put_keys, put_vals, src_db, src_key);
}
toku_ydb_unlock();
toku_multi_operation_client_unlock();
if (indexer) {
toku_indexer_unlock(indexer);
}
cleanup:
if (r == 0)
......@@ -617,6 +672,7 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
int r = 0;
HANDLE_PANICKED_ENV(env);
DB_INDEXER* indexer = NULL;
if (!txn) {
r = EINVAL;
......@@ -628,6 +684,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
}
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
r = get_indexer_if_exists(num_dbs, db_array, &indexer);
if (r) {
goto cleanup;
}
{
uint32_t n_del_dbs = 0;
......@@ -731,9 +791,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
n_put_dbs++;
}
}
// grab the ydb lock for the actual work that
// depends on it
toku_ydb_lock();
if (indexer) {
toku_indexer_lock(indexer);
}
toku_multi_operation_client_lock();
if (r == 0 && n_del_dbs > 0) {
if (n_del_dbs == 1)
r = log_del_single(txn, del_fts[0], &del_keys[0]);
......@@ -751,7 +812,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
if (r == 0)
r = do_put_multiple(txn, n_put_dbs, put_dbs, put_keys, put_vals, src_db, new_src_key);
}
toku_ydb_unlock();
toku_multi_operation_client_unlock();
if (indexer) {
toku_indexer_unlock(indexer);
}
}
cleanup:
......
......@@ -206,7 +206,7 @@ int toku_set_func_fclose(int (*)(FILE*));
int toku_set_func_read(ssize_t (*)(int, void *, size_t));
int toku_set_func_pread (ssize_t (*)(int, void *, size_t, off_t));
int toku_portability_init (void);
int toku_portability_destroy (void);
void toku_portability_destroy (void);
#if defined(__cplusplus) || defined(__cilkplusplus)
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment