Commit 48aa0d2f authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:4920], [t:4953], separate ydb lock from hot indexing and checkpointing

git-svn-id: file:///svn/toku/tokudb@44202 c7de825b-a66e-492c-adef-691d508d4ae1
parent 6ccd2251
...@@ -20,8 +20,7 @@ ...@@ -20,8 +20,7 @@
* to set all the "pending" bits and to create the checkpoint-in-progress versions * to set all the "pending" bits and to create the checkpoint-in-progress versions
* of the header and translation table (btt). * of the header and translation table (btt).
* The following operations must take the multi_operation_lock: * The following operations must take the multi_operation_lock:
* - insertion into multiple indexes * - any set of operations that must be atomic with respect to begin checkpoint
* - "replace-into" (matching delete and insert on a single key)
* *
* - checkpoint_safe_lock * - checkpoint_safe_lock
* This is a new reader-writer lock. * This is a new reader-writer lock.
...@@ -35,13 +34,6 @@ ...@@ -35,13 +34,6 @@
* The application can use this lock to disable checkpointing during other sensitive * The application can use this lock to disable checkpointing during other sensitive
* operations, such as making a backup copy of the database. * operations, such as making a backup copy of the database.
* *
* - ydb_big_lock
* This is the existing lock used to serialize all access to tokudb.
* This lock is held by the checkpoint function only for as long as is required to
* to set all the "pending" bits and to create the checkpoint-in-progress versions
* of the header and translation table (btt).
*
* Once the "pending" bits are set and the snapshots are taken of the header and btt, * Once the "pending" bits are set and the snapshots are taken of the header and btt,
* most normal database operations are permitted to resume. * most normal database operations are permitted to resume.
* *
...@@ -118,10 +110,6 @@ static LSN last_completed_checkpoint_lsn; ...@@ -118,10 +110,6 @@ static LSN last_completed_checkpoint_lsn;
static toku_pthread_rwlock_t checkpoint_safe_lock; static toku_pthread_rwlock_t checkpoint_safe_lock;
static toku_pthread_rwlock_t multi_operation_lock; static toku_pthread_rwlock_t multi_operation_lock;
// Call through function pointers because this layer has no access to ydb lock functions.
static void (*ydb_lock)(void) = NULL;
static void (*ydb_unlock)(void) = NULL;
static BOOL initialized = FALSE; // sanity check static BOOL initialized = FALSE; // sanity check
static volatile BOOL locked_mo = FALSE; // true when the multi_operation write lock is held (by checkpoint) static volatile BOOL locked_mo = FALSE; // true when the multi_operation write lock is held (by checkpoint)
static volatile BOOL locked_cs = FALSE; // true when the checkpoint_safe write lock is held (by checkpoint) static volatile BOOL locked_cs = FALSE; // true when the checkpoint_safe write lock is held (by checkpoint)
...@@ -220,9 +208,7 @@ toku_checkpoint_safe_client_unlock(void) { ...@@ -220,9 +208,7 @@ toku_checkpoint_safe_client_unlock(void) {
// Initialize the checkpoint mechanism, must be called before any client operations. // Initialize the checkpoint mechanism, must be called before any client operations.
void void
toku_checkpoint_init(void (*ydb_lock_callback)(void), void (*ydb_unlock_callback)(void)) { toku_checkpoint_init(void) {
ydb_lock = ydb_lock_callback;
ydb_unlock = ydb_unlock_callback;
multi_operation_lock_init(); multi_operation_lock_init();
checkpoint_safe_lock_init(); checkpoint_safe_lock_init();
initialized = TRUE; initialized = TRUE;
...@@ -280,7 +266,6 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, ...@@ -280,7 +266,6 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
} }
multi_operation_checkpoint_lock(); multi_operation_checkpoint_lock();
SET_CHECKPOINT_FOOTPRINT(20); SET_CHECKPOINT_FOOTPRINT(20);
ydb_lock();
toku_ft_open_close_lock(); toku_ft_open_close_lock();
SET_CHECKPOINT_FOOTPRINT(30); SET_CHECKPOINT_FOOTPRINT(30);
...@@ -289,7 +274,6 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, ...@@ -289,7 +274,6 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
toku_ft_open_close_unlock(); toku_ft_open_close_unlock();
multi_operation_checkpoint_unlock(); multi_operation_checkpoint_unlock();
ydb_unlock();
SET_CHECKPOINT_FOOTPRINT(40); SET_CHECKPOINT_FOOTPRINT(40);
if (r==0) { if (r==0) {
......
...@@ -58,7 +58,7 @@ void toku_multi_operation_client_unlock(void); ...@@ -58,7 +58,7 @@ void toku_multi_operation_client_unlock(void);
// Initialize the checkpoint mechanism, must be called before any client operations. // Initialize the checkpoint mechanism, must be called before any client operations.
// Must pass in function pointers to take/release ydb lock. // Must pass in function pointers to take/release ydb lock.
void toku_checkpoint_init(void (*ydb_lock_callback)(void), void (*ydb_unlock_callback)(void)); void toku_checkpoint_init(void);
void toku_checkpoint_destroy(void); void toku_checkpoint_destroy(void);
......
...@@ -5423,35 +5423,28 @@ int toku_dump_ft (FILE *f, FT_HANDLE brt) { ...@@ -5423,35 +5423,28 @@ int toku_dump_ft (FILE *f, FT_HANDLE brt) {
return r; return r;
} }
int toku_ft_layer_init(void (*ydb_lock_callback)(void), int toku_ft_layer_init(void) {
void (*ydb_unlock_callback)(void)) {
int r = 0; int r = 0;
//Portability must be initialized first //Portability must be initialized first
r = toku_portability_init(); r = toku_portability_init();
if (r) { goto exit; } if (r) { goto exit; }
toku_checkpoint_init(ydb_lock_callback, ydb_unlock_callback); toku_checkpoint_init();
r = toku_ft_serialize_layer_init(); toku_ft_serialize_layer_init();
if (r) { goto exit; }
toku_mutex_init(&ft_open_close_lock, NULL); toku_mutex_init(&ft_open_close_lock, NULL);
exit: exit:
return r; return r;
} }
int toku_ft_layer_destroy(void) { void toku_ft_layer_destroy(void) {
int r = 0;
toku_mutex_destroy(&ft_open_close_lock); toku_mutex_destroy(&ft_open_close_lock);
if (r == 0) toku_ft_serialize_layer_destroy();
r = toku_ft_serialize_layer_destroy(); toku_checkpoint_destroy();
if (r==0)
toku_checkpoint_destroy();
//Portability must be cleaned up last //Portability must be cleaned up last
if (r==0) toku_portability_destroy();
r = toku_portability_destroy();
return r;
} }
void toku_ft_open_close_lock(void) { void toku_ft_open_close_lock(void) {
......
...@@ -238,14 +238,12 @@ struct ftstat64_s { ...@@ -238,14 +238,12 @@ struct ftstat64_s {
int int
toku_ft_handle_stat64 (FT_HANDLE, TOKUTXN, struct ftstat64_s *stat) __attribute__ ((warn_unused_result)); toku_ft_handle_stat64 (FT_HANDLE, TOKUTXN, struct ftstat64_s *stat) __attribute__ ((warn_unused_result));
int toku_ft_layer_init(void (*ydb_lock_callback)(void), int toku_ft_layer_init(void) __attribute__ ((warn_unused_result));
void (*ydb_unlock_callback)(void))
__attribute__ ((warn_unused_result));
void toku_ft_open_close_lock(void); void toku_ft_open_close_lock(void);
void toku_ft_open_close_unlock(void); void toku_ft_open_close_unlock(void);
int toku_ft_layer_destroy(void) __attribute__ ((warn_unused_result)); void toku_ft_layer_destroy(void);
int toku_ft_serialize_layer_init(void) __attribute__ ((warn_unused_result)); void toku_ft_serialize_layer_init(void);
int toku_ft_serialize_layer_destroy(void) __attribute__ ((warn_unused_result)); void toku_ft_serialize_layer_destroy(void);
void toku_maybe_truncate_cachefile (CACHEFILE cf, int fd, u_int64_t size_used); void toku_maybe_truncate_cachefile (CACHEFILE cf, int fd, u_int64_t size_used);
// Effect: truncate file if overallocated by at least 32MiB // Effect: truncate file if overallocated by at least 32MiB
......
...@@ -783,7 +783,7 @@ toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft_h, TOKU ...@@ -783,7 +783,7 @@ toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft_h, TOKU
// (old_ft_h may be one of many handles to the dictionary.) // (old_ft_h may be one of many handles to the dictionary.)
// txn that created the loader // txn that created the loader
// Requires: // Requires:
// ydb_lock is held. // multi operation lock is held.
// The brt is open. (which implies there can be no zombies.) // The brt is open. (which implies there can be no zombies.)
// The new file must be a valid dictionary. // The new file must be a valid dictionary.
// The block size and flags in the new file must match the existing BRT. // The block size and flags in the new file must match the existing BRT.
......
...@@ -67,17 +67,15 @@ static inline void do_toku_trace(const char *cp, int len) { ...@@ -67,17 +67,15 @@ static inline void do_toku_trace(const char *cp, int len) {
static int num_cores = 0; // cache the number of cores for the parallelization static int num_cores = 0; // cache the number of cores for the parallelization
static struct toku_thread_pool *ft_pool = NULL; static struct toku_thread_pool *ft_pool = NULL;
int void
toku_ft_serialize_layer_init(void) { toku_ft_serialize_layer_init(void) {
num_cores = toku_os_get_number_active_processors(); num_cores = toku_os_get_number_active_processors();
int r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r); int r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r);
return 0;
} }
int void
toku_ft_serialize_layer_destroy(void) { toku_ft_serialize_layer_destroy(void) {
toku_thread_pool_destroy(&ft_pool); toku_thread_pool_destroy(&ft_pool);
return 0;
} }
enum {FILE_CHANGE_INCREMENT = (16<<20)}; enum {FILE_CHANGE_INCREMENT = (16<<20)};
......
...@@ -172,6 +172,7 @@ struct tokutxn { ...@@ -172,6 +172,7 @@ struct tokutxn {
// Protected by the txn manager lock: // Protected by the txn manager lock:
TOKUTXN_STATE state; TOKUTXN_STATE state;
struct toku_list prepared_txns_link; // list of prepared transactions struct toku_list prepared_txns_link; // list of prepared transactions
uint32_t num_pin;
}; };
static inline int static inline int
......
...@@ -14,19 +14,14 @@ ...@@ -14,19 +14,14 @@
static int recovery_main(int argc, const char *const argv[]); static int recovery_main(int argc, const char *const argv[]);
static void dummy(void) {}
int int
main(int argc, const char *const argv[]) { main(int argc, const char *const argv[]) {
{ {
int rr = toku_ft_layer_init(dummy, dummy); int rr = toku_ft_layer_init();
assert(rr==0); assert(rr==0);
} }
int r = recovery_main(argc, argv); int r = recovery_main(argc, argv);
{ toku_ft_layer_destroy();
int rr = toku_ft_layer_destroy();
assert(rr==0);
}
return r; return r;
} }
......
...@@ -234,16 +234,13 @@ default_parse_args (int argc, const char *argv[]) { ...@@ -234,16 +234,13 @@ default_parse_args (int argc, const char *argv[]) {
int test_main(int argc, const char *argv[]); int test_main(int argc, const char *argv[]);
static void dummy(void) {}
int int
main(int argc, const char *argv[]) { main(int argc, const char *argv[]) {
initialize_dummymsn(); initialize_dummymsn();
int rinit = toku_ft_layer_init(dummy, dummy); int rinit = toku_ft_layer_init();
CKERR(rinit); CKERR(rinit);
int r = test_main(argc, argv); int r = test_main(argc, argv);
int rdestroy = toku_ft_layer_destroy(); toku_ft_layer_destroy();
CKERR(rdestroy);
return r; return r;
} }
......
...@@ -170,7 +170,8 @@ toku_txn_create_txn ( ...@@ -170,7 +170,8 @@ toku_txn_create_txn (
.txnid64 = xid, .txnid64 = xid,
.ancestor_txnid64 = (parent_tokutxn ? parent_tokutxn->ancestor_txnid64 : xid), .ancestor_txnid64 = (parent_tokutxn ? parent_tokutxn->ancestor_txnid64 : xid),
.xids = xids, .xids = xids,
.roll_info = roll_info .roll_info = roll_info,
.num_pin = 0
}; };
......
...@@ -27,6 +27,8 @@ struct txn_manager { ...@@ -27,6 +27,8 @@ struct txn_manager {
time_t oldest_living_starttime; // timestamp in seconds of when txn with oldest_living_xid started time_t oldest_living_starttime; // timestamp in seconds of when txn with oldest_living_xid started
struct toku_list prepared_txns; // transactions that have been prepared and are unresolved, but have not been returned through txn_recover. struct toku_list prepared_txns; // transactions that have been prepared and are unresolved, but have not been returned through txn_recover.
struct toku_list prepared_and_returned_txns; // transactions that have been prepared and unresolved, and have been returned through txn_recover. We need this list so that we can restart the recovery. struct toku_list prepared_and_returned_txns; // transactions that have been prepared and unresolved, and have been returned through txn_recover. We need this list so that we can restart the recovery.
toku_cond_t wait_for_unpin_of_txn;
}; };
static TXN_MANAGER_STATUS_S txn_manager_status; static TXN_MANAGER_STATUS_S txn_manager_status;
...@@ -201,6 +203,8 @@ void toku_txn_manager_init(TXN_MANAGER* txn_managerp) { ...@@ -201,6 +203,8 @@ void toku_txn_manager_init(TXN_MANAGER* txn_managerp) {
txn_manager->oldest_living_starttime = 0; txn_manager->oldest_living_starttime = 0;
toku_list_init(&txn_manager->prepared_txns); toku_list_init(&txn_manager->prepared_txns);
toku_list_init(&txn_manager->prepared_and_returned_txns); toku_list_init(&txn_manager->prepared_and_returned_txns);
toku_cond_init(&txn_manager->wait_for_unpin_of_txn, 0);
*txn_managerp = txn_manager; *txn_managerp = txn_manager;
} }
...@@ -210,6 +214,7 @@ void toku_txn_manager_destroy(TXN_MANAGER txn_manager) { ...@@ -210,6 +214,7 @@ void toku_txn_manager_destroy(TXN_MANAGER txn_manager) {
toku_omt_destroy(&txn_manager->live_root_txns); toku_omt_destroy(&txn_manager->live_root_txns);
toku_omt_destroy(&txn_manager->snapshot_txnids); toku_omt_destroy(&txn_manager->snapshot_txnids);
toku_omt_destroy(&txn_manager->live_list_reverse); toku_omt_destroy(&txn_manager->live_list_reverse);
toku_cond_destroy(&txn_manager->wait_for_unpin_of_txn);
toku_free(txn_manager); toku_free(txn_manager);
} }
...@@ -692,6 +697,16 @@ void toku_txn_manager_note_abort_txn(TXN_MANAGER txn_manager, TOKUTXN txn) { ...@@ -692,6 +697,16 @@ void toku_txn_manager_note_abort_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
invalidate_xa_xid(&txn->xa_xid); invalidate_xa_xid(&txn->xa_xid);
toku_list_remove(&txn->prepared_txns_link); toku_list_remove(&txn->prepared_txns_link);
} }
// for hot indexing, if hot index is processing
// this transaction in some leafentry, then we cannot change
// the state to commit or abort until
// hot index is done with that leafentry
while (txn->num_pin > 0) {
toku_cond_wait(
&txn_manager->wait_for_unpin_of_txn,
&txn_manager->txn_manager_lock
);
}
txn->state = TOKUTXN_ABORTING; txn->state = TOKUTXN_ABORTING;
toku_mutex_unlock(&txn_manager->txn_manager_lock); toku_mutex_unlock(&txn_manager->txn_manager_lock);
} }
...@@ -702,6 +717,16 @@ void toku_txn_manager_note_commit_txn(TXN_MANAGER txn_manager, TOKUTXN txn) { ...@@ -702,6 +717,16 @@ void toku_txn_manager_note_commit_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
invalidate_xa_xid(&txn->xa_xid); invalidate_xa_xid(&txn->xa_xid);
toku_list_remove(&txn->prepared_txns_link); toku_list_remove(&txn->prepared_txns_link);
} }
// for hot indexing, if hot index is processing
// this transaction in some leafentry, then we cannot change
// the state to commit or abort until
// hot index is done with that leafentry
while (txn->num_pin > 0) {
toku_cond_wait(
&txn_manager->wait_for_unpin_of_txn,
&txn_manager->txn_manager_lock
);
}
txn->state = TOKUTXN_COMMITTING; txn->state = TOKUTXN_COMMITTING;
toku_mutex_unlock(&txn_manager->txn_manager_lock); toku_mutex_unlock(&txn_manager->txn_manager_lock);
} }
...@@ -749,6 +774,20 @@ exit: ...@@ -749,6 +774,20 @@ exit:
} }
// needed for hot indexing // needed for hot indexing
void toku_txn_manager_pin_live_txn_unlocked(TXN_MANAGER UU(txn_manager), TOKUTXN txn) {
assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING);
txn->num_pin++;
}
void toku_txn_manager_unpin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn) {
assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING);
assert(txn->num_pin > 0);
txn->num_pin--;
if (txn->num_pin == 0) {
toku_cond_broadcast(&txn_manager->wait_for_unpin_of_txn);
}
}
void toku_txn_manager_suspend(TXN_MANAGER txn_manager) { void toku_txn_manager_suspend(TXN_MANAGER txn_manager) {
toku_mutex_lock(&txn_manager->txn_manager_lock); toku_mutex_lock(&txn_manager->txn_manager_lock);
} }
......
...@@ -78,6 +78,9 @@ int toku_txn_manager_recover_txn( ...@@ -78,6 +78,9 @@ int toku_txn_manager_recover_txn(
u_int32_t flags u_int32_t flags
); );
void toku_txn_manager_pin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_unpin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_suspend(TXN_MANAGER txn_manager); void toku_txn_manager_suspend(TXN_MANAGER txn_manager);
void toku_txn_manager_resume(TXN_MANAGER txn_manager); void toku_txn_manager_resume(TXN_MANAGER txn_manager);
......
...@@ -47,10 +47,9 @@ toku_portability_init(void) { ...@@ -47,10 +47,9 @@ toku_portability_init(void) {
return r; return r;
} }
int void
toku_portability_destroy(void) { toku_portability_destroy(void) {
toku_memory_shutdown(); toku_memory_shutdown();
return 0;
} }
int int
......
...@@ -19,8 +19,7 @@ main(int argc, char *const argv[]) { ...@@ -19,8 +19,7 @@ main(int argc, char *const argv[]) {
int ri = toku_portability_init(); int ri = toku_portability_init();
assert(ri==0); assert(ri==0);
int r = test_main(argc, argv); int r = test_main(argc, argv);
int rd = toku_portability_destroy(); toku_portability_destroy();
assert(rd==0);
return r; return r;
} }
...@@ -93,7 +93,7 @@ toku_ydb_lock(void) { ...@@ -93,7 +93,7 @@ toku_ydb_lock(void) {
// Update status // Update status
STATUS_VALUE(YDB_LOCK_TAKEN)++; STATUS_VALUE(YDB_LOCK_TAKEN)++;
if (new_num_waiters > STATUS_VALUE(YDB_MAX_WAITERS)) if (new_num_waiters > STATUS_VALUE(YDB_MAX_WAITERS))
STATUS_VALUE(YDB_MAX_WAITERS) = new_num_waiters; STATUS_VALUE(YDB_MAX_WAITERS) = new_num_waiters;
STATUS_VALUE(YDB_TOTAL_TIME_SINCE_START) = now - ydb_big_lock.starttime; STATUS_VALUE(YDB_TOTAL_TIME_SINCE_START) = now - ydb_big_lock.starttime;
} }
...@@ -105,7 +105,7 @@ ydb_unlock_internal(unsigned long useconds) { ...@@ -105,7 +105,7 @@ ydb_unlock_internal(unsigned long useconds) {
tokutime_t time_held = now - ydb_big_lock.acquired_time; tokutime_t time_held = now - ydb_big_lock.acquired_time;
STATUS_VALUE(YDB_TOTAL_TIME_YDB_LOCK_HELD) += time_held; STATUS_VALUE(YDB_TOTAL_TIME_YDB_LOCK_HELD) += time_held;
if (time_held > STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD)) if (time_held > STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD))
STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD) = time_held; STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD) = time_held;
STATUS_VALUE(YDB_TOTAL_TIME_SINCE_START) = now - ydb_big_lock.starttime; STATUS_VALUE(YDB_TOTAL_TIME_SINCE_START) = now - ydb_big_lock.starttime;
toku_mutex_unlock(&ydb_big_lock.lock); toku_mutex_unlock(&ydb_big_lock.lock);
...@@ -113,7 +113,7 @@ ydb_unlock_internal(unsigned long useconds) { ...@@ -113,7 +113,7 @@ ydb_unlock_internal(unsigned long useconds) {
int new_num_waiters = __sync_add_and_fetch(&STATUS_VALUE(YDB_NUM_WAITERS_NOW), -1); int new_num_waiters = __sync_add_and_fetch(&STATUS_VALUE(YDB_NUM_WAITERS_NOW), -1);
if (new_num_waiters > 0 && useconds > 0) { if (new_num_waiters > 0 && useconds > 0) {
__sync_add_and_fetch(&STATUS_VALUE(YDB_TOTAL_SLEEP_TIME), useconds); __sync_add_and_fetch(&STATUS_VALUE(YDB_TOTAL_SLEEP_TIME), useconds);
usleep(useconds); usleep(useconds);
} }
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define TOKU_INDEXER_INTERNAL_H #define TOKU_INDEXER_INTERNAL_H
#include <ft/txn_state.h> #include <ft/txn_state.h>
#include <toku_pthread.h>
// the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array. // the indexer_commit_keys is an ordered set of keys described by a DBT in the keys array.
// the array is a resizeable array with max size "max_keys" and current size "current_keys". // the array is a resizeable array with max size "max_keys" and current size "current_keys".
...@@ -24,7 +25,8 @@ struct indexer_commit_keys { ...@@ -24,7 +25,8 @@ struct indexer_commit_keys {
struct __toku_indexer_internal { struct __toku_indexer_internal {
DB_ENV *env; DB_ENV *env;
DB_TXN *txn; DB_TXN *txn;
toku_mutex_t indexer_lock;
DB *src_db; DB *src_db;
int N; int N;
DB **dest_dbs; /* [N] */ DB **dest_dbs; /* [N] */
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <ft/ule.h> #include <ft/ule.h>
#include <ft/xids.h> #include <ft/xids.h>
#include "ft/txn_manager.h" #include "ft/txn_manager.h"
#include <ft/checkpoint.h>
#include "ydb_row_lock.h" #include "ydb_row_lock.h"
#include "indexer-internal.h" #include "indexer-internal.h"
...@@ -77,13 +78,12 @@ static int indexer_append_xid(DB_INDEXER *indexer, TXNID xid, XIDS *xids_result) ...@@ -77,13 +78,12 @@ static int indexer_append_xid(DB_INDEXER *indexer, TXNID xid, XIDS *xids_result)
static BOOL indexer_find_prev_xr(DB_INDEXER *indexer, ULEHANDLE ule, uint64_t xrindex, uint64_t *prev_xrindex); static BOOL indexer_find_prev_xr(DB_INDEXER *indexer, ULEHANDLE ule, uint64_t xrindex, uint64_t *prev_xrindex);
static int indexer_generate_hot_key_val(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule, UXRHANDLE uxr, DBT *hotkey, DBT *hotval); static int indexer_generate_hot_key_val(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule, UXRHANDLE uxr, DBT *hotkey, DBT *hotval);
static int indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids); static int indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn);
static int indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids); static int indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids);
static int indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids); static int indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn);
static int indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids); static int indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids);
static int indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids); static int indexer_ft_commit(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids);
static int indexer_lock_key(DB_INDEXER *indexer, DB *hotdb, DBT *key, TXNID outermost_live_xid); static int indexer_lock_key(DB_INDEXER *indexer, DB *hotdb, DBT *key, TXNID outermost_live_xid, TOKUTXN txn);
static TOKUTXN_STATE indexer_xid_state(DB_INDEXER *indexer, TXNID xid);
// initialize undo globals located in the indexer private object // initialize undo globals located in the indexer private object
...@@ -177,43 +177,113 @@ indexer_undo_do_committed(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) { ...@@ -177,43 +177,113 @@ indexer_undo_do_committed(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
return result; return result;
} }
static void fill_prov_info(
ULEHANDLE ule,
TXNID* prov_ids,
TOKUTXN_STATE* prov_states,
TOKUTXN* prov_txns,
DB_INDEXER *indexer
)
{
uint32_t num_provisional = ule_get_num_provisional(ule);
uint32_t num_committed = ule_get_num_committed(ule);
DB_ENV *env = indexer->i->env;
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(env->i->logger);
toku_txn_manager_suspend(txn_manager);
for (uint32_t i = 0; i < num_provisional; i++) {
UXRHANDLE uxr = ule_get_uxr(ule, num_committed+i);
prov_ids[i] = uxr_get_txnid(uxr);
if (indexer->i->test_xid_state) {
prov_states[i] = indexer->i->test_xid_state(indexer, prov_ids[i]);
prov_txns[i] = NULL;
}
else {
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
txn_manager,
prov_ids[i],
&txn
);
prov_txns[i] = txn;
if (txn) {
prov_states[i] = toku_txn_get_state(txn);
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_LIVE) {
// pin
toku_txn_manager_pin_live_txn_unlocked(txn_manager, txn);
}
}
else {
prov_states[i] = TOKUTXN_RETIRED;
}
}
}
toku_txn_manager_resume(txn_manager);
}
static void release_txns(
ULEHANDLE ule,
TOKUTXN_STATE* prov_states,
TOKUTXN* prov_txns,
DB_INDEXER *indexer
)
{
uint32_t num_provisional = ule_get_num_provisional(ule);
DB_ENV *env = indexer->i->env;
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(env->i->logger);
BOOL some_txn_pinned = FALSE;
if (indexer->i->test_xid_state) {
goto exit;
}
// see if any txn pinned before bothering to grab txn_manager lock
for (u_int32_t i = 0; i < num_provisional; i++) {
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_LIVE) {
assert(prov_txns[i]);
some_txn_pinned = TRUE;
}
}
if (some_txn_pinned) {
toku_txn_manager_suspend(txn_manager);
for (u_int32_t i = 0; i < num_provisional; i++) {
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_LIVE) {
toku_txn_manager_unpin_live_txn_unlocked(txn_manager, prov_txns[i]);
}
}
toku_txn_manager_resume(txn_manager);
}
exit:
return;
}
static int static int
indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) { indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
int result = 0; int result = 0;
BOOL txn_manager_suspended = FALSE; uint32_t num_committed = ule_get_num_committed(ule);
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(indexer->i->env->i->logger); uint32_t num_provisional = ule_get_num_provisional(ule);
indexer_commit_keys_set_empty(&indexer->i->commit_keys); indexer_commit_keys_set_empty(&indexer->i->commit_keys);
toku_txn_manager_suspend(txn_manager);
txn_manager_suspended = TRUE;
// init the xids to the root xid // init the xids to the root xid
XIDS xids = xids_get_root_xids(); XIDS xids = xids_get_root_xids();
// scan the provisional stack from the outermost to the innermost transaction record TXNID prov_ids[num_provisional];
uint32_t num_committed = ule_get_num_committed(ule); TOKUTXN_STATE prov_states[num_provisional];
uint32_t num_provisional = ule_get_num_provisional(ule); TOKUTXN prov_txns[num_provisional];
BOOL outermost_retired = FALSE; memset(prov_txns, 0, sizeof(prov_txns));
TXNID outermost_xid = TXNID_NONE; if (num_provisional == 0) {
TOKUTXN_STATE outermost_xid_state = TOKUTXN_RETIRED; goto exit;
if (num_provisional) {
outermost_xid = uxr_get_txnid(ule_get_uxr(ule, num_committed));
outermost_xid_state = indexer_xid_state(indexer, outermost_xid);
outermost_retired = outermost_xid_state == TOKUTXN_RETIRED;
}
else {
outermost_retired = TRUE;
}
if (outermost_retired) {
toku_txn_manager_resume(txn_manager);
txn_manager_suspended = FALSE;
} }
fill_prov_info(ule, prov_ids, prov_states, prov_txns, indexer);
TXNID outermost_xid_state = prov_states[0];
// scan the provisional stack from the outermost to the innermost transaction record
TOKUTXN curr_txn = NULL;
for (uint64_t xrindex = num_committed; xrindex < num_committed + num_provisional; xrindex++) { for (uint64_t xrindex = num_committed; xrindex < num_committed + num_provisional; xrindex++) {
// get the ith transaction record // get the ith transaction record
UXRHANDLE uxr = ule_get_uxr(ule, xrindex); UXRHANDLE uxr = ule_get_uxr(ule, xrindex);
TXNID this_xid = uxr_get_txnid(uxr); TXNID this_xid = uxr_get_txnid(uxr);
TOKUTXN_STATE this_xid_state = outermost_retired ? TOKUTXN_RETIRED : indexer_xid_state(indexer, this_xid); TOKUTXN_STATE this_xid_state = prov_states[xrindex - num_committed];
if (this_xid_state == TOKUTXN_ABORTING) { if (this_xid_state == TOKUTXN_ABORTING) {
break; // nothing to do once we reach a transaction that is aborting break; // nothing to do once we reach a transaction that is aborting
...@@ -221,10 +291,15 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) { ...@@ -221,10 +291,15 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
if (xrindex == num_committed) { // if this is the outermost xr if (xrindex == num_committed) { // if this is the outermost xr
result = indexer_set_xid(indexer, this_xid, &xids); // always add the outermost xid to the XIDS list result = indexer_set_xid(indexer, this_xid, &xids); // always add the outermost xid to the XIDS list
curr_txn = prov_txns[xrindex - num_committed];
} else { } else {
switch (this_xid_state) { switch (this_xid_state) {
case TOKUTXN_LIVE: case TOKUTXN_LIVE:
result = indexer_append_xid(indexer, this_xid, &xids); // append a live xid to the XIDS list result = indexer_append_xid(indexer, this_xid, &xids); // append a live xid to the XIDS list
curr_txn = prov_txns[xrindex - num_committed];
if (!indexer->i->test_xid_state) {
assert(curr_txn);
}
break; break;
case TOKUTXN_PREPARING: case TOKUTXN_PREPARING:
assert(0); // not allowed assert(0); // not allowed
...@@ -242,9 +317,9 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) { ...@@ -242,9 +317,9 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
assert(this_xid_state == TOKUTXN_RETIRED); assert(this_xid_state == TOKUTXN_RETIRED);
} }
if (uxr_is_placeholder(uxr)) if (uxr_is_placeholder(uxr)) {
continue; // skip placeholders continue; // skip placeholders
}
// undo // undo
uint64_t prev_xrindex; uint64_t prev_xrindex;
BOOL prev_xrindex_found = indexer_find_prev_xr(indexer, ule, xrindex, &prev_xrindex); BOOL prev_xrindex_found = indexer_find_prev_xr(indexer, ule, xrindex, &prev_xrindex);
...@@ -261,9 +336,9 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) { ...@@ -261,9 +336,9 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
case TOKUTXN_LIVE: case TOKUTXN_LIVE:
case TOKUTXN_PREPARING: case TOKUTXN_PREPARING:
assert(this_xid_state != TOKUTXN_ABORTING); assert(this_xid_state != TOKUTXN_ABORTING);
result = indexer_ft_delete_provisional(indexer, hotdb, &indexer->i->hotkey, xids); result = indexer_ft_delete_provisional(indexer, hotdb, &indexer->i->hotkey, xids, curr_txn);
if (result == 0) if (result == 0)
result = indexer_lock_key(indexer, hotdb, &indexer->i->hotkey, outermost_xid); result = indexer_lock_key(indexer, hotdb, &indexer->i->hotkey, prov_ids[0], curr_txn);
break; break;
case TOKUTXN_COMMITTING: case TOKUTXN_COMMITTING:
case TOKUTXN_RETIRED: case TOKUTXN_RETIRED:
...@@ -293,9 +368,10 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) { ...@@ -293,9 +368,10 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
case TOKUTXN_LIVE: case TOKUTXN_LIVE:
case TOKUTXN_PREPARING: case TOKUTXN_PREPARING:
assert(this_xid_state != TOKUTXN_ABORTING); assert(this_xid_state != TOKUTXN_ABORTING);
result = indexer_ft_insert_provisional(indexer, hotdb, &indexer->i->hotkey, &indexer->i->hotval, xids); result = indexer_ft_insert_provisional(indexer, hotdb, &indexer->i->hotkey, &indexer->i->hotval, xids, curr_txn);
if (result == 0) if (result == 0) {
result = indexer_lock_key(indexer, hotdb, &indexer->i->hotkey, outermost_xid); result = indexer_lock_key(indexer, hotdb, &indexer->i->hotkey, prov_ids[0], prov_txns[0]);
}
break; break;
case TOKUTXN_COMMITTING: case TOKUTXN_COMMITTING:
case TOKUTXN_RETIRED: case TOKUTXN_RETIRED:
...@@ -319,10 +395,14 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) { ...@@ -319,10 +395,14 @@ indexer_undo_do_provisional(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule) {
for (int i = 0; result == 0 && i < indexer_commit_keys_valid(&indexer->i->commit_keys); i++) { for (int i = 0; result == 0 && i < indexer_commit_keys_valid(&indexer->i->commit_keys); i++) {
result = indexer_ft_commit(indexer, hotdb, &indexer->i->commit_keys.keys[i], xids); result = indexer_ft_commit(indexer, hotdb, &indexer->i->commit_keys.keys[i], xids);
} }
// be careful with this in the future. Right now, only exit path
// is BEFORE we call fill_prov_info, so this happens before exit
// If in the future we add a way to exit after fill_prov_info,
// then this will need to be handled below exit
release_txns(ule, prov_states, prov_txns, indexer);
exit:
xids_destroy(&xids); xids_destroy(&xids);
if (txn_manager_suspended) {
toku_txn_manager_resume(toku_logger_get_txn_manager(indexer->i->env->i->logger));
}
return result; return result;
} }
...@@ -396,46 +476,14 @@ indexer_generate_hot_key_val(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule, UXRH ...@@ -396,46 +476,14 @@ indexer_generate_hot_key_val(DB_INDEXER *indexer, DB *hotdb, ULEHANDLE ule, UXRH
return result; return result;
} }
// return the state of a transaction given a transaction id. if the transaction no longer exists,
// then return TOKUTXN_RETIRED.
static TOKUTXN_STATE
indexer_xid_state(DB_INDEXER *indexer, TXNID xid) {
TOKUTXN_STATE result;
// TEST
if (indexer->i->test_xid_state) {
result = indexer->i->test_xid_state(indexer, xid);
} else {
DB_ENV *env = indexer->i->env;
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
toku_logger_get_txn_manager(env->i->logger),
xid,
&txn
);
if (txn)
result = toku_txn_get_state(txn);
else
result = TOKUTXN_RETIRED;
}
return result;
}
// Take a write lock on the given key for the outermost xid in the xids list. // Take a write lock on the given key for the outermost xid in the xids list.
static int static int
indexer_lock_key(DB_INDEXER *indexer, DB *hotdb, DBT *key, TXNID outermost_live_xid) { indexer_lock_key(DB_INDEXER *indexer, DB *hotdb, DBT *key, TXNID outermost_live_xid, TOKUTXN txn) {
int result = 0; int result = 0;
// TEST // TEST
if (indexer->i->test_lock_key) { if (indexer->i->test_lock_key) {
result = indexer->i->test_lock_key(indexer, outermost_live_xid, hotdb, key); result = indexer->i->test_lock_key(indexer, outermost_live_xid, hotdb, key);
} else { } else {
DB_ENV *env = indexer->i->env;
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
toku_logger_get_txn_manager(env->i->logger),
outermost_live_xid,
&txn
);
assert(txn != NULL);
result = toku_grab_write_lock(hotdb, key, txn); result = toku_grab_write_lock(hotdb, key, txn);
} }
return result; return result;
...@@ -460,26 +508,10 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u ...@@ -460,26 +508,10 @@ indexer_find_prev_xr(DB_INDEXER *UU(indexer), ULEHANDLE ule, uint64_t xrindex, u
return prev_found; return prev_found;
} }
// get the innermost live txn from the xids stack. the xid on the top of the xids stack must be live
// when calling this function. the indexer_append_xid only appends live xid's onto the stack.
static TOKUTXN
indexer_get_innermost_live_txn(DB_INDEXER *indexer, XIDS xids) {
DB_ENV *env = indexer->i->env;
uint8_t num_xids = xids_get_num_xids(xids);
TXNID xid = xids_get_xid(xids, (u_int8_t)(num_xids-1));
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
toku_logger_get_txn_manager(env->i->logger),
xid,
&txn
);
return txn;
}
// inject "delete" message into brt with logging in recovery and rollback logs, // inject "delete" message into brt with logging in recovery and rollback logs,
// and making assocation between txn and brt // and making assocation between txn and brt
static int static int
indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids) { indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xids, TOKUTXN txn) {
int result = 0; int result = 0;
// TEST // TEST
if (indexer->i->test_delete_provisional) { if (indexer->i->test_delete_provisional) {
...@@ -487,9 +519,16 @@ indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS ...@@ -487,9 +519,16 @@ indexer_ft_delete_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS
} else { } else {
result = toku_ydb_check_avail_fs_space(indexer->i->env); result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) { if (result == 0) {
TOKUTXN txn = indexer_get_innermost_live_txn(indexer, xids);
assert(txn != NULL); assert(txn != NULL);
// Not sure if this is really necessary, as
// the hot index DB should have to be checkpointed
// upon commit of the hot index transaction, but
// it is safe to do this
// this question apples to delete_committed, insert_provisional
// and insert_committed
toku_multi_operation_client_lock();
result = toku_ft_maybe_delete (hotdb->i->ft_handle, hotkey, txn, FALSE, ZERO_LSN, TRUE); result = toku_ft_maybe_delete (hotdb->i->ft_handle, hotkey, txn, FALSE, ZERO_LSN, TRUE);
toku_multi_operation_client_unlock();
} }
} }
return result; return result;
...@@ -504,8 +543,14 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi ...@@ -504,8 +543,14 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
result = indexer->i->test_delete_committed(indexer, hotdb, hotkey, xids); result = indexer->i->test_delete_committed(indexer, hotdb, hotkey, xids);
} else { } else {
result = toku_ydb_check_avail_fs_space(indexer->i->env); result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) if (result == 0) {
// MO lock needed because toku_ft_root_put_cmd must be atomic
// with respect to checkpointing
// comment/question in indexer_ft_delete_provisional applies
toku_multi_operation_client_lock();
result = toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids); result = toku_ft_send_delete(db_struct_i(hotdb)->ft_handle, hotkey, xids);
toku_multi_operation_client_unlock();
}
} }
return result; return result;
} }
...@@ -513,7 +558,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi ...@@ -513,7 +558,7 @@ indexer_ft_delete_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, XIDS xi
// inject "insert" message into brt with logging in recovery and rollback logs, // inject "insert" message into brt with logging in recovery and rollback logs,
// and making assocation between txn and brt // and making assocation between txn and brt
static int static int
indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids) { indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *hotval, XIDS xids, TOKUTXN txn) {
int result = 0; int result = 0;
// TEST // TEST
if (indexer->i->test_insert_provisional) { if (indexer->i->test_insert_provisional) {
...@@ -521,12 +566,14 @@ indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT * ...@@ -521,12 +566,14 @@ indexer_ft_insert_provisional(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *
} else { } else {
result = toku_ydb_check_avail_fs_space(indexer->i->env); result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) { if (result == 0) {
TOKUTXN txn = indexer_get_innermost_live_txn(indexer, xids);
assert(txn != NULL); assert(txn != NULL);
// comment/question in indexer_ft_delete_provisional applies
toku_multi_operation_client_lock();
result = toku_ft_maybe_insert (hotdb->i->ft_handle, hotkey, hotval, txn, FALSE, ZERO_LSN, TRUE, FT_INSERT); result = toku_ft_maybe_insert (hotdb->i->ft_handle, hotkey, hotval, txn, FALSE, ZERO_LSN, TRUE, FT_INSERT);
toku_multi_operation_client_unlock();
} }
} }
return result; return result;
} }
// send an insert message into the tree without rollback or recovery logging // send an insert message into the tree without rollback or recovery logging
...@@ -539,8 +586,14 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho ...@@ -539,8 +586,14 @@ indexer_ft_insert_committed(DB_INDEXER *indexer, DB *hotdb, DBT *hotkey, DBT *ho
result = indexer->i->test_insert_committed(indexer, hotdb, hotkey, hotval, xids); result = indexer->i->test_insert_committed(indexer, hotdb, hotkey, hotval, xids);
} else { } else {
result = toku_ydb_check_avail_fs_space(indexer->i->env); result = toku_ydb_check_avail_fs_space(indexer->i->env);
if (result == 0) if (result == 0) {
// MO lock needed because toku_ft_root_put_cmd must be atomic
// with respect to checkpointing
// comment/question in indexer_ft_delete_provisional applies
toku_multi_operation_client_lock();
result = toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT); result = toku_ft_send_insert(db_struct_i(hotdb)->ft_handle, hotkey, hotval, xids, FT_INSERT);
toku_multi_operation_client_unlock();
}
} }
return result; return result;
} }
......
...@@ -113,7 +113,8 @@ disassociate_indexer_from_hot_dbs(DB_INDEXER *indexer) { ...@@ -113,7 +113,8 @@ disassociate_indexer_from_hot_dbs(DB_INDEXER *indexer) {
static void static void
free_indexer_resources(DB_INDEXER *indexer) { free_indexer_resources(DB_INDEXER *indexer) {
if ( indexer->i ) { if ( indexer->i ) {
toku_mutex_destroy(&indexer->i->indexer_lock);
if ( indexer->i->lec ) { le_cursor_close(indexer->i->lec); } if ( indexer->i->lec ) { le_cursor_close(indexer->i->lec); }
if ( indexer->i->fnums ) { if ( indexer->i->fnums ) {
toku_free(indexer->i->fnums); toku_free(indexer->i->fnums);
...@@ -135,6 +136,16 @@ free_indexer(DB_INDEXER *indexer) { ...@@ -135,6 +136,16 @@ free_indexer(DB_INDEXER *indexer) {
} }
} }
void
toku_indexer_lock(DB_INDEXER* indexer) {
toku_mutex_lock(&indexer->i->indexer_lock);
}
void
toku_indexer_unlock(DB_INDEXER* indexer) {
toku_mutex_unlock(&indexer->i->indexer_lock);
}
int int
toku_indexer_create_indexer(DB_ENV *env, toku_indexer_create_indexer(DB_ENV *env,
DB_TXN *txn, DB_TXN *txn,
...@@ -180,6 +191,8 @@ toku_indexer_create_indexer(DB_ENV *env, ...@@ -180,6 +191,8 @@ toku_indexer_create_indexer(DB_ENV *env,
indexer->close = close_indexer; indexer->close = close_indexer;
indexer->abort = abort_indexer; indexer->abort = abort_indexer;
toku_mutex_init(&indexer->i->indexer_lock, NULL);
// //
// create and close a dummy loader to get redirection going for the hot indexer // create and close a dummy loader to get redirection going for the hot indexer
// This way, if the hot index aborts, but other transactions have references to the // This way, if the hot index aborts, but other transactions have references to the
...@@ -273,7 +286,7 @@ build_index(DB_INDEXER *indexer) { ...@@ -273,7 +286,7 @@ build_index(DB_INDEXER *indexer) {
BOOL done = FALSE; BOOL done = FALSE;
for (uint64_t loop_count = 0; !done; loop_count++) { for (uint64_t loop_count = 0; !done; loop_count++) {
toku_ydb_lock(); toku_indexer_lock(indexer);
result = le_cursor_next(indexer->i->lec, &le); result = le_cursor_next(indexer->i->lec, &le);
if (result != 0) { if (result != 0) {
...@@ -295,10 +308,7 @@ build_index(DB_INDEXER *indexer) { ...@@ -295,10 +308,7 @@ build_index(DB_INDEXER *indexer) {
toku_ule_free(ule); toku_ule_free(ule);
} }
// if there is lock contention, then sleep for 1 millisecond after the unlock toku_indexer_unlock(indexer);
// note: the value 1000 was empirically determined to provide good query performance
// during hotindexing
toku_ydb_unlock_and_yield(1000);
if (result == 0) if (result == 0)
result = maybe_call_poll_func(indexer, loop_count); result = maybe_call_poll_func(indexer, loop_count);
......
...@@ -16,6 +16,12 @@ extern "C" { ...@@ -16,6 +16,12 @@ extern "C" {
#endif #endif
// locking and unlocking functions to synchronize cursor position with
// XXX_multiple APIs
void toku_indexer_lock(DB_INDEXER* indexer);
void toku_indexer_unlock(DB_INDEXER* indexer);
// The indexer populates multiple destination db's from the contents of one source db. // The indexer populates multiple destination db's from the contents of one source db.
// While the indexes are being built by the indexer, the application may continue to // While the indexes are being built by the indexer, the application may continue to
// change the contents of the source db. The changes will be reflected into the destination // change the contents of the source db. The changes will be reflected into the destination
......
...@@ -9,6 +9,8 @@ ...@@ -9,6 +9,8 @@
#include <sys/stat.h> #include <sys/stat.h>
#include "key-val.h" #include "key-val.h"
toku_mutex_t put_lock;
enum {NUM_INDEXER_INDEXES=1}; enum {NUM_INDEXER_INDEXES=1};
static const int NUM_DBS = NUM_INDEXER_INDEXES + 1; // 1 for source DB static const int NUM_DBS = NUM_INDEXER_INDEXES + 1; // 1 for source DB
static const int NUM_ROWS = 100000; static const int NUM_ROWS = 100000;
...@@ -83,6 +85,7 @@ static void * client(void *arg) ...@@ -83,6 +85,7 @@ static void * client(void *arg)
dbt_init(&val, &v, sizeof(v)); dbt_init(&val, &v, sizeof(v));
while ( retry++ < 10 ) { while ( retry++ < 10 ) {
toku_mutex_lock(&put_lock);
rr = env->put_multiple(env, rr = env->put_multiple(env,
cs->dbs[0], cs->dbs[0],
txn, txn,
...@@ -93,6 +96,7 @@ static void * client(void *arg) ...@@ -93,6 +96,7 @@ static void * client(void *arg)
dest_keys, dest_keys,
dest_vals, dest_vals,
cs->flags); cs->flags);
toku_mutex_unlock(&put_lock);
if ( rr == 0 ) break; if ( rr == 0 ) break;
sleep(0); sleep(0);
} }
...@@ -257,12 +261,14 @@ static void test_indexer(DB *src, DB **dbs) ...@@ -257,12 +261,14 @@ static void test_indexer(DB *src, DB **dbs)
CKERR(r); CKERR(r);
if ( verbose ) printf("test_indexer create_indexer\n"); if ( verbose ) printf("test_indexer create_indexer\n");
toku_mutex_lock(&put_lock);
r = env->create_indexer(env, txn, &indexer, src, NUM_DBS-1, &dbs[1], db_flags, 0); r = env->create_indexer(env, txn, &indexer, src, NUM_DBS-1, &dbs[1], db_flags, 0);
CKERR(r); CKERR(r);
r = indexer->set_error_callback(indexer, NULL, NULL); r = indexer->set_error_callback(indexer, NULL, NULL);
CKERR(r); CKERR(r);
r = indexer->set_poll_function(indexer, poll_print, NULL); r = indexer->set_poll_function(indexer, poll_print, NULL);
CKERR(r); CKERR(r);
toku_mutex_unlock(&put_lock);
// start threads doing additional inserts - no lock issues since indexer already created // start threads doing additional inserts - no lock issues since indexer already created
r = toku_pthread_create(&client_threads[0], 0, client, (void *)&client_specs[0]); CKERR(r); r = toku_pthread_create(&client_threads[0], 0, client, (void *)&client_specs[0]); CKERR(r);
...@@ -283,8 +289,10 @@ static void test_indexer(DB *src, DB **dbs) ...@@ -283,8 +289,10 @@ static void test_indexer(DB *src, DB **dbs)
} }
if ( verbose ) printf("test_indexer close\n"); if ( verbose ) printf("test_indexer close\n");
toku_mutex_lock(&put_lock);
r = indexer->close(indexer); r = indexer->close(indexer);
CKERR(r); CKERR(r);
toku_mutex_unlock(&put_lock);
r = txn->commit(txn, DB_TXN_SYNC); r = txn->commit(txn, DB_TXN_SYNC);
CKERR(r); CKERR(r);
...@@ -306,6 +314,7 @@ static void test_indexer(DB *src, DB **dbs) ...@@ -306,6 +314,7 @@ static void test_indexer(DB *src, DB **dbs)
static void run_test(void) static void run_test(void)
{ {
int r; int r;
toku_mutex_init(&put_lock, NULL);
r = system("rm -rf " ENVDIR); CKERR(r); r = system("rm -rf " ENVDIR); CKERR(r);
r = toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = toku_os_mkdir(ENVDIR "/log", S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); r = toku_os_mkdir(ENVDIR "/log", S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
...@@ -350,6 +359,7 @@ static void run_test(void) ...@@ -350,6 +359,7 @@ static void run_test(void)
for(int i=0;i<NUM_DBS;i++) { for(int i=0;i<NUM_DBS;i++) {
r = dbs[i]->close(dbs[i], 0); CKERR(r); r = dbs[i]->close(dbs[i], 0); CKERR(r);
} }
toku_mutex_destroy(&put_lock);
r = env->close(env, 0); CKERR(r); r = env->close(env, 0); CKERR(r);
} }
......
...@@ -426,8 +426,7 @@ main(int argc, char * const argv[]) ...@@ -426,8 +426,7 @@ main(int argc, char * const argv[])
toku_os_initialize_settings(1); toku_os_initialize_settings(1);
r = test_main(argc, argv); r = test_main(argc, argv);
#if IS_TDB && TOKU_WINDOWS #if IS_TDB && TOKU_WINDOWS
int rdestroy = toku_ydb_destroy(); toku_ydb_destroy();
CKERR(rdestroy);
#endif #endif
return r; return r;
} }
......
...@@ -193,22 +193,22 @@ int ...@@ -193,22 +193,22 @@ int
toku_ydb_init(void) { toku_ydb_init(void) {
int r = 0; int r = 0;
//Lower level must be initialized first. //Lower level must be initialized first.
if (r==0) if (r==0) {
r = toku_ft_layer_init(toku_ydb_lock, toku_ydb_unlock); r = toku_ft_layer_init();
if (r==0) }
if (r==0) {
toku_ydb_lock_init(); toku_ydb_lock_init();
}
return r; return r;
} }
// Do not clean up resources if env is panicked, just exit ugly // Do not clean up resources if env is panicked, just exit ugly
int void
toku_ydb_destroy(void) { toku_ydb_destroy(void) {
int r = 0;
if (env_is_panicked == 0) { if (env_is_panicked == 0) {
toku_ydb_lock_destroy(); toku_ydb_lock_destroy();
r = toku_ft_layer_destroy(); toku_ft_layer_destroy();
} }
return r;
} }
static int static int
......
...@@ -14,7 +14,7 @@ extern "C" { ...@@ -14,7 +14,7 @@ extern "C" {
int toku_ydb_init(void); int toku_ydb_init(void);
// Called when the ydb library is unloaded. // Called when the ydb library is unloaded.
int toku_ydb_destroy(void); void toku_ydb_destroy(void);
// db_env_create for the trace library // db_env_create for the trace library
int db_env_create_toku10(DB_ENV **, u_int32_t) __attribute__((__visibility__("default"))); int db_env_create_toku10(DB_ENV **, u_int32_t) __attribute__((__visibility__("default")));
......
...@@ -21,8 +21,7 @@ static void __attribute__((constructor)) libtokudb_init(void) { ...@@ -21,8 +21,7 @@ static void __attribute__((constructor)) libtokudb_init(void) {
static void __attribute__((destructor)) libtokudb_destroy(void) { static void __attribute__((destructor)) libtokudb_destroy(void) {
// printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__); // printf("%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__);
int r = toku_ydb_destroy(); toku_ydb_destroy();
assert(r==0);
} }
#endif #endif
...@@ -40,7 +39,7 @@ BOOL WINAPI DllMain(HINSTANCE h, DWORD reason, LPVOID reserved) { ...@@ -40,7 +39,7 @@ BOOL WINAPI DllMain(HINSTANCE h, DWORD reason, LPVOID reserved) {
r = toku_ydb_init(); r = toku_ydb_init();
break; break;
case DLL_PROCESS_DETACH: case DLL_PROCESS_DETACH:
r = toku_ydb_destroy(); toku_ydb_destroy();
break; break;
case DLL_THREAD_ATTACH: case DLL_THREAD_ATTACH:
//TODO: Any new thread code if necessary, i.e. allocate per-thread //TODO: Any new thread code if necessary, i.e. allocate per-thread
......
...@@ -374,6 +374,39 @@ do_del_multiple(DB_TXN *txn, uint32_t num_dbs, DB *db_array[], DBT keys[], DB *s ...@@ -374,6 +374,39 @@ do_del_multiple(DB_TXN *txn, uint32_t num_dbs, DB *db_array[], DBT keys[], DB *s
return r; return r;
} }
//
// if a hot index is in progress, gets the indexer
// also verifies that there is at most one hot index
// in progress. If it finds more than one, then returns EINVAL
//
static int
get_indexer_if_exists(
uint32_t num_dbs,
DB **db_array,
DB_INDEXER** indexerp
)
{
int r = 0;
DB_INDEXER* first_indexer = NULL;
for (uint32_t i = 0; i < num_dbs; i++) {
DB_INDEXER* indexer = toku_db_get_indexer(db_array[i]);
if (indexer) {
if (!first_indexer) {
first_indexer = indexer;
}
else {
if (first_indexer != indexer) {
r = EINVAL;
}
}
}
}
if (r == 0) {
*indexerp = first_indexer;
}
return r;
}
int int
env_del_multiple( env_del_multiple(
DB_ENV *env, DB_ENV *env,
...@@ -388,6 +421,7 @@ env_del_multiple( ...@@ -388,6 +421,7 @@ env_del_multiple(
{ {
int r; int r;
DBT del_keys[num_dbs]; DBT del_keys[num_dbs];
DB_INDEXER* indexer = NULL;
HANDLE_PANICKED_ENV(env); HANDLE_PANICKED_ENV(env);
...@@ -401,6 +435,10 @@ env_del_multiple( ...@@ -401,6 +435,10 @@ env_del_multiple(
} }
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn); HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
r = get_indexer_if_exists(num_dbs, db_array, &indexer);
if (r) {
goto cleanup;
}
{ {
uint32_t lock_flags[num_dbs]; uint32_t lock_flags[num_dbs];
...@@ -442,7 +480,10 @@ env_del_multiple( ...@@ -442,7 +480,10 @@ env_del_multiple(
brts[which_db] = db->i->ft_handle; brts[which_db] = db->i->ft_handle;
} }
toku_ydb_lock(); if (indexer) {
toku_indexer_lock(indexer);
}
toku_multi_operation_client_lock();
if (num_dbs == 1) { if (num_dbs == 1) {
r = log_del_single(txn, brts[0], &del_keys[0]); r = log_del_single(txn, brts[0], &del_keys[0]);
} }
...@@ -452,7 +493,10 @@ env_del_multiple( ...@@ -452,7 +493,10 @@ env_del_multiple(
if (r == 0) if (r == 0)
r = do_del_multiple(txn, num_dbs, db_array, del_keys, src_db, src_key); r = do_del_multiple(txn, num_dbs, db_array, del_keys, src_db, src_key);
} }
toku_ydb_unlock(); toku_multi_operation_client_unlock();
if (indexer) {
toku_indexer_unlock(indexer);
}
cleanup: cleanup:
if (r == 0) if (r == 0)
...@@ -527,6 +571,7 @@ env_put_multiple_internal( ...@@ -527,6 +571,7 @@ env_put_multiple_internal(
int r; int r;
DBT put_keys[num_dbs]; DBT put_keys[num_dbs];
DBT put_vals[num_dbs]; DBT put_vals[num_dbs];
DB_INDEXER* indexer = NULL;
HANDLE_PANICKED_ENV(env); HANDLE_PANICKED_ENV(env);
...@@ -544,6 +589,10 @@ env_put_multiple_internal( ...@@ -544,6 +589,10 @@ env_put_multiple_internal(
} }
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn); HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
r = get_indexer_if_exists(num_dbs, db_array, &indexer);
if (r) {
goto cleanup;
}
for (uint32_t which_db = 0; which_db < num_dbs; which_db++) { for (uint32_t which_db = 0; which_db < num_dbs; which_db++) {
DB *db = db_array[which_db]; DB *db = db_array[which_db];
...@@ -587,7 +636,10 @@ env_put_multiple_internal( ...@@ -587,7 +636,10 @@ env_put_multiple_internal(
brts[which_db] = db->i->ft_handle; brts[which_db] = db->i->ft_handle;
} }
toku_ydb_lock(); if (indexer) {
toku_indexer_lock(indexer);
}
toku_multi_operation_client_lock();
if (num_dbs == 1) { if (num_dbs == 1) {
r = log_put_single(txn, brts[0], &put_keys[0], &put_vals[0]); r = log_put_single(txn, brts[0], &put_keys[0], &put_vals[0]);
} }
...@@ -597,7 +649,10 @@ env_put_multiple_internal( ...@@ -597,7 +649,10 @@ env_put_multiple_internal(
if (r == 0) { if (r == 0) {
r = do_put_multiple(txn, num_dbs, db_array, put_keys, put_vals, src_db, src_key); r = do_put_multiple(txn, num_dbs, db_array, put_keys, put_vals, src_db, src_key);
} }
toku_ydb_unlock(); toku_multi_operation_client_unlock();
if (indexer) {
toku_indexer_unlock(indexer);
}
cleanup: cleanup:
if (r == 0) if (r == 0)
...@@ -617,6 +672,7 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn, ...@@ -617,6 +672,7 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
int r = 0; int r = 0;
HANDLE_PANICKED_ENV(env); HANDLE_PANICKED_ENV(env);
DB_INDEXER* indexer = NULL;
if (!txn) { if (!txn) {
r = EINVAL; r = EINVAL;
...@@ -628,6 +684,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn, ...@@ -628,6 +684,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
} }
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn); HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
r = get_indexer_if_exists(num_dbs, db_array, &indexer);
if (r) {
goto cleanup;
}
{ {
uint32_t n_del_dbs = 0; uint32_t n_del_dbs = 0;
...@@ -731,9 +791,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn, ...@@ -731,9 +791,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
n_put_dbs++; n_put_dbs++;
} }
} }
// grab the ydb lock for the actual work that if (indexer) {
// depends on it toku_indexer_lock(indexer);
toku_ydb_lock(); }
toku_multi_operation_client_lock();
if (r == 0 && n_del_dbs > 0) { if (r == 0 && n_del_dbs > 0) {
if (n_del_dbs == 1) if (n_del_dbs == 1)
r = log_del_single(txn, del_fts[0], &del_keys[0]); r = log_del_single(txn, del_fts[0], &del_keys[0]);
...@@ -751,7 +812,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn, ...@@ -751,7 +812,10 @@ env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
if (r == 0) if (r == 0)
r = do_put_multiple(txn, n_put_dbs, put_dbs, put_keys, put_vals, src_db, new_src_key); r = do_put_multiple(txn, n_put_dbs, put_dbs, put_keys, put_vals, src_db, new_src_key);
} }
toku_ydb_unlock(); toku_multi_operation_client_unlock();
if (indexer) {
toku_indexer_unlock(indexer);
}
} }
cleanup: cleanup:
......
...@@ -206,7 +206,7 @@ int toku_set_func_fclose(int (*)(FILE*)); ...@@ -206,7 +206,7 @@ int toku_set_func_fclose(int (*)(FILE*));
int toku_set_func_read(ssize_t (*)(int, void *, size_t)); int toku_set_func_read(ssize_t (*)(int, void *, size_t));
int toku_set_func_pread (ssize_t (*)(int, void *, size_t, off_t)); int toku_set_func_pread (ssize_t (*)(int, void *, size_t, off_t));
int toku_portability_init (void); int toku_portability_init (void);
int toku_portability_destroy (void); void toku_portability_destroy (void);
#if defined(__cplusplus) || defined(__cilkplusplus) #if defined(__cplusplus) || defined(__cilkplusplus)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment