Commit 3d295670 authored by Barry Perlman's avatar Barry Perlman Committed by Yoni Fogel

Addresses #2075, #2094 refs[t:2075] refs[t:2094]

Merge 2.2.0 scheduling statistics to 3.0.0 (#2075).
Restore #2094 fix to 3.0.0 (cachefiles_lock).
Command line for merge (executed in tokudb.2037b, ../tokudb.2.2.0 is local sandbox image of 2.2.0 release): svn merge -r14900:HEAD ../tokudb.2.2.0

git-svn-id: file:///svn/toku/tokudb.2037b@15629 c7de825b-a66e-492c-adef-691d508d4ae1
parent ebc7c205
......@@ -44,25 +44,41 @@ typedef struct __toku_db_btree_stat64 {
} DB_BTREE_STAT64;
typedef struct __toku_engine_status {
char now[26]; /* time of engine status query (i.e. now) */
u_int32_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int32_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int64_t max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */
u_int64_t processor_freq_mhz; /* clock frequency in MHz */
u_int64_t max_requested_sleep; /* max sleep time requested, can be larger than max possible */
u_int64_t times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */
u_int64_t total_sleepers; /* total number of times a client slept for ydb lock scheduling */
u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling */
u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */
u_int64_t total_waiters; /* total number of times a client thread waited for ydb lock */
u_int64_t total_clients; /* total number of separate client threads that use ydb lock */
u_int64_t time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */
u_int64_t max_time_ydb_lock_held; /* max time a client thread held the ydb lock */
u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */
u_int64_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int32_t checkpoint_period; /* delay between automatic checkpoints */
u_int32_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
u_int32_t cachetable_lock_ctr; /* how many times has cachetable lock been taken/released */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has get_and_pin been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin() returned with a node */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
int64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
int64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
......@@ -171,8 +187,9 @@ struct __toku_db_env {
int (*set_default_dup_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (val) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
void *app_private; /* 32-bit offset=36 size=4, 64=bit offset=72 size=8 */
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*) /* Fill in status struct */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
void* __toku_dummy0[25];
void* __toku_dummy0[24];
char __toku_dummy1[64];
void *api1_internal; /* 32-bit offset=212 size=4, 64=bit offset=360 size=8 */
void* __toku_dummy2[7];
......
......@@ -44,25 +44,41 @@ typedef struct __toku_db_btree_stat64 {
} DB_BTREE_STAT64;
typedef struct __toku_engine_status {
char now[26]; /* time of engine status query (i.e. now) */
u_int32_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int32_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int64_t max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */
u_int64_t processor_freq_mhz; /* clock frequency in MHz */
u_int64_t max_requested_sleep; /* max sleep time requested, can be larger than max possible */
u_int64_t times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */
u_int64_t total_sleepers; /* total number of times a client slept for ydb lock scheduling */
u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling */
u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */
u_int64_t total_waiters; /* total number of times a client thread waited for ydb lock */
u_int64_t total_clients; /* total number of separate client threads that use ydb lock */
u_int64_t time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */
u_int64_t max_time_ydb_lock_held; /* max time a client thread held the ydb lock */
u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */
u_int64_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int32_t checkpoint_period; /* delay between automatic checkpoints */
u_int32_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
u_int32_t cachetable_lock_ctr; /* how many times has cachetable lock been taken/released */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has get_and_pin been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin() returned with a node */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
int64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
int64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
......@@ -172,9 +188,10 @@ struct __toku_db_env {
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*set_default_dup_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (val) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*) /* Fill in status struct */;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
void *app_private; /* 32-bit offset=44 size=4, 64=bit offset=88 size=8 */
void* __toku_dummy0[25];
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
void* __toku_dummy0[24];
char __toku_dummy1[96];
void *api1_internal; /* 32-bit offset=244 size=4, 64=bit offset=392 size=8 */
void* __toku_dummy2[7];
......
......@@ -44,25 +44,41 @@ typedef struct __toku_db_btree_stat64 {
} DB_BTREE_STAT64;
typedef struct __toku_engine_status {
char now[26]; /* time of engine status query (i.e. now) */
u_int32_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int32_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int64_t max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */
u_int64_t processor_freq_mhz; /* clock frequency in MHz */
u_int64_t max_requested_sleep; /* max sleep time requested, can be larger than max possible */
u_int64_t times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */
u_int64_t total_sleepers; /* total number of times a client slept for ydb lock scheduling */
u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling */
u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */
u_int64_t total_waiters; /* total number of times a client thread waited for ydb lock */
u_int64_t total_clients; /* total number of separate client threads that use ydb lock */
u_int64_t time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */
u_int64_t max_time_ydb_lock_held; /* max time a client thread held the ydb lock */
u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */
u_int64_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int32_t checkpoint_period; /* delay between automatic checkpoints */
u_int32_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
u_int32_t cachetable_lock_ctr; /* how many times has cachetable lock been taken/released */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has get_and_pin been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin() returned with a node */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
int64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
int64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
......@@ -173,9 +189,10 @@ struct __toku_db_env {
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*set_default_dup_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (val) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*) /* Fill in status struct */;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
void *app_private; /* 32-bit offset=44 size=4, 64=bit offset=88 size=8 */
void* __toku_dummy0[40];
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
void* __toku_dummy0[39];
char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[7];
......
......@@ -44,25 +44,41 @@ typedef struct __toku_db_btree_stat64 {
} DB_BTREE_STAT64;
typedef struct __toku_engine_status {
char now[26]; /* time of engine status query (i.e. now) */
u_int32_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int32_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int64_t max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */
u_int64_t processor_freq_mhz; /* clock frequency in MHz */
u_int64_t max_requested_sleep; /* max sleep time requested, can be larger than max possible */
u_int64_t times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */
u_int64_t total_sleepers; /* total number of times a client slept for ydb lock scheduling */
u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling */
u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */
u_int64_t total_waiters; /* total number of times a client thread waited for ydb lock */
u_int64_t total_clients; /* total number of separate client threads that use ydb lock */
u_int64_t time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */
u_int64_t max_time_ydb_lock_held; /* max time a client thread held the ydb lock */
u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */
u_int64_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int32_t checkpoint_period; /* delay between automatic checkpoints */
u_int32_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
u_int32_t cachetable_lock_ctr; /* how many times has cachetable lock been taken/released */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has get_and_pin been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin() returned with a node */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
int64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
int64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
......@@ -173,8 +189,9 @@ struct __toku_db_env {
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*set_default_dup_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (val) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*) /* Fill in status struct */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
void* __toku_dummy0[2];
void* __toku_dummy0[1];
void *app_private; /* 32-bit offset=52 size=4, 64=bit offset=104 size=8 */
void* __toku_dummy1[38];
char __toku_dummy2[128];
......
......@@ -44,25 +44,41 @@ typedef struct __toku_db_btree_stat64 {
} DB_BTREE_STAT64;
typedef struct __toku_engine_status {
char now[26]; /* time of engine status query (i.e. now) */
u_int32_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int32_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int64_t max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */
u_int64_t processor_freq_mhz; /* clock frequency in MHz */
u_int64_t max_requested_sleep; /* max sleep time requested, can be larger than max possible */
u_int64_t times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */
u_int64_t total_sleepers; /* total number of times a client slept for ydb lock scheduling */
u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling */
u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */
u_int64_t total_waiters; /* total number of times a client thread waited for ydb lock */
u_int64_t total_clients; /* total number of separate client threads that use ydb lock */
u_int64_t time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */
u_int64_t max_time_ydb_lock_held; /* max time a client thread held the ydb lock */
u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */
u_int64_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int32_t checkpoint_period; /* delay between automatic checkpoints */
u_int32_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
u_int32_t cachetable_lock_ctr; /* how many times has cachetable lock been taken/released */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has get_and_pin been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin() returned with a node */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
int64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
int64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
......@@ -175,8 +191,9 @@ struct __toku_db_env {
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*set_default_dup_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (val) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*) /* Fill in status struct */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
void* __toku_dummy0[2];
void* __toku_dummy0[1];
void *app_private; /* 32-bit offset=52 size=4, 64=bit offset=104 size=8 */
void* __toku_dummy1[39];
char __toku_dummy2[144];
......
......@@ -342,29 +342,44 @@ int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__un
printf(" u_int64_t bt_fsize; /* how big is the underlying file */\n");
printf("} DB_BTREE_STAT64;\n");
//engine status info
printf("typedef struct __toku_engine_status {\n");
printf(" char now[26]; /* time of engine status query (i.e. now) */ \n");
printf(" u_int32_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */ \n");
printf(" u_int32_t logger_lock_ctr; /* how many times has logger lock been taken/released */ \n");
printf(" u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */ \n");
printf(" u_int64_t max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */ \n");
printf(" u_int64_t processor_freq_mhz; /* clock frequency in MHz */ \n");
printf(" u_int64_t max_requested_sleep; /* max sleep time requested, can be larger than max possible */ \n");
printf(" u_int64_t times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */ \n");
printf(" u_int64_t total_sleepers; /* total number of times a client slept for ydb lock scheduling */ \n");
printf(" u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling */ \n");
printf(" u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */ \n");
printf(" u_int64_t total_waiters; /* total number of times a client thread waited for ydb lock */ \n");
printf(" u_int64_t total_clients; /* total number of separate client threads that use ydb lock */ \n");
printf(" u_int64_t time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */ \n");
printf(" u_int64_t max_time_ydb_lock_held; /* max time a client thread held the ydb lock */ \n");
printf(" u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */ \n");
printf(" u_int64_t logger_lock_ctr; /* how many times has logger lock been taken/released */ \n");
printf(" u_int32_t checkpoint_period; /* delay between automatic checkpoints */ \n");
printf(" u_int32_t checkpoint_footprint; /* state of checkpoint procedure */ \n");
printf(" char checkpoint_time_begin[26]; /* time of last checkpoint begin */ \n");
printf(" char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */ \n");
printf(" char checkpoint_time_end[26]; /* time of last checkpoint end */ \n");
printf(" u_int32_t cachetable_lock_ctr; /* how many times has cachetable lock been taken/released */ \n");
printf(" u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */ \n");
printf(" u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */ \n");
printf(" u_int64_t cachetable_hit; /* how many cache hits */ \n");
printf(" u_int64_t cachetable_miss; /* how many cache misses */ \n");
printf(" u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */ \n");
printf(" u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */ \n");
printf(" u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */ \n");
printf(" u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ \n");
printf(" u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ \n");
printf(" u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ \n");
printf(" u_int64_t maybe_get_and_pins; /* how many times has get_and_pin been called */ \n");
printf(" u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin() returned with a node */ \n");
printf(" u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ \n");
printf(" u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */ \n");
printf(" int64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */ \n");
printf(" int64_t cachetable_size_limit; /* the limit to the sum of the node sizes */ \n");
printf(" int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */ \n");
printf(" int64_t get_and_pin_footprint; /* state of get_and_pin procedure */ \n");
printf(" u_int32_t range_locks_max; /* max total number of range locks */ \n");
printf(" u_int32_t range_locks_max_per_db; /* max range locks per dictionary */ \n");
printf(" u_int32_t range_locks_curr; /* total range locks currently in use */ \n");
......@@ -374,13 +389,6 @@ int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__un
printf(" u_int64_t aborts; /* ydb txn abort operations */ \n");
printf(" u_int64_t point_queries; /* ydb point queries */ \n");
printf(" u_int64_t sequential_queries; /* ydb sequential queries */ \n");
// printf(" struct timeval checkpoint_tbegin; /* time of last checkpoint begin */ \n");
// printf(" struct timeval checkpoint_tend; /* time of last checkpoint end */ \n");
// printf(" DB_LSN lsn_of_last_checkpoint_begin; \n");
// printf(" DB_LSN lsn_of_last_checkpoint_end; \n");
printf("} ENGINE_STATUS;\n");
......@@ -405,6 +413,7 @@ int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__un
"int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */",
"int (*set_default_dup_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (val) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */",
"int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*) /* Fill in status struct */",
"int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */",
"int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */",
NULL};
print_struct("db_env", 1, db_env_fields32, db_env_fields64, sizeof(db_env_fields32)/sizeof(db_env_fields32[0]), extra);
......
......@@ -44,25 +44,41 @@ typedef struct __toku_db_btree_stat64 {
} DB_BTREE_STAT64;
typedef struct __toku_engine_status {
char now[26]; /* time of engine status query (i.e. now) */
u_int32_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int32_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int64_t max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */
u_int64_t processor_freq_mhz; /* clock frequency in MHz */
u_int64_t max_requested_sleep; /* max sleep time requested, can be larger than max possible */
u_int64_t times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */
u_int64_t total_sleepers; /* total number of times a client slept for ydb lock scheduling */
u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling */
u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */
u_int64_t total_waiters; /* total number of times a client thread waited for ydb lock */
u_int64_t total_clients; /* total number of separate client threads that use ydb lock */
u_int64_t time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */
u_int64_t max_time_ydb_lock_held; /* max time a client thread held the ydb lock */
u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */
u_int64_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int32_t checkpoint_period; /* delay between automatic checkpoints */
u_int32_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
u_int32_t cachetable_lock_ctr; /* how many times has cachetable lock been taken/released */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has get_and_pin been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin() returned with a node */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
int64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
int64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
......@@ -175,6 +191,7 @@ struct __toku_db_env {
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*set_default_dup_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (val) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*) /* Fill in status struct */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
void *app_private;
void *api1_internal;
......
......@@ -24,11 +24,13 @@ u_int32_t cachesize = 127*1024*1024;
static int do_mysql = 0;
static u_int64_t start_range = 0, end_range = 0;
static int n_experiments = 2;
static int verbose = 0;
static const char *log_dir = NULL;
static int print_usage (const char *argv0) {
fprintf(stderr, "Usage:\n%s [--verify-lwc | --lwc | --nohwc] [--prelock] [--prelockflag] [--prelockwriteflag] [--env DIR]\n", argv0);
fprintf(stderr, "Usage:\n%s [--verify-lwc | --lwc | --nohwc] [--prelock] [--prelockflag] [--prelockwriteflag] [--env DIR] [--verbose]\n", argv0);
fprintf(stderr, " --hwc run heavy weight cursors (this is the default)\n");
fprintf(stderr, " --verify-lwc means to run the light weight cursor and the heavyweight cursor to verify that they get the same answer.\n");
fprintf(stderr, " --flatten Flatten only using special flatten function\n");
......@@ -42,6 +44,7 @@ static int print_usage (const char *argv0) {
fprintf(stderr, " --mysql compare keys that are mysql big int not null types\n");
fprintf(stderr, " --env DIR put db files in DIR instead of default\n");
fprintf(stderr, " --log_dir LOGDIR put the logs in LOGDIR\n");
fprintf(stderr, " --verbose print verbose information\n");
return 1;
}
......@@ -111,6 +114,8 @@ static void parse_args (int argc, const char *argv[]) {
log_dir = *argv;
} else if (strcmp(*argv, "--mysql") == 0) {
do_mysql = 1;
} else if (strcmp(*argv, "--verbose") == 0) {
verbose = 1;
} else if (strcmp(*argv, "--range") == 0 && argc > 2) {
run_mode = RUN_RANGE;
argc--; argv++;
......@@ -195,6 +200,7 @@ static void scanscan_shutdown (void) {
r = tid->commit(tid, 0); assert(r==0);
}
r = env->close(env, 0); assert(r==0);
env = NULL;
#if 0 && defined TOKUDB
{
......@@ -207,6 +213,20 @@ static void scanscan_shutdown (void) {
#endif
}
static void print_engine_status(void) {
#if defined TOKUDB
if (verbose) {
int buffsize = 1024 * 32;
char buff[buffsize];
env->get_engine_status_text(env, buff, buffsize);
printf("Engine status:\n");
printf(buff);
}
#endif
}
static void scanscan_hwc (void) {
int r;
int counter=0;
......@@ -233,6 +253,7 @@ static void scanscan_hwc (void) {
double thistime = gettime();
double tdiff = thistime-prevtime;
printf("Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", totalbytes, rowcounter, tdiff, 1e-6*totalbytes/tdiff);
print_engine_status();
}
}
......@@ -279,6 +300,7 @@ static void scanscan_lwc (void) {
double thistime = gettime();
double tdiff = thistime-prevtime;
printf("LWC Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", e.totalbytes, e.rowcounter, tdiff, 1e-6*e.totalbytes/tdiff);
print_engine_status();
}
}
......@@ -327,6 +349,7 @@ static void scanscan_range (void) {
if (limitcount>0 && rowcounter>=limitcount) break;
}
#endif
print_engine_status();
}
r = dbc->c_close(dbc);
......@@ -429,6 +452,7 @@ static void scanscan_heaviside (void) {
double thistime = gettime();
double tdiff = thistime-prevtime;
printf("LWC Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", e.totalbytes, e.rowcounter, tdiff, 1e-6*e.totalbytes/tdiff);
print_engine_status();
}
}
......@@ -484,6 +508,7 @@ static void scanscan_verify (void) {
double thistime = gettime();
double tdiff = thistime-prevtime;
printf("verify %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", v.totalbytes, v.rowcounter, tdiff, 1e-6*v.totalbytes/tdiff);
print_engine_status();
}
}
......
......@@ -44,25 +44,41 @@ typedef struct __toku_db_btree_stat64 {
} DB_BTREE_STAT64;
typedef struct __toku_engine_status {
char now[26]; /* time of engine status query (i.e. now) */
u_int32_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int32_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released */
u_int64_t max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */
u_int64_t processor_freq_mhz; /* clock frequency in MHz */
u_int64_t max_requested_sleep; /* max sleep time requested, can be larger than max possible */
u_int64_t times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */
u_int64_t total_sleepers; /* total number of times a client slept for ydb lock scheduling */
u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling */
u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */
u_int64_t total_waiters; /* total number of times a client thread waited for ydb lock */
u_int64_t total_clients; /* total number of separate client threads that use ydb lock */
u_int64_t time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */
u_int64_t max_time_ydb_lock_held; /* max time a client thread held the ydb lock */
u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */
u_int64_t logger_lock_ctr; /* how many times has logger lock been taken/released */
u_int32_t checkpoint_period; /* delay between automatic checkpoints */
u_int32_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
u_int32_t cachetable_lock_ctr; /* how many times has cachetable lock been taken/released */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has get_and_pin been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin() returned with a node */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
int64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
int64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
......@@ -175,6 +191,7 @@ struct __toku_db_env {
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*set_default_dup_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (val) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*) /* Fill in status struct */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* lookup existing iname */;
void *app_private;
void *api1_internal;
......
......@@ -20,6 +20,7 @@
#include "minicron.h"
#include "log-internal.h"
#if !defined(TOKU_CACHETABLE_DO_EVICT_FROM_WRITER)
#error
#endif
......@@ -54,14 +55,15 @@ static u_int64_t cachetable_wait_reading; // how many times does get_and_pin()
static u_int64_t cachetable_wait_writing; // how many times does get_and_pin() wait for a node to be written?
static u_int64_t cachetable_puts; // how many times has a newly created node been put into the cachetable?
static u_int64_t cachetable_prefetches; // how many times has a block been prefetched into the cachetable?
static u_int64_t cachetable_maybe_get_and_pins; // how many times has get_and_pin been called?
static u_int64_t cachetable_maybe_get_and_pin_hits; // how many times has get_and_pin() returned with a node?
static u_int64_t cachetable_maybe_get_and_pins; // how many times has maybe_get_and_pin(_clean) been called?
static u_int64_t cachetable_maybe_get_and_pin_hits; // how many times has get_and_pin(_clean) returned with a node?
#if TOKU_DO_WAIT_TIME
static u_int64_t cachetable_misstime;
static u_int64_t cachetable_waittime;
static u_int64_t cachetable_misstime; // time spent waiting for disk read
static u_int64_t cachetable_waittime; // time spent waiting for another thread to release lock (e.g. prefetch, writing)
#endif
static u_int32_t cachetable_lock_ctr = 0;
static u_int64_t cachetable_lock_taken = 0;
static u_int64_t cachetable_lock_released = 0;
enum ctpair_state {
CTPAIR_INVALID = 0, // invalid
......@@ -134,6 +136,10 @@ static inline void ctpair_destroy(PAIR p) {
}
// The cachetable is as close to an ENV as we get.
// There are 3 locks, must be taken in this order
// openfd_mutex
// cachetable_mutex
// cachefiles_mutex
struct cachetable {
enum typ_tag tag;
u_int32_t n_in_table; // number of pairs in the hash table
......@@ -147,6 +153,7 @@ struct cachetable {
int64_t size_writing; // the sum of the sizes of the pairs being written
TOKULOGGER logger;
toku_pthread_mutex_t *mutex; // coarse lock that protects the cachetable, the cachefiles, and the pairs
toku_pthread_mutex_t cachefiles_mutex; // lock that protects the cachefiles list
struct workqueue wq; // async work queue
THREADPOOL threadpool; // pool of worker threads
LSN lsn_of_checkpoint_in_progress;
......@@ -158,18 +165,28 @@ struct cachetable {
};
// Lock the cachetable
static inline void cachefiles_lock(CACHETABLE ct) {
int r = toku_pthread_mutex_lock(&ct->cachefiles_mutex); assert(r == 0);
}
// Unlock the cachetable
static inline void cachefiles_unlock(CACHETABLE ct) {
int r = toku_pthread_mutex_unlock(&ct->cachefiles_mutex); assert(r == 0);
}
// Lock the cachetable
static inline void cachetable_lock(CACHETABLE ct __attribute__((unused))) {
#if DO_CACHETABLE_LOCK
int r = toku_pthread_mutex_lock(ct->mutex); assert(r == 0);
cachetable_lock_ctr++;
cachetable_lock_taken++;
#endif
}
// Unlock the cachetable
static inline void cachetable_unlock(CACHETABLE ct __attribute__((unused))) {
#if DO_CACHETABLE_LOCK
cachetable_lock_ctr++;
cachetable_lock_released++;
int r = toku_pthread_mutex_unlock(ct->mutex); assert(r == 0);
#endif
}
......@@ -256,6 +273,7 @@ int toku_create_cachetable(CACHETABLE *result, long size_limit, LSN UU(initial_l
toku_init_workers(&ct->wq, &ct->threadpool);
ct->mutex = workqueue_lock_ref(&ct->wq);
int r = toku_pthread_mutex_init(&ct->openfd_mutex, NULL); assert(r == 0);
r = toku_pthread_mutex_init(&ct->cachefiles_mutex, 0); assert(r == 0);
toku_minicron_setup(&ct->checkpointer, 0, checkpoint_thread, ct); // default is no checkpointing
r = toku_leaflock_create(&ct->leaflock_pool); assert(r==0);
*result = ct;
......@@ -311,16 +329,18 @@ restart:
// This function can only be called if the brt is still open, so file must
// still be open and cannot be in the is_closing state.
int toku_cachefile_of_filenum (CACHETABLE ct, FILENUM filenum, CACHEFILE *cf) {
cachefiles_lock(ct);
CACHEFILE extant;
int r = ENOENT;
for (extant = ct->cachefiles; extant; extant=extant->next) {
if (extant->filenum.fileid==filenum.fileid) {
assert(!extant->is_closing);
*cf = extant;
r = 0;
r = 0;
break;
}
}
cachefiles_unlock(ct);
return r;
}
......@@ -350,15 +370,18 @@ int toku_cachetable_openfd_with_filenum (CACHEFILE *cfptr, CACHETABLE ct, int fd
r = toku_pthread_mutex_lock(&ct->openfd_mutex); // purpose is to make this function single-threaded
assert(r==0);
cachetable_lock(ct);
cachefiles_lock(ct);
for (extant = ct->cachefiles; extant; extant=extant->next) {
if (memcmp(&extant->fileid, &fileid, sizeof(fileid))==0) {
//File is already open (and in cachetable as extant)
cachefile_refup(extant);
if (extant->is_closing) {
// if another thread is closing this file, wait until the close is fully complete
cachefiles_unlock(ct); //Cannot hold cachefiles lock over the cond_wait
r = toku_pthread_cond_wait(&extant->openfd_wait, ct->mutex);
assert(r == 0);
break; // other thread has closed this file, go create a new cachefile
cachefiles_lock(ct);
goto try_again; // other thread has closed this file, go create a new cachefile
}
r = close(fd);
assert(r == 0);
......@@ -406,6 +429,7 @@ int toku_cachetable_openfd_with_filenum (CACHEFILE *cfptr, CACHETABLE ct, int fd
r = 0;
}
exit:
cachefiles_unlock(ct);
{
int rm = toku_pthread_mutex_unlock(&ct->openfd_mutex);
assert (rm == 0);
......@@ -484,16 +508,23 @@ toku_cachefile_truncate (CACHEFILE cf, toku_off_t new_size) {
return r;
}
static CACHEFILE remove_cf_from_list (CACHEFILE cf, CACHEFILE list) {
static CACHEFILE remove_cf_from_list_locked (CACHEFILE cf, CACHEFILE list) {
if (list==0) return 0;
else if (list==cf) {
return list->next;
} else {
list->next = remove_cf_from_list(cf, list->next);
list->next = remove_cf_from_list_locked(cf, list->next);
return list;
}
}
static void remove_cf_from_cachefiles_list (CACHEFILE cf) {
CACHETABLE ct = cf->cachetable;
cachefiles_lock(ct);
ct->cachefiles = remove_cf_from_list_locked(cf, ct->cachefiles);
cachefiles_unlock(ct);
}
static int cachetable_flush_cachefile (CACHETABLE, CACHEFILE cf);
int toku_cachefile_close (CACHEFILE *cfp, TOKULOGGER logger, char **error_string, BOOL oplsn_valid, LSN oplsn) {
......@@ -518,7 +549,7 @@ int toku_cachefile_close (CACHEFILE *cfp, TOKULOGGER logger, char **error_string
// - toku_cachefile_of_iname_and_add_reference()
if ((r = cachetable_flush_cachefile(ct, cf))) {
error:
cf->cachetable->cachefiles = remove_cf_from_list(cf, cf->cachetable->cachefiles);
remove_cf_from_cachefiles_list(cf);
if (cf->refcount > 0) {
int rs;
assert(cf->refcount == 1); // toku_cachetable_openfd() is single-threaded
......@@ -560,7 +591,7 @@ int toku_cachefile_close (CACHEFILE *cfp, TOKULOGGER logger, char **error_string
cf->begin_checkpoint_userdata = NULL;
cf->end_checkpoint_userdata = NULL;
cf->userdata = NULL;
cf->cachetable->cachefiles = remove_cf_from_list(cf, cf->cachetable->cachefiles);
remove_cf_from_cachefiles_list(cf);
// refcount could be non-zero if another thread is trying to open this cachefile,
// but is blocked in toku_cachetable_openfd() waiting for us to finish closing it.
if (cf->refcount > 0) {
......@@ -1154,7 +1185,7 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful
cachetable_wait_writing++;
#if TOKU_DO_WAIT_TIME
do_wait_time = 1;
t0 = get_tnow();
t0 = get_tnow();
#endif
}
if (p->checkpoint_pending) {
......@@ -1216,7 +1247,7 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful
get_and_pin_footprint = 10;
rwlock_write_lock(&p->rwlock, ct->mutex);
#if TOKU_DO_WAIT_TIME
uint64_t t0 = get_tnow();
uint64_t t0 = get_tnow();
#endif
r = cachetable_fetch_pair(ct, cachefile, p);
if (r) {
......@@ -1601,6 +1632,7 @@ toku_cachetable_close (CACHETABLE *ctp) {
cachetable_unlock(ct);
toku_destroy_workers(&ct->wq, &ct->threadpool);
r = toku_leaflock_destroy(&ct->leaflock_pool); assert(r==0);
r = toku_pthread_mutex_destroy(&ct->cachefiles_mutex); assert(r == 0);
toku_free(ct->table);
toku_free(ct);
*ctp = 0;
......@@ -1685,6 +1717,7 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) {
{
CACHEFILE cf;
assert(ct->cachefiles_in_checkpoint==NULL);
cachefiles_lock(ct);
for (cf = ct->cachefiles; cf; cf=cf->next) {
assert(!cf->is_closing); //Closing requires ydb lock (or in checkpoint). Cannot happen.
assert(cf->refcount>0); //Must have a reference if not closing.
......@@ -1695,6 +1728,7 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) {
ct->cachefiles_in_checkpoint = cf;
cf->for_checkpoint = TRUE;
}
cachefiles_unlock(ct);
}
if (logger) {
......@@ -1714,12 +1748,14 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) {
{
//Must loop through ALL open files (even if not included in checkpoint).
CACHEFILE cf;
cachefiles_lock(ct);
for (cf = ct->cachefiles; cf; cf=cf->next) {
if (cf->log_fassociate_during_checkpoint) {
int r = cf->log_fassociate_during_checkpoint(cf, cf->userdata);
assert(r==0);
}
}
cachefiles_unlock(ct);
}
}
......@@ -1751,12 +1787,14 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) {
//Once marked as pending, we own write locks on the pairs, which means the writer threads can't conflict.
{
CACHEFILE cf;
cachefiles_lock(ct);
for (cf = ct->cachefiles_in_checkpoint; cf; cf=cf->next_in_checkpoint) {
if (cf->begin_checkpoint_userdata) {
int r = cf->begin_checkpoint_userdata(cf, ct->lsn_of_checkpoint_in_progress, cf->userdata);
assert(r==0);
}
}
cachefiles_unlock(ct);
}
cachetable_unlock(ct);
......@@ -2057,9 +2095,12 @@ toku_cachefile_size_in_memory(CACHEFILE cf)
}
void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s) {
s->lock_ctr = cachetable_lock_ctr;
s->lock_taken = cachetable_lock_taken;
s->lock_released = cachetable_lock_released;
s->hit = cachetable_hit;
s->miss = cachetable_miss;
s->misstime = cachetable_misstime;
s->waittime = cachetable_waittime;
s->wait_reading = cachetable_wait_reading;
s->wait_writing = cachetable_wait_writing;
s->puts = cachetable_puts;
......@@ -2069,4 +2110,5 @@ void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s) {
s->size_current = ct->size_current;
s->size_limit = ct->size_limit;
s->size_writing = ct->size_writing;
s->get_and_pin_footprint = get_and_pin_footprint;
}
......@@ -249,19 +249,24 @@ void toku_cachetable_maybe_flush_some(CACHETABLE ct);
u_int64_t toku_cachefile_size_in_memory(CACHEFILE cf);
typedef struct cachetable_status {
u_int32_t lock_ctr;
u_int64_t lock_taken;
u_int64_t lock_released;
u_int64_t hit;
u_int64_t miss;
u_int64_t misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t wait_reading;
u_int64_t wait_writing;
u_int64_t puts; // how many times has a newly created node been put into the cachetable?
u_int64_t prefetches; // how many times has a block been prefetched into the cachetable?
u_int64_t maybe_get_and_pins; // how many times has get_and_pin been called?
u_int64_t maybe_get_and_pin_hits; // how many times has get_and_pin() returned with a node?
u_int64_t maybe_get_and_pins; // how many times has maybe_get_and_pin(_clean) been called?
u_int64_t maybe_get_and_pin_hits; // how many times has maybe_get_and_pin(_clean) returned with a node?
int64_t size_current; // the sum of the sizes of the nodes represented in the cachetable
int64_t size_limit; // the limit to the sum of the node sizes
int64_t size_writing; // the sum of the sizes of the nodes being written
u_int64_t get_and_pin_footprint;
} CACHETABLE_STATUS_S, *CACHETABLE_STATUS;
void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s);
......
......@@ -93,7 +93,7 @@ toku_rollback_fcreate (FILENUM filenum,
char *fname = fixup_fname(&bs_fname);
//Remove reference to the fd in the cachetable
CACHEFILE cf;
CACHEFILE cf = NULL;
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
assert(r == 0);
{
......
......@@ -215,7 +215,3 @@ toku_ydb_unlock(void) {
}
int
toku_ydb_lock_ctr(void) {
return status.ydb_lock_ctr;
}
......@@ -87,13 +87,16 @@ typedef struct {
u_int64_t total_time_ydb_lock_held;/* total time client threads held the ydb lock */
} SCHEDULE_STATUS_S, *SCHEDULE_STATUS;
int toku_ydb_lock_init(void);
int toku_ydb_lock_destroy(void);
void toku_ydb_lock(void);
void toku_ydb_unlock(void);
int toku_ydb_lock_ctr(void);
void toku_ydb_lock_get_status(SCHEDULE_STATUS statp);
/* *********************************************************
Error handling
......
......@@ -1117,8 +1117,27 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) {
else {
time_t now = time(NULL);
format_time(&now, engstat->now);
engstat->ydb_lock_ctr = toku_ydb_lock_ctr(); // is ydb lock held? how many times taken/released?
engstat->logger_lock_ctr = toku_logger_get_lock_ctr(); // is logger lock held? how many times taken/released?
engstat->logger_lock_ctr = toku_logger_get_lock_ctr();
{
SCHEDULE_STATUS_S schedstat;
toku_ydb_lock_get_status(&schedstat);
engstat->ydb_lock_ctr = schedstat.ydb_lock_ctr; /* how many times has ydb lock been taken/released */
engstat->max_possible_sleep = schedstat.max_possible_sleep; /* max possible sleep time for ydb lock scheduling (constant) */
engstat->processor_freq_mhz = schedstat.processor_freq_mhz; /* clock frequency in MHz */
engstat->max_requested_sleep = schedstat.max_requested_sleep; /* max sleep time requested, can be larger than max possible */
engstat->times_max_sleep_used = schedstat.times_max_sleep_used; /* number of times the max_possible_sleep was used to sleep */
engstat->total_sleepers = schedstat.total_sleepers; /* total number of times a client slept for ydb lock scheduling */
engstat->total_sleep_time = schedstat.total_sleep_time; /* total time spent sleeping for ydb lock scheduling */
engstat->max_waiters = schedstat.max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock */
engstat->total_waiters = schedstat.total_waiters; /* total number of times a client thread waited for ydb lock */
engstat->total_clients = schedstat.total_clients; /* total number of separate client threads that use ydb lock */
engstat->time_ydb_lock_held_unavailable = schedstat.time_ydb_lock_held_unavailable; /* number of times a thread migrated and theld is unavailable */
engstat->total_time_ydb_lock_held = schedstat.total_time_ydb_lock_held;/* total time client threads held the ydb lock */
engstat->max_time_ydb_lock_held = schedstat.max_time_ydb_lock_held; /* max time client threads held the ydb lock */
}
env_checkpointing_get_period(env, &(engstat->checkpoint_period)); // do not take ydb lock (take minicron lock, but that's a very ephemeral low-level lock)
{
CHECKPOINT_STATUS_S cpstat;
......@@ -1131,18 +1150,22 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) {
{
CACHETABLE_STATUS_S ctstat;
toku_cachetable_get_status(env->i->cachetable, &ctstat);
engstat->cachetable_lock_ctr = ctstat.lock_ctr;
engstat->cachetable_hit = ctstat.hit;
engstat->cachetable_miss = ctstat.miss;
engstat->cachetable_wait_reading = ctstat.wait_reading;
engstat->cachetable_wait_writing = ctstat.wait_writing;
engstat->puts = ctstat.puts;
engstat->prefetches = ctstat.prefetches;
engstat->maybe_get_and_pins = ctstat.maybe_get_and_pins;
engstat->maybe_get_and_pin_hits = ctstat.maybe_get_and_pin_hits;
engstat->cachetable_size_current = ctstat.size_current;
engstat->cachetable_size_limit = ctstat.size_limit;
engstat->cachetable_size_writing = ctstat.size_writing;
engstat->cachetable_lock_taken = ctstat.lock_taken;
engstat->cachetable_lock_released = ctstat.lock_released;
engstat->cachetable_hit = ctstat.hit;
engstat->cachetable_miss = ctstat.miss;
engstat->cachetable_misstime = ctstat.misstime;
engstat->cachetable_waittime = ctstat.waittime;
engstat->cachetable_wait_reading = ctstat.wait_reading;
engstat->cachetable_wait_writing = ctstat.wait_writing;
engstat->puts = ctstat.puts;
engstat->prefetches = ctstat.prefetches;
engstat->maybe_get_and_pins = ctstat.maybe_get_and_pins;
engstat->maybe_get_and_pin_hits = ctstat.maybe_get_and_pin_hits;
engstat->cachetable_size_current = ctstat.size_current;
engstat->cachetable_size_limit = ctstat.size_limit;
engstat->cachetable_size_writing = ctstat.size_writing;
engstat->get_and_pin_footprint = ctstat.get_and_pin_footprint;
}
{
toku_ltm* ltm = env->i->ltm;
......@@ -1162,6 +1185,71 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) {
return r;
}
// Fill buff with text description of engine status up to bufsiz bytes.
// Intended for use by test programs that do not have the handlerton available.
static int
env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) {
ENGINE_STATUS engstat;
int r = env_get_engine_status(env, &engstat);
int n = 0; // number of characters printed so far
n += snprintf(buff + n, bufsiz - n, "now %s \n", engstat.now);
n += snprintf(buff + n, bufsiz - n, "ydb_lock_ctr %"PRIu64"\n", engstat.ydb_lock_ctr);
n += snprintf(buff + n, bufsiz - n, "max_possible_sleep %"PRIu64"\n", engstat.max_possible_sleep);
n += snprintf(buff + n, bufsiz - n, "processor_freq_mhz %"PRIu64"\n", engstat.processor_freq_mhz);
n += snprintf(buff + n, bufsiz - n, "max_requested_sleep %"PRIu64"\n", engstat.max_requested_sleep);
n += snprintf(buff + n, bufsiz - n, "times_max_sleep_used %"PRIu64"\n", engstat.times_max_sleep_used);
n += snprintf(buff + n, bufsiz - n, "total_sleepers %"PRIu64"\n", engstat.total_sleepers);
n += snprintf(buff + n, bufsiz - n, "total_sleep_time %"PRIu64"\n", engstat.total_sleep_time);
n += snprintf(buff + n, bufsiz - n, "max_waiters %"PRIu64"\n", engstat.max_waiters);
n += snprintf(buff + n, bufsiz - n, "total_waiters %"PRIu64"\n", engstat.total_waiters);
n += snprintf(buff + n, bufsiz - n, "total_clients %"PRIu64"\n", engstat.total_clients);
n += snprintf(buff + n, bufsiz - n, "time_ydb_lock_held_unavailable %"PRIu64"\n", engstat.time_ydb_lock_held_unavailable);
n += snprintf(buff + n, bufsiz - n, "max_time_ydb_lock_held %"PRIu64"\n", engstat.max_time_ydb_lock_held);
n += snprintf(buff + n, bufsiz - n, "total_time_ydb_lock_held %"PRIu64"\n", engstat.total_time_ydb_lock_held);
n += snprintf(buff + n, bufsiz - n, "logger_lock_ctr %"PRIu64"\n", engstat.logger_lock_ctr);
n += snprintf(buff + n, bufsiz - n, "checkpoint_period %d \n", engstat.checkpoint_period);
n += snprintf(buff + n, bufsiz - n, "checkpoint_footprint %d \n", engstat.checkpoint_footprint);
n += snprintf(buff + n, bufsiz - n, "checkpoint_time_begin %s \n", engstat.checkpoint_time_begin);
n += snprintf(buff + n, bufsiz - n, "checkpoint_time_begin_complete %s \n", engstat.checkpoint_time_begin_complete);
n += snprintf(buff + n, bufsiz - n, "checkpoint_time_end %s \n", engstat.checkpoint_time_end);
n += snprintf(buff + n, bufsiz - n, "cachetable_lock_taken %"PRIu64"\n", engstat.cachetable_lock_taken);
n += snprintf(buff + n, bufsiz - n, "cachetable_lock_released %"PRIu64"\n", engstat.cachetable_lock_released);
n += snprintf(buff + n, bufsiz - n, "cachetable_hit %"PRIu64"\n", engstat.cachetable_hit);
n += snprintf(buff + n, bufsiz - n, "cachetable_miss %"PRIu64"\n", engstat.cachetable_miss);
n += snprintf(buff + n, bufsiz - n, "cachetable_misstime %"PRIu64"\n", engstat.cachetable_misstime);
n += snprintf(buff + n, bufsiz - n, "cachetable_waittime %"PRIu64"\n", engstat.cachetable_waittime);
n += snprintf(buff + n, bufsiz - n, "cachetable_wait_reading %"PRIu64"\n", engstat.cachetable_wait_reading);
n += snprintf(buff + n, bufsiz - n, "cachetable_wait_writing %"PRIu64"\n", engstat.cachetable_wait_writing);
n += snprintf(buff + n, bufsiz - n, "puts %"PRIu64"\n", engstat.puts);
n += snprintf(buff + n, bufsiz - n, "prefetches %"PRIu64"\n", engstat.prefetches);
n += snprintf(buff + n, bufsiz - n, "maybe_get_and_pins %"PRIu64"\n", engstat.maybe_get_and_pins);
n += snprintf(buff + n, bufsiz - n, "maybe_get_and_pin_hits %"PRIu64"\n", engstat.maybe_get_and_pin_hits);
n += snprintf(buff + n, bufsiz - n, "cachetable_size_current %"PRId64"\n", engstat.cachetable_size_current);
n += snprintf(buff + n, bufsiz - n, "cachetable_size_limit %"PRId64"\n", engstat.cachetable_size_limit);
n += snprintf(buff + n, bufsiz - n, "cachetable_size_writing %"PRId64"\n", engstat.cachetable_size_writing);
n += snprintf(buff + n, bufsiz - n, "get_and_pin_footprint %"PRId64"\n", engstat.get_and_pin_footprint);
n += snprintf(buff + n, bufsiz - n, "range_locks_max %d \n", engstat.range_locks_max);
n += snprintf(buff + n, bufsiz - n, "range_locks_max_per_db %d \n", engstat.range_locks_max_per_db);
n += snprintf(buff + n, bufsiz - n, "range_locks_curr %d \n", engstat.range_locks_curr);
n += snprintf(buff + n, bufsiz - n, "inserts %"PRIu64"\n", engstat.inserts);
n += snprintf(buff + n, bufsiz - n, "deletes %"PRIu64"\n", engstat.deletes);
n += snprintf(buff + n, bufsiz - n, "commits %"PRIu64"\n", engstat.commits);
n += snprintf(buff + n, bufsiz - n, "aborts %"PRIu64"\n", engstat.aborts);
n += snprintf(buff + n, bufsiz - n, "point_queries %"PRIu64"\n", engstat.point_queries);
n += snprintf(buff + n, bufsiz - n, "sequential_queries %"PRIu64"\n", engstat.sequential_queries);
// n += snprintf(buff + n, bufsiz - n, " %"PRIu64"\n", engstat.);
if (n > bufsiz) {
char * errmsg = "BUFFER TOO SMALL\n";
int len = strlen(errmsg) + 1;
(void) snprintf(buff + (bufsiz - 1) - len, len, errmsg);
}
return r;
}
static int locked_txn_begin(DB_ENV * env, DB_TXN * stxn, DB_TXN ** txn, u_int32_t flags);
static int toku_db_lt_panic(DB* db, int r);
......@@ -1191,6 +1279,7 @@ static int toku_env_create(DB_ENV ** envp, u_int32_t flags) {
result->checkpointing_begin_atomic_operation = env_checkpointing_begin_atomic_operation;
result->checkpointing_end_atomic_operation = env_checkpointing_end_atomic_operation;
result->get_engine_status = env_get_engine_status;
result->get_engine_status_text = env_get_engine_status_text;
result->get_iname = env_get_iname;
SENV(open);
SENV(close);
......@@ -3785,7 +3874,6 @@ toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, u_int32_t flags) {
int r;
num_inserts++;
u_int32_t lock_flags = get_prelocked_flags(flags, txn, db);
flags &= ~lock_flags;
BOOL do_locking = (BOOL)(db->i->lt && !(lock_flags&DB_PRELOCKED_WRITE));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment