Commit 8ec25fb7 authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:4260], [t:4239], merge to main

git-svn-id: file:///svn/toku/tokudb@37751 c7de825b-a66e-492c-adef-691d508d4ae1
parent f090e12c
...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status { ...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status {
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/ uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/ uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/ uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */ u_int64_t le_max_memsize; /* max memsize of any packed le */
...@@ -331,6 +350,7 @@ typedef enum { ...@@ -331,6 +350,7 @@ typedef enum {
#define DB_PRELOCKED_WRITE 0x00400000 #define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000 #define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_IS_HOT_INDEX 0x00100000 #define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1 #define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 64 #define DB_DBT_DUPOK 64
#define DB_DBT_MALLOC 4 #define DB_DBT_MALLOC 4
......
...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status { ...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status {
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/ uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/ uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/ uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */ u_int64_t le_max_memsize; /* max memsize of any packed le */
...@@ -332,6 +351,7 @@ typedef enum { ...@@ -332,6 +351,7 @@ typedef enum {
#define DB_PRELOCKED_WRITE 0x00400000 #define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000 #define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_IS_HOT_INDEX 0x00100000 #define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1 #define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 64 #define DB_DBT_DUPOK 64
#define DB_DBT_MALLOC 4 #define DB_DBT_MALLOC 4
......
...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status { ...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status {
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/ uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/ uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/ uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */ u_int64_t le_max_memsize; /* max memsize of any packed le */
...@@ -332,6 +351,7 @@ typedef enum { ...@@ -332,6 +351,7 @@ typedef enum {
#define DB_PRELOCKED_WRITE 0x00400000 #define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000 #define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_IS_HOT_INDEX 0x00100000 #define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1 #define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 64 #define DB_DBT_DUPOK 64
#define DB_DBT_MALLOC 4 #define DB_DBT_MALLOC 4
......
...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status { ...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status {
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/ uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/ uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/ uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */ u_int64_t le_max_memsize; /* max memsize of any packed le */
...@@ -332,6 +351,7 @@ typedef enum { ...@@ -332,6 +351,7 @@ typedef enum {
#define DB_PRELOCKED_WRITE 0x00400000 #define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000 #define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_IS_HOT_INDEX 0x00100000 #define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1 #define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 128 #define DB_DBT_DUPOK 128
#define DB_DBT_MALLOC 4 #define DB_DBT_MALLOC 4
......
...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status { ...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status {
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/ uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/ uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/ uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */ u_int64_t le_max_memsize; /* max memsize of any packed le */
...@@ -332,6 +351,7 @@ typedef enum { ...@@ -332,6 +351,7 @@ typedef enum {
#define DB_PRELOCKED_WRITE 0x00400000 #define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000 #define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_IS_HOT_INDEX 0x00100000 #define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1 #define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 2 #define DB_DBT_DUPOK 2
#define DB_DBT_MALLOC 8 #define DB_DBT_MALLOC 8
......
...@@ -160,6 +160,7 @@ static void print_defines (void) { ...@@ -160,6 +160,7 @@ static void print_defines (void) {
printf("#define DB_PRELOCKED_WRITE 0x00400000\n"); // private tokudb printf("#define DB_PRELOCKED_WRITE 0x00400000\n"); // private tokudb
printf("#define DB_PRELOCKED_FILE_READ 0x00200000\n"); // private tokudb printf("#define DB_PRELOCKED_FILE_READ 0x00200000\n"); // private tokudb
printf("#define DB_IS_HOT_INDEX 0x00100000\n"); // private tokudb printf("#define DB_IS_HOT_INDEX 0x00100000\n"); // private tokudb
printf("#define DBC_DISABLE_PREFETCHING 0x20000000\n"); // private tokudb
{ {
//dbt flags //dbt flags
...@@ -609,6 +610,25 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__ ...@@ -609,6 +610,25 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
printf(" uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/\n"); printf(" uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/\n");
printf(" uint64_t msg_num; /* how many messages injected at root*/\n"); printf(" uint64_t msg_num; /* how many messages injected at root*/\n");
printf(" uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/\n"); printf(" uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/\n");
printf(" uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */\n");
printf(" uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */\n");
printf(" uint64_t num_basements_decompressed_prefetch;\n");
printf(" uint64_t num_basements_decompressed_write;\n");
printf(" uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */\n");
printf(" uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */\n");
printf(" uint64_t num_msg_buffer_decompressed_prefetch;\n");
printf(" uint64_t num_msg_buffer_decompressed_write;\n");
printf(" uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */\n");
printf(" uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */\n");
printf(" uint64_t num_pivots_fetched_write; /* ... for a write */\n");
printf(" uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */\n");
printf(" uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */\n");
printf(" uint64_t num_basements_fetched_prefetch;\n");
printf(" uint64_t num_basements_fetched_write;\n");
printf(" uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */\n");
printf(" uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */\n");
printf(" uint64_t num_msg_buffer_fetched_prefetch;\n");
printf(" uint64_t num_msg_buffer_fetched_write;\n");
printf(" u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ \n"); printf(" u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ \n");
printf(" u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ \n"); printf(" u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ \n");
printf(" u_int64_t le_max_memsize; /* max memsize of any packed le */ \n"); printf(" u_int64_t le_max_memsize; /* max memsize of any packed le */ \n");
......
...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status { ...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status {
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/ uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/ uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/ uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */ u_int64_t le_max_memsize; /* max memsize of any packed le */
...@@ -332,6 +351,7 @@ typedef enum { ...@@ -332,6 +351,7 @@ typedef enum {
#define DB_PRELOCKED_WRITE 0x00400000 #define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000 #define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_IS_HOT_INDEX 0x00100000 #define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1 #define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 2 #define DB_DBT_DUPOK 2
#define DB_DBT_MALLOC 8 #define DB_DBT_MALLOC 8
......
...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status { ...@@ -214,6 +214,25 @@ typedef struct __toku_engine_status {
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/ uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/ uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/ uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */ u_int64_t le_max_memsize; /* max memsize of any packed le */
...@@ -332,6 +351,7 @@ typedef enum { ...@@ -332,6 +351,7 @@ typedef enum {
#define DB_PRELOCKED_WRITE 0x00400000 #define DB_PRELOCKED_WRITE 0x00400000
#define DB_PRELOCKED_FILE_READ 0x00200000 #define DB_PRELOCKED_FILE_READ 0x00200000
#define DB_IS_HOT_INDEX 0x00100000 #define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1 #define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 2 #define DB_DBT_DUPOK 2
#define DB_DBT_MALLOC 8 #define DB_DBT_MALLOC 8
......
...@@ -79,6 +79,14 @@ struct brtnode_fetch_extra { ...@@ -79,6 +79,14 @@ struct brtnode_fetch_extra {
brt_search_t* search; brt_search_t* search;
DBT *range_lock_left_key, *range_lock_right_key; DBT *range_lock_left_key, *range_lock_right_key;
BOOL left_is_neg_infty, right_is_pos_infty; BOOL left_is_neg_infty, right_is_pos_infty;
// states if we should try to aggressively fetch basement nodes
// that are not specifically needed for current query,
// but may be needed for other cursor operations user is doing
// For example, if we have not disabled prefetching,
// and the user is doing a dictionary wide scan, then
// even though a query may only want one basement node,
// we fetch all basement nodes in a leaf node.
BOOL disable_prefetching;
// this value will be set during the fetch_callback call by toku_brtnode_fetch_callback or toku_brtnode_pf_req_callback // this value will be set during the fetch_callback call by toku_brtnode_fetch_callback or toku_brtnode_pf_req_callback
// thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it // thi callbacks need to evaluate this anyway, so we cache it here so the search code does not reevaluate it
int child_to_read; int child_to_read;
...@@ -485,6 +493,7 @@ struct brtenv { ...@@ -485,6 +493,7 @@ struct brtenv {
long long checksum_number; long long checksum_number;
}; };
void toku_brt_status_update_pivot_fetch_reason(struct brtnode_fetch_extra *bfe);
extern void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *brtnode_v, void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint); extern void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *brtnode_v, void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint);
extern int toku_brtnode_fetch_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, u_int32_t fullhash, void **brtnode_pv, PAIR_ATTR *sizep, int*dirty, void*extraargs); extern int toku_brtnode_fetch_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, u_int32_t fullhash, void **brtnode_pv, PAIR_ATTR *sizep, int*dirty, void*extraargs);
extern void toku_brtnode_pe_est_callback(void* brtnode_pv, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs); extern void toku_brtnode_pe_est_callback(void* brtnode_pv, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs);
...@@ -522,6 +531,7 @@ struct brt_cursor { ...@@ -522,6 +531,7 @@ struct brt_cursor {
BOOL left_is_neg_infty, right_is_pos_infty; BOOL left_is_neg_infty, right_is_pos_infty;
BOOL is_snapshot_read; // true if query is read_committed, false otherwise BOOL is_snapshot_read; // true if query is read_committed, false otherwise
BOOL is_leaf_mode; BOOL is_leaf_mode;
BOOL disable_prefetching;
TOKUTXN ttxn; TOKUTXN ttxn;
struct brt_cursor_leaf_info leaf_info; struct brt_cursor_leaf_info leaf_info;
}; };
...@@ -541,6 +551,7 @@ static inline void fill_bfe_for_full_read(struct brtnode_fetch_extra *bfe, struc ...@@ -541,6 +551,7 @@ static inline void fill_bfe_for_full_read(struct brtnode_fetch_extra *bfe, struc
bfe->left_is_neg_infty = FALSE; bfe->left_is_neg_infty = FALSE;
bfe->right_is_pos_infty = FALSE; bfe->right_is_pos_infty = FALSE;
bfe->child_to_read = -1; bfe->child_to_read = -1;
bfe->disable_prefetching = FALSE;
} }
// //
...@@ -556,7 +567,8 @@ static inline void fill_bfe_for_subset_read( ...@@ -556,7 +567,8 @@ static inline void fill_bfe_for_subset_read(
DBT *left, DBT *left,
DBT *right, DBT *right,
BOOL left_is_neg_infty, BOOL left_is_neg_infty,
BOOL right_is_pos_infty BOOL right_is_pos_infty,
BOOL disable_prefetching
) )
{ {
bfe->type = brtnode_fetch_subset; bfe->type = brtnode_fetch_subset;
...@@ -567,6 +579,7 @@ static inline void fill_bfe_for_subset_read( ...@@ -567,6 +579,7 @@ static inline void fill_bfe_for_subset_read(
bfe->left_is_neg_infty = left_is_neg_infty; bfe->left_is_neg_infty = left_is_neg_infty;
bfe->right_is_pos_infty = right_is_pos_infty; bfe->right_is_pos_infty = right_is_pos_infty;
bfe->child_to_read = -1; bfe->child_to_read = -1;
bfe->disable_prefetching = disable_prefetching;
} }
// //
...@@ -584,6 +597,7 @@ static inline void fill_bfe_for_min_read(struct brtnode_fetch_extra *bfe, struct ...@@ -584,6 +597,7 @@ static inline void fill_bfe_for_min_read(struct brtnode_fetch_extra *bfe, struct
bfe->left_is_neg_infty = FALSE; bfe->left_is_neg_infty = FALSE;
bfe->right_is_pos_infty = FALSE; bfe->right_is_pos_infty = FALSE;
bfe->child_to_read = -1; bfe->child_to_read = -1;
bfe->disable_prefetching = FALSE;
} }
static inline void destroy_bfe_for_prefetch(struct brtnode_fetch_extra *bfe) { static inline void destroy_bfe_for_prefetch(struct brtnode_fetch_extra *bfe) {
...@@ -628,6 +642,7 @@ static inline void fill_bfe_for_prefetch(struct brtnode_fetch_extra *bfe, ...@@ -628,6 +642,7 @@ static inline void fill_bfe_for_prefetch(struct brtnode_fetch_extra *bfe,
bfe->left_is_neg_infty = c->left_is_neg_infty; bfe->left_is_neg_infty = c->left_is_neg_infty;
bfe->right_is_pos_infty = c->right_is_pos_infty; bfe->right_is_pos_infty = c->right_is_pos_infty;
bfe->child_to_read = -1; bfe->child_to_read = -1;
bfe->disable_prefetching = c->disable_prefetching;
} }
struct ancestors { struct ancestors {
...@@ -787,6 +802,25 @@ struct brt_status { ...@@ -787,6 +802,25 @@ struct brt_status {
uint64_t msg_bytes_max; // how many bytes of messages currently in trees (estimate) uint64_t msg_bytes_max; // how many bytes of messages currently in trees (estimate)
uint64_t msg_num; // how many messages injected at root uint64_t msg_num; // how many messages injected at root
uint64_t msg_num_broadcast; // how many broadcast messages injected at root uint64_t msg_num_broadcast; // how many broadcast messages injected at root
uint64_t num_basements_decompressed_normal; // how many basement nodes were decompressed because they were the target of a query
uint64_t num_basements_decompressed_aggressive; // ... because they were between lc and rc
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; // how many msg buffers were decompressed because they were the target of a query
uint64_t num_msg_buffer_decompressed_aggressive; // ... because they were between lc and rc
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; // how many pivots were fetched for a query
uint64_t num_pivots_fetched_prefetch; // ... for a prefetch
uint64_t num_pivots_fetched_write; // ... for a write
uint64_t num_basements_fetched_normal; // how many basement nodes were fetched because they were the target of a query
uint64_t num_basements_fetched_aggressive; // ... because they were between lc and rc
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; // how many msg buffers were fetched because they were the target of a query
uint64_t num_msg_buffer_fetched_aggressive; // ... because they were between lc and rc
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
}; };
void toku_brt_get_status(BRT_STATUS); void toku_brt_get_status(BRT_STATUS);
......
...@@ -1266,28 +1266,29 @@ static void setup_brtnode_partitions(BRTNODE node, struct brtnode_fetch_extra* b ...@@ -1266,28 +1266,29 @@ static void setup_brtnode_partitions(BRTNODE node, struct brtnode_fetch_extra* b
//printf("node height %d, blocknum %"PRId64", type %d lc %d rc %d\n", node->height, node->thisnodename.b, bfe->type, lc, rc); //printf("node height %d, blocknum %"PRId64", type %d lc %d rc %d\n", node->height, node->thisnodename.b, bfe->type, lc, rc);
for (int i = 0; i < node->n_children; i++) { for (int i = 0; i < node->n_children; i++) {
BP_INIT_UNTOUCHED_CLOCK(node,i); BP_INIT_UNTOUCHED_CLOCK(node,i);
if (data_in_memory) { if (data_in_memory) {
BP_STATE(node, i) = ((toku_bfe_wants_child_available(bfe, i) || (lc <= i && i <= rc)) BP_STATE(node, i) = ((toku_bfe_wants_child_available(bfe, i) || (lc <= i && i <= rc))
? PT_AVAIL : PT_COMPRESSED); ? PT_AVAIL : PT_COMPRESSED);
} else { } else {
BP_STATE(node, i) = PT_ON_DISK; BP_STATE(node, i) = PT_ON_DISK;
} }
BP_WORKDONE(node,i) = 0; BP_WORKDONE(node,i) = 0;
switch (BP_STATE(node,i)) {
case PT_AVAIL: switch (BP_STATE(node,i)) {
case PT_AVAIL:
setup_available_brtnode_partition(node, i); setup_available_brtnode_partition(node, i);
BP_TOUCH_CLOCK(node,i); BP_TOUCH_CLOCK(node,i);
continue; continue;
case PT_COMPRESSED: case PT_COMPRESSED:
set_BSB(node, i, sub_block_creat()); set_BSB(node, i, sub_block_creat());
continue; continue;
case PT_ON_DISK: case PT_ON_DISK:
set_BNULL(node, i); set_BNULL(node, i);
continue; continue;
case PT_INVALID: case PT_INVALID:
break; break;
} }
assert(FALSE); assert(FALSE);
} }
} }
...@@ -1371,18 +1372,18 @@ check_and_copy_compressed_sub_block_worker(struct rbuf curr_rbuf, struct sub_blo ...@@ -1371,18 +1372,18 @@ check_and_copy_compressed_sub_block_worker(struct rbuf curr_rbuf, struct sub_blo
} }
static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnode, static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnode,
BLOCKNUM blocknum, BLOCKNUM blocknum,
u_int32_t fullhash, u_int32_t fullhash,
struct brtnode_fetch_extra *bfe, struct brtnode_fetch_extra *bfe,
struct rbuf *rb, struct rbuf *rb,
int fd) int fd)
// If we have enough information in the rbuf to construct a header, then do so. // If we have enough information in the rbuf to construct a header, then do so.
// Also fetch in the basement node if needed. // Also fetch in the basement node if needed.
// Return 0 if it worked. If something goes wrong (including that we are looking at some old data format that doesn't have partitions) then return nonzero. // Return 0 if it worked. If something goes wrong (including that we are looking at some old data format that doesn't have partitions) then return nonzero.
{ {
int r; int r;
BRTNODE node = toku_xmalloc(sizeof(*node)); BRTNODE node = toku_xmalloc(sizeof(*node));
// fill in values that are known and not stored in rb // fill in values that are known and not stored in rb
node->fullhash = fullhash; node->fullhash = fullhash;
node->thisnodename = blocknum; node->thisnodename = blocknum;
...@@ -1408,13 +1409,14 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod ...@@ -1408,13 +1409,14 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod
r = EINVAL; r = EINVAL;
goto cleanup; goto cleanup;
} }
node->layout_version = node->layout_version_read_from_disk; node->layout_version = node->layout_version_read_from_disk;
node->layout_version_original = rbuf_int(rb); node->layout_version_original = rbuf_int(rb);
node->build_id = rbuf_int(rb); node->build_id = rbuf_int(rb);
node->n_children = rbuf_int(rb); node->n_children = rbuf_int(rb);
// Guaranteed to be have been able to read up to here. If n_children is too big, we may have a problem, so check that we won't overflow while // Guaranteed to be have been able to read up to here. If n_children
// reading the partition locations. // is too big, we may have a problem, so check that we won't overflow
// while reading the partition locations.
unsigned int nhsize = serialize_node_header_size(node); // we can do this because n_children is filled in. unsigned int nhsize = serialize_node_header_size(node); // we can do this because n_children is filled in.
unsigned int needed_size = nhsize + 12; // we need 12 more so that we can read the compressed block size information that follows for the nodeinfo. unsigned int needed_size = nhsize + 12; // we need 12 more so that we can read the compressed block size information that follows for the nodeinfo.
if (needed_size > rb->size) { if (needed_size > rb->size) {
...@@ -1442,10 +1444,11 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod ...@@ -1442,10 +1444,11 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod
sb_node_info.compressed_size = rbuf_int(rb); // we'll be able to read these because we checked the size earlier. sb_node_info.compressed_size = rbuf_int(rb); // we'll be able to read these because we checked the size earlier.
sb_node_info.uncompressed_size = rbuf_int(rb); sb_node_info.uncompressed_size = rbuf_int(rb);
if (rb->size-rb->ndone < sb_node_info.compressed_size + 8) { if (rb->size-rb->ndone < sb_node_info.compressed_size + 8) {
r = EINVAL; // we won't r = EINVAL; // we won't
goto cleanup; goto cleanup;
} }
// We got the entire header and node info! // We got the entire header and node info!
toku_brt_status_update_pivot_fetch_reason(bfe);
// Finish reading compressed the sub_block // Finish reading compressed the sub_block
bytevec* cp = (bytevec*)&sb_node_info.compressed_ptr; bytevec* cp = (bytevec*)&sb_node_info.compressed_ptr;
...@@ -1458,7 +1461,7 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod ...@@ -1458,7 +1461,7 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod
// Now decompress the subblock // Now decompress the subblock
sb_node_info.uncompressed_ptr = toku_xmalloc(sb_node_info.uncompressed_size); sb_node_info.uncompressed_ptr = toku_xmalloc(sb_node_info.uncompressed_size);
assert(sb_node_info.uncompressed_ptr); assert(sb_node_info.uncompressed_ptr);
toku_decompress( toku_decompress(
sb_node_info.uncompressed_ptr, sb_node_info.uncompressed_ptr,
sb_node_info.uncompressed_size, sb_node_info.uncompressed_size,
...@@ -1471,7 +1474,9 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod ...@@ -1471,7 +1474,9 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod
toku_free(sb_node_info.uncompressed_ptr); toku_free(sb_node_info.uncompressed_ptr);
sb_node_info.uncompressed_ptr = NULL; sb_node_info.uncompressed_ptr = NULL;
// Now we have the brtnode_info. We have a bunch more stuff in the rbuf, so we might be able to store the compressed data for some objects. // Now we have the brtnode_info. We have a bunch more stuff in the
// rbuf, so we might be able to store the compressed data for some
// objects.
// We can proceed to deserialize the individual subblocks. // We can proceed to deserialize the individual subblocks.
assert(bfe->type == brtnode_fetch_none || bfe->type == brtnode_fetch_subset || bfe->type == brtnode_fetch_all || bfe->type == brtnode_fetch_prefetch); assert(bfe->type == brtnode_fetch_none || bfe->type == brtnode_fetch_subset || bfe->type == brtnode_fetch_all || bfe->type == brtnode_fetch_prefetch);
...@@ -1480,29 +1485,17 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod ...@@ -1480,29 +1485,17 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod
// for partitions staying compressed, create sub_block // for partitions staying compressed, create sub_block
setup_brtnode_partitions(node, bfe, false); setup_brtnode_partitions(node, bfe, false);
// determine the range to preetch if (bfe->type != brtnode_fetch_none) {
int lc, rc; PAIR_ATTR attr;
if (bfe->type == brtnode_fetch_subset || bfe->type == brtnode_fetch_prefetch) { toku_brtnode_pf_callback(node, bfe, fd, &attr);
lc = toku_bfe_leftmost_child_wanted(bfe, node);
rc = toku_bfe_rightmost_child_wanted(bfe, node);
} else {
lc = -1;
rc = -1;
} }
// handle clock
cilk_for (int i = 0; i < node->n_children; i++) { for (int i = 0; i < node->n_children; i++) {
assert(BP_STATE(node, i) == PT_ON_DISK);
// We only touch the clock for basement nodes that the bfe wants,
// and not basement nodes that the are being prefetched
if (toku_bfe_wants_child_available(bfe, i)) { if (toku_bfe_wants_child_available(bfe, i)) {
assert(BP_STATE(node,i) == PT_AVAIL);
BP_TOUCH_CLOCK(node,i); BP_TOUCH_CLOCK(node,i);
} }
if ((lc <= i && i <= rc) || toku_bfe_wants_child_available(bfe, i)) {
assert(BP_STATE(node,i) == PT_ON_DISK);
toku_deserialize_bp_from_disk(node, i, fd, bfe);
}
} }
*brtnode = node; *brtnode = node;
r = 0; r = 0;
......
...@@ -538,16 +538,28 @@ void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename ...@@ -538,16 +538,28 @@ void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename
//printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced); //printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
} }
void
toku_brt_status_update_pivot_fetch_reason(struct brtnode_fetch_extra *bfe)
{
if (bfe->type == brtnode_fetch_prefetch) {
brt_status.num_pivots_fetched_prefetch++;
} else if (bfe->type == brtnode_fetch_all) {
brt_status.num_pivots_fetched_write++;
} else if (bfe->type == brtnode_fetch_subset) {
brt_status.num_pivots_fetched_query++;
}
}
//fd is protected (must be holding fdlock) //fd is protected (must be holding fdlock)
int toku_brtnode_fetch_callback (CACHEFILE UU(cachefile), int fd, BLOCKNUM nodename, u_int32_t fullhash, int toku_brtnode_fetch_callback (CACHEFILE UU(cachefile), int fd, BLOCKNUM nodename, u_int32_t fullhash,
void **brtnode_pv, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) { void **brtnode_pv, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) {
assert(extraargs); assert(extraargs);
assert(*brtnode_pv == NULL); assert(*brtnode_pv == NULL);
struct brtnode_fetch_extra *bfe = (struct brtnode_fetch_extra *)extraargs; struct brtnode_fetch_extra *bfe = (struct brtnode_fetch_extra *)extraargs;
BRTNODE *result=(BRTNODE*)brtnode_pv; BRTNODE *result=(BRTNODE*)brtnode_pv;
// deserialize the node, must pass the bfe in because we cannot evaluate what piece of the // deserialize the node, must pass the bfe in because we cannot
// the node is necessary until we get it at least partially into memory // evaluate what piece of the the node is necessary until we get it at
// least partially into memory
int r = toku_deserialize_brtnode_from(fd, nodename, fullhash, result, bfe); int r = toku_deserialize_brtnode_from(fd, nodename, fullhash, result, bfe);
if (r == 0) { if (r == 0) {
*sizep = make_brtnode_pair_attr(*result); *sizep = make_brtnode_pair_attr(*result);
...@@ -781,6 +793,9 @@ BOOL toku_brtnode_pf_req_callback(void* brtnode_pv, void* read_extraargs) { ...@@ -781,6 +793,9 @@ BOOL toku_brtnode_pf_req_callback(void* brtnode_pv, void* read_extraargs) {
brt_status_update_partial_fetch(BP_STATE(node, bfe->child_to_read)); brt_status_update_partial_fetch(BP_STATE(node, bfe->child_to_read));
} }
else if (bfe->type == brtnode_fetch_prefetch) { else if (bfe->type == brtnode_fetch_prefetch) {
// makes no sense to have prefetching disabled
// and still call this function
assert(!bfe->disable_prefetching);
int lc = toku_bfe_leftmost_child_wanted(bfe, node); int lc = toku_bfe_leftmost_child_wanted(bfe, node);
int rc = toku_bfe_rightmost_child_wanted(bfe, node); int rc = toku_bfe_rightmost_child_wanted(bfe, node);
for (int i = lc; i <= rc; ++i) { for (int i = lc; i <= rc; ++i) {
...@@ -797,6 +812,71 @@ BOOL toku_brtnode_pf_req_callback(void* brtnode_pv, void* read_extraargs) { ...@@ -797,6 +812,71 @@ BOOL toku_brtnode_pf_req_callback(void* brtnode_pv, void* read_extraargs) {
return retval; return retval;
} }
static void
brt_status_update_partial_fetch_reason(
struct brtnode_fetch_extra *bfe,
int i,
int state,
BOOL is_leaf
)
{
invariant(state == PT_COMPRESSED || state == PT_ON_DISK);
if (is_leaf) {
if (bfe->type == brtnode_fetch_prefetch) {
if (state == PT_COMPRESSED) {
brt_status.num_basements_decompressed_prefetch++;
} else {
brt_status.num_basements_fetched_prefetch++;
}
} else if (bfe->type == brtnode_fetch_all) {
if (state == PT_COMPRESSED) {
brt_status.num_basements_decompressed_write++;
} else {
brt_status.num_basements_fetched_write++;
}
} else if (i == bfe->child_to_read) {
if (state == PT_COMPRESSED) {
brt_status.num_basements_decompressed_normal++;
} else {
brt_status.num_basements_fetched_normal++;
}
} else {
if (state == PT_COMPRESSED) {
brt_status.num_basements_decompressed_aggressive++;
} else {
brt_status.num_basements_fetched_aggressive++;
}
}
}
else {
if (bfe->type == brtnode_fetch_prefetch) {
if (state == PT_COMPRESSED) {
brt_status.num_msg_buffer_decompressed_prefetch++;
} else {
brt_status.num_msg_buffer_fetched_prefetch++;
}
} else if (bfe->type == brtnode_fetch_all) {
if (state == PT_COMPRESSED) {
brt_status.num_msg_buffer_decompressed_write++;
} else {
brt_status.num_msg_buffer_fetched_write++;
}
} else if (i == bfe->child_to_read) {
if (state == PT_COMPRESSED) {
brt_status.num_msg_buffer_decompressed_normal++;
} else {
brt_status.num_msg_buffer_fetched_normal++;
}
} else {
if (state == PT_COMPRESSED) {
brt_status.num_msg_buffer_decompressed_aggressive++;
} else {
brt_status.num_msg_buffer_fetched_aggressive++;
}
}
}
}
// callback for partially reading a node // callback for partially reading a node
// could have just used toku_brtnode_fetch_callback, but wanted to separate the two cases to separate functions // could have just used toku_brtnode_fetch_callback, but wanted to separate the two cases to separate functions
int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAIR_ATTR* sizep) { int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAIR_ATTR* sizep) {
...@@ -807,7 +887,10 @@ int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAI ...@@ -807,7 +887,10 @@ int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAI
assert((bfe->type == brtnode_fetch_subset) || (bfe->type == brtnode_fetch_all) || (bfe->type == brtnode_fetch_prefetch)); assert((bfe->type == brtnode_fetch_subset) || (bfe->type == brtnode_fetch_all) || (bfe->type == brtnode_fetch_prefetch));
// determine the range to prefetch // determine the range to prefetch
int lc, rc; int lc, rc;
if (bfe->type == brtnode_fetch_subset || bfe->type == brtnode_fetch_prefetch) { if (!bfe->disable_prefetching &&
(bfe->type == brtnode_fetch_subset || bfe->type == brtnode_fetch_prefetch)
)
{
lc = toku_bfe_leftmost_child_wanted(bfe, node); lc = toku_bfe_leftmost_child_wanted(bfe, node);
rc = toku_bfe_rightmost_child_wanted(bfe, node); rc = toku_bfe_rightmost_child_wanted(bfe, node);
} else { } else {
...@@ -821,6 +904,7 @@ int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAI ...@@ -821,6 +904,7 @@ int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAI
continue; continue;
} }
if ((lc <= i && i <= rc) || toku_bfe_wants_child_available(bfe, i)) { if ((lc <= i && i <= rc) || toku_bfe_wants_child_available(bfe, i)) {
brt_status_update_partial_fetch_reason(bfe, i, BP_STATE(node, i), (node->height == 0));
if (BP_STATE(node,i) == PT_COMPRESSED) { if (BP_STATE(node,i) == PT_COMPRESSED) {
cilk_spawn toku_deserialize_bp_from_compressed(node, i, &bfe->h->descriptor, bfe->h->compare_fun); cilk_spawn toku_deserialize_bp_from_compressed(node, i, &bfe->h->descriptor, bfe->h->compare_fun);
} }
...@@ -3843,7 +3927,8 @@ int toku_brt_cursor ( ...@@ -3843,7 +3927,8 @@ int toku_brt_cursor (
BRT brt, BRT brt,
BRT_CURSOR *cursorptr, BRT_CURSOR *cursorptr,
TOKUTXN ttxn, TOKUTXN ttxn,
BOOL is_snapshot_read BOOL is_snapshot_read,
BOOL disable_prefetching
) )
{ {
if (is_snapshot_read) { if (is_snapshot_read) {
...@@ -3868,6 +3953,7 @@ int toku_brt_cursor ( ...@@ -3868,6 +3953,7 @@ int toku_brt_cursor (
cursor->is_snapshot_read = is_snapshot_read; cursor->is_snapshot_read = is_snapshot_read;
cursor->is_leaf_mode = FALSE; cursor->is_leaf_mode = FALSE;
cursor->ttxn = ttxn; cursor->ttxn = ttxn;
cursor->disable_prefetching = disable_prefetching;
toku_list_push(&brt->cursors, &cursor->cursors_link); toku_list_push(&brt->cursors, &cursor->cursors_link);
*cursorptr = cursor; *cursorptr = cursor;
return 0; return 0;
...@@ -4543,7 +4629,7 @@ brt_node_maybe_prefetch(BRT brt, BRTNODE node, int childnum, BRT_CURSOR brtcurso ...@@ -4543,7 +4629,7 @@ brt_node_maybe_prefetch(BRT brt, BRTNODE node, int childnum, BRT_CURSOR brtcurso
// if we want to prefetch in the tree // if we want to prefetch in the tree
// then prefetch the next children if there are any // then prefetch the next children if there are any
if (*doprefetch && brt_cursor_prefetching(brtcursor)) { if (*doprefetch && brt_cursor_prefetching(brtcursor) && !brtcursor->disable_prefetching) {
int rc = brt_cursor_rightmost_child_wanted(brtcursor, brt, node); int rc = brt_cursor_rightmost_child_wanted(brtcursor, brt, node);
for (int i = childnum + 1; (i <= childnum + TOKU_DO_PREFETCH) && (i <= rc); i++) { for (int i = childnum + 1; (i <= childnum + TOKU_DO_PREFETCH) && (i <= rc); i++) {
BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i); BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i);
...@@ -4612,7 +4698,8 @@ brt_search_child(BRT brt, BRTNODE node, int childnum, brt_search_t *search, BRT_ ...@@ -4612,7 +4698,8 @@ brt_search_child(BRT brt, BRTNODE node, int childnum, brt_search_t *search, BRT_
&brtcursor->range_lock_left_key, &brtcursor->range_lock_left_key,
&brtcursor->range_lock_right_key, &brtcursor->range_lock_right_key,
brtcursor->left_is_neg_infty, brtcursor->left_is_neg_infty,
brtcursor->right_is_pos_infty brtcursor->right_is_pos_infty,
brtcursor->disable_prefetching
); );
{ {
int rr = toku_pin_brtnode(brt, childblocknum, fullhash, int rr = toku_pin_brtnode(brt, childblocknum, fullhash,
...@@ -4858,7 +4945,8 @@ try_again: ...@@ -4858,7 +4945,8 @@ try_again:
&brtcursor->range_lock_left_key, &brtcursor->range_lock_left_key,
&brtcursor->range_lock_right_key, &brtcursor->range_lock_right_key,
brtcursor->left_is_neg_infty, brtcursor->left_is_neg_infty,
brtcursor->right_is_pos_infty brtcursor->right_is_pos_infty,
brtcursor->disable_prefetching
); );
r = toku_pin_brtnode(brt, *rootp, fullhash,(UNLOCKERS)NULL,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node); r = toku_pin_brtnode(brt, *rootp, fullhash,(UNLOCKERS)NULL,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node);
assert(r==0 || r== TOKUDB_TRY_AGAIN); assert(r==0 || r== TOKUDB_TRY_AGAIN);
...@@ -5015,7 +5103,7 @@ int ...@@ -5015,7 +5103,7 @@ int
toku_brt_flatten(BRT brt, TOKUTXN ttxn) toku_brt_flatten(BRT brt, TOKUTXN ttxn)
{ {
BRT_CURSOR tmp_cursor; BRT_CURSOR tmp_cursor;
int r = toku_brt_cursor(brt, &tmp_cursor, ttxn, FALSE); int r = toku_brt_cursor(brt, &tmp_cursor, ttxn, FALSE, FALSE);
if (r!=0) return r; if (r!=0) return r;
brt_search_t search; brt_search_init(&search, brt_cursor_compare_one, BRT_SEARCH_LEFT, 0, tmp_cursor->brt); brt_search_t search; brt_search_init(&search, brt_cursor_compare_one, BRT_SEARCH_LEFT, 0, tmp_cursor->brt);
r = brt_cursor_search(tmp_cursor, &search, brt_flatten_getf, NULL, FALSE); r = brt_cursor_search(tmp_cursor, &search, brt_flatten_getf, NULL, FALSE);
...@@ -5281,7 +5369,7 @@ toku_brt_lookup (BRT brt, DBT *k, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v) ...@@ -5281,7 +5369,7 @@ toku_brt_lookup (BRT brt, DBT *k, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v)
int r, rr; int r, rr;
BRT_CURSOR cursor; BRT_CURSOR cursor;
rr = toku_brt_cursor(brt, &cursor, NULL, FALSE); rr = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
if (rr != 0) return rr; if (rr != 0) return rr;
int op = DB_SET; int op = DB_SET;
......
...@@ -186,7 +186,7 @@ int toku_verify_brt_with_progress (BRT brt, int (*progress_callback)(void *extra ...@@ -186,7 +186,7 @@ int toku_verify_brt_with_progress (BRT brt, int (*progress_callback)(void *extra
//int show_brt_blocknumbers(BRT); //int show_brt_blocknumbers(BRT);
typedef struct brt_cursor *BRT_CURSOR; typedef struct brt_cursor *BRT_CURSOR;
int toku_brt_cursor (BRT, BRT_CURSOR*, TOKUTXN, BOOL) __attribute__ ((warn_unused_result)); int toku_brt_cursor (BRT, BRT_CURSOR*, TOKUTXN, BOOL, BOOL) __attribute__ ((warn_unused_result));
void toku_brt_cursor_set_leaf_mode(BRT_CURSOR); void toku_brt_cursor_set_leaf_mode(BRT_CURSOR);
int toku_brt_cursor_is_leaf_mode(BRT_CURSOR); int toku_brt_cursor_is_leaf_mode(BRT_CURSOR);
void toku_brt_cursor_set_range_lock(BRT_CURSOR, const DBT *, const DBT *, BOOL, BOOL); void toku_brt_cursor_set_range_lock(BRT_CURSOR, const DBT *, const DBT *, BOOL, BOOL);
......
...@@ -23,7 +23,7 @@ le_cursor_create(LE_CURSOR *le_cursor_result, BRT brt, TOKUTXN txn) { ...@@ -23,7 +23,7 @@ le_cursor_create(LE_CURSOR *le_cursor_result, BRT brt, TOKUTXN txn) {
if (le_cursor == NULL) if (le_cursor == NULL)
result = errno; result = errno;
else { else {
result = toku_brt_cursor(brt, &le_cursor->brt_cursor, txn, FALSE); result = toku_brt_cursor(brt, &le_cursor->brt_cursor, txn, FALSE, FALSE);
if (result == 0) { if (result == 0) {
// TODO move the leaf mode to the brt cursor constructor // TODO move the leaf mode to the brt cursor constructor
toku_brt_cursor_set_leaf_mode(le_cursor->brt_cursor); toku_brt_cursor_set_leaf_mode(le_cursor->brt_cursor);
......
...@@ -19,7 +19,6 @@ int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) { ...@@ -19,7 +19,6 @@ int64_key_cmp (DB *db UU(), const DBT *a, const DBT *b) {
return 0; return 0;
} }
#if 0
static void static void
test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) {
int r; int r;
...@@ -33,8 +32,29 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { ...@@ -33,8 +32,29 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) {
memset(&cursor->range_lock_right_key, 0 , sizeof(DBT)); memset(&cursor->range_lock_right_key, 0 , sizeof(DBT));
cursor->left_is_neg_infty = TRUE; cursor->left_is_neg_infty = TRUE;
cursor->right_is_pos_infty = TRUE; cursor->right_is_pos_infty = TRUE;
cursor->disable_prefetching = FALSE;
struct brtnode_fetch_extra bfe; struct brtnode_fetch_extra bfe;
// quick test to see that we have the right behavior when we set
// disable_prefetching to TRUE
cursor->disable_prefetching = TRUE;
fill_bfe_for_prefetch(&bfe, brt_h, cursor);
r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe);
assert(r==0);
assert(dn->n_children == 3);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_ON_DISK);
r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_ON_DISK);
destroy_bfe_for_prefetch(&bfe);
toku_brtnode_free(&dn);
// now enable prefetching again
cursor->disable_prefetching = FALSE;
fill_bfe_for_prefetch(&bfe, brt_h, cursor); fill_bfe_for_prefetch(&bfe, brt_h, cursor);
r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe);
...@@ -138,7 +158,6 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { ...@@ -138,7 +158,6 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) {
toku_free(cursor); toku_free(cursor);
} }
#endif
static void static void
test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) { test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) {
...@@ -168,11 +187,38 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) { ...@@ -168,11 +187,38 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) {
&left, &left,
&right, &right,
FALSE, FALSE,
FALSE,
FALSE FALSE
); );
// fake the childnum to read
// set disable_prefetching ON
bfe.child_to_read = 2;
bfe.disable_prefetching = TRUE;
r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe);
assert(r==0);
assert(dn->n_children == 3);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_AVAIL);
// need to call this twice because we had a subset read before, that touched the clock
toku_brtnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, NULL);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_AVAIL);
toku_brtnode_pe_callback(dn, make_pair_attr(0xffffffff), &attr, NULL);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_COMPRESSED);
r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr);
assert(BP_STATE(dn,0) == PT_ON_DISK);
assert(BP_STATE(dn,1) == PT_ON_DISK);
assert(BP_STATE(dn,2) == PT_AVAIL);
toku_brtnode_free(&dn);
// fake the childnum to read // fake the childnum to read
bfe.child_to_read = 2; bfe.child_to_read = 2;
bfe.disable_prefetching = FALSE;
r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe);
assert(r==0); assert(r==0);
assert(dn->n_children == 3); assert(dn->n_children == 3);
...@@ -217,7 +263,6 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) { ...@@ -217,7 +263,6 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) {
assert(BP_STATE(dn,2) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK);
toku_brtnode_free(&dn); toku_brtnode_free(&dn);
toku_free(cursor); toku_free(cursor);
} }
...@@ -307,7 +352,7 @@ test_prefetching(void) { ...@@ -307,7 +352,7 @@ test_prefetching(void) {
r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE);
assert(r==0); assert(r==0);
//test_prefetch_read(fd, brt, brt_h); test_prefetch_read(fd, brt, brt_h);
test_subset_read(fd, brt, brt_h); test_subset_read(fd, brt, brt_h);
kv_pair_free(sn.childkeys[0]); kv_pair_free(sn.childkeys[0]);
......
...@@ -158,7 +158,8 @@ test2(int fd, struct brt_header *brt_h, BRTNODE *dn) { ...@@ -158,7 +158,8 @@ test2(int fd, struct brt_header *brt_h, BRTNODE *dn) {
&left, &left,
&right, &right,
TRUE, TRUE,
TRUE TRUE,
FALSE
); );
r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &bfe_subset); r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &bfe_subset);
......
...@@ -52,7 +52,7 @@ static void test_sub_block(int n) { ...@@ -52,7 +52,7 @@ static void test_sub_block(int n) {
assert(error == 0); assert(error == 0);
BRT_CURSOR cursor; BRT_CURSOR cursor;
error = toku_brt_cursor(brt, &cursor, NULL, FALSE); error = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(error == 0); assert(error == 0);
for (i=0; ; i++) { for (i=0; ; i++) {
......
...@@ -52,7 +52,7 @@ static void test_multiple_brt_cursor_dbts(int n, DB *db) { ...@@ -52,7 +52,7 @@ static void test_multiple_brt_cursor_dbts(int n, DB *db) {
} }
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
r = toku_brt_cursor(brt, &cursors[i], NULL, FALSE); r = toku_brt_cursor(brt, &cursors[i], NULL, FALSE, FALSE);
assert(r == 0); assert(r == 0);
} }
......
...@@ -20,7 +20,7 @@ static void assert_cursor_notfound(BRT brt, int position) { ...@@ -20,7 +20,7 @@ static void assert_cursor_notfound(BRT brt, int position) {
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
int r; int r;
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
struct check_pair pair = {0,0,0,0,0}; struct check_pair pair = {0,0,0,0,0};
...@@ -36,7 +36,7 @@ static void assert_cursor_value(BRT brt, int position, long long value) { ...@@ -36,7 +36,7 @@ static void assert_cursor_value(BRT brt, int position, long long value) {
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
int r; int r;
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
if (test_cursor_debug && verbose) printf("key: "); if (test_cursor_debug && verbose) printf("key: ");
...@@ -53,7 +53,7 @@ static void assert_cursor_first_last(BRT brt, long long firstv, long long lastv) ...@@ -53,7 +53,7 @@ static void assert_cursor_first_last(BRT brt, long long firstv, long long lastv)
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
int r; int r;
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
if (test_cursor_debug && verbose) printf("first key: "); if (test_cursor_debug && verbose) printf("first key: ");
...@@ -251,7 +251,7 @@ static void assert_cursor_walk(BRT brt, int n) { ...@@ -251,7 +251,7 @@ static void assert_cursor_walk(BRT brt, int n) {
int i; int i;
int r; int r;
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
if (test_cursor_debug && verbose) printf("key: "); if (test_cursor_debug && verbose) printf("key: ");
...@@ -317,7 +317,7 @@ static void assert_cursor_rwalk(BRT brt, int n) { ...@@ -317,7 +317,7 @@ static void assert_cursor_rwalk(BRT brt, int n) {
int i; int i;
int r; int r;
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
if (test_cursor_debug && verbose) printf("key: "); if (test_cursor_debug && verbose) printf("key: ");
...@@ -403,7 +403,7 @@ static void assert_cursor_walk_inorder(BRT brt, int n) { ...@@ -403,7 +403,7 @@ static void assert_cursor_walk_inorder(BRT brt, int n) {
int r; int r;
char *prevkey = 0; char *prevkey = 0;
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
if (test_cursor_debug && verbose) printf("key: "); if (test_cursor_debug && verbose) printf("key: ");
...@@ -505,7 +505,7 @@ static void test_brt_cursor_split(int n, DB *db) { ...@@ -505,7 +505,7 @@ static void test_brt_cursor_split(int n, DB *db) {
assert(r==0); assert(r==0);
} }
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
if (test_cursor_debug && verbose) printf("key: "); if (test_cursor_debug && verbose) printf("key: ");
...@@ -570,7 +570,7 @@ static void test_multiple_brt_cursors(int n, DB *db) { ...@@ -570,7 +570,7 @@ static void test_multiple_brt_cursors(int n, DB *db) {
int i; int i;
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
r = toku_brt_cursor(brt, &cursors[i], NULL, FALSE); r = toku_brt_cursor(brt, &cursors[i], NULL, FALSE, FALSE);
assert(r == 0); assert(r == 0);
} }
...@@ -620,7 +620,7 @@ static void test_multiple_brt_cursor_walk(int n, DB *db) { ...@@ -620,7 +620,7 @@ static void test_multiple_brt_cursor_walk(int n, DB *db) {
int c; int c;
/* create the cursors */ /* create the cursors */
for (c=0; c<ncursors; c++) { for (c=0; c<ncursors; c++) {
r = toku_brt_cursor(brt, &cursors[c], NULL, FALSE); r = toku_brt_cursor(brt, &cursors[c], NULL, FALSE, FALSE);
assert(r == 0); assert(r == 0);
} }
...@@ -707,7 +707,7 @@ static void test_brt_cursor_set(int n, int cursor_op, DB *db) { ...@@ -707,7 +707,7 @@ static void test_brt_cursor_set(int n, int cursor_op, DB *db) {
assert(r == 0); assert(r == 0);
} }
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
/* set cursor to random keys in set { 0, 10, 20, .. 10*(n-1) } */ /* set cursor to random keys in set { 0, 10, 20, .. 10*(n-1) } */
...@@ -780,7 +780,7 @@ static void test_brt_cursor_set_range(int n, DB *db) { ...@@ -780,7 +780,7 @@ static void test_brt_cursor_set_range(int n, DB *db) {
assert(r == 0); assert(r == 0);
} }
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(r==0); assert(r==0);
/* pick random keys v in 0 <= v < 10*n, the cursor should point /* pick random keys v in 0 <= v < 10*n, the cursor should point
...@@ -830,7 +830,7 @@ static void test_brt_cursor_delete(int n, DB *db) { ...@@ -830,7 +830,7 @@ static void test_brt_cursor_delete(int n, DB *db) {
error = toku_open_brt(fname, 1, &brt, 1<<12, 1<<9, ct, null_txn, test_brt_cursor_keycompare, db); error = toku_open_brt(fname, 1, &brt, 1<<12, 1<<9, ct, null_txn, test_brt_cursor_keycompare, db);
assert(error == 0); assert(error == 0);
error = toku_brt_cursor(brt, &cursor, NULL, FALSE); error = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE);
assert(error == 0); assert(error == 0);
DBT key, val; DBT key, val;
......
...@@ -261,7 +261,7 @@ static void test_cursor_last_empty(void) { ...@@ -261,7 +261,7 @@ static void test_cursor_last_empty(void) {
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items(); //printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
r = toku_open_brt(fname, 1, &brt, 1<<12, 1<<9, ct, null_txn, toku_builtin_compare_fun, null_db); assert(r==0); r = toku_open_brt(fname, 1, &brt, 1<<12, 1<<9, ct, null_txn, toku_builtin_compare_fun, null_db); assert(r==0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items(); //printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); assert(r==0); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE); assert(r==0);
{ {
struct check_pair pair = {0,0,0,0,0}; struct check_pair pair = {0,0,0,0,0};
r = toku_brt_cursor_get(cursor, NULL, lookup_checkf, &pair, DB_LAST); r = toku_brt_cursor_get(cursor, NULL, lookup_checkf, &pair, DB_LAST);
...@@ -297,7 +297,7 @@ static void test_cursor_next (void) { ...@@ -297,7 +297,7 @@ static void test_cursor_next (void) {
r = toku_brt_insert(brt, toku_fill_dbt(&kbt, "hello", 6), toku_fill_dbt(&vbt, "there", 6), null_txn); r = toku_brt_insert(brt, toku_fill_dbt(&kbt, "hello", 6), toku_fill_dbt(&vbt, "there", 6), null_txn);
r = toku_brt_insert(brt, toku_fill_dbt(&kbt, "byebye", 7), toku_fill_dbt(&vbt, "byenow", 7), null_txn); r = toku_brt_insert(brt, toku_fill_dbt(&kbt, "byebye", 7), toku_fill_dbt(&vbt, "byenow", 7), null_txn);
if (verbose) printf("%s:%d calling toku_brt_cursor(...)\n", __FILE__, __LINE__); if (verbose) printf("%s:%d calling toku_brt_cursor(...)\n", __FILE__, __LINE__);
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); assert(r==0); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE); assert(r==0);
toku_init_dbt(&kbt); toku_init_dbt(&kbt);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items(); //printf("%s:%d %d alloced\n", __FILE__, __LINE__, toku_get_n_items_malloced()); toku_print_malloced_items();
toku_init_dbt(&vbt); toku_init_dbt(&vbt);
...@@ -389,7 +389,7 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) { ...@@ -389,7 +389,7 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) {
} }
{ {
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); assert(r==0); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE); assert(r==0);
for (i=0; i<2; i++) { for (i=0; i<2; i++) {
unsigned char a[4],b[4]; unsigned char a[4],b[4];
...@@ -429,7 +429,7 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) { ...@@ -429,7 +429,7 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) {
toku_cachetable_verify(ct); toku_cachetable_verify(ct);
} }
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); assert(r==0); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE); assert(r==0);
for (i=0; i<N; i++) { for (i=0; i<N; i++) {
unsigned char a[4],b[4]; unsigned char a[4],b[4];
...@@ -573,7 +573,7 @@ static void test_brt_delete_present(int n) { ...@@ -573,7 +573,7 @@ static void test_brt_delete_present(int n) {
/* cursor should not find anything */ /* cursor should not find anything */
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE);
assert(r == 0); assert(r == 0);
{ {
...@@ -704,7 +704,7 @@ static void test_brt_delete_cursor_first(int n) { ...@@ -704,7 +704,7 @@ static void test_brt_delete_cursor_first(int n) {
/* cursor should find the last key: n-1 */ /* cursor should find the last key: n-1 */
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE);
assert(r == 0); assert(r == 0);
{ {
...@@ -805,7 +805,7 @@ static void test_new_brt_cursor_create_close (void) { ...@@ -805,7 +805,7 @@ static void test_new_brt_cursor_create_close (void) {
int i; int i;
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
r = toku_brt_cursor(brt, &cursors[i], NULL, FALSE); assert(r == 0); r = toku_brt_cursor(brt, &cursors[i], NULL, FALSE, FALSE); assert(r == 0);
} }
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
...@@ -839,7 +839,7 @@ static void test_new_brt_cursor_first(int n) { ...@@ -839,7 +839,7 @@ static void test_new_brt_cursor_first(int n) {
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); assert(r == 0); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE); assert(r == 0);
toku_init_dbt(&key); key.flags = DB_DBT_REALLOC; toku_init_dbt(&key); key.flags = DB_DBT_REALLOC;
toku_init_dbt(&val); val.flags = DB_DBT_REALLOC; toku_init_dbt(&val); val.flags = DB_DBT_REALLOC;
...@@ -891,7 +891,7 @@ static void test_new_brt_cursor_last(int n) { ...@@ -891,7 +891,7 @@ static void test_new_brt_cursor_last(int n) {
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); assert(r == 0); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE); assert(r == 0);
toku_init_dbt(&key); key.flags = DB_DBT_REALLOC; toku_init_dbt(&key); key.flags = DB_DBT_REALLOC;
toku_init_dbt(&val); val.flags = DB_DBT_REALLOC; toku_init_dbt(&val); val.flags = DB_DBT_REALLOC;
...@@ -943,7 +943,7 @@ static void test_new_brt_cursor_next(int n) { ...@@ -943,7 +943,7 @@ static void test_new_brt_cursor_next(int n) {
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); assert(r == 0); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE); assert(r == 0);
for (i=0; ; i++) { for (i=0; ; i++) {
int kk = toku_htonl(i); int kk = toku_htonl(i);
...@@ -986,7 +986,7 @@ static void test_new_brt_cursor_prev(int n) { ...@@ -986,7 +986,7 @@ static void test_new_brt_cursor_prev(int n) {
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); assert(r == 0); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE); assert(r == 0);
for (i=n-1; ; i--) { for (i=n-1; ; i--) {
int kk = toku_htonl(i); int kk = toku_htonl(i);
...@@ -1029,7 +1029,7 @@ static void test_new_brt_cursor_current(int n) { ...@@ -1029,7 +1029,7 @@ static void test_new_brt_cursor_current(int n) {
BRT_CURSOR cursor=0; BRT_CURSOR cursor=0;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); assert(r == 0); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE); assert(r == 0);
for (i=0; ; i++) { for (i=0; ; i++) {
{ {
...@@ -1113,7 +1113,7 @@ static void test_new_brt_cursor_set_range(int n) { ...@@ -1113,7 +1113,7 @@ static void test_new_brt_cursor_set_range(int n) {
r = toku_brt_insert(brt, toku_fill_dbt(&key, &k, sizeof k), toku_fill_dbt(&val, &v, sizeof v), 0); assert(r == 0); r = toku_brt_insert(brt, toku_fill_dbt(&key, &k, sizeof k), toku_fill_dbt(&val, &v, sizeof v), 0); assert(r == 0);
} }
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); assert(r==0); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE); assert(r==0);
/* pick random keys v in 0 <= v < 10*n, the cursor should point /* pick random keys v in 0 <= v < 10*n, the cursor should point
to the smallest key in the tree that is >= v */ to the smallest key in the tree that is >= v */
...@@ -1170,7 +1170,7 @@ static void test_new_brt_cursor_set(int n, int cursor_op, DB *db) { ...@@ -1170,7 +1170,7 @@ static void test_new_brt_cursor_set(int n, int cursor_op, DB *db) {
r = toku_brt_insert(brt, toku_fill_dbt(&key, &k, sizeof k), toku_fill_dbt(&val, &v, sizeof v), 0); assert(r == 0); r = toku_brt_insert(brt, toku_fill_dbt(&key, &k, sizeof k), toku_fill_dbt(&val, &v, sizeof v), 0); assert(r == 0);
} }
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); assert(r==0); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE); assert(r==0);
/* set cursor to random keys in set { 0, 10, 20, .. 10*(n-1) } */ /* set cursor to random keys in set { 0, 10, 20, .. 10*(n-1) } */
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
......
...@@ -58,7 +58,7 @@ static void verify_dbfile(int n, const char *name) { ...@@ -58,7 +58,7 @@ static void verify_dbfile(int n, const char *name) {
if (verbose) traceit("Verified brt internals"); if (verbose) traceit("Verified brt internals");
BRT_CURSOR cursor = NULL; BRT_CURSOR cursor = NULL;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); assert(r == 0); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE); assert(r == 0);
int i; int i;
for (i=0; ; i++) { for (i=0; ; i++) {
......
...@@ -251,7 +251,7 @@ static void verify_dbfile(int n, int sorted_keys[], const char *sorted_vals[], c ...@@ -251,7 +251,7 @@ static void verify_dbfile(int n, int sorted_keys[], const char *sorted_vals[], c
r = toku_brt_open(t, name, 0, 0, ct, null_txn, 0); assert(r==0); r = toku_brt_open(t, name, 0, 0, ct, null_txn, 0); assert(r==0);
BRT_CURSOR cursor = NULL; BRT_CURSOR cursor = NULL;
r = toku_brt_cursor(t, &cursor, NULL, FALSE); assert(r == 0); r = toku_brt_cursor(t, &cursor, NULL, FALSE, FALSE); assert(r == 0);
int i; int i;
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
......
...@@ -23,7 +23,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute ...@@ -23,7 +23,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute
r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); assert(r==0); r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); assert(r==0);
r = toku_open_brt(fname, 1, &brt, 1<<12, 1<<9, ct, null_txn, test_brt_cursor_keycompare, db); assert(r==0); r = toku_open_brt(fname, 1, &brt, 1<<12, 1<<9, ct, null_txn, test_brt_cursor_keycompare, db); assert(r==0);
r = toku_brt_cursor(brt, &cursor, NULL, FALSE); assert(r==0); r = toku_brt_cursor(brt, &cursor, NULL, FALSE, FALSE); assert(r==0);
int i; int i;
for (i=0; i<1000; i++) { for (i=0; i<1000; i++) {
......
...@@ -59,7 +59,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_ ...@@ -59,7 +59,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_
BRT_CURSOR c; BRT_CURSOR c;
char lkey[100],rkey[100]; char lkey[100],rkey[100];
DBT lk, rk; DBT lk, rk;
r = toku_brt_cursor(t, &c, null_txn, FALSE); assert(r == 0); r = toku_brt_cursor(t, &c, null_txn, FALSE, FALSE); assert(r == 0);
snprintf(lkey, 100, "hello%d", i); snprintf(lkey, 100, "hello%d", i);
snprintf(rkey, 100, "hello%d", i + 100); snprintf(rkey, 100, "hello%d", i + 100);
toku_brt_cursor_set_range_lock(c, toku_fill_dbt(&lk, lkey, 1+strlen(lkey)), toku_brt_cursor_set_range_lock(c, toku_fill_dbt(&lk, lkey, 1+strlen(lkey)),
......
...@@ -13,17 +13,17 @@ ...@@ -13,17 +13,17 @@
static void static void
verify_val(DBT const *a, DBT const *b, void *c) { verify_val(DBT const *a, DBT const *b, void *c) {
assert(a->size == sizeof(int)); assert(a->size == sizeof(u_int64_t));
assert(b->size == sizeof(int)); assert(b->size == sizeof(u_int64_t));
int* expected = (int *)c; u_int64_t* expected = (u_int64_t *)c;
assert(*expected == *(int *)a->data); assert(*expected == *(u_int64_t *)a->data);
assert(*expected == *(int *)b->data); assert(*expected == *(u_int64_t *)b->data);
} }
static int static int
verify_fwd_fast(DBT const *a, DBT const *b, void *c) { verify_fwd_fast(DBT const *a, DBT const *b, void *c) {
verify_val(a,b,c); verify_val(a,b,c);
int* expected = (int *)c; u_int64_t* expected = (u_int64_t *)c;
*expected = *expected + 1; *expected = *expected + 1;
return TOKUDB_CURSOR_CONTINUE; return TOKUDB_CURSOR_CONTINUE;
} }
...@@ -31,7 +31,7 @@ verify_fwd_fast(DBT const *a, DBT const *b, void *c) { ...@@ -31,7 +31,7 @@ verify_fwd_fast(DBT const *a, DBT const *b, void *c) {
static int static int
verify_fwd_slow(DBT const *a, DBT const *b, void *c) { verify_fwd_slow(DBT const *a, DBT const *b, void *c) {
verify_val(a,b,c); verify_val(a,b,c);
int* expected = (int *)c; u_int64_t* expected = (u_int64_t *)c;
*expected = *expected + 1; *expected = *expected + 1;
return 0; return 0;
} }
...@@ -39,7 +39,7 @@ verify_fwd_slow(DBT const *a, DBT const *b, void *c) { ...@@ -39,7 +39,7 @@ verify_fwd_slow(DBT const *a, DBT const *b, void *c) {
static int static int
verify_bwd_fast(DBT const *a, DBT const *b, void *c) { verify_bwd_fast(DBT const *a, DBT const *b, void *c) {
verify_val(a,b,c); verify_val(a,b,c);
int* expected = (int *)c; u_int64_t* expected = (u_int64_t *)c;
*expected = *expected - 1; *expected = *expected - 1;
return TOKUDB_CURSOR_CONTINUE; return TOKUDB_CURSOR_CONTINUE;
} }
...@@ -47,20 +47,61 @@ verify_bwd_fast(DBT const *a, DBT const *b, void *c) { ...@@ -47,20 +47,61 @@ verify_bwd_fast(DBT const *a, DBT const *b, void *c) {
static int static int
verify_bwd_slow(DBT const *a, DBT const *b, void *c) { verify_bwd_slow(DBT const *a, DBT const *b, void *c) {
verify_val(a,b,c); verify_val(a,b,c);
int* expected = (int *)c; u_int64_t* expected = (u_int64_t *)c;
*expected = *expected - 1; *expected = *expected - 1;
return 0; return 0;
} }
u_int64_t num_pivots_fetched_prefetch;
u_int64_t num_basements_decompressed_aggressive;
u_int64_t num_basements_decompressed_prefetch;
u_int64_t num_basements_fetched_aggressive;
u_int64_t num_basements_fetched_prefetch;
static void
init_eng_stat_vars(DB_ENV* env) {
ENGINE_STATUS engstat;
int r = env->get_engine_status(env, &engstat, NULL, 0);
CKERR(r);
num_pivots_fetched_prefetch = engstat.num_pivots_fetched_prefetch;
num_basements_decompressed_aggressive = engstat.num_basements_decompressed_aggressive;
num_basements_decompressed_prefetch = engstat.num_basements_decompressed_prefetch;
num_basements_fetched_aggressive = engstat.num_basements_fetched_aggressive;
num_basements_fetched_prefetch = engstat.num_basements_fetched_prefetch;
}
static void static void
test_bulk_fetch (int n, BOOL prelock) { check_eng_stat_vars_unchanged(DB_ENV* env) {
if (verbose) printf("test_rand_insert:%d \n", n); ENGINE_STATUS engstat;
int r = env->get_engine_status(env, &engstat, NULL, 0);
CKERR(r);
assert(num_pivots_fetched_prefetch == engstat.num_pivots_fetched_prefetch);
assert(num_basements_decompressed_aggressive == engstat.num_basements_decompressed_aggressive);
assert(num_basements_decompressed_prefetch == engstat.num_basements_decompressed_prefetch);
assert(num_basements_fetched_aggressive == engstat.num_basements_fetched_aggressive);
assert(num_basements_fetched_prefetch == engstat.num_basements_fetched_prefetch);
}
static void
print_relevant_eng_stat_vars(DB_ENV* env) {
ENGINE_STATUS engstat;
int r = env->get_engine_status(env, &engstat, NULL, 0);
CKERR(r);
printf("num_pivots_fetched_prefetch %"PRId64" \n", engstat.num_pivots_fetched_prefetch);
printf("num_basements_decompressed_aggressive %"PRId64" \n", engstat.num_basements_decompressed_aggressive);
printf("num_basements_decompressed_prefetch %"PRId64" \n", engstat.num_basements_decompressed_prefetch);
printf("num_basements_fetched_aggressive %"PRId64" \n", engstat.num_basements_fetched_aggressive);
printf("num_basements_fetched_prefetch %"PRId64" \n", engstat.num_basements_fetched_prefetch);
}
static void
test_bulk_fetch (u_int64_t n, BOOL prelock, BOOL disable_prefetching) {
if (verbose) printf("test_rand_insert:%"PRId64" \n", n);
DB_TXN * const null_txn = 0; DB_TXN * const null_txn = 0;
const char * const fname = "test.bulk_fetch.brt"; const char * const fname = "test.bulk_fetch.brt";
int r; int r;
r = system("rm -rf " ENVDIR); r = system("rm -rf " ENVDIR);
CKERR(r); CKERR(r);
...@@ -69,7 +110,8 @@ test_bulk_fetch (int n, BOOL prelock) { ...@@ -69,7 +110,8 @@ test_bulk_fetch (int n, BOOL prelock) {
/* create the dup database file */ /* create the dup database file */
DB_ENV *env; DB_ENV *env;
r = db_env_create(&env, 0); assert(r == 0); r = db_env_create(&env, 0); assert(r == 0);
r=env->set_default_bt_compare(env, int_dbt_cmp); CKERR(r); r=env->set_default_bt_compare(env, int64_dbt_cmp); CKERR(r);
r = env->set_cachesize(env, 0, (u_int32_t)n, 1); assert(r == 0);
r = env->open(env, ENVDIR, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL, 0); assert(r == 0); r = env->open(env, ENVDIR, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL, 0); assert(r == 0);
DB *db; DB *db;
...@@ -79,11 +121,13 @@ test_bulk_fetch (int n, BOOL prelock) { ...@@ -79,11 +121,13 @@ test_bulk_fetch (int n, BOOL prelock) {
assert(r == 0); assert(r == 0);
r = db->set_pagesize(db, 4096); r = db->set_pagesize(db, 4096);
assert(r == 0); assert(r == 0);
r = db->set_readpagesize(db, 1024);
assert(r == 0);
r = db->open(db, null_txn, fname, "main", DB_BTREE, DB_CREATE, 0666); r = db->open(db, null_txn, fname, "main", DB_BTREE, DB_CREATE, 0666);
assert(r == 0); assert(r == 0);
int keys[n]; u_int64_t keys[n];
int i; u_int64_t i;
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
keys[i] = i; keys[i] = i;
} }
...@@ -100,7 +144,11 @@ test_bulk_fetch (int n, BOOL prelock) { ...@@ -100,7 +144,11 @@ test_bulk_fetch (int n, BOOL prelock) {
DBC* cursor; DBC* cursor;
// verify fast // verify fast
r = db->cursor(db, NULL, &cursor, 0); u_int32_t flags = disable_prefetching ? DBC_DISABLE_PREFETCHING : 0;
if (disable_prefetching) {
init_eng_stat_vars(env);
}
r = db->cursor(db, NULL, &cursor, flags);
CKERR(r); CKERR(r);
if (prelock) { if (prelock) {
r = cursor->c_pre_acquire_range_lock( r = cursor->c_pre_acquire_range_lock(
...@@ -110,15 +158,24 @@ test_bulk_fetch (int n, BOOL prelock) { ...@@ -110,15 +158,24 @@ test_bulk_fetch (int n, BOOL prelock) {
); );
CKERR(r); CKERR(r);
} }
int expected = 0; u_int64_t expected = 0;
while (r != DB_NOTFOUND) { while (r != DB_NOTFOUND) {
r = cursor->c_getf_next(cursor, 0, verify_fwd_fast, &expected); r = cursor->c_getf_next(cursor, 0, verify_fwd_fast, &expected);
assert(r==0 || r==DB_NOTFOUND); assert(r==0 || r==DB_NOTFOUND);
} }
r = cursor->c_close(cursor); CKERR(r); r = cursor->c_close(cursor); CKERR(r);
if (disable_prefetching) {
check_eng_stat_vars_unchanged(env);
}
if (verbose) {
print_relevant_eng_stat_vars(env);
}
// verify slow // verify slow
r = db->cursor(db, NULL, &cursor, 0); if (disable_prefetching) {
init_eng_stat_vars(env);
}
r = db->cursor(db, NULL, &cursor, flags);
CKERR(r); CKERR(r);
if (prelock) { if (prelock) {
r = cursor->c_pre_acquire_range_lock( r = cursor->c_pre_acquire_range_lock(
...@@ -134,9 +191,18 @@ test_bulk_fetch (int n, BOOL prelock) { ...@@ -134,9 +191,18 @@ test_bulk_fetch (int n, BOOL prelock) {
assert(r==0 || r==DB_NOTFOUND); assert(r==0 || r==DB_NOTFOUND);
} }
r = cursor->c_close(cursor); CKERR(r); r = cursor->c_close(cursor); CKERR(r);
if (disable_prefetching) {
check_eng_stat_vars_unchanged(env);
}
if (verbose) {
print_relevant_eng_stat_vars(env);
}
// now do backwards // now do backwards
r = db->cursor(db, NULL, &cursor, 0); if (disable_prefetching) {
init_eng_stat_vars(env);
}
r = db->cursor(db, NULL, &cursor, flags);
CKERR(r); CKERR(r);
if (prelock) { if (prelock) {
r = cursor->c_pre_acquire_range_lock( r = cursor->c_pre_acquire_range_lock(
...@@ -152,9 +218,18 @@ test_bulk_fetch (int n, BOOL prelock) { ...@@ -152,9 +218,18 @@ test_bulk_fetch (int n, BOOL prelock) {
assert(r==0 || r==DB_NOTFOUND); assert(r==0 || r==DB_NOTFOUND);
} }
r = cursor->c_close(cursor); CKERR(r); r = cursor->c_close(cursor); CKERR(r);
if (disable_prefetching) {
check_eng_stat_vars_unchanged(env);
}
if (verbose) {
print_relevant_eng_stat_vars(env);
}
// verify slow // verify slow
r = db->cursor(db, NULL, &cursor, 0); if (disable_prefetching) {
init_eng_stat_vars(env);
}
r = db->cursor(db, NULL, &cursor, flags);
CKERR(r); CKERR(r);
if (prelock) { if (prelock) {
r = cursor->c_pre_acquire_range_lock( r = cursor->c_pre_acquire_range_lock(
...@@ -170,6 +245,12 @@ test_bulk_fetch (int n, BOOL prelock) { ...@@ -170,6 +245,12 @@ test_bulk_fetch (int n, BOOL prelock) {
assert(r==0 || r==DB_NOTFOUND); assert(r==0 || r==DB_NOTFOUND);
} }
r = cursor->c_close(cursor); CKERR(r); r = cursor->c_close(cursor); CKERR(r);
if (disable_prefetching) {
check_eng_stat_vars_unchanged(env);
}
if (verbose) {
print_relevant_eng_stat_vars(env);
}
r = db->close(db, 0); CKERR(r); r = db->close(db, 0); CKERR(r);
...@@ -179,7 +260,9 @@ test_bulk_fetch (int n, BOOL prelock) { ...@@ -179,7 +260,9 @@ test_bulk_fetch (int n, BOOL prelock) {
int int
test_main(int argc, char *const argv[]) { test_main(int argc, char *const argv[]) {
parse_args(argc, argv); parse_args(argc, argv);
test_bulk_fetch(10000, FALSE); test_bulk_fetch(10000, FALSE, TRUE);
test_bulk_fetch(10000, TRUE); test_bulk_fetch(10000, TRUE, TRUE);
test_bulk_fetch(10000, FALSE, FALSE);
test_bulk_fetch(10000, TRUE, FALSE);
return 0; return 0;
} }
...@@ -2091,6 +2091,25 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat, char * env_panic_st ...@@ -2091,6 +2091,25 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat, char * env_panic_st
engstat->msg_bytes_max = brt_stat.msg_bytes_max; engstat->msg_bytes_max = brt_stat.msg_bytes_max;
engstat->msg_num = brt_stat.msg_num; engstat->msg_num = brt_stat.msg_num;
engstat->msg_num_broadcast = brt_stat.msg_num_broadcast; engstat->msg_num_broadcast = brt_stat.msg_num_broadcast;
engstat->num_basements_decompressed_normal = brt_stat.num_basements_decompressed_normal;
engstat->num_basements_decompressed_aggressive = brt_stat.num_basements_decompressed_aggressive;
engstat->num_basements_decompressed_prefetch = brt_stat.num_basements_decompressed_prefetch;
engstat->num_basements_decompressed_write = brt_stat.num_basements_decompressed_write;
engstat->num_msg_buffer_decompressed_normal = brt_stat.num_msg_buffer_decompressed_normal;
engstat->num_msg_buffer_decompressed_aggressive = brt_stat.num_msg_buffer_decompressed_aggressive;
engstat->num_msg_buffer_decompressed_prefetch = brt_stat.num_msg_buffer_decompressed_prefetch;
engstat->num_msg_buffer_decompressed_write = brt_stat.num_msg_buffer_decompressed_write;
engstat->num_pivots_fetched_query = brt_stat.num_pivots_fetched_query;
engstat->num_pivots_fetched_prefetch = brt_stat.num_pivots_fetched_prefetch;
engstat->num_pivots_fetched_write = brt_stat.num_pivots_fetched_write;
engstat->num_basements_fetched_normal = brt_stat.num_basements_fetched_normal;
engstat->num_basements_fetched_aggressive = brt_stat.num_basements_fetched_aggressive;
engstat->num_basements_fetched_prefetch = brt_stat.num_basements_fetched_prefetch;
engstat->num_basements_fetched_write = brt_stat.num_basements_fetched_write;
engstat->num_msg_buffer_fetched_normal = brt_stat.num_msg_buffer_fetched_normal;
engstat->num_msg_buffer_fetched_aggressive = brt_stat.num_msg_buffer_fetched_aggressive;
engstat->num_msg_buffer_fetched_prefetch = brt_stat.num_msg_buffer_fetched_prefetch;
engstat->num_msg_buffer_fetched_write = brt_stat.num_msg_buffer_fetched_write;
} }
{ {
u_int64_t fsync_count, fsync_time; u_int64_t fsync_count, fsync_time;
...@@ -2353,6 +2372,25 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) { ...@@ -2353,6 +2372,25 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) {
n += snprintf(buff + n, bufsiz - n, "msg_bytes_max %"PRIu64"\n", engstat.msg_bytes_max); n += snprintf(buff + n, bufsiz - n, "msg_bytes_max %"PRIu64"\n", engstat.msg_bytes_max);
n += snprintf(buff + n, bufsiz - n, "msg_num %"PRIu64"\n", engstat.msg_num); n += snprintf(buff + n, bufsiz - n, "msg_num %"PRIu64"\n", engstat.msg_num);
n += snprintf(buff + n, bufsiz - n, "msg_num_broadcast %"PRIu64"\n", engstat.msg_num_broadcast); n += snprintf(buff + n, bufsiz - n, "msg_num_broadcast %"PRIu64"\n", engstat.msg_num_broadcast);
n += snprintf(buff + n, bufsiz - n, "num_basements_decompressed_normal %"PRIu64"\n", engstat.num_basements_decompressed_normal);
n += snprintf(buff + n, bufsiz - n, "num_basements_decompressed_aggressive %"PRIu64"\n", engstat.num_basements_decompressed_aggressive);
n += snprintf(buff + n, bufsiz - n, "num_basements_decompressed_prefetch %"PRIu64"\n", engstat.num_basements_decompressed_prefetch);
n += snprintf(buff + n, bufsiz - n, "num_basements_decompressed_write %"PRIu64"\n", engstat.num_basements_decompressed_write);
n += snprintf(buff + n, bufsiz - n, "num_msg_buffer_decompressed_normal %"PRIu64"\n", engstat.num_msg_buffer_decompressed_normal);
n += snprintf(buff + n, bufsiz - n, "num_msg_buffer_decompressed_aggressive %"PRIu64"\n", engstat.num_msg_buffer_decompressed_aggressive);
n += snprintf(buff + n, bufsiz - n, "num_msg_buffer_decompressed_prefetch %"PRIu64"\n", engstat.num_msg_buffer_decompressed_prefetch);
n += snprintf(buff + n, bufsiz - n, "num_msg_buffer_decompressed_write %"PRIu64"\n", engstat.num_msg_buffer_decompressed_write);
n += snprintf(buff + n, bufsiz - n, "num_pivots_fetched_query %"PRIu64"\n", engstat.num_pivots_fetched_query);
n += snprintf(buff + n, bufsiz - n, "num_pivots_fetched_prefetch %"PRIu64"\n", engstat.num_pivots_fetched_prefetch);
n += snprintf(buff + n, bufsiz - n, "num_pivots_fetched_write %"PRIu64"\n", engstat.num_pivots_fetched_write);
n += snprintf(buff + n, bufsiz - n, "num_basements_fetched_normal %"PRIu64"\n", engstat.num_basements_fetched_normal);
n += snprintf(buff + n, bufsiz - n, "num_basements_fetched_aggressive %"PRIu64"\n", engstat.num_basements_fetched_aggressive);
n += snprintf(buff + n, bufsiz - n, "num_basements_fetched_prefetch %"PRIu64"\n", engstat.num_basements_fetched_prefetch);
n += snprintf(buff + n, bufsiz - n, "num_basements_fetched_write %"PRIu64"\n", engstat.num_basements_fetched_write);
n += snprintf(buff + n, bufsiz - n, "num_msg_buffer_fetched_normal %"PRIu64"\n", engstat.num_msg_buffer_fetched_normal);
n += snprintf(buff + n, bufsiz - n, "num_msg_buffer_fetched_aggressive %"PRIu64"\n", engstat.num_msg_buffer_fetched_aggressive);
n += snprintf(buff + n, bufsiz - n, "num_msg_buffer_fetched_prefetch %"PRIu64"\n", engstat.num_msg_buffer_fetched_prefetch);
n += snprintf(buff + n, bufsiz - n, "num_msg_buffer_fetched_write %"PRIu64"\n", engstat.num_msg_buffer_fetched_write);
n += snprintf(buff + n, bufsiz - n, "multi_inserts %"PRIu64"\n", engstat.multi_inserts); n += snprintf(buff + n, bufsiz - n, "multi_inserts %"PRIu64"\n", engstat.multi_inserts);
n += snprintf(buff + n, bufsiz - n, "multi_inserts_fail %"PRIu64"\n", engstat.multi_inserts_fail); n += snprintf(buff + n, bufsiz - n, "multi_inserts_fail %"PRIu64"\n", engstat.multi_inserts_fail);
n += snprintf(buff + n, bufsiz - n, "multi_deletes %"PRIu64"\n", engstat.multi_deletes); n += snprintf(buff + n, bufsiz - n, "multi_deletes %"PRIu64"\n", engstat.multi_deletes);
...@@ -4189,7 +4227,7 @@ toku_c_count(DBC *cursor, db_recno_t *count, u_int32_t flags) { ...@@ -4189,7 +4227,7 @@ toku_c_count(DBC *cursor, db_recno_t *count, u_int32_t flags) {
// lock_flags |= DB_PRELOCKED // lock_flags |= DB_PRELOCKED
//} //}
r = toku_db_cursor(cursor->dbp, dbc_struct_i(cursor)->txn, &count_cursor, 0, 0); r = toku_db_cursor(cursor->dbp, dbc_struct_i(cursor)->txn, &count_cursor, DBC_DISABLE_PREFETCHING, 0);
if (r != 0) goto finish; if (r != 0) goto finish;
r = toku_c_getf_set(count_cursor, lock_flags, &currentkey, ydb_getf_do_nothing, NULL); r = toku_c_getf_set(count_cursor, lock_flags, &currentkey, ydb_getf_do_nothing, NULL);
...@@ -4218,7 +4256,7 @@ db_getf_set(DB *db, DB_TXN *txn, u_int32_t flags, DBT *key, YDB_CALLBACK_FUNCTIO ...@@ -4218,7 +4256,7 @@ db_getf_set(DB *db, DB_TXN *txn, u_int32_t flags, DBT *key, YDB_CALLBACK_FUNCTIO
DBC *c; DBC *c;
uint32_t create_flags = flags & (DB_ISOLATION_FLAGS | DB_RMW); uint32_t create_flags = flags & (DB_ISOLATION_FLAGS | DB_RMW);
flags &= ~DB_ISOLATION_FLAGS; flags &= ~DB_ISOLATION_FLAGS;
int r = toku_db_cursor(db, txn, &c, create_flags, 1); int r = toku_db_cursor(db, txn, &c, create_flags | DBC_DISABLE_PREFETCHING, 1);
if (r==0) { if (r==0) {
r = toku_c_getf_set(c, flags, key, f, extra); r = toku_c_getf_set(c, flags, key, f, extra);
int r2 = toku_c_close(c); int r2 = toku_c_close(c);
...@@ -4464,7 +4502,7 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar ...@@ -4464,7 +4502,7 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
DB_ENV* env = db->dbenv; DB_ENV* env = db->dbenv;
int r; int r;
size_t result_size = sizeof(DBC)+sizeof(struct __toku_dbc_internal); // internal stuff stuck on the end size_t result_size = sizeof(DBC)+sizeof(struct __toku_dbc_internal); // internal stuff stuck on the end
if (flags & ~(DB_SERIALIZABLE | DB_INHERIT_ISOLATION | DB_RMW)) { if (flags & ~(DB_SERIALIZABLE | DB_INHERIT_ISOLATION | DB_RMW | DBC_DISABLE_PREFETCHING)) {
return toku_ydb_do_error( return toku_ydb_do_error(
env, env,
EINVAL, EINVAL,
...@@ -4527,7 +4565,8 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar ...@@ -4527,7 +4565,8 @@ toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporar
db->i->brt, db->i->brt,
&dbc_struct_i(result)->c, &dbc_struct_i(result)->c,
txn ? db_txn_struct_i(txn)->tokutxn : NULL, txn ? db_txn_struct_i(txn)->tokutxn : NULL,
is_snapshot_read is_snapshot_read,
((flags & DBC_DISABLE_PREFETCHING) != 0)
); );
assert(r == 0 || r == TOKUDB_MVCC_DICTIONARY_TOO_NEW); assert(r == 0 || r == TOKUDB_MVCC_DICTIONARY_TOO_NEW);
if (r == 0) { if (r == 0) {
...@@ -4565,7 +4604,7 @@ toku_db_get (DB * db, DB_TXN * txn, DBT * key, DBT * data, u_int32_t flags) { ...@@ -4565,7 +4604,7 @@ toku_db_get (DB * db, DB_TXN * txn, DBT * key, DBT * data, u_int32_t flags) {
DBC *dbc; DBC *dbc;
r = toku_db_cursor(db, txn, &dbc, iso_flags, 1); r = toku_db_cursor(db, txn, &dbc, iso_flags | DBC_DISABLE_PREFETCHING, 1);
if (r!=0) return r; if (r!=0) return r;
u_int32_t c_get_flags = DB_SET; u_int32_t c_get_flags = DB_SET;
r = toku_c_get(dbc, key, data, c_get_flags | lock_flags); r = toku_c_get(dbc, key, data, c_get_flags | lock_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment