#261 auto detect index scans to fix perf problem with partitions

5f034b7a · Rich Prohaska · 94d73529 · 5f034b7a · 5f034b7a · 5f034b7a
Commit 5f034b7a authored Jul 18, 2014 by Rich Prohaska
4 changed files
--- a/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
+++ b/mysql-test/suite/tokudb.bugs/r/part_index_scan.result
+set default_storage_engine='tokudb';
+drop table if exists t,t1,t2,t3;
+CREATE TABLE `t` (
+`num` int(10) unsigned auto_increment NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+SELECT count(*) FROM t;
+count(*)
+8388608
+CREATE TABLE `t1` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+);
+CREATE TABLE `t2` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+) 
+PARTITION BY HASH (num) PARTITIONS 10;
+CREATE TABLE `t3` (
+`num` int(10) unsigned NOT NULL,
+`val` varchar(32) DEFAULT NULL,
+PRIMARY KEY (`num`)
+)
+PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (1000000),
+PARTITION p1 VALUES LESS THAN (2000000),
+PARTITION p2 VALUES LESS THAN (3000000),
+PARTITION p3 VALUES LESS THAN (4000000),
+PARTITION p4 VALUES LESS THAN (5000000),
+PARTITION p5 VALUES LESS THAN (6000000),
+PARTITION p6 VALUES LESS THAN (7000000),
+PARTITION p7 VALUES LESS THAN (8000000),
+PARTITION px VALUES LESS THAN MAXVALUE);
+insert into t1 select * from t;
+insert into t2 select * from t;
+insert into t3 select * from t;
+select count(*) from t1;
+count(*)
+8388608
+select count(*) from t2;
+count(*)
+8388608
+1
+select count(*) from t3;
+count(*)
+8388608
+1
+select count(*) from t1 where num>7000000;
+count(*)
+1847274
+select count(*) from t2 where num>7000000;
+count(*)
+1847274
+1
+select count(*) from t3 where num>7000000;
+count(*)
+1847274
+1
+drop table if exists t,t1,t2,t3;
--- a/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
+++ b/mysql-test/suite/tokudb.bugs/t/part_index_scan.test
+# verify that index scans on parititions are not slow
+# due totokudb bulk fetch not being used
+source include/have_tokudb.inc;
+set default_storage_engine='tokudb';
+disable_warnings;
+drop table if exists t,t1,t2,t3;
+
+CREATE TABLE `t` (
+  `num` int(10) unsigned auto_increment NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+# put 8M rows into t
+INSERT INTO t values (null,null);
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+INSERT INTO t SELECT null,null FROM t;
+SELECT count(*) FROM t;
+
+CREATE TABLE `t1` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+);
+
+CREATE TABLE `t2` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+) 
+PARTITION BY HASH (num) PARTITIONS 10;
+
+CREATE TABLE `t3` (
+  `num` int(10) unsigned NOT NULL,
+  `val` varchar(32) DEFAULT NULL,
+  PRIMARY KEY (`num`)
+)
+PARTITION BY RANGE (num)
+(PARTITION p0 VALUES LESS THAN (1000000),
+ PARTITION p1 VALUES LESS THAN (2000000),
+ PARTITION p2 VALUES LESS THAN (3000000),
+ PARTITION p3 VALUES LESS THAN (4000000),
+ PARTITION p4 VALUES LESS THAN (5000000),
+ PARTITION p5 VALUES LESS THAN (6000000),
+ PARTITION p6 VALUES LESS THAN (7000000),
+ PARTITION p7 VALUES LESS THAN (8000000),
+ PARTITION px VALUES LESS THAN MAXVALUE);
+
+insert into t1 select * from t;
+insert into t2 select * from t;
+insert into t3 select * from t;
+
+# verify that full index scans on partitioned tables t2 and t3 are comparable to a non-partitioned table t1
+let $s = `select to_seconds(now())`;
+select count(*) from t1;
+let $t1 = `select to_seconds(now()) - $s`;
+# echo $t1;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t2;
+let $t2 = `select to_seconds(now()) - $s`;
+# echo $t2;
+let $d = `select abs($t2 - $t1) <= $t1`;
+echo $d;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t3;
+let $t3 = `select to_seconds(now()) - $s`;
+# echo $t3;
+let $d = `select abs($t3 - $t1) <= $t1`;
+echo $d;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t1 where num>7000000;
+let $t1 = `select to_seconds(now()) - $s`;
+# echo $t1;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t2 where num>7000000;
+let $t2 = `select to_seconds(now()) - $s`;
+# echo $t2;
+let $d = `select abs($t2 - $t1) <= $t1`;
+echo $d;
+
+let $s = `select to_seconds(now())`;
+select count(*) from t3 where num>7000000;
+let $t3 = `select to_seconds(now()) - $s`;
+# echo $t3;
+let $d = `select abs($t3 - $t1) <= $t1`;
+echo $d;
+
+enable_warnings;
+drop table if exists t,t1,t2,t3;
--- a/storage/tokudb/ha_tokudb.cc
+++ b/storage/tokudb/ha_tokudb.cc
@@ -4504,6 +4504,7 @@ int ha_tokudb::index_init(uint keynr, bool sorted) {
    }
    invalidate_bulk_fetch();
    doing_bulk_fetch = false;
+    maybe_index_scan = false;
    error = 0;
 exit:
    TOKUDB_HANDLER_DBUG_RETURN(error);
@@ -5246,86 +5247,91 @@ int ha_tokudb::fill_range_query_buf(
 }

 int ha_tokudb::get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_key_read) {
-    int error = 0; 
-    uint32_t flags = SET_PRELOCK_FLAG(0);
-    THD* thd = ha_thd();
-    tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(thd, tokudb_hton);;
-    bool need_val;
+    int error = 0;
    HANDLE_INVALID_CURSOR();

-    // we need to read the val of what we retrieve if
-    // we do NOT have a covering index AND we are using a clustering secondary
-    // key
-    need_val = (do_key_read == 0) && 
-                (tokudb_active_index == primary_key || 
-                 key_is_clustering(&table->key_info[tokudb_active_index])
-                       );
-
-    if ((bytes_used_in_range_query_buff - curr_range_query_buff_offset) > 0) {
-        error = read_data_from_range_query_buff(buf, need_val, do_key_read);
-    }
-    else if (icp_went_out_of_range) {
-        icp_went_out_of_range = false;
-        error = HA_ERR_END_OF_FILE;
+    if (maybe_index_scan) {
+        maybe_index_scan = false;
+        if (!range_lock_grabbed) {
+            error = prepare_index_scan();
+        }
    }
-    else {
-        invalidate_bulk_fetch();
-        if (doing_bulk_fetch) {
-            struct smart_dbt_bf_info bf_info;
-            bf_info.ha = this;
-            // you need the val if you have a clustering index and key_read is not 0;
-            bf_info.direction = direction;
-            bf_info.thd = ha_thd();
-            bf_info.need_val = need_val;
-            bf_info.buf = buf;
-            bf_info.key_to_compare = key_to_compare;
-            //
-            // call c_getf_next with purpose of filling in range_query_buff
-            //
-            rows_fetched_using_bulk_fetch = 0;
-            // it is expected that we can do ICP in the smart_dbt_bf_callback
-            // as a result, it's possible we don't return any data because
-            // none of the rows matched the index condition. Therefore, we need
-            // this while loop. icp_out_of_range will be set if we hit a row that
-            // the index condition states is out of our range. When that hits,
-            // we know all the data in the buffer is the last data we will retrieve
-            while (bytes_used_in_range_query_buff == 0 && !icp_went_out_of_range && error == 0) {
-                if (direction > 0) {
-                    error = cursor->c_getf_next(cursor, flags, smart_dbt_bf_callback, &bf_info);
-                } else {
-                    error = cursor->c_getf_prev(cursor, flags, smart_dbt_bf_callback, &bf_info);
-                }
-            }
-            // if there is no data set and we went out of range, 
-            // then there is nothing to return
-            if (bytes_used_in_range_query_buff == 0 && icp_went_out_of_range) {
-                icp_went_out_of_range = false;
-                error = HA_ERR_END_OF_FILE;
-            }
-            if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) {
-                bulk_fetch_iteration++;
-            }
+    
+    if (!error) {
+        uint32_t flags = SET_PRELOCK_FLAG(0);

-            error = handle_cursor_error(error, HA_ERR_END_OF_FILE,tokudb_active_index);
-            if (error) { goto cleanup; }
-            
-            //
-            // now that range_query_buff is filled, read an element
-            //
+        // we need to read the val of what we retrieve if
+        // we do NOT have a covering index AND we are using a clustering secondary
+        // key
+        bool need_val = (do_key_read == 0) && 
+            (tokudb_active_index == primary_key || key_is_clustering(&table->key_info[tokudb_active_index]));
+
+        if ((bytes_used_in_range_query_buff - curr_range_query_buff_offset) > 0) {
            error = read_data_from_range_query_buff(buf, need_val, do_key_read);
        }
+        else if (icp_went_out_of_range) {
+            icp_went_out_of_range = false;
+            error = HA_ERR_END_OF_FILE;
+        }
        else {
-            struct smart_dbt_info info;
-            info.ha = this;
-            info.buf = buf;
-            info.keynr = tokudb_active_index;
+            invalidate_bulk_fetch();
+            if (doing_bulk_fetch) {
+                struct smart_dbt_bf_info bf_info;
+                bf_info.ha = this;
+                // you need the val if you have a clustering index and key_read is not 0;
+                bf_info.direction = direction;
+                bf_info.thd = ha_thd();
+                bf_info.need_val = need_val;
+                bf_info.buf = buf;
+                bf_info.key_to_compare = key_to_compare;
+                //
+                // call c_getf_next with purpose of filling in range_query_buff
+                //
+                rows_fetched_using_bulk_fetch = 0;
+                // it is expected that we can do ICP in the smart_dbt_bf_callback
+                // as a result, it's possible we don't return any data because
+                // none of the rows matched the index condition. Therefore, we need
+                // this while loop. icp_out_of_range will be set if we hit a row that
+                // the index condition states is out of our range. When that hits,
+                // we know all the data in the buffer is the last data we will retrieve
+                while (bytes_used_in_range_query_buff == 0 && !icp_went_out_of_range && error == 0) {
+                    if (direction > 0) {
+                        error = cursor->c_getf_next(cursor, flags, smart_dbt_bf_callback, &bf_info);
+                    } else {
+                        error = cursor->c_getf_prev(cursor, flags, smart_dbt_bf_callback, &bf_info);
+                    }
+                }
+                // if there is no data set and we went out of range, 
+                // then there is nothing to return
+                if (bytes_used_in_range_query_buff == 0 && icp_went_out_of_range) {
+                    icp_went_out_of_range = false;
+                    error = HA_ERR_END_OF_FILE;
+                }
+                if (bulk_fetch_iteration < HA_TOKU_BULK_FETCH_ITERATION_MAX) {
+                    bulk_fetch_iteration++;
+                }

-            if (direction > 0) {
-                error = cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
-            } else {
-                error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
+                error = handle_cursor_error(error, HA_ERR_END_OF_FILE,tokudb_active_index);
+                if (error) { goto cleanup; }
+            
+                //
+                // now that range_query_buff is filled, read an element
+                //
+                error = read_data_from_range_query_buff(buf, need_val, do_key_read);
+            }
+            else {
+                struct smart_dbt_info info;
+                info.ha = this;
+                info.buf = buf;
+                info.keynr = tokudb_active_index;
+                
+                if (direction > 0) {
+                    error = cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
+                } else {
+                    error = cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK(do_key_read), &info);
+                }
+                error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index);
            }
-            error = handle_cursor_error(error, HA_ERR_END_OF_FILE, tokudb_active_index);
        }
    }

@@ -5337,12 +5343,15 @@ int ha_tokudb::get_next(uchar* buf, int direction, DBT* key_to_compare, bool do_
    // read the full row by doing a point query into the 
    // main table.
    //
-    
    if (!error && !do_key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) {
        error = read_full_row(buf);
    }
-    trx->stmt_progress.queried++;
-    track_progress(thd);
+
+    if (!error) {
+        tokudb_trx_data* trx = (tokudb_trx_data *) thd_get_ha_data(ha_thd(), tokudb_hton);
+        trx->stmt_progress.queried++;
+        track_progress(ha_thd());
+    }
 cleanup:
    return error;
 }
@@ -5411,8 +5420,7 @@ int ha_tokudb::index_first(uchar * buf) {
    info.buf = buf;
    info.keynr = tokudb_active_index;

-    error = cursor->c_getf_first(cursor, flags,
-            SMART_DBT_CALLBACK(key_read), &info);
+    error = cursor->c_getf_first(cursor, flags, SMART_DBT_CALLBACK(key_read), &info);
    error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index);

    //
@@ -5422,9 +5430,11 @@ int ha_tokudb::index_first(uchar * buf) {
    if (!error && !key_read && (tokudb_active_index != primary_key) && !key_is_clustering(&table->key_info[tokudb_active_index])) {
        error = read_full_row(buf);
    }
-    trx->stmt_progress.queried++;
+    if (trx) {
+        trx->stmt_progress.queried++;
+    }
    track_progress(thd);
-    
+    maybe_index_scan = true;    
 cleanup:
    TOKUDB_HANDLER_DBUG_RETURN(error);
 }
@@ -5454,8 +5464,7 @@ int ha_tokudb::index_last(uchar * buf) {
    info.buf = buf;
    info.keynr = tokudb_active_index;

-    error = cursor->c_getf_last(cursor, flags,
-            SMART_DBT_CALLBACK(key_read), &info);
+    error = cursor->c_getf_last(cursor, flags, SMART_DBT_CALLBACK(key_read), &info);
    error = handle_cursor_error(error,HA_ERR_END_OF_FILE,tokudb_active_index);
    //
    // still need to get entire contents of the row if operation done on
@@ -5469,6 +5478,7 @@ int ha_tokudb::index_last(uchar * buf) {
        trx->stmt_progress.queried++;
    }
    track_progress(thd);
+    maybe_index_scan = true;
 cleanup:
    TOKUDB_HANDLER_DBUG_RETURN(error);
 }

--- a/storage/tokudb/ha_tokudb.h
+++ b/storage/tokudb/ha_tokudb.h
@@ -251,6 +251,7 @@ class ha_tokudb : public handler {
    uint64_t bulk_fetch_iteration;
    uint64_t rows_fetched_using_bulk_fetch;
    bool doing_bulk_fetch;
+    bool maybe_index_scan;

    //
    // buffer used to temporarily store a "packed key"