Commit 42282c10 authored by Mattias Jonsson's avatar Mattias Jonsson

Bug#13694811: THE OPTIMIZER WRONGLY USES THE FIRST INNODB

                           PARTITION STATISTICS

Problem was the fix for bug#11756867; It always used the first
partitions, and stopped after it checked 10 [sub]partitions.
(or until it found a partition which would contain a match).

This results in bad statistics for tables where the first 10 partitions
don't represent the majority of the data (like when the first 10
partitions only contained a few rows in total).

The solution was to take statisics from the partitions containing
the most rows instead:

Added an array of partition ids which is sorted by number of records
in descending order.

this array is used in records_in_range to cover as many records as
possible in as few calls as possible.

Also changed the limit of how many partitions to use for the statistics
from a static max of 10 partitions, into a dynamic model:
Maximum number of partitions is now log2(total number of partitions)
taken from the ordered array.
It will continue calling partitions records_in_range until it has 
checked:
(total rows in matching partitions) * (maximum number of partitions)
/ (number of used partitions)

Also reverted the changes for ha_partition::scan_time() and
ha_partition::estimate_rows_upper_bound() to before
the fix of  bug#11756867. Since they are not as slow as
records_in_range.
parent 4af45463
drop table if exists t1, t2;
#
# Bug#13694811: THE OPTIMIZER WRONGLY USES THE FIRST
# INNODB PARTITION STATISTICS
#
CREATE TABLE t1
(a INT,
b varchar(64),
PRIMARY KEY (a),
KEY (b))
ENGINE = InnoDB
PARTITION BY RANGE (a)
SUBPARTITION BY HASH (a) SUBPARTITIONS 10
(PARTITION pNeg VALUES LESS THAN (0),
PARTITION p0 VALUES LESS THAN (1000),
PARTITION pMAX VALUES LESS THAN MAXVALUE);
# Only one row in the first 10 subpartitions
INSERT INTO t1 VALUES (-1, 'Only negative pk value');
INSERT INTO t1 VALUES (0, 'Mod Zero'), (1, 'One'), (2, 'Two'), (3, 'Three'),
(10, 'Zero'), (11, 'Mod One'), (12, 'Mod Two'), (13, 'Mod Three'),
(20, '0'), (21, '1'), (22, '2'), (23, '3'),
(4, '4'), (5, '5'), (6, '6'), (7, '7'), (8, '8'), (9, '9');
INSERT INTO t1 SELECT a + 30, b FROM t1 WHERE a >= 0;
ANALYZE TABLE t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
EXPLAIN SELECT b FROM t1 WHERE b between 'L' and 'N' AND a > -100;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range PRIMARY,b b 67 NULL 18 Using where; Using index
DROP TABLE t1;
#
# Bug#56287: crash when using Partition datetime in sub in query
#
CREATE TABLE t1
......
This diff is collapsed.
......@@ -7,6 +7,35 @@ drop table if exists t1, t2;
let $MYSQLD_DATADIR= `SELECT @@datadir`;
--echo #
--echo # Bug#13694811: THE OPTIMIZER WRONGLY USES THE FIRST
--echo # INNODB PARTITION STATISTICS
--echo #
CREATE TABLE t1
(a INT,
b varchar(64),
PRIMARY KEY (a),
KEY (b))
ENGINE = InnoDB
PARTITION BY RANGE (a)
SUBPARTITION BY HASH (a) SUBPARTITIONS 10
(PARTITION pNeg VALUES LESS THAN (0),
PARTITION p0 VALUES LESS THAN (1000),
PARTITION pMAX VALUES LESS THAN MAXVALUE);
--echo # Only one row in the first 10 subpartitions
INSERT INTO t1 VALUES (-1, 'Only negative pk value');
INSERT INTO t1 VALUES (0, 'Mod Zero'), (1, 'One'), (2, 'Two'), (3, 'Three'),
(10, 'Zero'), (11, 'Mod One'), (12, 'Mod Two'), (13, 'Mod Three'),
(20, '0'), (21, '1'), (22, '2'), (23, '3'),
(4, '4'), (5, '5'), (6, '6'), (7, '7'), (8, '8'), (9, '9');
INSERT INTO t1 SELECT a + 30, b FROM t1 WHERE a >= 0;
ANALYZE TABLE t1;
EXPLAIN SELECT b FROM t1 WHERE b between 'L' and 'N' AND a > -100;
DROP TABLE t1;
--echo #
--echo # Bug#56287: crash when using Partition datetime in sub in query
--echo #
......
This diff is collapsed.
......@@ -199,6 +199,12 @@ private:
ha_rows m_bulk_inserted_rows;
/** used for prediction of start_bulk_insert rows */
enum_monotonicity_info m_part_func_monotonicity_info;
/** Sorted array of partition ids in descending order of number of rows. */
uint32 *m_part_ids_sorted_by_num_of_records;
/* Compare function for my_qsort2, for reversed order. */
static int compare_number_of_records(ha_partition *me,
const uint32 *a,
const uint32 *b);
public:
handler *clone(const char *name, MEM_ROOT *mem_root);
virtual void set_part_info(partition_info *part_info)
......@@ -582,15 +588,9 @@ public:
*/
private:
/*
Helper function to get the minimum number of partitions to use for
the optimizer hints/cost calls.
*/
void partitions_optimizer_call_preparations(uint *num_used_parts,
uint *check_min_num,
uint *first);
ha_rows estimate_rows(bool is_records_in_range, uint inx,
key_range *min_key, key_range *max_key);
/* Helper functions for optimizer hints. */
ha_rows min_rows_for_estimate();
uint get_biggest_used_partition(uint *part_index);
public:
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment