Commit 44630e09 authored by Mattias Jonsson's avatar Mattias Jonsson

Bug#37721: ORDER BY when WHERE contains non-partitioned

index column

There was actually two problems
1) when clustered pk, order by non pk index should also
compare with pk as last resort to differ keys from each
other
2) bug in the index search handling in ha_partition (was
found when extending the test case

Solution to 1 was to include the pk in key compare if
clustered pk and search on other index.

Solution for 2 was to remove the optimization from
ordered scan to unordered scan if clustered pk.
parent e300184c
# Bug#37721, test of ORDER BY on PK and WHERE on INDEX
CREATE TABLE t1 (
a INT,
b INT,
PRIMARY KEY (a),
INDEX (b))
ENGINE InnoDB
PARTITION BY HASH(a)
PARTITIONS 3;
INSERT INTO t1 VALUES (0,0),(4,0),(2,0);
SELECT a FROM t1 WHERE b = 0 ORDER BY a ASC;
a
0
2
4
SELECT a FROM t1 WHERE b = 0 ORDER BY a DESC;
a
4
2
0
ALTER TABLE t1 DROP INDEX b;
SELECT a FROM t1 WHERE b = 0 ORDER BY a ASC;
a
0
2
4
SELECT a FROM t1 WHERE b = 0 ORDER BY a DESC;
a
4
2
0
DROP TABLE t1;
CREATE TABLE t1 (
a VARCHAR(600),
b VARCHAR(600),
PRIMARY KEY (a),
INDEX (b))
ENGINE InnoDB
PARTITION BY KEY(a)
PARTITIONS 3;
INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'1'),repeat('0',257));
INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'3'),repeat('0',257));
INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'2'),repeat('0',257));
SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a ASC;
right(a,1)
1
2
3
SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a DESC;
right(a,1)
3
2
1
ALTER TABLE t1 DROP INDEX b;
SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a ASC;
right(a,1)
1
2
3
SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a DESC;
right(a,1)
3
2
1
DROP TABLE t1;
# Bug#32948 # Bug#32948
CREATE TABLE t1 (c1 INT, PRIMARY KEY (c1)) ENGINE=INNODB; CREATE TABLE t1 (c1 INT, PRIMARY KEY (c1)) ENGINE=INNODB;
CREATE TABLE t2 (c1 INT, PRIMARY KEY (c1), CREATE TABLE t2 (c1 INT, PRIMARY KEY (c1),
......
--source include/have_partition.inc --source include/have_partition.inc
--source include/have_innodb.inc --source include/have_innodb.inc
#
# Bug37721: ORDER BY when WHERE contains non-partitioned index column
# wrong order since it did not use pk as second compare
--echo # Bug#37721, test of ORDER BY on PK and WHERE on INDEX
CREATE TABLE t1 (
a INT,
b INT,
PRIMARY KEY (a),
INDEX (b))
ENGINE InnoDB
PARTITION BY HASH(a)
PARTITIONS 3;
# This will give the middle partition the highest value
INSERT INTO t1 VALUES (0,0),(4,0),(2,0);
SELECT a FROM t1 WHERE b = 0 ORDER BY a ASC;
SELECT a FROM t1 WHERE b = 0 ORDER BY a DESC;
ALTER TABLE t1 DROP INDEX b;
SELECT a FROM t1 WHERE b = 0 ORDER BY a ASC;
SELECT a FROM t1 WHERE b = 0 ORDER BY a DESC;
DROP TABLE t1;
CREATE TABLE t1 (
a VARCHAR(600),
b VARCHAR(600),
PRIMARY KEY (a),
INDEX (b))
ENGINE InnoDB
PARTITION BY KEY(a)
PARTITIONS 3;
# This will give the middle partition the highest value
INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'1'),repeat('0',257));
INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'3'),repeat('0',257));
INSERT INTO t1 VALUES (concat(repeat('MySQL',100),'2'),repeat('0',257));
SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a ASC;
SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a DESC;
ALTER TABLE t1 DROP INDEX b;
SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a ASC;
SELECT right(a,1) FROM t1 WHERE b = repeat('0',257) ORDER BY a DESC;
DROP TABLE t1;
#
# Bug#32948 - FKs allowed to reference partitioned table # Bug#32948 - FKs allowed to reference partitioned table
# #
-- echo # Bug#32948 -- echo # Bug#32948
......
...@@ -239,7 +239,8 @@ void ha_partition::init_handler_variables() ...@@ -239,7 +239,8 @@ void ha_partition::init_handler_variables()
m_rec_length= 0; m_rec_length= 0;
m_last_part= 0; m_last_part= 0;
m_rec0= 0; m_rec0= 0;
m_curr_key_info= 0; m_curr_key_info[0]= NULL;
m_curr_key_info[1]= NULL;
/* /*
this allows blackhole to work properly this allows blackhole to work properly
*/ */
...@@ -3604,11 +3605,24 @@ int ha_partition::index_init(uint inx, bool sorted) ...@@ -3604,11 +3605,24 @@ int ha_partition::index_init(uint inx, bool sorted)
handler **file; handler **file;
DBUG_ENTER("ha_partition::index_init"); DBUG_ENTER("ha_partition::index_init");
DBUG_PRINT("info", ("inx %u sorted %u", inx, sorted));
active_index= inx; active_index= inx;
m_part_spec.start_part= NO_CURRENT_PART_ID; m_part_spec.start_part= NO_CURRENT_PART_ID;
m_start_key.length= 0; m_start_key.length= 0;
m_ordered= sorted; m_ordered= sorted;
m_curr_key_info= table->key_info+inx; m_curr_key_info[0]= table->key_info+inx;
if (m_pkey_is_clustered && table->s->primary_key != MAX_KEY)
{
/*
if PK is clustered, then the key cmp must use the pk to
differentiate between equal key in given index.
*/
DBUG_PRINT("info", ("Clustered pk, using pk as secondary cmp"));
m_curr_key_info[1]= table->key_info+table->s->primary_key;
m_curr_key_info[2]= NULL;
}
else
m_curr_key_info[1]= NULL;
/* /*
Some handlers only read fields as specified by the bitmap for the Some handlers only read fields as specified by the bitmap for the
read set. For partitioned handlers we always require that the read set. For partitioned handlers we always require that the
...@@ -3633,9 +3647,13 @@ int ha_partition::index_init(uint inx, bool sorted) ...@@ -3633,9 +3647,13 @@ int ha_partition::index_init(uint inx, bool sorted)
TODO: handle COUNT(*) queries via unordered scan. TODO: handle COUNT(*) queries via unordered scan.
*/ */
uint i; uint i;
for (i= 0; i < m_curr_key_info->key_parts; i++) KEY **key_info= m_curr_key_info;
do
{
for (i= 0; i < (*key_info)->key_parts; i++)
bitmap_set_bit(table->read_set, bitmap_set_bit(table->read_set,
m_curr_key_info->key_part[i].field->field_index); (*key_info)->key_part[i].field->field_index);
} while (*(++key_info));
} }
file= m_file; file= m_file;
do do
...@@ -3692,10 +3710,10 @@ int ha_partition::index_end() ...@@ -3692,10 +3710,10 @@ int ha_partition::index_end()
Read one record in an index scan and start an index scan Read one record in an index scan and start an index scan
SYNOPSIS SYNOPSIS
index_read() index_read_map()
buf Read row in MySQL Row Format buf Read row in MySQL Row Format
key Key parts in consecutive order key Key parts in consecutive order
key_len Total length of key parts keypart_map Which part of key is used
find_flag What type of key condition is used find_flag What type of key condition is used
RETURN VALUE RETURN VALUE
...@@ -3703,12 +3721,12 @@ int ha_partition::index_end() ...@@ -3703,12 +3721,12 @@ int ha_partition::index_end()
0 Success 0 Success
DESCRIPTION DESCRIPTION
index_read starts a new index scan using a start key. The MySQL Server index_read_map starts a new index scan using a start key. The MySQL Server
will check the end key on its own. Thus to function properly the will check the end key on its own. Thus to function properly the
partitioned handler need to ensure that it delivers records in the sort partitioned handler need to ensure that it delivers records in the sort
order of the MySQL Server. order of the MySQL Server.
index_read can be restarted without calling index_end on the previous index_read_map can be restarted without calling index_end on the previous
index scan and without calling index_init. In this case the index_read index scan and without calling index_init. In this case the index_read_map
is on the same index as the previous index_scan. This is particularly is on the same index as the previous index_scan. This is particularly
used in conjuntion with multi read ranges. used in conjuntion with multi read ranges.
*/ */
...@@ -3765,11 +3783,15 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key) ...@@ -3765,11 +3783,15 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key)
DBUG_ENTER("ha_partition::common_index_read"); DBUG_ENTER("ha_partition::common_index_read");
LINT_INIT(key_len); /* used if have_start_key==TRUE */ LINT_INIT(key_len); /* used if have_start_key==TRUE */
DBUG_PRINT("info", ("m_ordered %u m_ordered_scan_ong %u have_start_key %u",
m_ordered, m_ordered_scan_ongoing, have_start_key));
if (have_start_key) if (have_start_key)
{ {
m_start_key.length= key_len= calculate_key_len(table, active_index, m_start_key.length= key_len= calculate_key_len(table, active_index,
m_start_key.key, m_start_key.key,
m_start_key.keypart_map); m_start_key.keypart_map);
DBUG_ASSERT(key_len);
} }
if ((error= partition_scan_set_up(buf, have_start_key))) if ((error= partition_scan_set_up(buf, have_start_key)))
{ {
...@@ -3784,9 +3806,12 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key) ...@@ -3784,9 +3806,12 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key)
reverse_order= TRUE; reverse_order= TRUE;
m_ordered_scan_ongoing= TRUE; m_ordered_scan_ongoing= TRUE;
} }
DBUG_PRINT("info", ("m_ordered %u m_o_scan_ong %u have_start_key %u",
m_ordered, m_ordered_scan_ongoing, have_start_key));
if (!m_ordered_scan_ongoing || if (!m_ordered_scan_ongoing ||
(have_start_key && m_start_key.flag == HA_READ_KEY_EXACT && (have_start_key && m_start_key.flag == HA_READ_KEY_EXACT &&
(key_len >= m_curr_key_info->key_length || key_len == 0))) !m_pkey_is_clustered &&
key_len >= m_curr_key_info[0]->key_length))
{ {
/* /*
We use unordered index scan either when read_range is used and flag We use unordered index scan either when read_range is used and flag
...@@ -3799,6 +3824,8 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key) ...@@ -3799,6 +3824,8 @@ int ha_partition::common_index_read(uchar *buf, bool have_start_key)
Need to set unordered scan ongoing since we can come here even when Need to set unordered scan ongoing since we can come here even when
it isn't set. it isn't set.
*/ */
DBUG_PRINT("info", ("key_len %lu (%lu), doing unordered scan",
key_len, m_curr_key_info[0]->key_length));
m_ordered_scan_ongoing= FALSE; m_ordered_scan_ongoing= FALSE;
error= handle_unordered_scan_next_partition(buf); error= handle_unordered_scan_next_partition(buf);
} }
...@@ -3900,7 +3927,7 @@ int ha_partition::common_first_last(uchar *buf) ...@@ -3900,7 +3927,7 @@ int ha_partition::common_first_last(uchar *buf)
Read last using key Read last using key
SYNOPSIS SYNOPSIS
index_read_last() index_read_last_map()
buf Read row in MySQL Row Format buf Read row in MySQL Row Format
key Key key Key
keypart_map Which part of key is used keypart_map Which part of key is used
...@@ -4057,7 +4084,7 @@ int ha_partition::read_range_first(const key_range *start_key, ...@@ -4057,7 +4084,7 @@ int ha_partition::read_range_first(const key_range *start_key,
(end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0); (end_key->flag == HA_READ_AFTER_KEY) ? -1 : 0);
} }
range_key_part= m_curr_key_info->key_part; range_key_part= m_curr_key_info[0]->key_part;
if (start_key) if (start_key)
m_start_key= *start_key; m_start_key= *start_key;
else else
......
...@@ -75,7 +75,14 @@ class ha_partition :public handler ...@@ -75,7 +75,14 @@ class ha_partition :public handler
partition_info *m_part_info; // local reference to partition partition_info *m_part_info; // local reference to partition
Field **m_part_field_array; // Part field array locally to save acc Field **m_part_field_array; // Part field array locally to save acc
uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan uchar *m_ordered_rec_buffer; // Row and key buffer for ord. idx scan
KEY *m_curr_key_info; // Current index /*
Current index.
When used in key_rec_cmp: If clustered pk, index compare
must compare pk if given index is same for two rows.
So normally m_curr_key_info[0]= current index and m_curr_key[1]= NULL,
and if clustered pk, [0]= current index, [1]= pk, [2]= NULL
*/
KEY *m_curr_key_info[3]; // Current index
uchar *m_rec0; // table->record[0] uchar *m_rec0; // table->record[0]
QUEUE m_queue; // Prio queue used by sorted read QUEUE m_queue; // Prio queue used by sorted read
/* /*
......
...@@ -448,16 +448,9 @@ int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length) ...@@ -448,16 +448,9 @@ int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length)
} }
/* /**
Compare two records in index order Compare two records in index order.
SYNOPSIS
key_rec_cmp()
key Index information
rec0 Pointer to table->record[0]
first_rec Pointer to record compare with
second_rec Pointer to record compare against first_rec
DESCRIPTION
This method is set-up such that it can be called directly from the This method is set-up such that it can be called directly from the
priority queue and it is attempted to be optimised as much as possible priority queue and it is attempted to be optimised as much as possible
since this will be called O(N * log N) times while performing a merge since this will be called O(N * log N) times while performing a merge
...@@ -471,21 +464,43 @@ int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length) ...@@ -471,21 +464,43 @@ int key_cmp(KEY_PART_INFO *key_part, const uchar *key, uint key_length)
We first check for NULL values, if there are no NULL values we use We first check for NULL values, if there are no NULL values we use
a compare method that gets two field pointers and a max length a compare method that gets two field pointers and a max length
and return the result of the comparison. and return the result of the comparison.
key is a null terminated array, since in some cases (clustered
primary key) it must compare more than one index.
@param key Null terminated array of index information
@param first_rec Pointer to record compare with
@param second_rec Pointer to record compare against first_rec
@return Return value is SIGN(first_rec - second_rec)
@retval 0 Keys are equal
@retval -1 second_rec is greater than first_rec
@retval +1 first_rec is greater than second_rec
*/ */
int key_rec_cmp(void *key, uchar *first_rec, uchar *second_rec) int key_rec_cmp(void *key_p, uchar *first_rec, uchar *second_rec)
{ {
KEY *key_info= (KEY*)key; KEY **key= (KEY**) key_p;
uint key_parts= key_info->key_parts, i= 0; KEY *key_info= *(key++); // Start with first key
uint key_parts, key_part_num;
KEY_PART_INFO *key_part= key_info->key_part; KEY_PART_INFO *key_part= key_info->key_part;
uchar *rec0= key_part->field->ptr - key_part->offset; uchar *rec0= key_part->field->ptr - key_part->offset;
my_ptrdiff_t first_diff= first_rec - rec0, sec_diff= second_rec - rec0; my_ptrdiff_t first_diff= first_rec - rec0, sec_diff= second_rec - rec0;
int result= 0; int result= 0;
Field *field;
DBUG_ENTER("key_rec_cmp"); DBUG_ENTER("key_rec_cmp");
/* loop over all given keys */
do do
{ {
Field *field= key_part->field; key_parts= key_info->key_parts;
key_part= key_info->key_part;
key_part_num= 0;
/* loop over every key part */
do
{
field= key_part->field;
if (key_part->null_bit) if (key_part->null_bit)
{ {
...@@ -522,10 +537,15 @@ int key_rec_cmp(void *key, uchar *first_rec, uchar *second_rec) ...@@ -522,10 +537,15 @@ int key_rec_cmp(void *key, uchar *first_rec, uchar *second_rec)
max length. The exceptions are the BLOB and VARCHAR field types max length. The exceptions are the BLOB and VARCHAR field types
that take the max length into account. that take the max length into account.
*/ */
result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff, if ((result= field->cmp_max(field->ptr+first_diff, field->ptr+sec_diff,
key_part->length); key_part->length)))
DBUG_RETURN(result);
next_loop: next_loop:
key_part++; key_part++;
} while (!result && ++i < key_parts); key_part_num++;
DBUG_RETURN(result); } while (key_part_num < key_parts); /* this key is done */
key_info= *(key++);
} while (key_info); /* no more keys to test */
DBUG_RETURN(0);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment