Commit 6cecf61a authored by Alexander Barkov's avatar Alexander Barkov

MDEV-34417 Wrong result set with utf8mb4_danish_ci and BNLH join

There were erroneous calls for charpos() in key_hashnr() and key_buf_cmp().
These functions are never called with prefix segments.

The charpos() calls were wrong. Before the change BNHL joins
- could return wrong result sets, as reported in MDEV-34417
- were extremely slow for multi-byte character sets, because
  the hash was calculated on string prefixes, which increased
  the amount of collisions drastically.

This patch fixes the wrong result set as reported in MDEV-34417,
as well as (partially) the performance problem reported in MDEV-34352.
parent 2f0e7f66
......@@ -15370,3 +15370,39 @@ DROP TABLE t1;
#
# End of MariaDB-10.2 tests
#
#
# Start of 10.5 tests
#
#
# MDEV-34417 Wrong result set with utf8mb4_danish_ci and BNLH join
#
CREATE TABLE t1 (a VARCHAR(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_danish_ci);
INSERT INTO t1 VALUES ('aaaa'),('åå');
SELECT * FROM t1 WHERE a='aaaa';
a
aaaa
åå
SET join_cache_level=1;
SELECT * FROM t1 NATURAL JOIN t1 t2;
a
aaaa
åå
aaaa
åå
# Expect a BNHL join
SET join_cache_level=3;
EXPLAIN SELECT * FROM t1 NATURAL JOIN t1 t2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 2 Using where
1 SIMPLE t2 hash_ALL NULL #hash#$hj 2003 test.t1.a 2 Using where; Using join buffer (flat, BNLH join)
SELECT * FROM t1 NATURAL JOIN t1 t2;
a
aaaa
åå
aaaa
åå
DROP TABLE t1;
SET join_cache_level=DEFAULT;
#
# End of 10.5 tests
#
......@@ -696,3 +696,32 @@ DROP TABLE t1;
--echo #
--echo # End of MariaDB-10.2 tests
--echo #
--echo #
--echo # Start of 10.5 tests
--echo #
--echo #
--echo # MDEV-34417 Wrong result set with utf8mb4_danish_ci and BNLH join
--echo #
CREATE TABLE t1 (a VARCHAR(500) CHARACTER SET utf8mb4 COLLATE utf8mb4_danish_ci);
INSERT INTO t1 VALUES ('aaaa'),('åå');
SELECT * FROM t1 WHERE a='aaaa';
SET join_cache_level=1;
SELECT * FROM t1 NATURAL JOIN t1 t2;
--echo # Expect a BNHL join
SET join_cache_level=3;
EXPLAIN SELECT * FROM t1 NATURAL JOIN t1 t2;
SELECT * FROM t1 NATURAL JOIN t1 t2;
DROP TABLE t1;
SET join_cache_level=DEFAULT;
--echo #
--echo # End of 10.5 tests
--echo #
......@@ -754,13 +754,11 @@ ulong key_hashnr(KEY *key_info, uint used_key_parts, const uchar *key)
if (is_string)
{
if (cs->mbmaxlen > 1)
{
size_t char_length= cs->charpos(pos + pack_length,
pos + pack_length + length,
length / cs->mbmaxlen);
set_if_smaller(length, char_length);
}
/*
Prefix keys are not possible in BNLH joins.
Use the whole string to calculate the hash.
*/
DBUG_ASSERT((key_part->key_part_flag & HA_PART_KEY_SEG) == 0);
cs->hash_sort(pos+pack_length, length, &nr, &nr2);
key+= pack_length;
}
......@@ -864,25 +862,13 @@ bool key_buf_cmp(KEY *key_info, uint used_key_parts,
if (is_string)
{
/*
Compare the strings taking into account length in characters
and collation
Prefix keys are not possible in BNLH joins.
Compare whole strings.
*/
size_t byte_len1= length1, byte_len2= length2;
if (cs->mbmaxlen > 1)
{
size_t char_length1= cs->charpos(pos1 + pack_length,
pos1 + pack_length + length1,
length1 / cs->mbmaxlen);
size_t char_length2= cs->charpos(pos2 + pack_length,
pos2 + pack_length + length2,
length2 / cs->mbmaxlen);
set_if_smaller(length1, char_length1);
set_if_smaller(length2, char_length2);
}
if (length1 != length2 ||
cs->strnncollsp(pos1 + pack_length, byte_len1,
pos2 + pack_length, byte_len2))
return TRUE;
DBUG_ASSERT((key_part->key_part_flag & HA_PART_KEY_SEG) == 0);
if (cs->strnncollsp(pos1 + pack_length, length1,
pos2 + pack_length, length2))
return true;
key1+= pack_length; key2+= pack_length;
}
else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment