Commit df72c57d authored by Alexander Barkov's avatar Alexander Barkov

MDEV-30048 Prefix keys for CHAR work differently for MyISAM vs InnoDB

Also fixes: MDEV-30050 Inconsistent results of DISTINCT with NOPAD

Problem:

Key segments for CHAR columns where compared using strnncollsp()
for engines MyISAM and Aria.

This did not work correct in case if the engine applyied trailing
space compression.

Fix:

Replacing ha_compare_text() calls to new functions:

- ha_compare_char_varying()
- ha_compare_char_fixed()
- ha_compare_word()
- ha_compare_word_prefix()
- ha_compare_word_or_prefix()

The code branch corresponding to comparison of CHAR column keys
(HA_KEYTYPE_TEXT segment type) now uses ha_compare_char_fixed()
which calls strnncollsp_nchars().

This patch does not change the behavior for the rest of the code:
- comparison of VARCHAR/TEXT column keys
  (HA_KEYTYPE_VARTEXT1, HA_KEYTYPE_VARTEXT2 segments types)
- comparison in the fulltext code
parent 09e23708
......@@ -110,8 +110,135 @@ static inline void set_rec_bits(uint16 bits, uchar *ptr, uchar ofs, uint len)
#define clr_rec_bits(bit_ptr, bit_ofs, bit_len) \
set_rec_bits(0, bit_ptr, bit_ofs, bit_len)
extern int ha_compare_text(CHARSET_INFO *, const uchar *, size_t,
const uchar *, size_t , my_bool);
/*
Compare two VARCHAR values.
@param charset_info - The character set and collation
@param a - The pointer to the first string
@param a_length - The length of the first string
@param b - The pointer to the second string
@param b_length - The length of the second string
@param b_is_prefix - Whether "b" is a prefix of "a",
e.g. in a prefix key (partial length key).
@returns - The result of comparison
- If "b_is_prefix" is FALSE, then the two strings are compared
taking into account the PAD SPACE/NO PAD attribute of the collation.
- If "b_is_prefix" is TRUE, then trailing spaces are compared in NO PAD style.
This is done e.g. when we compare a column value to its prefix key value
(the value of "a" to the value of "key_a"):
CREATE TABLE t1 (a VARCHAR(10), KEY(key_a(5));
*/
static inline int ha_compare_char_varying(CHARSET_INFO *charset_info,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
if (!b_is_prefix)
return charset_info->coll->strnncollsp(charset_info, a, a_length,
b, b_length);
return charset_info->coll->strnncoll(charset_info,
a, a_length,
b, b_length, TRUE/*prefix*/);
}
/*
Compare two CHAR values of the same declared character length,
e.g. CHAR(5) to CHAR(5).
@param charset_info - The character set and collation
@param a - The pointer to the first string
@param a_length - The length of the first string
@param b - The pointer to the second string
@param b_length - The length of the second string
@param nchars - The declared length (in characters)
@param b_is_prefix - Whether "b" is a prefix of "a",
e.g. in a prefix key (partial length key).
@returns - The result of comparison
- If "b_is_prefix" is FALSE, then the two strings are compared
taking into account the PAD SPACE/NO PAD attribute of the collation.
Additionally, this function assumes that the underlying storage could
optionally apply trailing space compression, so values can come into this
comparison function in different states:
- all trailing spaces removed
- some trailing spaced removed
- no trailing spaces removed (exactly "nchars" characters on the two sides)
This function virtually reconstructs trailing spaces up to the defined
length specified in "nchars".
If either of the sides have more than "nchar" characters,
then only leftmost "nchar" characters are compared.
- If "b_is_prefix" is TRUE, then trailing spaces are compared in NO PAD style.
This is done e.g. when we compare a column value to its prefix key value
(the value of "a" to the value of "key_a"):
CREATE TABLE t1 (a CHAR(10), KEY(key_a(5));
*/
static inline int ha_compare_char_fixed(CHARSET_INFO *charset_info,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
size_t nchars,
my_bool b_is_prefix)
{
if (!b_is_prefix)
return charset_info->coll->strnncollsp_nchars(charset_info,
a, a_length,
b, b_length,
nchars,
MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES);
return charset_info->coll->strnncoll(charset_info,
a, a_length,
b, b_length, TRUE/*prefix*/);
}
/*
A function to compare words of a text.
This is a common operation in full-text search:
SELECT MATCH (title) AGAINST ('word') FROM t1;
*/
static inline int ha_compare_word(CHARSET_INFO *charset_info,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length)
{
return charset_info->coll->strnncollsp(charset_info,
a, a_length,
b, b_length);
}
/*
A function to compare a word of a text to a word prefix.
This is a common operation in full-text search:
SELECT MATCH (title) AGAINST ('wor*' IN BOOLEAN MODE) FROM t1;
*/
static inline int ha_compare_word_prefix(CHARSET_INFO *charset_info,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length)
{
return charset_info->coll->strnncoll(charset_info,
a, a_length,
b, b_length,
TRUE/*b_is_prefix*/);
}
/*
Compare words (full match or prefix match), e.g. for full-text search.
*/
static inline int ha_compare_word_or_prefix(CHARSET_INFO *charset_info,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool b_is_prefix)
{
if (!b_is_prefix)
return ha_compare_word(charset_info, a, a_length, b, b_length);
return ha_compare_word_prefix(charset_info, a, a_length, b, b_length);
}
extern int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a,
const uchar *b, uint key_length, uint nextflag,
uint *diff_pos);
......
......@@ -58,11 +58,6 @@ DROP TABLE t1;
# CHAR
# MyISAM is buggy on CHAR+BTREE+UNIQUE+PREFIX (see MDEV-30048), disable for now
# Other engines work fine
if (`SELECT UPPER(@@storage_engine) != 'MYISAM'`)
{
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
......@@ -72,7 +67,6 @@ SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
}
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
......
--echo #
--echo # MDEV-30048 Prefix keys for CHAR work differently for MyISAM vs InnoDB
--echo #
SET NAMES utf8mb3;
#
# Engines have different conditions based on the column size
# determining when to use trailing space compressions in key values,
# so let's test different column sizes for better coverage.
#
#
# CHAR(10)
#
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(2)));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
#
# CHAR(120)
#
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(100)));
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
--echo #
--echo # MDEV-30050 Inconsistent results of DISTINCT with NOPAD
--echo #
CREATE TABLE t1 (c CHAR(100) COLLATE utf8mb3_unicode_nopad_ci);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss'),('ß');
SET big_tables=0;
SELECT DISTINCT c FROM t1;
SET big_tables=1;
SELECT DISTINCT c FROM t1;
DROP TABLE t1;
SET big_tables=DEFAULT;
......@@ -830,6 +830,20 @@ INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
......@@ -906,6 +920,133 @@ INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
SET STORAGE_ENGINE=DEFAULT;
SET default_storage_engine=MyISAM;
#
# MDEV-30048 Prefix keys for CHAR work differently for MyISAM vs InnoDB
#
SET NAMES utf8mb3;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(10) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(2)));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(10) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(2))
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(120) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(100)));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(120) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(100))
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
#
# MDEV-30050 Inconsistent results of DISTINCT with NOPAD
#
CREATE TABLE t1 (c CHAR(100) COLLATE utf8mb3_unicode_nopad_ci);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c` char(100) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
SET big_tables=0;
SELECT DISTINCT c FROM t1;
c
ss
ß
SET big_tables=1;
SELECT DISTINCT c FROM t1;
c
ss
ß
DROP TABLE t1;
SET big_tables=DEFAULT;
SET default_storage_engine=MEMORY;
#
# MDEV-30048 Prefix keys for CHAR work differently for MyISAM vs InnoDB
#
SET NAMES utf8mb3;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(10) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`)
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(2)));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(10) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(2))
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(120) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`)
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(100)));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(120) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(100))
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
#
# MDEV-30050 Inconsistent results of DISTINCT with NOPAD
#
CREATE TABLE t1 (c CHAR(100) COLLATE utf8mb3_unicode_nopad_ci);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c` char(100) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
SET big_tables=0;
SELECT DISTINCT c FROM t1;
c
ss
ß
SET big_tables=1;
SELECT DISTINCT c FROM t1;
c
ss
ß
DROP TABLE t1;
SET big_tables=DEFAULT;
SET default_storage_engine=DEFAULT;
#
# End of 10.4 tests
#
......@@ -65,6 +65,14 @@ SET STORAGE_ENGINE=HEAP;
SET STORAGE_ENGINE=DEFAULT;
SET default_storage_engine=MyISAM;
--source include/ctype_utf8mb3_uca_char.inc
SET default_storage_engine=MEMORY;
--source include/ctype_utf8mb3_uca_char.inc
SET default_storage_engine=DEFAULT;
--echo #
--echo # End of 10.4 tests
--echo #
......@@ -379,5 +379,89 @@ INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
#
# MDEV-30048 Prefix keys for CHAR work differently for MyISAM vs InnoDB
#
SET NAMES utf8mb3;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(10) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(2)));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(10) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(2))
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(120) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(100)));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(120) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(100))
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
#
# MDEV-30050 Inconsistent results of DISTINCT with NOPAD
#
CREATE TABLE t1 (c CHAR(100) COLLATE utf8mb3_unicode_nopad_ci);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c` char(100) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
SET big_tables=0;
SELECT DISTINCT c FROM t1;
c
ss
ß
SET big_tables=1;
SELECT DISTINCT c FROM t1;
c
ss
ß
DROP TABLE t1;
SET big_tables=DEFAULT;
#
# MDEV-30050 Inconsistent results of DISTINCT with NOPAD
#
CREATE OR REPLACE TABLE t1 (c CHAR(100) COLLATE utf8mb3_unicode_nopad_ci);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c` char(100) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss'),('ß');
SET big_tables=0;
SELECT DISTINCT c FROM t1;
c
ss
ß
SET big_tables=1;
SELECT DISTINCT c FROM t1;
c
ss
ß
DROP TABLE t1;
SET big_tables=DEFAULT;
#
# End 10.4 tests
#
......@@ -32,6 +32,23 @@ let $coll_pad='utf8_bin';
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_nopad_ci;
--source include/ctype_nopad_prefix_unique.inc
--source include/ctype_utf8mb3_uca_char.inc
--echo #
--echo # MDEV-30050 Inconsistent results of DISTINCT with NOPAD
--echo #
CREATE OR REPLACE TABLE t1 (c CHAR(100) COLLATE utf8mb3_unicode_nopad_ci);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss'),('ß');
SET big_tables=0;
SELECT DISTINCT c FROM t1;
SET big_tables=1;
SELECT DISTINCT c FROM t1;
DROP TABLE t1;
SET big_tables=DEFAULT;
--echo #
--echo # End 10.4 tests
--echo #
#
# Start of 10.4 tests
#
SET default_storage_engine=Aria;
#
# MDEV-30048 Prefix keys for CHAR work differently for MyISAM vs InnoDB
#
SET NAMES utf8mb3;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(10) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`)
) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(10) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(2)));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(10) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(2))
) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(120) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`)
) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
CREATE TABLE t1 (a CHAR(120) COLLATE utf8mb3_unicode_nopad_ci, UNIQUE KEY(a(100)));
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(120) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(100))
) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1
INSERT INTO t1 VALUES ('ss'),('ß');
DROP TABLE t1;
#
# MDEV-30050 Inconsistent results of DISTINCT with NOPAD
#
CREATE TABLE t1 (c CHAR(100) COLLATE utf8mb3_unicode_nopad_ci);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c` char(100) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL
) ENGINE=Aria DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci PAGE_CHECKSUM=1
INSERT INTO t1 VALUES ('ss'),('ß');
SET big_tables=0;
SELECT DISTINCT c FROM t1;
c
ss
ß
SET big_tables=1;
SELECT DISTINCT c FROM t1;
c
ss
ß
DROP TABLE t1;
SET big_tables=DEFAULT;
#
# End of 10.4 tests
#
--source include/have_maria.inc
--echo #
--echo # Start of 10.4 tests
--echo #
SET default_storage_engine=Aria;
--source include/ctype_utf8mb3_uca_char.inc
--echo #
--echo # End of 10.4 tests
--echo #
......@@ -20,16 +20,6 @@
#include <my_compare.h>
#include <my_sys.h>
int ha_compare_text(CHARSET_INFO *charset_info, const uchar *a, size_t a_length,
const uchar *b, size_t b_length, my_bool part_key)
{
if (!part_key)
return charset_info->coll->strnncollsp(charset_info, a, a_length,
b, b_length);
return charset_info->coll->strnncoll(charset_info, a, a_length,
b, b_length, part_key);
}
static int compare_bin(const uchar *a, uint a_length,
const uchar *b, uint b_length,
......@@ -183,9 +173,12 @@ int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a,
next_key_length=key_length-b_length-pack_length;
if (piks &&
(flag=ha_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
(flag= ha_compare_char_fixed(keyseg->charset,
a, a_length,
b, b_length,
keyseg->length / keyseg->charset->mbmaxlen,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a+=a_length;
b+=b_length;
......@@ -195,9 +188,12 @@ int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a,
{
uint length=(uint) (end-a), a_length=length, b_length=length;
if (piks &&
(flag= ha_compare_text(keyseg->charset, a, a_length, b, b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
(flag= ha_compare_char_fixed(keyseg->charset,
a, a_length,
b, b_length,
keyseg->length / keyseg->charset->mbmaxlen,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
a=end;
b+=length;
......@@ -242,7 +238,9 @@ int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a,
next_key_length=key_length-b_length-pack_length;
if (piks &&
(flag= ha_compare_text(keyseg->charset,a,a_length,b,b_length,
(flag= ha_compare_char_varying(keyseg->charset,
a, a_length,
b, b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
......@@ -260,7 +258,7 @@ int ha_key_cmp(HA_KEYSEG *keyseg, const uchar *a,
next_key_length=key_length-b_length-pack_length;
if (piks &&
(flag=compare_bin(a,a_length,b,b_length,
(flag=compare_bin(a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0), 0)))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
......
......@@ -6751,9 +6751,9 @@ innobase_fts_text_cmp(
const fts_string_t* s1 = (const fts_string_t*) p1;
const fts_string_t* s2 = (const fts_string_t*) p2;
return(ha_compare_text(
charset, s1->f_str, static_cast<uint>(s1->f_len),
s2->f_str, static_cast<uint>(s2->f_len), 0));
return(ha_compare_word(charset,
s1->f_str, static_cast<uint>(s1->f_len),
s2->f_str, static_cast<uint>(s2->f_len)));
}
/******************************************************************//**
......@@ -6774,9 +6774,9 @@ innobase_fts_text_case_cmp(
newlen = strlen((const char*) s2->f_str);
return(ha_compare_text(
charset, s1->f_str, static_cast<uint>(s1->f_len),
s2->f_str, static_cast<uint>(newlen), 0));
return(ha_compare_word(charset,
s1->f_str, static_cast<uint>(s1->f_len),
s2->f_str, static_cast<uint>(newlen)));
}
/******************************************************************//**
......@@ -6821,11 +6821,11 @@ innobase_fts_text_cmp_prefix(
const fts_string_t* s2 = (const fts_string_t*) p2;
int result;
result = ha_compare_text(
charset, s2->f_str, static_cast<uint>(s2->f_len),
s1->f_str, static_cast<uint>(s1->f_len), 1);
result = ha_compare_word_prefix(charset,
s2->f_str, static_cast<uint>(s2->f_len),
s1->f_str, static_cast<uint>(s1->f_len));
/* We switched s1, s2 position in ha_compare_text. So we need
/* We switched s1, s2 position in the above call. So we need
to negate the result */
return(-result);
}
......
......@@ -5702,9 +5702,9 @@ static int sort_maria_ft_key_write(MARIA_SORT_PARAM *sort_param,
}
get_key_full_length_rdonly(val_off, ft_buf->lastkey);
if (ha_compare_text(sort_param->seg->charset,
a+1,a_len-1,
ft_buf->lastkey+1,val_off-1, 0)==0)
if (ha_compare_word(sort_param->seg->charset,
a + 1, a_len - 1,
ft_buf->lastkey + 1, val_off - 1) == 0)
{
uchar *p;
if (!ft_buf->buf) /* store in second-level tree */
......
......@@ -162,8 +162,8 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
/* ORDER BY word, ndepth */
int i= ha_compare_text(cs, (uchar*) (*a)->word + 1,(*a)->len - 1,
(uchar*) (*b)->word + 1,(*b)->len - 1, 0);
int i= ha_compare_word(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
(uchar*) (*b)->word + 1, (*b)->len - 1);
if (!i)
i=CMP_NUM((*a)->ndepth, (*b)->ndepth);
return i;
......@@ -403,13 +403,14 @@ static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
if (!r && !ftbw->off)
{
r= ha_compare_text(ftb->charset,
info->last_key.data+1,
info->last_key.data_length + info->last_key.ref_length-
extra-1,
(uchar*) ftbw->word+1,
ftbw->len-1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
r= ha_compare_word_or_prefix(ftb->charset,
info->last_key.data + 1,
info->last_key.data_length +
info->last_key.ref_length -
extra - 1,
(uchar*) ftbw->word + 1,
ftbw->len - 1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
}
if (r) /* not found */
......@@ -899,9 +900,9 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
{
ftbw= ftb->list[c];
if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word+1, ftbw->len-1,
(my_bool)(ftbw->flags&FTB_FLAG_TRUNC)) < 0)
if (ha_compare_word_or_prefix(ftb->charset, (uchar*) word, len,
(uchar*) ftbw->word + 1, ftbw->len - 1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC)) < 0)
b= c;
else
a= c;
......@@ -926,9 +927,9 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
for (; c >= 0; c--)
{
ftbw= ftb->list[c];
if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word + 1,ftbw->len - 1,
(my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
if (ha_compare_word_or_prefix(ftb->charset, (uchar*) word, len,
(uchar*)ftbw->word + 1, ftbw->len - 1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC)))
{
if (ftb->with_scan & FTB_FLAG_TRUNC)
continue;
......
......@@ -111,11 +111,11 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
while (!r && gweight)
{
if (key.data_length &&
ha_compare_text(aio->charset,
info->last_key.data+1,
ha_compare_word(aio->charset,
info->last_key.data + 1,
info->last_key.data_length +
info->last_key.ref_length - extra - 1,
key.data+1, key.data_length-1, 0))
key.data + 1, key.data_length - 1))
break;
if (subkeys.i < 0)
......
......@@ -33,8 +33,8 @@ typedef struct st_my_maria_ft_parser_param
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
(uchar*) w2->pos, w2->len, 0);
return ha_compare_word(cs, (uchar*) w1->pos, w1->len,
(uchar*) w2->pos, w2->len);
}
static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
......
......@@ -185,8 +185,7 @@ int _ma_ft_cmp(MARIA_HA *info, uint keynr, const uchar *rec1, const uchar *rec2)
{
if ((ftsi1.pos != ftsi2.pos) &&
(!ftsi1.pos || !ftsi2.pos ||
ha_compare_text(cs, ftsi1.pos,ftsi1.len,
ftsi2.pos,ftsi2.len,0)))
ha_compare_word(cs, ftsi1.pos, ftsi1.len, ftsi2.pos, ftsi2.len)))
DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
}
DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL);
......@@ -213,8 +212,8 @@ int _ma_ft_update(MARIA_HA *info, uint keynr, uchar *keybuf,
error=0;
while(old_word->pos && new_word->pos)
{
cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
(uchar*) new_word->pos,new_word->len,0);
cmp= ha_compare_word(cs, (uchar*) old_word->pos, old_word->len,
(uchar*) new_word->pos, new_word->len);
cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
if (cmp < 0 || cmp2)
......
......@@ -236,11 +236,22 @@ my_bool _ma_unique_comp(MARIA_UNIQUEDEF *def, const uchar *a, const uchar *b,
memcpy((void*) &pos_a, pos_a+keyseg->bit_start, sizeof(char*));
memcpy((void*) &pos_b, pos_b+keyseg->bit_start, sizeof(char*));
}
if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
type == HA_KEYTYPE_VARTEXT2)
if (type == HA_KEYTYPE_TEXT/* the CHAR data type*/)
{
if (ha_compare_char_fixed(keyseg->charset,
pos_a, a_length,
pos_b, b_length,
keyseg->length / keyseg->charset->mbmaxlen,
FALSE/*b_is_prefix*/))
return 1;
}
else if (type == HA_KEYTYPE_VARTEXT1 ||
type == HA_KEYTYPE_VARTEXT2)
{
if (ha_compare_text(keyseg->charset, pos_a, a_length,
pos_b, b_length, 0))
if (ha_compare_char_varying(keyseg->charset,
pos_a, a_length,
pos_b, b_length,
FALSE/*b_is_prefix*/))
return 1;
}
else
......
......@@ -890,8 +890,10 @@ ChangeSet@1.2562, 2008-04-09 07:41:40+02:00, serg@janus.mylan +9 -0
get_key_length(alen,a);
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
ha_compare_text(keyinfo->seg->charset, a, alen,
b, blen, 0) == 0)
ha_compare_char_varying(keyinfo->seg->charset,
a, alen,
b, blen,
FALSE/*b_is_prefix*/) == 0)
{
/* Yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
......
......@@ -162,8 +162,8 @@ static int FTB_WORD_cmp(my_off_t *v, FTB_WORD *a, FTB_WORD *b)
static int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b)
{
/* ORDER BY word, ndepth */
int i= ha_compare_text(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
(uchar*) (*b)->word + 1, (*b)->len - 1, 0);
int i= ha_compare_word(cs, (uchar*) (*a)->word + 1, (*a)->len - 1,
(uchar*) (*b)->word + 1, (*b)->len - 1);
if (!i)
i= CMP_NUM((*a)->ndepth, (*b)->ndepth);
return i;
......@@ -407,12 +407,12 @@ static int _ft2_search_no_lock(FTB *ftb, FTB_WORD *ftbw, my_bool init_search)
if (!r && !ftbw->off)
{
r= ha_compare_text(ftb->charset,
info->lastkey+1,
info->lastkey_length-extra-1,
(uchar*) ftbw->word+1,
ftbw->len-1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
r= ha_compare_word_or_prefix(ftb->charset,
info->lastkey + 1,
info->lastkey_length - extra - 1,
(uchar*) ftbw->word + 1,
ftbw->len - 1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC));
}
if (r) /* not found */
......@@ -907,9 +907,9 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
for (a= 0, b= ftb->queue.elements, c= (a+b)/2; b-a>1; c= (a+b)/2)
{
ftbw= ftb->list[c];
if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word+1, ftbw->len-1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC)) < 0)
if (ha_compare_word_or_prefix(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word + 1, ftbw->len - 1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC)) < 0)
b= c;
else
a= c;
......@@ -934,9 +934,9 @@ static int ftb_find_relevance_add_word(MYSQL_FTPARSER_PARAM *param,
for (; c >= 0; c--)
{
ftbw= ftb->list[c];
if (ha_compare_text(ftb->charset, (uchar*)word, len,
(uchar*)ftbw->word + 1,ftbw->len - 1,
(my_bool)(ftbw->flags & FTB_FLAG_TRUNC)))
if (ha_compare_word_or_prefix(ftb->charset, (uchar*) word, len,
(uchar*) ftbw->word + 1, ftbw->len - 1,
(my_bool) (ftbw->flags & FTB_FLAG_TRUNC)))
{
if (ftb->with_scan & FTB_FLAG_TRUNC)
continue;
......
......@@ -109,8 +109,10 @@ static int walk_and_match(FT_WORD *word, uint32 count, ALL_IN_ONE *aio)
{
if (keylen &&
ha_compare_text(aio->charset,info->lastkey+1,
info->lastkey_length-extra-1, keybuff+1,keylen-1,0))
ha_compare_word(aio->charset,
info->lastkey + 1,
info->lastkey_length - extra - 1,
keybuff + 1, keylen - 1))
break;
if (subkeys.i < 0)
......
......@@ -31,8 +31,8 @@ typedef struct st_my_ft_parser_param
static int FT_WORD_cmp(CHARSET_INFO* cs, FT_WORD *w1, FT_WORD *w2)
{
return ha_compare_text(cs, (uchar*) w1->pos, w1->len,
(uchar*) w2->pos, w2->len, 0);
return ha_compare_word(cs, (uchar*) w1->pos, w1->len,
(uchar*) w2->pos, w2->len);
}
static int walk_and_copy(FT_WORD *word,uint32 count,FT_DOCSTAT *docstat)
......
......@@ -33,9 +33,9 @@ static TREE *stopwords3=NULL;
static int FT_STOPWORD_cmp(void* cmp_arg __attribute__((unused)),
FT_STOPWORD *w1, FT_STOPWORD *w2)
{
return ha_compare_text(ft_stopword_cs,
(uchar *)w1->pos,w1->len,
(uchar *)w2->pos,w2->len,0);
return ha_compare_word(ft_stopword_cs,
(uchar *) w1->pos, w1->len,
(uchar *) w2->pos, w2->len);
}
static int FT_STOPWORD_free(FT_STOPWORD *w, TREE_FREE action,
......
......@@ -181,8 +181,8 @@ int _mi_ft_cmp(MI_INFO *info, uint keynr, const uchar *rec1, const uchar *rec2)
{
if ((ftsi1.pos != ftsi2.pos) &&
(!ftsi1.pos || !ftsi2.pos ||
ha_compare_text(cs, (uchar*) ftsi1.pos,ftsi1.len,
(uchar*) ftsi2.pos,ftsi2.len,0)))
ha_compare_word(cs, (uchar*) ftsi1.pos, ftsi1.len,
(uchar*) ftsi2.pos, ftsi2.len)))
DBUG_RETURN(THOSE_TWO_DAMN_KEYS_ARE_REALLY_DIFFERENT);
}
DBUG_RETURN(GEE_THEY_ARE_ABSOLUTELY_IDENTICAL);
......@@ -210,8 +210,8 @@ int _mi_ft_update(MI_INFO *info, uint keynr, uchar *keybuf,
error=0;
while(old_word->pos && new_word->pos)
{
cmp= ha_compare_text(cs, (uchar*) old_word->pos,old_word->len,
(uchar*) new_word->pos,new_word->len,0);
cmp= ha_compare_word(cs, (uchar*) old_word->pos, old_word->len,
(uchar*) new_word->pos, new_word->len);
cmp2= cmp ? 0 : (fabs(old_word->weight - new_word->weight) > 1.e-5);
if (cmp < 0 || cmp2)
......
......@@ -3957,9 +3957,9 @@ static int sort_ft_key_write(MI_SORT_PARAM *sort_param, const void *a)
}
get_key_full_length_rdonly(val_off, ft_buf->lastkey);
if (ha_compare_text(sort_param->seg->charset,
((uchar *)a)+1,a_len-1,
(uchar*) ft_buf->lastkey+1,val_off-1, 0)==0)
if (ha_compare_word(sort_param->seg->charset,
((uchar *)a) + 1, a_len - 1,
(uchar*) ft_buf->lastkey + 1, val_off - 1) == 0)
{
if (!ft_buf->buf) /* store in second-level tree */
{
......
......@@ -211,11 +211,22 @@ int mi_unique_comp(MI_UNIQUEDEF *def, const uchar *a, const uchar *b,
memcpy((char**) &pos_a, pos_a+keyseg->bit_start, sizeof(char*));
memcpy((char**) &pos_b, pos_b+keyseg->bit_start, sizeof(char*));
}
if (type == HA_KEYTYPE_TEXT || type == HA_KEYTYPE_VARTEXT1 ||
type == HA_KEYTYPE_VARTEXT2)
if (type == HA_KEYTYPE_TEXT/*The CHAR data type*/)
{
if (ha_compare_char_fixed(keyseg->charset,
(uchar *) pos_a, a_length,
(uchar *) pos_b, b_length,
keyseg->length / keyseg->charset->mbmaxlen,
FALSE/*b_is_prefix*/))
return 1;
}
else if (type == HA_KEYTYPE_VARTEXT1 ||
type == HA_KEYTYPE_VARTEXT2)
{
if (ha_compare_text(keyseg->charset, (uchar *) pos_a, a_length,
(uchar *) pos_b, b_length, 0))
if (ha_compare_char_varying(keyseg->charset,
(uchar *) pos_a, a_length,
(uchar *) pos_b, b_length,
FALSE/*b_is_prefix*/))
return 1;
}
else
......
......@@ -542,7 +542,9 @@ int _mi_insert(register MI_INFO *info, register MI_KEYDEF *keyinfo,
get_key_length(alen,a);
DBUG_ASSERT(info->ft1_to_ft2==0);
if (alen == blen &&
ha_compare_text(keyinfo->seg->charset, a, alen, b, blen, 0)==0)
ha_compare_word(keyinfo->seg->charset,
a, alen,
b, blen) == 0)
{
/* yup. converting */
info->ft1_to_ft2=(DYNAMIC_ARRAY *)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment