Commit 8020b1bd authored by Alexander Barkov's avatar Alexander Barkov

MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations

- Adding a new argument "flag" to MY_COLLATION_HANDLER::strnncollsp_nchars()
  and a flag MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES.
  The flag defines if strnncollsp_nchars() should emulate trailing spaces
  which were possibly trimmed earlier (e.g. in InnoDB CHAR compression).
  This is important for NOPAD collations.

  For example, with this input:
   - str1= 'a '    (Latin letter a followed by one space)
   - str2= 'a  '   (Latin letter a followed by two spaces)
   - nchars= 3
  if the flag is given, strnncollsp_nchars() will virtually restore
  one trailing space to str1 up to nchars (3) characters and compare two
  strings as equal:
  - str1= 'a  '  (one extra trailing space emulated)
  - str2= 'a  '  (as is)

  If the flag is not given, strnncollsp_nchars() does not add trailing
  virtual spaces, so in case of a NOPAD collation, str1 will be compared
  as less than str2 because it is shorter.

- Field_string::cmp_prefix() now passes the new flag.
  Field_varstring::cmp_prefix() and Field_blob::cmp_prefix() do
  not pass the new flag.

- The branch in cmp_whole_field() in storage/innobase/rem/rem0cmp.cc
  (which handles the CHAR data type) now also passed the new flag.

- Fixing UCA collations to respect the new flag.
  Other collations are possibly also affected, however
  I had no success in making an SQL script demonstrating the problem.
  Other collations will be extended to respect this flags in a separate
  patch later.

- Changing the meaning of the last parameter of Field::cmp_prefix()
  from "number of bytes" (internal length)
  to "number of characters" (user visible length).

  The code calling cmp_prefix() from handler.cc was wrong.
  After this change, the call in handler.cc became correct.

  The code calling cmp_prefix() from key_rec_cmp() in key.cc
  was adjusted according to this change.

- Old strnncollsp_nchar() related tests in unittest/strings/strings-t.c
  now pass the new flag.
  A few new tests also were added, without the flag.
parent 0cc1694e
...@@ -248,6 +248,28 @@ extern MY_UNI_CTYPE my_uni_ctype[256]; ...@@ -248,6 +248,28 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */ #define MY_STRXFRM_REVERSE_LEVEL6 0x00200000 /* if reverse order for level6 */
#define MY_STRXFRM_REVERSE_SHIFT 16 #define MY_STRXFRM_REVERSE_SHIFT 16
/* Flags to strnncollsp_nchars */
/*
MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES -
defines if inside strnncollsp_nchars()
short strings should be virtually extended to "nchars"
characters by emulating trimmed trailing spaces.
This flag is needed when comparing packed strings of the CHAR
data type, when trailing spaces are trimmed on storage (like in InnoDB),
however the actual values (after unpacking) will have those trailing
spaces.
If this flag is passed, strnncollsp_nchars() performs both
truncating longer strings and extending shorter strings
to exactly "nchars".
If this flag is not passed, strnncollsp_nchars() only truncates longer
strings to "nchars", but does not extend shorter strings to "nchars".
*/
#define MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES 1
/* /*
Collation IDs for MariaDB that should not conflict with MySQL. Collation IDs for MariaDB that should not conflict with MySQL.
We reserve 256..511, because MySQL will most likely use this range We reserve 256..511, because MySQL will most likely use this range
...@@ -383,7 +405,8 @@ struct my_collation_handler_st ...@@ -383,7 +405,8 @@ struct my_collation_handler_st
int (*strnncollsp_nchars)(CHARSET_INFO *, int (*strnncollsp_nchars)(CHARSET_INFO *,
const uchar *str1, size_t len1, const uchar *str1, size_t len1,
const uchar *str2, size_t len2, const uchar *str2, size_t len2,
size_t nchars); size_t nchars,
uint flags);
size_t (*strnxfrm)(CHARSET_INFO *, size_t (*strnxfrm)(CHARSET_INFO *,
uchar *dst, size_t dstlen, uint nweights, uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags); const uchar *src, size_t srclen, uint flags);
......
--echo #
--echo # MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations
--echo #
# TEXT
if (`SELECT UPPER(@@storage_engine) != 'MEMORY'`)
{
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
}
# VARCHAR
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
--error ER_DUP_ENTRY
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
# CHAR
# MyISAM is buggy on CHAR+BTREE+UNIQUE+PREFIX (see MDEV-30048), disable for now
# Other engines work fine
if (`SELECT UPPER(@@storage_engine) != 'MYISAM'`)
{
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
}
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
...@@ -761,3 +761,151 @@ DROP TABLE case_folding; ...@@ -761,3 +761,151 @@ DROP TABLE case_folding;
# #
# End of 10.3 tests # End of 10.3 tests
# #
#
# Start of 10.4 tests
#
SET STORAGE_ENGINE=MyISAM;
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_nopad_ci;
#
# MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations
#
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
SET STORAGE_ENGINE=HEAP;
#
# MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations
#
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=MEMORY DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
SET STORAGE_ENGINE=DEFAULT;
#
# End of 10.4 tests
#
...@@ -50,3 +50,21 @@ SET NAMES utf8mb3 COLLATE utf8mb3_thai_520_w2; ...@@ -50,3 +50,21 @@ SET NAMES utf8mb3 COLLATE utf8mb3_thai_520_w2;
--echo # --echo #
--echo # End of 10.3 tests --echo # End of 10.3 tests
--echo # --echo #
--echo #
--echo # Start of 10.4 tests
--echo #
SET STORAGE_ENGINE=MyISAM;
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_nopad_ci;
--source include/ctype_nopad_prefix_unique.inc
SET STORAGE_ENGINE=HEAP;
--source include/ctype_nopad_prefix_unique.inc
SET STORAGE_ENGINE=DEFAULT;
--echo #
--echo # End of 10.4 tests
--echo #
...@@ -283,3 +283,101 @@ DROP TABLE t1; ...@@ -283,3 +283,101 @@ DROP TABLE t1;
# #
# End of 10.2 tests # End of 10.2 tests
# #
#
# Start of 10.4 tests
#
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_nopad_ci;
#
# MDEV-30034 UNIQUE USING HASH accepts duplicate entries for tricky collations
#
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a TEXT COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` text CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a VARCHAR(2000) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(2000) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
ERROR 23000: Duplicate entry 'ß ' for key 'a'
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)))',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3))
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
EXECUTE IMMEDIATE REPLACE(
'CREATE TABLE t1 ( '
' a CHAR(20) COLLATE <COLLATION>,'
'UNIQUE(a(3)) USING HASH)',
'<COLLATION>', @@collation_connection);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(20) CHARACTER SET utf8 COLLATE utf8_unicode_nopad_ci DEFAULT NULL,
UNIQUE KEY `a` (`a`(3)) USING HASH
) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
INSERT INTO t1 VALUES ('ss ');
INSERT INTO t1 VALUES (_utf8mb3 0xC39F20)/*SZ+SPACE*/;
DROP TABLE t1;
#
# End 10.4 tests
#
...@@ -23,3 +23,15 @@ let $coll_pad='utf8_bin'; ...@@ -23,3 +23,15 @@ let $coll_pad='utf8_bin';
--echo # --echo #
--echo # End of 10.2 tests --echo # End of 10.2 tests
--echo # --echo #
--echo #
--echo # Start of 10.4 tests
--echo #
SET NAMES utf8mb3 COLLATE utf8mb3_unicode_nopad_ci;
--source include/ctype_nopad_prefix_unique.inc
--echo #
--echo # End 10.4 tests
--echo #
...@@ -7435,7 +7435,8 @@ int Field_string::cmp(const uchar *a_ptr, const uchar *b_ptr) ...@@ -7435,7 +7435,8 @@ int Field_string::cmp(const uchar *a_ptr, const uchar *b_ptr)
return field_charset->coll->strnncollsp_nchars(field_charset, return field_charset->coll->strnncollsp_nchars(field_charset,
a_ptr, field_length, a_ptr, field_length,
b_ptr, field_length, b_ptr, field_length,
Field_string::char_length()); Field_string::char_length(),
MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES);
} }
...@@ -7835,10 +7836,11 @@ int Field_varstring::cmp(const uchar *a_ptr, const uchar *b_ptr) ...@@ -7835,10 +7836,11 @@ int Field_varstring::cmp(const uchar *a_ptr, const uchar *b_ptr)
int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
size_t prefix_len) size_t prefix_char_len)
{ {
/* avoid expensive well_formed_char_length if possible */ /* avoid more expensive strnncollsp_nchars() if possible */
if (prefix_len == table->field[field_index]->field_length) if (prefix_char_len * field_charset->mbmaxlen ==
table->field[field_index]->field_length)
return Field_varstring::cmp(a_ptr, b_ptr); return Field_varstring::cmp(a_ptr, b_ptr);
size_t a_length, b_length; size_t a_length, b_length;
...@@ -7858,7 +7860,8 @@ int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, ...@@ -7858,7 +7860,8 @@ int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
a_length, a_length,
b_ptr + length_bytes, b_ptr + length_bytes,
b_length, b_length,
prefix_len / field_charset->mbmaxlen); prefix_char_len,
0);
} }
...@@ -8635,7 +8638,7 @@ int Field_blob::cmp(const uchar *a_ptr, const uchar *b_ptr) ...@@ -8635,7 +8638,7 @@ int Field_blob::cmp(const uchar *a_ptr, const uchar *b_ptr)
int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
size_t prefix_len) size_t prefix_char_len)
{ {
uchar *blob1,*blob2; uchar *blob1,*blob2;
memcpy(&blob1, a_ptr+packlength, sizeof(char*)); memcpy(&blob1, a_ptr+packlength, sizeof(char*));
...@@ -8644,7 +8647,8 @@ int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr, ...@@ -8644,7 +8647,8 @@ int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
return field_charset->coll->strnncollsp_nchars(field_charset, return field_charset->coll->strnncollsp_nchars(field_charset,
blob1, a_len, blob1, a_len,
blob2, b_len, blob2, b_len,
prefix_len / field_charset->mbmaxlen); prefix_char_len,
0);
} }
...@@ -10114,7 +10118,8 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value) ...@@ -10114,7 +10118,8 @@ my_decimal *Field_bit::val_decimal(my_decimal *deciaml_value)
The a and b pointer must be pointers to the field in a record The a and b pointer must be pointers to the field in a record
(not the table->record[0] necessarily) (not the table->record[0] necessarily)
*/ */
int Field_bit::cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len) int Field_bit::cmp_prefix(const uchar *a, const uchar *b,
size_t prefix_char_len)
{ {
my_ptrdiff_t a_diff= a - ptr; my_ptrdiff_t a_diff= a - ptr;
my_ptrdiff_t b_diff= b - ptr; my_ptrdiff_t b_diff= b - ptr;
......
...@@ -1109,7 +1109,8 @@ class Field: public Value_source ...@@ -1109,7 +1109,8 @@ class Field: public Value_source
The following method is used for comparing prefix keys. The following method is used for comparing prefix keys.
Currently it's only used in partitioning. Currently it's only used in partitioning.
*/ */
virtual int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len) virtual int cmp_prefix(const uchar *a, const uchar *b,
size_t prefix_char_len)
{ return cmp(a, b); } { return cmp(a, b); }
virtual int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U) virtual int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U)
{ return memcmp(a,b,pack_length()); } { return memcmp(a,b,pack_length()); }
...@@ -3728,7 +3729,7 @@ class Field_varstring :public Field_longstr { ...@@ -3728,7 +3729,7 @@ class Field_varstring :public Field_longstr {
String *val_str(String*,String *); String *val_str(String*,String *);
my_decimal *val_decimal(my_decimal *); my_decimal *val_decimal(my_decimal *);
int cmp(const uchar *a,const uchar *b); int cmp(const uchar *a,const uchar *b);
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len); int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_char_len);
void sort_string(uchar *buff,uint length); void sort_string(uchar *buff,uint length);
uint get_key_image(uchar *buff,uint length, imagetype type); uint get_key_image(uchar *buff,uint length, imagetype type);
void set_key_image(const uchar *buff,uint length); void set_key_image(const uchar *buff,uint length);
...@@ -3964,7 +3965,7 @@ class Field_blob :public Field_longstr { ...@@ -3964,7 +3965,7 @@ class Field_blob :public Field_longstr {
String *val_str(String*,String *); String *val_str(String*,String *);
my_decimal *val_decimal(my_decimal *); my_decimal *val_decimal(my_decimal *);
int cmp(const uchar *a,const uchar *b); int cmp(const uchar *a,const uchar *b);
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len); int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_char_len);
int cmp(const uchar *a, uint32 a_length, const uchar *b, uint32 b_length); int cmp(const uchar *a, uint32 a_length, const uchar *b, uint32 b_length);
int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U); int cmp_binary(const uchar *a,const uchar *b, uint32 max_length=~0U);
int key_cmp(const uchar *,const uchar*); int key_cmp(const uchar *,const uchar*);
...@@ -4501,7 +4502,7 @@ class Field_bit :public Field { ...@@ -4501,7 +4502,7 @@ class Field_bit :public Field {
} }
int cmp_binary_offset(uint row_offset) int cmp_binary_offset(uint row_offset)
{ return cmp_offset(row_offset); } { return cmp_offset(row_offset); }
int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_len); int cmp_prefix(const uchar *a, const uchar *b, size_t prefix_char_len);
int key_cmp(const uchar *a, const uchar *b) int key_cmp(const uchar *a, const uchar *b)
{ return cmp_binary((uchar *) a, (uchar *) b); } { return cmp_binary((uchar *) a, (uchar *) b); }
int key_cmp(const uchar *str, uint length); int key_cmp(const uchar *str, uint length);
......
...@@ -612,7 +612,8 @@ int key_rec_cmp(void *key_p, uchar *first_rec, uchar *second_rec) ...@@ -612,7 +612,8 @@ int key_rec_cmp(void *key_p, uchar *first_rec, uchar *second_rec)
that take the max length into account. that take the max length into account.
*/ */
if ((result= field->cmp_prefix(field->ptr+first_diff, field->ptr+sec_diff, if ((result= field->cmp_prefix(field->ptr+first_diff, field->ptr+sec_diff,
key_part->length))) key_part->length /
field->charset()->mbmaxlen)))
DBUG_RETURN(result); DBUG_RETURN(result);
next_loop: next_loop:
key_part++; key_part++;
......
...@@ -327,7 +327,8 @@ static int cmp_whole_field(ulint mtype, ulint prtype, ...@@ -327,7 +327,8 @@ static int cmp_whole_field(ulint mtype, ulint prtype,
if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype), if (CHARSET_INFO *cs= get_charset(dtype_get_charset_coll(prtype),
MYF(MY_WME))) MYF(MY_WME)))
return cs->coll->strnncollsp_nchars(cs, a, a_length, b, b_length, return cs->coll->strnncollsp_nchars(cs, a, a_length, b, b_length,
std::max(a_length, b_length)); std::max(a_length, b_length),
MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES);
} }
ib::fatal() << "Unable to find charset-collation for " << prtype; ib::fatal() << "Unable to find charset-collation for " << prtype;
......
...@@ -128,7 +128,8 @@ static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)), ...@@ -128,7 +128,8 @@ static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
static int my_strnncollsp_nchars_binary(CHARSET_INFO * cs __attribute__((unused)), static int my_strnncollsp_nchars_binary(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen, const uchar *s, size_t slen,
const uchar *t, size_t tlen, const uchar *t, size_t tlen,
size_t nchars) size_t nchars,
uint flags)
{ {
set_if_smaller(slen, nchars); set_if_smaller(slen, nchars);
set_if_smaller(tlen, nchars); set_if_smaller(tlen, nchars);
...@@ -213,7 +214,8 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)), ...@@ -213,7 +214,8 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
static int my_strnncollsp_nchars_8bit_bin(CHARSET_INFO * cs, static int my_strnncollsp_nchars_8bit_bin(CHARSET_INFO * cs,
const uchar *a, size_t a_length, const uchar *a, size_t a_length,
const uchar *b, size_t b_length, const uchar *b, size_t b_length,
size_t nchars) size_t nchars,
uint flags)
{ {
set_if_smaller(a_length, nchars); set_if_smaller(a_length, nchars);
set_if_smaller(b_length, nchars); set_if_smaller(b_length, nchars);
......
...@@ -212,7 +212,8 @@ static int ...@@ -212,7 +212,8 @@ static int
my_strnncollsp_nchars_simple(CHARSET_INFO * cs, my_strnncollsp_nchars_simple(CHARSET_INFO * cs,
const uchar *a, size_t a_length, const uchar *a, size_t a_length,
const uchar *b, size_t b_length, const uchar *b, size_t b_length,
size_t nchars) size_t nchars,
uint flags)
{ {
set_if_smaller(a_length, nchars); set_if_smaller(a_length, nchars);
set_if_smaller(b_length, nchars); set_if_smaller(b_length, nchars);
......
...@@ -589,7 +589,8 @@ static int ...@@ -589,7 +589,8 @@ static int
my_strnncollsp_nchars_tis620(CHARSET_INFO * cs, my_strnncollsp_nchars_tis620(CHARSET_INFO * cs,
const uchar *a, size_t a_length, const uchar *a, size_t a_length,
const uchar *b, size_t b_length, const uchar *b, size_t b_length,
size_t nchars) size_t nchars,
uint flags)
{ {
set_if_smaller(a_length, nchars); set_if_smaller(a_length, nchars);
set_if_smaller(b_length, nchars); set_if_smaller(b_length, nchars);
......
...@@ -317,6 +317,7 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs, ...@@ -317,6 +317,7 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
static inline weight_and_nchars_t static inline weight_and_nchars_t
MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner, MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
size_t nchars, size_t nchars,
uint flags,
uint *generated) uint *generated)
{ {
weight_and_nchars_t res; weight_and_nchars_t res;
...@@ -330,7 +331,10 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner, ...@@ -330,7 +331,10 @@ MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
We reached the end of the string, but the caller wants more weights. We reached the end of the string, but the caller wants more weights.
Perform space padding. Perform space padding.
*/ */
res.weight= my_space_weight(scanner->level); res.weight=
flags & MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES ?
my_space_weight(scanner->level) : 0;
res.nchars= 1; res.nchars= 1;
(*generated)++; (*generated)++;
} }
...@@ -367,7 +371,8 @@ MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs, ...@@ -367,7 +371,8 @@ MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
const MY_UCA_WEIGHT_LEVEL *level, const MY_UCA_WEIGHT_LEVEL *level,
const uchar *s, size_t slen, const uchar *s, size_t slen,
const uchar *t, size_t tlen, const uchar *t, size_t tlen,
size_t nchars) size_t nchars,
uint flags)
{ {
my_uca_scanner sscanner; my_uca_scanner sscanner;
my_uca_scanner tscanner; my_uca_scanner tscanner;
...@@ -385,15 +390,17 @@ MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs, ...@@ -385,15 +390,17 @@ MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
int diff; int diff;
s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left, s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left,
&generated); flags, &generated);
t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left, t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left,
&generated); flags, &generated);
if ((diff= (s_res.weight - t_res.weight))) if ((diff= (s_res.weight - t_res.weight)))
return diff; return diff;
if (generated == 2) if (generated == 2)
{ {
if (cs->state & MY_CS_NOPAD) if ((cs->state & MY_CS_NOPAD) &&
(flags & MY_STRNNCOLLSP_NCHARS_EMULATE_TRIMMED_TRAILING_SPACES))
{ {
/* /*
Both values are auto-generated. There's no real data any more. Both values are auto-generated. There's no real data any more.
...@@ -445,11 +452,12 @@ static int ...@@ -445,11 +452,12 @@ static int
MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs, MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs,
const uchar *s, size_t slen, const uchar *s, size_t slen,
const uchar *t, size_t tlen, const uchar *t, size_t tlen,
size_t nchars) size_t nchars,
uint flags)
{ {
return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0], return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0],
s, slen, t, tlen, s, slen, t, tlen,
nchars); nchars, flags);
} }
...@@ -460,7 +468,8 @@ static int ...@@ -460,7 +468,8 @@ static int
MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs, MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs,
const uchar *s, size_t slen, const uchar *s, size_t slen,
const uchar *t, size_t tlen, const uchar *t, size_t tlen,
size_t nchars) size_t nchars,
uint flags)
{ {
uint num_level= cs->levels_for_order; uint num_level= cs->levels_for_order;
uint i; uint i;
...@@ -470,7 +479,7 @@ MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs, ...@@ -470,7 +479,7 @@ MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs,
&cs->uca->level[i], &cs->uca->level[i],
s, slen, s, slen,
t, tlen, t, tlen,
nchars); nchars, flags);
if (ret) if (ret)
return ret; return ret;
} }
......
...@@ -1215,7 +1215,8 @@ my_convert_fix(CHARSET_INFO *to_cs, char *to, size_t to_length, ...@@ -1215,7 +1215,8 @@ my_convert_fix(CHARSET_INFO *to_cs, char *to, size_t to_length,
int my_strnncollsp_nchars_generic(CHARSET_INFO *cs, int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
const uchar *str1, size_t len1, const uchar *str1, size_t len1,
const uchar *str2, size_t len2, const uchar *str2, size_t len2,
size_t nchars) size_t nchars,
uint flags)
{ {
int error; int error;
len1= my_well_formed_length(cs, (const char *) str1, len1= my_well_formed_length(cs, (const char *) str1,
...@@ -1232,7 +1233,8 @@ int my_strnncollsp_nchars_generic(CHARSET_INFO *cs, ...@@ -1232,7 +1233,8 @@ int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs, int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs,
const uchar *str1, size_t len1, const uchar *str1, size_t len1,
const uchar *str2, size_t len2, const uchar *str2, size_t len2,
size_t nchars) size_t nchars,
uint flags)
{ {
set_if_smaller(len1, nchars); set_if_smaller(len1, nchars);
set_if_smaller(len2, nchars); set_if_smaller(len2, nchars);
......
...@@ -304,7 +304,8 @@ static int ...@@ -304,7 +304,8 @@ static int
MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs __attribute__((unused)), MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length, const uchar *a, size_t a_length,
const uchar *b, size_t b_length, const uchar *b, size_t b_length,
size_t nchars) size_t nchars,
uint flags)
{ {
const uchar *a_end= a + a_length; const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length; const uchar *b_end= b + b_length;
......
...@@ -108,12 +108,14 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len) ...@@ -108,12 +108,14 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
int my_strnncollsp_nchars_generic(CHARSET_INFO *cs, int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
const uchar *str1, size_t len1, const uchar *str1, size_t len1,
const uchar *str2, size_t len2, const uchar *str2, size_t len2,
size_t nchars); size_t nchars,
uint flags);
int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs, int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs,
const uchar *str1, size_t len1, const uchar *str1, size_t len1,
const uchar *str2, size_t len2, const uchar *str2, size_t len2,
size_t nchars); size_t nchars,
uint flags);
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs); uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs); uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment