Commit 2da0fdea authored by unknown's avatar unknown

Bug #5324 Bug in UCA collations with LIKE comparisons and INDEX

parent 459d26a5
...@@ -312,6 +312,13 @@ my_bool my_like_range_simple(CHARSET_INFO *cs, ...@@ -312,6 +312,13 @@ my_bool my_like_range_simple(CHARSET_INFO *cs,
char *min_str, char *max_str, char *min_str, char *max_str,
uint *min_length, uint *max_length); uint *min_length, uint *max_length);
my_bool my_like_range_mb(CHARSET_INFO *cs,
const char *ptr, uint ptr_length,
pbool escape, pbool w_one, pbool w_many,
uint res_length,
char *min_str, char *max_str,
uint *min_length, uint *max_length);
my_bool my_like_range_ucs2(CHARSET_INFO *cs, my_bool my_like_range_ucs2(CHARSET_INFO *cs,
const char *ptr, uint ptr_length, const char *ptr, uint ptr_length,
pbool escape, pbool w_one, pbool w_many, pbool escape, pbool w_one, pbool w_many,
......
...@@ -1872,3 +1872,42 @@ Z,z,Ź,ź,Ż,ż,Ž,ž ...@@ -1872,3 +1872,42 @@ Z,z,Ź,ź,Ż,ż,Ž,ž
ǁ ǁ
ǂ ǂ
ǃ ǃ
drop table t1;
SET NAMES utf8;
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_general_ci;
c
Μωδαί̈
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_general_ci ORDER BY c;
c
Μωδ
Μωδαί̈
DROP TABLE t1;
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
c
Μωδαί̈
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
COLLATE ucs2_unicode_ci ORDER BY c;
c
Μωδ
Μωδαί̈
DROP TABLE t1;
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
c
Μωδαί̈
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_unicode_ci ORDER BY c;
c
Μωδ
Μωδαί̈
DROP TABLE t1;
...@@ -180,3 +180,40 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_slovak_ci; ...@@ -180,3 +180,40 @@ select group_concat(c1 order by c1) from t1 group by c1 collate utf8_slovak_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_spanish2_ci; select group_concat(c1 order by c1) from t1 group by c1 collate utf8_spanish2_ci;
select group_concat(c1 order by c1) from t1 group by c1 collate utf8_roman_ci; select group_concat(c1 order by c1) from t1 group by c1 collate utf8_roman_ci;
drop table t1;
#
# Bug#5324
#
SET NAMES utf8;
#test1
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_general_ci, INDEX (c));
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
#Check one row
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_general_ci;
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
#Check two rows
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_general_ci ORDER BY c;
DROP TABLE t1;
#test2
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE ucs2_unicode_ci, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025 COLLATE ucs2_unicode_ci;
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _ucs2 0x039C0025
COLLATE ucs2_unicode_ci ORDER BY c;
DROP TABLE t1;
#test 3
CREATE TABLE t1 (c varchar(255) NOT NULL COLLATE utf8_unicode_ci, INDEX (c));
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B403B11F770308 USING utf8));
#Check one row row
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8) COLLATE utf8_unicode_ci;
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
#Check two rows
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf8)
COLLATE utf8_unicode_ci ORDER BY c;
DROP TABLE t1;
...@@ -458,6 +458,92 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)), ...@@ -458,6 +458,92 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
} }
} }
/*
** Calculate min_str and max_str that ranges a LIKE string.
** Arguments:
** ptr Pointer to LIKE string.
** ptr_length Length of LIKE string.
** escape Escape character in LIKE. (Normally '\').
** All escape characters should be removed from min_str and max_str
** res_length Length of min_str and max_str.
** min_str Smallest case sensitive string that ranges LIKE.
** Should be space padded to res_length.
** max_str Largest case sensitive string that ranges LIKE.
** Normally padded with the biggest character sort value.
**
** The function should return 0 if ok and 1 if the LIKE string can't be
** optimized !
*/
my_bool my_like_range_mb(CHARSET_INFO *cs,
const char *ptr,uint ptr_length,
pbool escape, pbool w_one, pbool w_many,
uint res_length,
char *min_str,char *max_str,
uint *min_length,uint *max_length)
{
const char *end=ptr+ptr_length;
char *min_org=min_str;
char *min_end=min_str+res_length;
char *max_end=max_str+res_length;
for (; ptr != end && min_str != min_end ; ptr++)
{
if (*ptr == escape && ptr+1 != end)
{
ptr++; /* Skip escape */
*min_str++= *max_str++ = *ptr;
continue;
}
if (*ptr == w_one || *ptr == w_many) /* '_' and '%' in SQL */
{
char buf[10];
uint buflen;
/* Write min key */
*min_length= (uint) (min_str - min_org);
*max_length=res_length;
do
{
*min_str++= (char) cs->min_sort_char;
} while (min_str != min_end);
/*
Write max key: create a buffer with multibyte
representation of the max_sort_char character,
and copy it into max_str in a loop.
*/
buflen= cs->cset->wc_mb(cs, cs->max_sort_char, buf, buf + sizeof(buf));
DBUG_ASSERT(buflen > 0);
do
{
if ((max_str + buflen) <= max_end)
{
/* Enough space for max characer */
memcpy(max_str, buf, buflen);
max_str+= buflen;
}
else
{
/*
There is no space for whole multibyte
character, then add trailing spaces.
*/
*max_str++= ' ';
}
} while (max_str != max_end);
return 0;
}
*min_str++= *max_str++ = *ptr;
}
*min_length= *max_length = (uint) (min_str - min_org);
while (min_str != min_end)
*min_str++ = *max_str++ = ' '; /* Because if key compression */
return 0;
}
static int my_wildcmp_mb_bin(CHARSET_INFO *cs, static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end, const char *str,const char *str_end,
const char *wildstr,const char *wildend, const char *wildstr,const char *wildend,
......
...@@ -6876,7 +6876,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner) ...@@ -6876,7 +6876,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
int mblen; int mblen;
if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc, if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc,
scanner->sbeg, scanner->send)) < 0)) scanner->sbeg,
scanner->send)) <= 0))
return -1; return -1;
scanner->page= wc >> 8; scanner->page= wc >> 8;
...@@ -7918,7 +7919,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = ...@@ -7918,7 +7919,7 @@ MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
my_strnncoll_ucs2_uca, my_strnncoll_ucs2_uca,
my_strnncollsp_ucs2_uca, my_strnncollsp_ucs2_uca,
my_strnxfrm_ucs2_uca, my_strnxfrm_ucs2_uca,
my_like_range_simple, my_like_range_ucs2,
my_wildcmp_uca, my_wildcmp_uca,
NULL, NULL,
my_instr_mb, my_instr_mb,
...@@ -8369,7 +8370,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler = ...@@ -8369,7 +8370,7 @@ MY_COLLATION_HANDLER my_collation_any_uca_handler =
my_strnncoll_any_uca, my_strnncoll_any_uca,
my_strnncollsp_any_uca, my_strnncollsp_any_uca,
my_strnxfrm_any_uca, my_strnxfrm_any_uca,
my_like_range_simple, my_like_range_mb,
my_wildcmp_uca, my_wildcmp_uca,
NULL, NULL,
my_instr_mb, my_instr_mb,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment