Commit 8f0ea6f0 authored by unknown's avatar unknown

Bug #6737: REGEXP gives wrong result with case sensitive collation:

- A new flag MY_CS_CSSORT was introduced for case sensitivity.
- Item_func_regexp doesn't substiture ICASE not only
  for binary collations but for case sensitive collations as well. 
parent c6766305
......@@ -63,7 +63,7 @@ typedef struct unicase_info_st
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
#define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
#define MY_CHARSET_UNDEFINED 0
......
......@@ -296,3 +296,12 @@ FD C3BD FD 1
FE C3BE FE 1
FF C3BF FF 1
DROP TABLE t1;
select 'a' regexp 'A' collate latin1_general_ci;
'a' regexp 'A' collate latin1_general_ci
1
select 'a' regexp 'A' collate latin1_general_cs;
'a' regexp 'A' collate latin1_general_cs
0
select 'a' regexp 'A' collate latin1_bin;
'a' regexp 'A' collate latin1_bin
0
......@@ -53,3 +53,10 @@ SELECT
hex(@l:=convert(@u using latin1)),
a=@l FROM t1;
DROP TABLE t1;
#
# Bug #6737: REGEXP gives wrong result with case sensitive collation
#
select 'a' regexp 'A' collate latin1_general_ci;
select 'a' regexp 'A' collate latin1_general_cs;
select 'a' regexp 'A' collate latin1_bin;
......@@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs)
}
else
{
uchar *sort_order= all_charsets[cs->number]->sort_order;
simple_cs_init_functions(all_charsets[cs->number]);
new->mbminlen= 1;
new->mbmaxlen= 1;
......@@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs)
all_charsets[cs->number]->state |= MY_CS_LOADED;
}
all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
/*
Check if case sensitive sort order: A < a < B.
We need MY_CS_FLAG for regex library, and for
case sensitivity flag for 5.0 client protocol,
to support isCaseSensitive() method in JDBC driver
*/
if (sort_order && sort_order['A'] < sort_order['a'] &&
sort_order['a'] < sort_order['B'])
all_charsets[cs->number]->state|= MY_CS_CSSORT;
}
}
else
......
......@@ -2364,11 +2364,12 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
return 0;
}
int error;
if ((error=regcomp(&preg,res->c_ptr(),
(cmp_collation.collation->state & MY_CS_BINSORT) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE,
cmp_collation.collation)))
if ((error= regcomp(&preg,res->c_ptr(),
((cmp_collation.collation->state & MY_CS_BINSORT) ||
(cmp_collation.collation->state & MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE,
cmp_collation.collation)))
{
(void) regerror(error,&preg,buff,sizeof(buff));
my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
......@@ -2416,10 +2417,11 @@ longlong Item_func_regex::val_int()
regex_compiled=0;
}
if (regcomp(&preg,res2->c_ptr(),
(cmp_collation.collation->state & MY_CS_BINSORT) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE,
cmp_collation.collation))
((cmp_collation.collation->state & MY_CS_BINSORT) ||
(cmp_collation.collation->state & MY_CS_CSSORT)) ?
REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE,
cmp_collation.collation))
{
null_value=1;
return 0;
......
......@@ -589,12 +589,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
CHARSET_INFO my_charset_latin2_czech_ci =
{
2,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
"latin2", /* cs name */
"latin2_czech_cs", /* name */
"", /* comment */
NULL, /* tailoring */
2,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
"latin2", /* cs name */
"latin2_czech_cs", /* name */
"", /* comment */
NULL, /* tailoring */
ctype_czech,
to_lower_czech,
to_upper_czech,
......
......@@ -624,12 +624,12 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
CHARSET_INFO my_charset_cp1250_czech_ci =
{
34,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
"cp1250", /* cs name */
"cp1250_czech_cs", /* name */
"", /* comment */
NULL, /* tailoring */
34,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
"cp1250", /* cs name */
"cp1250_czech_cs", /* name */
"", /* comment */
NULL, /* tailoring */
ctype_win1250ch,
to_lower_win1250ch,
to_upper_win1250ch,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment