diff --git a/mysql-test/include/ctype_regex.inc b/mysql-test/include/ctype_regex.inc new file mode 100644 index 0000000000000000000000000000000000000000..0e6b4c416079d8563539b823ddc84179f5cf2195 --- /dev/null +++ b/mysql-test/include/ctype_regex.inc @@ -0,0 +1,42 @@ +# +# To test a desired collation, set session.collation_connection to +# this collation before including this file +# + +--disable_warnings +drop table if exists t1; +--enable_warnings + +# +# Create a table with two varchar(64) null-able column, +# using current values of +# @@character_set_connection and @@collation_connection. +# + +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +delete from t1; + +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); + +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); + +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); + +select HIGH_PRIORITY s1 regexp s2 from t1; + +drop table t1; diff --git a/mysql-test/r/ctype_uca.result b/mysql-test/r/ctype_uca.result index 889702e380c807d91626517bf6e3800b00c64c2c..ada7ace9696c1e1cc7a53a0b5c543262fb4909c5 100644 --- a/mysql-test/r/ctype_uca.result +++ b/mysql-test/r/ctype_uca.result @@ -2754,4 +2754,49 @@ a c ch drop table t1; +set collation_connection=ucs2_unicode_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL, + `s2` varchar(64) character set ucs2 collate ucs2_unicode_ci default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names utf8; End for 5.0 tests diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result index 1097486f185f0bc80906e7e0171470a94468e869..709a097d823836b70ff429970f9d409c66291e30 100644 --- a/mysql-test/r/ctype_ucs.result +++ b/mysql-test/r/ctype_ucs.result @@ -922,4 +922,49 @@ ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_gen select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0); ERROR HY000: Illegal mix of collations (ascii_general_ci,IMPLICIT) and (ucs2_general_ci,COERCIBLE) for operation '=' drop table t1; +set collation_connection=ucs2_general_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) character set ucs2 default NULL, + `s2` varchar(64) character set ucs2 default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names latin1; End of 5.0 tests diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 715dfb1b7d8a37bbca4cada1f8ab5b45016a2f69..0981f72b0c15ba76c7d42d508f416685e2cdcf00 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -267,6 +267,51 @@ b select * from t1 where a = 'b' and a != 'b'; a drop table t1; +set collation_connection=utf8_general_ci; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) character set utf8 default NULL, + `s2` varchar(64) character set utf8 default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; +insert into t1 values('aaa','aaa'); +insert into t1 values('aaa|qqq','qqq'); +insert into t1 values('gheis','^[^a-dXYZ]+$'); +insert into t1 values('aab','^aa?b'); +insert into t1 values('Baaan','^Ba*n'); +insert into t1 values('aaa','qqq|aaa'); +insert into t1 values('qqq','qqq|aaa'); +insert into t1 values('bbb','qqq|aaa'); +insert into t1 values('bbb','qqq'); +insert into t1 values('aaa','aba'); +insert into t1 values(null,'abc'); +insert into t1 values('def',null); +insert into t1 values(null,null); +insert into t1 values('ghi','ghi['); +select HIGH_PRIORITY s1 regexp s2 from t1; +s1 regexp s2 +1 +1 +1 +1 +1 +1 +1 +0 +0 +0 +NULL +NULL +NULL +NULL +drop table t1; +set names utf8; set names utf8; select '胁邪褋褟' rlike '[[:<:]]胁邪褋褟[[:>:]]'; '胁邪褋褟' rlike '[[:<:]]胁邪褋褟[[:>:]]' diff --git a/mysql-test/r/func_regexp.result b/mysql-test/r/func_regexp.result index 4e35c2a134800a509821352ded24be708b68ca7f..c3851e51207b16f0d7c8d60dbb12c87a8dc26c53 100644 --- a/mysql-test/r/func_regexp.result +++ b/mysql-test/r/func_regexp.result @@ -1,5 +1,17 @@ drop table if exists t1; -create table t1 (s1 char(64),s2 char(64)); +set names latin1; +drop table if exists t1; +create table t1 as +select repeat(' ', 64) as s1, repeat(' ',64) as s2 +union +select null, null; +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `s1` varchar(64) default NULL, + `s2` varchar(64) default NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +delete from t1; insert into t1 values('aaa','aaa'); insert into t1 values('aaa|qqq','qqq'); insert into t1 values('gheis','^[^a-dXYZ]+$'); diff --git a/mysql-test/t/ctype_uca.test b/mysql-test/t/ctype_uca.test index 695a21adbf5ae05fbb5c705cd9659dbdec1ce046..0d917428efb12ac0eb2c312d0ad54f387a5c27a2 100644 --- a/mysql-test/t/ctype_uca.test +++ b/mysql-test/t/ctype_uca.test @@ -538,4 +538,8 @@ alter table t1 convert to character set ucs2 collate ucs2_czech_ci; select * from t1 where a like 'c%'; drop table t1; +set collation_connection=ucs2_unicode_ci; +--source include/ctype_regex.inc +set names utf8; + -- echo End for 5.0 tests diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test index bca3a9c3a96c8f56a3da3ed6aaf71efcb04528b3..3779dd502607b7e252c8c5e939d9cb04643c5f78 100644 --- a/mysql-test/t/ctype_ucs.test +++ b/mysql-test/t/ctype_ucs.test @@ -651,4 +651,8 @@ select * from t1 where a=if(b<10,_ucs2 0x00C0,_ucs2 0x0062); select * from t1 where a=if(b<10,_ucs2 0x0062,_ucs2 0x00C0); drop table t1; +set collation_connection=ucs2_general_ci; +--source include/ctype_regex.inc +set names latin1; + --echo End of 5.0 tests diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index b61bb1d53bf9a6475fbbb4e49b837bcee670179d..5e3fe7a12c75efec64ff6fdfd1d216888cdfec89 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -185,6 +185,13 @@ select * from t1 where a = 'b' and a = 'b'; select * from t1 where a = 'b' and a != 'b'; drop table t1; +# +# Testing regexp +# +set collation_connection=utf8_general_ci; +--source include/ctype_regex.inc +set names utf8; + # # Bug #3928 regexp [[:>:]] and UTF-8 # diff --git a/mysql-test/t/func_regexp.test b/mysql-test/t/func_regexp.test index 23070c71fe99a76ef099cc6809ce7630367793f3..f34830f610088ba6b5f76f136105c903b7735599 100644 --- a/mysql-test/t/func_regexp.test +++ b/mysql-test/t/func_regexp.test @@ -6,28 +6,9 @@ drop table if exists t1; --enable_warnings -create table t1 (s1 char(64),s2 char(64)); +set names latin1; +--source include/ctype_regex.inc -insert into t1 values('aaa','aaa'); -insert into t1 values('aaa|qqq','qqq'); -insert into t1 values('gheis','^[^a-dXYZ]+$'); -insert into t1 values('aab','^aa?b'); -insert into t1 values('Baaan','^Ba*n'); -insert into t1 values('aaa','qqq|aaa'); -insert into t1 values('qqq','qqq|aaa'); - -insert into t1 values('bbb','qqq|aaa'); -insert into t1 values('bbb','qqq'); -insert into t1 values('aaa','aba'); - -insert into t1 values(null,'abc'); -insert into t1 values('def',null); -insert into t1 values(null,null); -insert into t1 values('ghi','ghi['); - -select HIGH_PRIORITY s1 regexp s2 from t1; - -drop table t1; # # This test a bug in regexp on Alpha diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 8f5ff050dd6d6f37f02dfe1153955a0a329f76dd..df5c356b2fcca068d461aaf5ea1256645ad31b65 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4339,6 +4339,51 @@ void Item_func_like::cleanup() #ifdef USE_REGEX +bool +Item_func_regex::regcomp(bool send_error) +{ + char buff[MAX_FIELD_WIDTH]; + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= args[1]->val_str(&tmp); + int error; + + if (args[1]->null_value) + return TRUE; + + if (regex_compiled) + { + if (!stringcmp(res, &prev_regexp)) + return FALSE; + prev_regexp.copy(*res); + my_regfree(&preg); + regex_compiled= 0; + } + + if (cmp_collation.collation != regex_lib_charset) + { + /* Convert UCS2 strings to UTF8 */ + uint dummy_errors; + if (conv.copy(res->ptr(), res->length(), res->charset(), + regex_lib_charset, &dummy_errors)) + return TRUE; + res= &conv; + } + + if ((error= my_regcomp(&preg, res->c_ptr(), + regex_lib_flags, regex_lib_charset))) + { + if (send_error) + { + (void) my_regerror(error, &preg, buff, sizeof(buff)); + my_error(ER_REGEXP_ERROR, MYF(0), buff); + } + return TRUE; + } + regex_compiled= 1; + return FALSE; +} + + bool Item_func_regex::fix_fields(THD *thd, Item **ref) { @@ -4355,34 +4400,33 @@ Item_func_regex::fix_fields(THD *thd, Item **ref) if (agg_arg_charsets(cmp_collation, args, 2, MY_COLL_CMP_CONV, 1)) return TRUE; + regex_lib_flags= (cmp_collation.collation->state & + (MY_CS_BINSORT | MY_CS_CSSORT)) ? + REG_EXTENDED | REG_NOSUB : + REG_EXTENDED | REG_NOSUB | REG_ICASE; + /* + If the case of UCS2 and other non-ASCII character sets, + we will convert patterns and strings to UTF8. + */ + regex_lib_charset= (cmp_collation.collation->mbminlen > 1) ? + &my_charset_utf8_general_ci : + cmp_collation.collation; + used_tables_cache=args[0]->used_tables() | args[1]->used_tables(); not_null_tables_cache= (args[0]->not_null_tables() | args[1]->not_null_tables()); const_item_cache=args[0]->const_item() && args[1]->const_item(); if (!regex_compiled && args[1]->const_item()) { - char buff[MAX_FIELD_WIDTH]; - String tmp(buff,sizeof(buff),&my_charset_bin); - String *res=args[1]->val_str(&tmp); if (args[1]->null_value) { // Will always return NULL maybe_null=1; return FALSE; } - int error; - if ((error= my_regcomp(&preg,res->c_ptr(), - ((cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE), - cmp_collation.collation))) - { - (void) my_regerror(error,&preg,buff,sizeof(buff)); - my_error(ER_REGEXP_ERROR, MYF(0), buff); + if (regcomp(TRUE)) return TRUE; - } - regex_compiled=regex_is_const=1; - maybe_null=args[0]->maybe_null; + regex_is_const= 1; + maybe_null= args[0]->maybe_null; } else maybe_null=1; @@ -4395,47 +4439,25 @@ longlong Item_func_regex::val_int() { DBUG_ASSERT(fixed == 1); char buff[MAX_FIELD_WIDTH]; - String *res, tmp(buff,sizeof(buff),&my_charset_bin); + String tmp(buff,sizeof(buff),&my_charset_bin); + String *res= args[0]->val_str(&tmp); - res=args[0]->val_str(&tmp); - if (args[0]->null_value) - { - null_value=1; + if ((null_value= (args[0]->null_value || + (!regex_is_const && regcomp(FALSE))))) return 0; - } - if (!regex_is_const) - { - char buff2[MAX_FIELD_WIDTH]; - String *res2, tmp2(buff2,sizeof(buff2),&my_charset_bin); - res2= args[1]->val_str(&tmp2); - if (args[1]->null_value) + if (cmp_collation.collation != regex_lib_charset) + { + /* Convert UCS2 strings to UTF8 */ + uint dummy_errors; + if (conv.copy(res->ptr(), res->length(), res->charset(), + regex_lib_charset, &dummy_errors)) { - null_value=1; + null_value= 1; return 0; } - if (!regex_compiled || stringcmp(res2,&prev_regexp)) - { - prev_regexp.copy(*res2); - if (regex_compiled) - { - my_regfree(&preg); - regex_compiled=0; - } - if (my_regcomp(&preg,res2->c_ptr_safe(), - ((cmp_collation.collation->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? - REG_EXTENDED | REG_NOSUB : - REG_EXTENDED | REG_NOSUB | REG_ICASE), - cmp_collation.collation)) - { - null_value=1; - return 0; - } - regex_compiled=1; - } + res= &conv; } - null_value=0; return my_regexec(&preg,res->c_ptr_safe(),0,(my_regmatch_t*) 0,0) ? 0 : 1; } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 40d0c2dfe917b321caa67dc57de32e03cc8304e1..9e316d184274847c3226911e394548378a82210f 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -1376,6 +1376,10 @@ class Item_func_regex :public Item_bool_func bool regex_is_const; String prev_regexp; DTCollation cmp_collation; + CHARSET_INFO *regex_lib_charset; + int regex_lib_flags; + String conv; + bool regcomp(bool send_error); public: Item_func_regex(Item *a,Item *b) :Item_bool_func(a,b), regex_compiled(0),regex_is_const(0) {}