Bug 1181 fix. LIKE didn't work with UCS2 character set.

de826fdb · unknown · 96eb8197 · de826fdb · de826fdb · de826fdb
Commit de826fdb authored Sep 22, 2003 by unknown
Showing with 428 additions and 8 deletions

mysql-test/r/ctype_ucs.result mysql-test/r/ctype_ucs.result +127 -0

mysql-test/t/ctype_ucs.test mysql-test/t/ctype_ucs.test +46 -0

strings/ctype-ucs2.c strings/ctype-ucs2.c +255 -8

No files found.
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -13,6 +13,133 @@ hex(word)
 0420
 2004
 DROP TABLE t1;
+SET NAMES koi8r;
+SET character_set_connection=ucs2;
+create table t1 (a varchar(10) character set ucs2, key(a));
+insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
+explain select * from t1 where a like 'abc%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	21	NULL	1	Using where; Using index
+explain select * from t1 where a like concat('abc','%');
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	21	NULL	1	Using where; Using index
+select * from t1 where a like "abc%";
+a
+abc
+abcd
+select * from t1 where a like concat("abc","%");
+a
+abc
+abcd
+select * from t1 where a like "ABC%";
+a
+abc
+abcd
+select * from t1 where a like "test%";
+a
+test
+select * from t1 where a like "te_t";
+a
+test
+select * from t1 where a like "%a%";
+a
+select * from t1 where a like "%abcd%";
+a
+abcd
+select * from t1 where a like "%abc\d%";
+a
+abcd
+drop table t1;
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ucs2);
+INSERT INTO t1 VALUES (''),(''),(''),(''),(''),('');
+INSERT INTO t1 VALUES (''),(''),(''),('');
+INSERT INTO t1 VALUES (''),(''),(''),('');
+INSERT INTO t1 VALUES (''),(''),(''),('');
+SELECT * FROM t1 WHERE a LIKE '%%';
+a
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+SELECT * FROM t1 WHERE a LIKE '%%';
+a
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+SELECT * FROM t1 WHERE a LIKE '%';
+a
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+SELECT * FROM t1 WHERE a LIKE '%' COLLATE ucs2_bin;
+a
+
+
+DROP TABLE t1;
+CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
+TYPE=MyISAM CHARACTER SET ucs2 COLLATE ucs2_general_ci;
+INSERT INTO t1 (word) VALUES ("cat");
+SELECT * FROM t1 WHERE word LIKE "c%";
+word
+cat
+SELECT * FROM t1 WHERE word LIKE "ca_";
+word
+cat
+SELECT * FROM t1 WHERE word LIKE "cat";
+word
+cat
+SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025';
+word
+cat
+SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F';
+word
+cat
+DROP TABLE t1;
+SET NAMES latin1;
 CREATE TABLE t1 (
 word VARCHAR(64),
 bar INT(11) default 0,

--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -29,6 +29,51 @@ INSERT INTO t1 VALUES (X'042000200020'), (X'200400200020');
 SELECT hex(word) FROM t1 ORDER BY word;
 DROP TABLE t1;

+######################################################
+#
+# Test of like
+#
+
+SET NAMES koi8r;
+SET character_set_connection=ucs2;
+
+create table t1 (a varchar(10) character set ucs2, key(a));
+insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
+explain select * from t1 where a like 'abc%';
+explain select * from t1 where a like concat('abc','%');
+select * from t1 where a like "abc%";
+select * from t1 where a like concat("abc","%");
+select * from t1 where a like "ABC%";
+select * from t1 where a like "test%";
+select * from t1 where a like "te_t";
+select * from t1 where a like "%a%";
+select * from t1 where a like "%abcd%";
+select * from t1 where a like "%abc\d%";
+drop table t1;
+
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ucs2);
+INSERT INTO t1 VALUES (''),(''),(''),(''),(''),('');
+INSERT INTO t1 VALUES (''),(''),(''),('');
+INSERT INTO t1 VALUES (''),(''),(''),('');
+INSERT INTO t1 VALUES (''),(''),(''),('');
+SELECT * FROM t1 WHERE a LIKE '%%';
+SELECT * FROM t1 WHERE a LIKE '%%';
+SELECT * FROM t1 WHERE a LIKE '%';
+SELECT * FROM t1 WHERE a LIKE '%' COLLATE ucs2_bin;
+DROP TABLE t1;
+
+#
+# Bug 1181
+#
+CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
+TYPE=MyISAM CHARACTER SET ucs2 COLLATE ucs2_general_ci;
+INSERT INTO t1 (word) VALUES ("cat");
+SELECT * FROM t1 WHERE word LIKE "c%";
+SELECT * FROM t1 WHERE word LIKE "ca_";
+SELECT * FROM t1 WHERE word LIKE "cat";
+SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630025';
+SELECT * FROM t1 WHERE word LIKE _ucs2 x'00630061005F';
+DROP TABLE t1;

 ######################################################

@@ -53,6 +98,7 @@ DROP TABLE t1;
 # are not part of the index sorted on, it does a filesort, which fails. 
 # Using a straight index yields correct results.

+SET NAMES latin1;

 #
 # Two fields, index

--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -90,7 +90,7 @@ static uchar to_upper_ucs2[] = {
 };


-static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)) , 
+static int my_ucs2_uni (CHARSET_INFO *cs __attribute__((unused)),
                 my_wc_t * pwc, const uchar *s, const uchar *e)
 {
  if (s+2 > e) /* Need 2 characters */
@@ -1018,21 +1018,268 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
  return (uint) (end-ptr);
 }

+/*
+** Compare string against string with wildcard
+**	0 if matched
+**	-1 if not matched with wildcard
+**	 1 if matched with wildcard
+*/
+
+static
+int my_wildcmp_ucs2(CHARSET_INFO *cs,
+		    const char *str,const char *str_end,
+		    const char *wildstr,const char *wildend,
+		    int escape, int w_one, int w_many,
+		    MY_UNICASE_INFO **weights)
+{
+  int result= -1;				/* Not found, using wildcards */
+  my_wc_t s_wc, w_wc;
+  int scan, plane;
+  
+  while (wildstr != wildend)
+  {
+    
+    while (1)
+    {
+      scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+      if (scan <= 0)
+        return 1;
+      wildstr+= scan;
+      
+      if (w_wc ==  (my_wc_t)escape)
+      {
+        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+        if (scan <= 0)
+          return 1;
+        wildstr+= scan;
+      }
+      
+      if (w_wc == (my_wc_t)w_many)
+      {
+        result= 1;				/* Found an anchor char     */
+        break;
+      }
+      
+      scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
+      if (scan <=0)
+        return 1;
+      str+= scan;
+      
+      if (w_wc == (my_wc_t)w_one)
+      {
+        result= 1;				/* Found an anchor char     */
+      }
+      else
+      {
+        if (weights)
+        {
+          plane=(s_wc>>8) & 0xFF;
+          s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+          plane=(w_wc>>8) & 0xFF;
+          w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+        }
+        if (s_wc != w_wc)
+          return 1;				/* No match */
+      }
+      if (wildstr == wildend)
+	return (str != str_end);		/* Match if both are at end */
+    }
+    
+    
+    if (w_wc == (my_wc_t)w_many)
+    {						/* Found w_many */
+    
+      /* Remove any '%' and '_' from the wild search string */
+      for ( ; wildstr != wildend ; )
+      {
+        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+        if (scan <= 0)
+          return 1;
+        wildstr+= scan;
        
+	if (w_wc == (my_wc_t)w_many)
+	  continue;
+	
+	if (w_wc == (my_wc_t)w_one)
+	{
+	  scan= my_ucs2_uni(cs, &s_wc, (const uchar*)str, (const uchar*)str_end);
+          if (scan <=0)
+            return 1;
+          str+= scan;
+	  continue;
+	}
+	break;					/* Not a wild character */
+      }
+      
+      if (wildstr == wildend)
+	return 0;				/* Ok if w_many is last */
+      
+      if (str == str_end)
+	return -1;
+      
+      scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+      if (scan <= 0)
+        return 1;
+      wildstr+= scan;
+      
+      if (w_wc ==  (my_wc_t)escape)
+      {
+        scan= my_ucs2_uni(cs,&w_wc, (const uchar*)wildstr, (const uchar*)wildend);
+        if (scan <= 0)
+          return 1;
+        wildstr+= scan;
+      }
+      
+      do
+      {
+        /* Skip until the first character from wildstr is found */
+        while (str != str_end)
+        {
+          scan= my_ucs2_uni(cs,&s_wc, (const uchar*)str, (const uchar*)str_end);
+          if (scan <= 0)
+            return 1;
+          str+= scan;
          
-static MY_COLLATION_HANDLER my_collation_ci_handler =
+          if (weights)
+          {
+            plane=(s_wc>>8) & 0xFF;
+            s_wc = weights[plane] ? weights[plane][s_wc & 0xFF].sort : s_wc;
+            plane=(w_wc>>8) & 0xFF;
+            w_wc = weights[plane] ? weights[plane][w_wc & 0xFF].sort : w_wc;
+          }
+          
+          if (s_wc == w_wc)
+            break;
+        }
+        if (str == str_end)
+          return -1;
+        
+        result= my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,escape,
+                                w_one,w_many,weights);
+        if (result <= 0)
+          return result;
+        
+      } while (str != str_end && w_wc != (my_wc_t)w_many);
+      return -1;
+    }
+  }
+  return (str != str_end ? 1 : 0);
+}
+
+
+static
+int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
+		    const char *str,const char *str_end,
+		    const char *wildstr,const char *wildend,
+		    int escape, int w_one, int w_many)
+{
+  return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
+                         escape,w_one,w_many,uni_plane); 
+}
+
+static
+int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
+		    const char *str,const char *str_end,
+		    const char *wildstr,const char *wildend,
+		    int escape, int w_one, int w_many)
+{
+  return my_wildcmp_ucs2(cs,str,str_end,wildstr,wildend,
+                         escape,w_one,w_many,NULL); 
+}
+
+
+static
+int my_strnncoll_ucs2_bin(CHARSET_INFO *cs, 
+				 const uchar *s, uint slen,
+				 const uchar *t, uint tlen)
+{
+  int s_res,t_res;
+  my_wc_t s_wc,t_wc;
+  const uchar *se=s+slen;
+  const uchar *te=t+tlen;
+
+  while ( s < se && t < te )
+  {
+    s_res=my_ucs2_uni(cs,&s_wc, s, se);
+    t_res=my_ucs2_uni(cs,&t_wc, t, te);
+    
+    if ( s_res <= 0 || t_res <= 0 )
+    {
+      /* Incorrect string, compare by char value */
+      return ((int)s[0]-(int)t[0]); 
+    }
+    if ( s_wc != t_wc )
+    {
+      return  ((int) s_wc) - ((int) t_wc);
+    }
+    
+    s+=s_res;
+    t+=t_res;
+  }
+  return ( (se-s) - (te-t) );
+}
+
+static
+int my_strcasecmp_ucs2_bin(CHARSET_INFO *cs, const char *s, const char *t)
+{
+  uint s_len=strlen(s);
+  uint t_len=strlen(t);
+  uint len = (s_len > t_len) ? s_len : t_len;
+  return  my_strncasecmp_ucs2(cs, s, t, len);
+}
+
+static
+int my_strnxfrm_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
+			 uchar *dst, uint dstlen,
+			 const uchar *src, uint srclen)
+{
+  if (dst != src)
+    memcpy(dst,src,srclen= min(dstlen,srclen));
+  return srclen;
+}
+
+static
+void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
+			   const uchar *key, uint len,ulong *nr1, ulong *nr2)
+{
+  const uchar *pos = key;
+  
+  key+= len;
+  
+  for (; pos < (uchar*) key ; pos++)
+  {
+    nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * 
+	     ((uint)*pos)) + (nr1[0] << 8);
+    nr2[0]+=3;
+  }
+}
+
+
+static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
 {
    my_strnncoll_ucs2,
    my_strnncoll_ucs2,
    my_strnxfrm_ucs2,
    my_like_range_simple,
-    my_wildcmp_mb,
+    my_wildcmp_ucs2_ci,
    my_strcasecmp_ucs2,
    my_instr_mb,
    my_hash_sort_ucs2
 };

-static MY_CHARSET_HANDLER my_charset_handler=
+static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
+{
+    my_strnncoll_ucs2_bin,
+    my_strnncoll_ucs2_bin,
+    my_strnxfrm_ucs2_bin,
+    my_like_range_simple,
+    my_wildcmp_ucs2_bin,
+    my_strcasecmp_ucs2_bin,
+    my_instr_mb,
+    my_hash_sort_ucs2_bin
+};
+
+static MY_CHARSET_HANDLER my_charset_ucs2_handler=
 {
    my_ismbchar_ucs2,	/* ismbchar     */
    my_mbcharlen_ucs2,	/* mbcharlen    */
@@ -1077,8 +1324,8 @@ CHARSET_INFO my_charset_ucs2_general_ci=
    1,			/* strxfrm_multiply */
    2,			/* mbmaxlen     */
    0,
-    &my_charset_handler,
-    &my_collation_ci_handler
+    &my_charset_ucs2_handler,
+    &my_collation_ucs2_general_ci_handler
 };


@@ -1100,8 +1347,8 @@ CHARSET_INFO my_charset_ucs2_bin=
    1,			/* strxfrm_multiply */
    2,			/* mbmaxlen     */
    0,
-    &my_charset_handler,
-    &my_collation_mb_bin_handler
+    &my_charset_ucs2_handler,
+    &my_collation_ucs2_bin_handler
 };