MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx

Synchronizing sources in: - my_wildcmp_uca_impl() handling utf8_unicode_ci - my_wildcmp_unicode_impl() handling utf8_general_ci The latter has already had a fix for a similar MySQL bug in utf8_general_ci: Bug#11754 SET NAMES utf8 followed by SELECT "A\\" LIKE "A\\" returns 0 So fix is now propagated to utf8_unicode_ci.

MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
Synchronizing sources in: - my_wildcmp_uca_impl() handling utf8_unicode_ci - my_wildcmp_unicode_impl() handling utf8_general_ci The latter has already had a fix for a similar MySQL bug in utf8_general_ci: Bug#11754 SET NAMES utf8 followed by SELECT "A\\" LIKE "A\\" returns 0 So fix is now propagated to utf8_unicode_ci.
34f8a407 · Alexander Barkov · ae3fe14c · 34f8a407 · 34f8a407 · 34f8a407
Commit 34f8a407 authored Oct 15, 2018 by Alexander Barkov
4 changed files
--- a/mysql-test/r/ctype_uca.result
+++ b/mysql-test/r/ctype_uca.result
@@ -13571,5 +13571,26 @@ Warnings:
 Note	1003	select `test`.`t1`.`a` AS `a` from `test`.`t1` where ((`test`.`t1`.`a` = 'oe') and (`test`.`t1`.`a` = 'oe'))
 DROP TABLE t1;
 #
+# MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
+#
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+name
+radio! test
+ALTER TABLE t1 CHANGE COLUMN name name VARCHAR(20) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+name
+radio! test
+DROP TABLE t1;
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT name LIKE '%!!%' ESCAPE '!' AS c1,
+name LIKE '%!!%' COLLATE utf8_general_ci ESCAPE '!' AS c2
+FROM t1;
+c1	c2
+1	1
+DROP TABLE t1;
+#
 # End of MariaDB-10.0 tests
 #
--- a/mysql-test/t/ctype_uca.test
+++ b/mysql-test/t/ctype_uca.test
@@ -617,6 +617,24 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' AND a='oe' COLLATE utf8_german2_c
 EXPLAIN EXTENDED SELECT * FROM t1 WHERE a='oe' COLLATE utf8_german2_ci AND a='oe';
 DROP TABLE t1;

+--echo #
+--echo # MDEV-17064 LIKE function has error behavior on the fields in which the collation is xxx_unicode_xx
+--echo #
+
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+ALTER TABLE t1 CHANGE COLUMN name name VARCHAR(20) CHARACTER SET 'utf8' COLLATE 'utf8_general_ci';
+SELECT * FROM t1 WHERE name LIKE '%!!%' ESCAPE '!';
+DROP TABLE t1;
+
+CREATE TABLE t1 (name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_unicode_ci);
+INSERT INTO t1 VALUES ('radio! test');
+SELECT name LIKE '%!!%' ESCAPE '!' AS c1,
+       name LIKE '%!!%' COLLATE utf8_general_ci ESCAPE '!' AS c2
+FROM t1;
+DROP TABLE t1;
+
 --echo #
 --echo # End of MariaDB-10.0 tests
 --echo #
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -21085,14 +21085,14 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
                       (const uchar*)wildend)) <= 0)
        return 1;

-      if (w_wc == (my_wc_t)w_many)
+      if (w_wc == (my_wc_t) w_many)
      {
        result= 1;                                /* Found an anchor char */
        break;
      }

      wildstr+= scan;
-      if (w_wc ==  (my_wc_t)escape)
+      if (w_wc ==  (my_wc_t) escape && wildstr < wildend)
      {
        if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                         (const uchar*)wildend)) <= 0)
@@ -21106,23 +21106,21 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
        return 1;
      str+= scan;

-      if (!escaped && w_wc == (my_wc_t)w_one)
+      if (!escaped && w_wc == (my_wc_t) w_one)
      {
        result= 1;                                /* Found an anchor char */
      }
      else
      {
        if (my_uca_charcmp(cs,s_wc,w_wc))
-          return 1;
+          return 1;                               /* No match */
      }
      if (wildstr == wildend)
        return (str != str_end);                  /* Match if both are at end */
    }

-    
-    if (w_wc == (my_wc_t)w_many)
+    if (w_wc == (my_wc_t) w_many)
    {                                             /* Found w_many */
-    
      /* Remove any '%' and '_' from the wild search string */
      for ( ; wildstr != wildend ; )
      {
@@ -21130,13 +21128,13 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
                         (const uchar*)wildend)) <= 0)
          return 1;

-	if (w_wc == (my_wc_t)w_many)
+        if (w_wc == (my_wc_t) w_many)
        {
          wildstr+= scan;
          continue;
        }

-	if (w_wc == (my_wc_t)w_one)
+        if (w_wc == (my_wc_t) w_one)
        {
          wildstr+= scan;
          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
@@ -21157,13 +21155,17 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                       (const uchar*)wildend)) <= 0)
        return 1;
+      wildstr+= scan;

-      if (w_wc ==  (my_wc_t)escape)
+      if (w_wc ==  (my_wc_t) escape)
+      {
+        if (wildstr < wildend)
        {
-        wildstr+= scan;
          if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
                           (const uchar*)wildend)) <= 0)
            return 1;
+          wildstr+= scan;
+        }
      }

      while (1)
@@ -21182,19 +21184,19 @@ int my_wildcmp_uca_impl(CHARSET_INFO *cs,
        if (str == str_end)
          return -1;

+        str+= scan;
        result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend,
-                                    escape, w_one, w_many, recurse_level+1);
-        
+                                    escape, w_one, w_many,
+                                    recurse_level + 1);
        if (result <= 0)
          return result;
-        
-        str+= scan;
      }
    }
  }
  return (str != str_end ? 1 : 0);
 }

+
 int my_wildcmp_uca(CHARSET_INFO *cs,
                   const char *str,const char *str_end,
                   const char *wildstr,const char *wildend,
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -4400,9 +4400,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
  int result= -1;                             /* Not found, using wildcards */
  my_wc_t s_wc, w_wc;
  int scan;
-  int (*mb_wc)(CHARSET_INFO *, my_wc_t *,
-               const uchar *, const uchar *);
-  mb_wc= cs->cset->mb_wc;
+  my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;

  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
    return 1;
@@ -4454,10 +4452,8 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
        return (str != str_end);                  /* Match if both are at end */
    }

-    
    if (w_wc == (my_wc_t) w_many)
    {                                             /* Found w_many */
-    
      /* Remove any '%' and '_' from the wild search string */
      for ( ; wildstr != wildend ; )
      {
@@ -4465,17 +4461,17 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
                         (const uchar*)wildend)) <= 0)
          return 1;

-        if (w_wc == (my_wc_t)w_many)
+        if (w_wc == (my_wc_t) w_many)
        {
          wildstr+= scan;
          continue;
        } 

-        if (w_wc == (my_wc_t)w_one)
+        if (w_wc == (my_wc_t) w_one)
        {
          wildstr+= scan;
          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-                           (const uchar*)str_end)) <=0)
+                           (const uchar*)str_end)) <= 0)
            return 1;
          str+= scan;
          continue;
@@ -4490,16 +4486,16 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
        return -1;

      if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-                       (const uchar*)wildend)) <=0)
+                       (const uchar*)wildend)) <= 0)
        return 1;
      wildstr+= scan;

-      if (w_wc ==  (my_wc_t)escape)
+      if (w_wc ==  (my_wc_t) escape)
      {
        if (wildstr < wildend)
        {
          if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
-                           (const uchar*)wildend)) <=0)
+                           (const uchar*)wildend)) <= 0)
            return 1;
          wildstr+= scan;
        }
@@ -4511,7 +4507,7 @@ int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
        while (str != str_end)
        {
          if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
-                           (const uchar*)str_end)) <=0)
+                           (const uchar*)str_end)) <= 0)
            return 1;
          if (weights)
          {