MDEV-8362 dash '-' is not recognized in charset armscii8 on select where query

75931fea · Alexander Barkov · 657f8a8d · 75931fea · 75931fea · 75931fea
Commit 75931fea authored Jul 14, 2015 by Alexander Barkov
Showing with 62 additions and 1 deletion

mysql-test/r/ctype_many.result mysql-test/r/ctype_many.result +21 -0

mysql-test/t/ctype_many.test mysql-test/t/ctype_many.test +19 -0

strings/ctype-simple.c strings/ctype-simple.c +22 -1

No files found.
--- a/mysql-test/r/ctype_many.result
+++ b/mysql-test/r/ctype_many.result
--- a/mysql-test/t/ctype_many.test
+++ b/mysql-test/t/ctype_many.test
@@ -331,3 +331,22 @@ CREATE TABLE t2 (a BINARY(1));
 SELECT * FROM t2 WHERE a=(SELECT a FROM t1) AND a=_LATIN1'x';
 DROP TABLE t2;
 DROP TABLE t1;
+
+
+--echo #
+--echo # Start of 10.1 tests
+--echo #
+
+--echo #
+--echo # MDEV-8362 dash '-' is not recognized in charset armscii8 on select where query
+--echo #
+CREATE TABLE t1 (a VARCHAR(64) CHARACTER SET armscii8);
+INSERT INTO t1 VALUES ('abc-def');
+SELECT * FROM t1 WHERE a='abc-def';
+SELECT * FROM t1 WHERE a LIKE 'abc%';
+DROP TABLE t1;
+SELECT HEX(CONVERT(_utf8 0x2728292C2D2E USING armscii8));
+
+--echo #
+--echo # End of 10.1 tests
+--echo #
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@@ -1303,7 +1303,28 @@ create_fromuni(struct charset_info_st *cs,
      if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc)
      {
        int ofs= wc - idx[i].uidx.from;
-        tab[ofs]= ch;
+        if (!tab[ofs] || tab[ofs] > 0x7F) /* Prefer ASCII*/
+        {
+          /*
+            Some character sets can have double encoding. For example,
+            in ARMSCII8, the following characters are encoded twice:
+
+            Encoding#1 Encoding#2 Unicode Character Name
+            ---------- ---------- ------- --------------
+            0x27       0xFF       U+0027  APOSTROPHE
+            0x28       0xA5       U+0028  LEFT PARENTHESIS
+            0x29       0xA4       U+0029  RIGHT PARENTHESIS
+            0x2C       0xAB       U+002C  COMMA
+            0x2D       0xAC       U+002D  HYPHEN-MINUS
+            0x2E       0xA9       U+002E  FULL STOP
+
+            That is, both 0x27 and 0xFF convert to Unicode U+0027.
+            When converting back from Unicode to ARMSCII,
+            we prefer the ASCII range, that is we want U+0027
+            to convert to 0x27 rather than to 0xFF.
+          */
+          tab[ofs]= ch;
+        }
      }
    }
  }