Bug #12319710: INVALID MEMORY READ AND/OR CRASH IN MY_UCA_CHARCMP

WITH UTF32 The 5.5 version of the UTF32 collation was not enforcing the BMP range that it currently supports when comparing with LIKE. Fixed by backporting the checks for the BMP from trunk. Added a named constant for the maximum character that can have a weight in the weight table.

Bug #12319710: INVALID MEMORY READ AND/OR CRASH IN MY_UCA_CHARCMP
WITH UTF32 The 5.5 version of the UTF32 collation was not enforcing the BMP range that it currently supports when comparing with LIKE. Fixed by backporting the checks for the BMP from trunk. Added a named constant for the maximum character that can have a weight in the weight table.
50af230e · Georgi Kodinov · 4ba30341 · 50af230e · 50af230e · 50af230e
Commit 50af230e authored Jul 22, 2011 by Georgi Kodinov
Showing with 86 additions and 9 deletions

mysql-test/r/ctype_utf32_uca.result mysql-test/r/ctype_utf32_uca.result +20 -0

mysql-test/t/ctype_utf32_uca.test mysql-test/t/ctype_utf32_uca.test +21 -0

strings/ctype-uca.c strings/ctype-uca.c +45 -9

No files found.
--- a/mysql-test/r/ctype_utf32_uca.result
+++ b/mysql-test/r/ctype_utf32_uca.result
@@ -2415,5 +2415,25 @@ HEX(s1)
 00000061
 DROP TABLE t1;
 #
+# Bug #12319710 : INVALID MEMORY READ AND/OR CRASH IN 
+#   MY_UCA_CHARCMP WITH UTF32
+#
+SET collation_connection=utf32_unicode_ci;
+CREATE TABLE t1 (a TEXT CHARACTER SET utf32 COLLATE utf32_turkish_ci NOT NULL);
+INSERT INTO t1 VALUES ('a'), ('b');
+CREATE TABLE t2 (b VARBINARY(5) NOT NULL);
+#insert chars outside of BMP
+INSERT INTO t2 VALUEs (0x082837),(0x082837);
+#test for read-out-of-bounds with non-BMP chars as a LIKE pattern
+SELECT * FROM t1,t2 WHERE a LIKE b;
+a	b
+#test the original statement
+SELECT 1 FROM t1 AS t1_0 NATURAL LEFT OUTER JOIN t2 AS t2_0
+RIGHT JOIN t1 AS t1_1 ON t1_0.a LIKE t2_0.b;
+1
+1
+1
+DROP TABLE t1,t2;
+#
 # End of 5.5 tests
 #
--- a/mysql-test/t/ctype_utf32_uca.test
+++ b/mysql-test/t/ctype_utf32_uca.test
@@ -293,6 +293,27 @@ SET collation_connection=utf32_czech_ci;
 --source include/ctype_czech.inc
 --source include/ctype_like_ignorable.inc
+--echo #
+--echo # Bug #12319710 : INVALID MEMORY READ AND/OR CRASH IN 
+--echo #   MY_UCA_CHARCMP WITH UTF32
+--echo #
+SET collation_connection=utf32_unicode_ci;
+CREATE TABLE t1 (a TEXT CHARACTER SET utf32 COLLATE utf32_turkish_ci NOT NULL);
+INSERT INTO t1 VALUES ('a'), ('b');
+CREATE TABLE t2 (b VARBINARY(5) NOT NULL);
+--echo #insert chars outside of BMP
+INSERT INTO t2 VALUEs (0x082837),(0x082837);
+--echo #test for read-out-of-bounds with non-BMP chars as a LIKE pattern
+SELECT * FROM t1,t2 WHERE a LIKE b;
+--echo #test the original statement
+SELECT 1 FROM t1 AS t1_0 NATURAL LEFT OUTER JOIN t2 AS t2_0
+RIGHT JOIN t1 AS t1_1 ON t1_0.a LIKE t2_0.b;
+DROP TABLE t1,t2;
 --echo #
 --echo # End of 5.5 tests

--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -42,6 +42,7 @@
 #define MY_UCA_NCHARS 256
 #define MY_UCA_CMASK  255
 #define MY_UCA_PSHIFT 8
+#define MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT 0xFFFF
 uint16 page000data[]= { /* 0000 (4 weights per char) */
 0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,
@@ -6984,7 +6985,7 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
      return -1;
    scanner->sbeg+= mb_len;
-    if (wc > 0xFFFF)
+    if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
    {
      /* Return 0xFFFD as weight for all characters outside BMP */
      scanner->wbeg= nochar;
@@ -7322,6 +7323,33 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
+/**
+  Helper function:
+  Find address of weights of the given character.
+  @param weights  UCA weight array
+  @param lengths  UCA length array
+  @param ch       character Unicode code point
+  @return Weight array
+    @retval  pointer to weight array for the given character,
+             or NULL if this page does not have implicit weights.
+*/
+static inline uint16 *
+my_char_weight_addr(CHARSET_INFO *cs, uint wc)
+{
+  uint page, ofst;
+  uchar *ucal= cs->sort_order;
+  uint16 **ucaw= cs->sort_order_big;
+  return wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ? NULL :
+         (ucaw[page= (wc >> 8)] ?
+          ucaw[page] + (ofst= (wc & 0xFF)) * ucal[page] :
+          NULL);
+}
 /*
  This function compares if two characters are the same.
  The sign +1 or -1 does not matter. The only
@@ -7332,17 +7360,20 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
 static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
 {
-  size_t page1= wc1 >> MY_UCA_PSHIFT;
+  size_t length1, length2;
-  size_t page2= wc2 >> MY_UCA_PSHIFT;
+  uint16 *weight1= my_char_weight_addr(cs, wc1);
-  uchar *ucal= cs->sort_order;
+  uint16 *weight2= my_char_weight_addr(cs, wc2);
-  uint16 **ucaw= cs->sort_order_big;
-  size_t length1= ucal[page1];
-  size_t length2= ucal[page2];
-  uint16 *weight1= ucaw[page1] + (wc1 & MY_UCA_CMASK) * ucal[page1];
-  uint16 *weight2= ucaw[page2] + (wc2 & MY_UCA_CMASK) * ucal[page2];
  if (!weight1 || !weight2)
    return wc1 != wc2;
+  /* Quickly compare first weights */
+  if (weight1[0] != weight2[0])
+    return 1;
+  /* Thoroughly compare all weights */
+  length1= cs->sort_order[wc1 >> MY_UCA_PSHIFT];
+  length2= cs->sort_order[wc2 >> MY_UCA_PSHIFT];
  if (length1 > length2)
    return memcmp((const void*)weight1, (const void*)weight2, length2*2) ?
@@ -7924,6 +7955,11 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
  */
  for (i=0; i < rc; i++)
  {
+    /* check if the shift or the reset characters are out of range */
+    if (rule[i].curr[0] > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ||
+        rule[i].base > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
+      return 1;
    if (!rule[i].curr[1]) /* If not a contraction */
    {
      uint pageb= (rule[i].base >> 8) & 0xFF;