Commit 50af230e authored by Georgi Kodinov's avatar Georgi Kodinov

Bug #12319710: INVALID MEMORY READ AND/OR CRASH IN MY_UCA_CHARCMP

WITH UTF32

The 5.5 version of the UTF32 collation was not enforcing the BMP range that 
it currently supports when comparing with LIKE. 
Fixed by backporting the checks for the BMP from trunk.
Added a named constant for the maximum character that can have a weight
in the weight table.
parent 4ba30341
...@@ -2415,5 +2415,25 @@ HEX(s1) ...@@ -2415,5 +2415,25 @@ HEX(s1)
00000061 00000061
DROP TABLE t1; DROP TABLE t1;
# #
# Bug #12319710 : INVALID MEMORY READ AND/OR CRASH IN
# MY_UCA_CHARCMP WITH UTF32
#
SET collation_connection=utf32_unicode_ci;
CREATE TABLE t1 (a TEXT CHARACTER SET utf32 COLLATE utf32_turkish_ci NOT NULL);
INSERT INTO t1 VALUES ('a'), ('b');
CREATE TABLE t2 (b VARBINARY(5) NOT NULL);
#insert chars outside of BMP
INSERT INTO t2 VALUEs (0x082837),(0x082837);
#test for read-out-of-bounds with non-BMP chars as a LIKE pattern
SELECT * FROM t1,t2 WHERE a LIKE b;
a b
#test the original statement
SELECT 1 FROM t1 AS t1_0 NATURAL LEFT OUTER JOIN t2 AS t2_0
RIGHT JOIN t1 AS t1_1 ON t1_0.a LIKE t2_0.b;
1
1
1
DROP TABLE t1,t2;
#
# End of 5.5 tests # End of 5.5 tests
# #
...@@ -293,6 +293,27 @@ SET collation_connection=utf32_czech_ci; ...@@ -293,6 +293,27 @@ SET collation_connection=utf32_czech_ci;
--source include/ctype_czech.inc --source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc --source include/ctype_like_ignorable.inc
--echo #
--echo # Bug #12319710 : INVALID MEMORY READ AND/OR CRASH IN
--echo # MY_UCA_CHARCMP WITH UTF32
--echo #
SET collation_connection=utf32_unicode_ci;
CREATE TABLE t1 (a TEXT CHARACTER SET utf32 COLLATE utf32_turkish_ci NOT NULL);
INSERT INTO t1 VALUES ('a'), ('b');
CREATE TABLE t2 (b VARBINARY(5) NOT NULL);
--echo #insert chars outside of BMP
INSERT INTO t2 VALUEs (0x082837),(0x082837);
--echo #test for read-out-of-bounds with non-BMP chars as a LIKE pattern
SELECT * FROM t1,t2 WHERE a LIKE b;
--echo #test the original statement
SELECT 1 FROM t1 AS t1_0 NATURAL LEFT OUTER JOIN t2 AS t2_0
RIGHT JOIN t1 AS t1_1 ON t1_0.a LIKE t2_0.b;
DROP TABLE t1,t2;
--echo # --echo #
--echo # End of 5.5 tests --echo # End of 5.5 tests
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#define MY_UCA_NCHARS 256 #define MY_UCA_NCHARS 256
#define MY_UCA_CMASK 255 #define MY_UCA_CMASK 255
#define MY_UCA_PSHIFT 8 #define MY_UCA_PSHIFT 8
#define MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT 0xFFFF
uint16 page000data[]= { /* 0000 (4 weights per char) */ uint16 page000data[]= { /* 0000 (4 weights per char) */
0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,
...@@ -6984,7 +6985,7 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner) ...@@ -6984,7 +6985,7 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
return -1; return -1;
scanner->sbeg+= mb_len; scanner->sbeg+= mb_len;
if (wc > 0xFFFF) if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
{ {
/* Return 0xFFFD as weight for all characters outside BMP */ /* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar; scanner->wbeg= nochar;
...@@ -7322,6 +7323,33 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs, ...@@ -7322,6 +7323,33 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
/**
Helper function:
Find address of weights of the given character.
@param weights UCA weight array
@param lengths UCA length array
@param ch character Unicode code point
@return Weight array
@retval pointer to weight array for the given character,
or NULL if this page does not have implicit weights.
*/
static inline uint16 *
my_char_weight_addr(CHARSET_INFO *cs, uint wc)
{
uint page, ofst;
uchar *ucal= cs->sort_order;
uint16 **ucaw= cs->sort_order_big;
return wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ? NULL :
(ucaw[page= (wc >> 8)] ?
ucaw[page] + (ofst= (wc & 0xFF)) * ucal[page] :
NULL);
}
/* /*
This function compares if two characters are the same. This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only The sign +1 or -1 does not matter. The only
...@@ -7332,17 +7360,20 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs, ...@@ -7332,17 +7360,20 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{ {
size_t page1= wc1 >> MY_UCA_PSHIFT; size_t length1, length2;
size_t page2= wc2 >> MY_UCA_PSHIFT; uint16 *weight1= my_char_weight_addr(cs, wc1);
uchar *ucal= cs->sort_order; uint16 *weight2= my_char_weight_addr(cs, wc2);
uint16 **ucaw= cs->sort_order_big;
size_t length1= ucal[page1];
size_t length2= ucal[page2];
uint16 *weight1= ucaw[page1] + (wc1 & MY_UCA_CMASK) * ucal[page1];
uint16 *weight2= ucaw[page2] + (wc2 & MY_UCA_CMASK) * ucal[page2];
if (!weight1 || !weight2) if (!weight1 || !weight2)
return wc1 != wc2; return wc1 != wc2;
/* Quickly compare first weights */
if (weight1[0] != weight2[0])
return 1;
/* Thoroughly compare all weights */
length1= cs->sort_order[wc1 >> MY_UCA_PSHIFT];
length2= cs->sort_order[wc2 >> MY_UCA_PSHIFT];
if (length1 > length2) if (length1 > length2)
return memcmp((const void*)weight1, (const void*)weight2, length2*2) ? return memcmp((const void*)weight1, (const void*)weight2, length2*2) ?
...@@ -7924,6 +7955,11 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t)) ...@@ -7924,6 +7955,11 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
*/ */
for (i=0; i < rc; i++) for (i=0; i < rc; i++)
{ {
/* check if the shift or the reset characters are out of range */
if (rule[i].curr[0] > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ||
rule[i].base > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
return 1;
if (!rule[i].curr[1]) /* If not a contraction */ if (!rule[i].curr[1]) /* If not a contraction */
{ {
uint pageb= (rule[i].base >> 8) & 0xFF; uint pageb= (rule[i].base >> 8) & 0xFF;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment