Commit 2b0ff48b authored by Alexander Barkov's avatar Alexander Barkov

A joint patch for:

Bug#51675 Server crashes on inserting 4 byte char. after ALTER TABLE to 'utf8mb4' 
Bug#51676 Server crashes on SELECT, ORDER BY on 'utf8mb4' column
parent 37c3268c
...@@ -38,6 +38,7 @@ extern "C" { ...@@ -38,6 +38,7 @@ extern "C" {
#define my_wc_t ulong #define my_wc_t ulong
#define MY_CS_REPLACEMENT_CHARACTER 0xFFFD
/* /*
On i386 we store Unicode->CS conversion tables for On i386 we store Unicode->CS conversion tables for
......
...@@ -2412,6 +2412,62 @@ SELECT CONCAT('a', _utf8 ' ...@@ -2412,6 +2412,62 @@ SELECT CONCAT('a', _utf8 '
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,COERCIBLE) and (utf8_general_ci,COERCIBLE) for operation 'concat' ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,COERCIBLE) and (utf8_general_ci,COERCIBLE) for operation 'concat'
DROP TABLE t1; DROP TABLE t1;
# #
# Bug#51675 Server crashes on inserting 4 byte char.
# after ALTER TABLE to 'utf8mb4'
#
SET NAMES utf8;
CREATE TABLE t1 (
subject varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci,
p VARCHAR(15) CHARACTER SET utf8
) DEFAULT CHARSET=latin1;
ALTER TABLE t1 ADD INDEX (subject);
ALTER TABLE t1
DEFAULT CHARACTER SET utf8,
MODIFY subject varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
MODIFY p varchar(255) CHARACTER SET utf8;
Warnings:
Warning 1071 Specified key was too long; max key length is 1000 bytes
Warning 1071 Specified key was too long; max key length is 1000 bytes
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`subject` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`p` varchar(255) DEFAULT NULL,
KEY `subject` (`subject`(250))
) ENGINE=MyISAM DEFAULT CHARSET=utf8
INSERT INTO t1(subject) VALUES ('abcd');
INSERT INTO t1(subject) VALUES(x'f0909080');
DROP TABLE t1;
#
# Bug #51676 Server crashes on SELECT, ORDER BY on 'utf8mb4' column
#
SET NAMES utf8mb4;
CREATE TABLE t1 (
subject varchar(255) character set utf8mb4 collate utf8mb4_unicode_ci,
p varchar(15) character set utf8mb4
) DEFAULT CHARSET=latin1;
INSERT INTO t1(subject) VALUES(0xF0909080);
INSERT INTO t1(subject) VALUES(0x616263F0909080646566);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`subject` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
`p` varchar(15) CHARACTER SET utf8mb4 DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
SELECT * FROM t1 ORDER BY 1;
subject p
𐐀 NULL
abc𐐀def NULL
SELECT hex(subject), length(subject), char_length(subject), octet_length(subject) FROM t1 ORDER BY 1;
hex(subject) length(subject) char_length(subject) octet_length(subject)
616263F0909080646566 10 7 10
F0909080 4 1 4
SELECT subject FROM t1 ORDER BY 1;
subject
𐐀
abc𐐀def
DROP TABLE t1;
#
# End of 5.5 tests # End of 5.5 tests
# #
# #
......
...@@ -1737,6 +1737,51 @@ SELECT CONCAT(utf8mb4, _utf8 ' ...@@ -1737,6 +1737,51 @@ SELECT CONCAT(utf8mb4, _utf8 '
SELECT CONCAT('a', _utf8 '') FROM t1; SELECT CONCAT('a', _utf8 '') FROM t1;
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # Bug#51675 Server crashes on inserting 4 byte char.
--echo # after ALTER TABLE to 'utf8mb4'
--echo #
SET NAMES utf8;
CREATE TABLE t1 (
subject varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci,
p VARCHAR(15) CHARACTER SET utf8
) DEFAULT CHARSET=latin1;
# Alter old table, add index
ALTER TABLE t1 ADD INDEX (subject);
# Alter old 'utf8' table to new 'utf8mb4'
ALTER TABLE t1
DEFAULT CHARACTER SET utf8,
MODIFY subject varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
MODIFY p varchar(255) CHARACTER SET utf8;
SHOW CREATE TABLE t1;
INSERT INTO t1(subject) VALUES ('abcd');
# Insert 4 byte characters
INSERT INTO t1(subject) VALUES(x'f0909080');
DROP TABLE t1;
--echo #
--echo # Bug #51676 Server crashes on SELECT, ORDER BY on 'utf8mb4' column
--echo #
SET NAMES utf8mb4;
CREATE TABLE t1 (
subject varchar(255) character set utf8mb4 collate utf8mb4_unicode_ci,
p varchar(15) character set utf8mb4
) DEFAULT CHARSET=latin1;
# Insert 4 byte characters
# 4byte character
INSERT INTO t1(subject) VALUES(0xF0909080);
# mix of 3 byte & 4 byte
INSERT INTO t1(subject) VALUES(0x616263F0909080646566);
SHOW CREATE TABLE t1;
SELECT * FROM t1 ORDER BY 1;
SELECT hex(subject), length(subject), char_length(subject), octet_length(subject) FROM t1 ORDER BY 1;
SELECT subject FROM t1 ORDER BY 1;
DROP TABLE t1;
--echo # --echo #
--echo # End of 5.5 tests --echo # End of 5.5 tests
--echo # --echo #
......
...@@ -6983,6 +6983,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner) ...@@ -6983,6 +6983,8 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
scanner->send)) <= 0)) scanner->send)) <= 0))
return -1; return -1;
if (wc > 0xFFFF)
wc= MY_CS_REPLACEMENT_CHARACTER;
scanner->page= wc >> 8; scanner->page= wc >> 8;
scanner->code= wc & 0xFF; scanner->code= wc & 0xFF;
scanner->sbeg+= mb_len; scanner->sbeg+= mb_len;
......
...@@ -50,8 +50,6 @@ ...@@ -50,8 +50,6 @@
static unsigned long lfactor[9]= static unsigned long lfactor[9]=
{ 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L }; { 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L };
#define REPLACEMENT_CHAR 0xFFFD;
#ifdef HAVE_CHARSET_mb2_or_mb4 #ifdef HAVE_CHARSET_mb2_or_mb4
...@@ -1145,7 +1143,7 @@ my_tosort_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) ...@@ -1145,7 +1143,7 @@ my_tosort_utf16(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
} }
else else
{ {
*wc= REPLACEMENT_CHAR; *wc= MY_CS_REPLACEMENT_CHARACTER;
} }
} }
...@@ -1874,7 +1872,7 @@ my_tosort_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) ...@@ -1874,7 +1872,7 @@ my_tosort_utf32(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
} }
else else
{ {
*wc= REPLACEMENT_CHAR; *wc= MY_CS_REPLACEMENT_CHARACTER;
} }
} }
......
...@@ -1720,9 +1720,6 @@ MY_UNICASE_INFO *my_unicase_turkish[256]= ...@@ -1720,9 +1720,6 @@ MY_UNICASE_INFO *my_unicase_turkish[256]=
}; };
#define REPLACEMENT_CHAR 0xFFFD;
static inline void static inline void
my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
{ {
...@@ -1734,7 +1731,7 @@ my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc) ...@@ -1734,7 +1731,7 @@ my_tosort_unicode(MY_UNICASE_INFO **uni_plane, my_wc_t *wc)
} }
else else
{ {
*wc= REPLACEMENT_CHAR; *wc= MY_CS_REPLACEMENT_CHARACTER;
} }
} }
...@@ -1757,7 +1754,7 @@ my_wildcmp_unicode(CHARSET_INFO *cs, ...@@ -1757,7 +1754,7 @@ my_wildcmp_unicode(CHARSET_INFO *cs,
{ {
int result= -1; /* Not found, using wildcards */ int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc; my_wc_t s_wc, w_wc;
int scan, plane; int scan;
int (*mb_wc)(struct charset_info_st *, my_wc_t *, int (*mb_wc)(struct charset_info_st *, my_wc_t *,
const uchar *, const uchar *); const uchar *, const uchar *);
mb_wc= cs->cset->mb_wc; mb_wc= cs->cset->mb_wc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment