Commit 44d1e85f authored by Alexander Barkov's avatar Alexander Barkov

MDEV-7649 wrong result when comparing utf8 column with an invalid literal

parent f9b2704f
#
# Compare a field to an utf8 string literal with illegal byte sequences
#
--echo #
--echo # Start of ctype_utf8_ilseq.inc
--echo #
--eval CREATE TABLE t1 ENGINE=$ENGINE AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;
ALTER TABLE t1
ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
ADD KEY(ch);
SHOW CREATE TABLE t1;
INSERT INTO t1 (ch) VALUES ('admin'),('admin1');
SELECT ch FROM t1 WHERE ch='admin𝌆';
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='admin𝌆';
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='admin𝌆';
DELETE FROM t1;
INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
INSERT INTO t1 (ch) VALUES ('z');
# LATIN SMALL LETTER A + LATIN CAPITAL LETTER E WITH GRAVE
INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
# LATIN SMALL LETTER A + ARMENIAN SMALL LETTER REH
INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
ALTER TABLE t1 DROP KEY ch;
--echo # 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
PREPARE stmt FROM @query;
EXECUTE stmt;
#
# Non-equality comparison currently work differently depending on collation:
#
# - utf8_general_ci falls back to memcmp() on bad byte
# - utf8_unicode_ci treats bad bytes greater than any valid character
#
# For example, these two characters:
# _utf8 0xD080 (U+00C8 LATIN CAPITAL LETTER E WITH GRAVE)
# _utf8 0xD680 (U+0580 ARMENIAN SMALL LETTER REH)
#
# will give different results (depending on collation) when compared
# to an incomplete byte sequence 0xD1 (mb2head not followed by mb2tail).
#
# For utf8_general_ci the result depends on the valid side:
# - 0xD080 is smaller than 0xD1, because 0xD0 < 0xD1
# - 0xD680 is greater than 0xD1, because 0xD6 > 0xD1
#
# For utf8_unicode_ci the result does not depend on the valid side:
# - 0xD080 is smaller than 0xD1, because 0xD1 is greater than any valid character
# - 0xD680 is smaller than 0xD1, because 0xD1 is greater than any valid character
#
# utf8_general_ci should be eventually fixed to treat bad bytes greater
# than any valid character, similar to utf8_unicode_ci.
#
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
--echo # 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
--echo # 0x8F is a bad byte sequence (an mb2tail without mb2head)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
--echo # 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
DROP TABLE t1;
--echo #
--echo # End of ctype_utf8_ilseq.inc
--echo #
...@@ -3216,5 +3216,417 @@ a c1 ...@@ -3216,5 +3216,417 @@ a c1
10 => DŽ 10 => DŽ
drop table t1; drop table t1;
# #
# MDEV-7649 wrong result when comparing utf8 column with an invalid literal
#
SET NAMES utf8 COLLATE utf8_unicode_ci;
#
# Start of ctype_utf8_ilseq.inc
#
CREATE TABLE t1 ENGINE=MyISAM AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
ALTER TABLE t1
ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
ADD KEY(ch);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`ch` varchar(60) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`),
KEY `ch` (`ch`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1
INSERT INTO t1 (ch) VALUES ('admin'),('admin1');
SELECT ch FROM t1 WHERE ch='admin𝌆';
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='admin𝌆';
ch
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='admin𝌆';
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
DELETE FROM t1;
INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
INSERT INTO t1 (ch) VALUES ('z');
INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
ch
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
ch
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
ch
z
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
ch
z
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
ch
z
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
ch
z
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
ALTER TABLE t1 DROP KEY ch;
# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
z
# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
# 0x8F is a bad byte sequence (an mb2tail without mb2head)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
DROP TABLE t1;
#
# End of ctype_utf8_ilseq.inc
#
#
# Start of ctype_utf8_ilseq.inc
#
CREATE TABLE t1 ENGINE=HEAP AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
ALTER TABLE t1
ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
ADD KEY(ch);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`ch` varchar(60) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`),
KEY `ch` (`ch`)
) ENGINE=MEMORY DEFAULT CHARSET=latin1
INSERT INTO t1 (ch) VALUES ('admin'),('admin1');
SELECT ch FROM t1 WHERE ch='admin𝌆';
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='admin𝌆';
ch
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='admin𝌆';
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
DELETE FROM t1;
INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
INSERT INTO t1 (ch) VALUES ('z');
INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
ch
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
ch
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
ch
z
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
ch
z
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
ch
z
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
ch
z
ALTER TABLE t1 DROP KEY ch;
# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
z
# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
# 0x8F is a bad byte sequence (an mb2tail without mb2head)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
DROP TABLE t1;
#
# End of ctype_utf8_ilseq.inc
#
#
# End of 5.5 tests # End of 5.5 tests
# #
#
# Start of 5.5 tests
#
#
# MDEV-7649 wrong result when comparing utf8 column with an invalid literal
#
SET NAMES utf8 COLLATE utf8_unicode_ci;
#
# Start of ctype_utf8_ilseq.inc
#
CREATE TABLE t1 ENGINE=InnoDB AS SELECT REPEAT(' ', 60) AS ch LIMIT 0;;
ALTER TABLE t1
ADD id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
ADD KEY(ch);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`ch` varchar(60) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL DEFAULT '',
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`),
KEY `ch` (`ch`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
INSERT INTO t1 (ch) VALUES ('admin'),('admin1');
SELECT ch FROM t1 WHERE ch='admin𝌆';
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='admin𝌆';
ch
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='admin𝌆';
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
DELETE FROM t1;
INSERT INTO t1 (ch) VALUES ('a'), ('a?'), ('a??'), ('a???'), ('a????');
INSERT INTO t1 (ch) VALUES ('ab'),('a?b'),('a??b'),('a???b'),('a????b');
INSERT INTO t1 (ch) VALUES ('az'),('a?z'),('a??z'),('a???z'),('a????z');
INSERT INTO t1 (ch) VALUES ('z');
INSERT INTO t1 (ch) VALUES (_utf8 0x61D080);
INSERT INTO t1 (ch) VALUES (_utf8 0x61D680);
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
ch
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
ch
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a𝌆' ORDER BY ch;
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch='a𝌆b' ORDER BY ch;
ch
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a𝌆' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch<'a𝌆b' ORDER BY ch;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
ch
z
SELECT ch FROM t1 IGNORE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
ch
z
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a𝌆' ORDER BY ch;
ch
z
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86' for column 'ch' at row 1
SELECT ch FROM t1 FORCE KEY (ch) WHERE ch>'a𝌆b' ORDER BY ch;
ch
z
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9D\x8C\x86b' for column 'ch' at row 1
ALTER TABLE t1 DROP KEY ch;
# 0xD18F would be a good 2-byte character, 0xD1 is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'''');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xD1,'b''');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch<''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
a
a?
a??
a???
a????
a????b
a????z
a???b
a???z
a??b
a??z
a?b
a?z
ab
az
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch>''a', 0xD1,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
z
# 0xEA9A96 would be a good 3-byte character, 0xEA9A is an incomplete sequence
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0xEA9A,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
# 0x8F is a bad byte sequence (an mb2tail without mb2head)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
# 0x8F8F is a bad byte sequence (an mb2tail without mb2head, two times)
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,''' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
SET @query=CONCAT('SELECT ch FROM t1 WHERE ch=''a', 0x8F8F,'b'' ORDER BY ch');
PREPARE stmt FROM @query;
EXECUTE stmt;
ch
DROP TABLE t1;
#
# End of ctype_utf8_ilseq.inc
#
#
# End of 5.5 tests
#
This diff is collapsed.
...@@ -577,6 +577,17 @@ select * from t1 where c1 = '=> dž'; ...@@ -577,6 +577,17 @@ select * from t1 where c1 = '=> dž';
select * from t1 where concat(c1) = '=> dž'; select * from t1 where concat(c1) = '=> dž';
drop table t1; drop table t1;
--echo #
--echo # MDEV-7649 wrong result when comparing utf8 column with an invalid literal
--echo #
SET NAMES utf8 COLLATE utf8_unicode_ci;
--let ENGINE=MyISAM
--source include/ctype_utf8_ilseq.inc
--let ENGINE=HEAP
--source include/ctype_utf8_ilseq.inc
--echo # --echo #
--echo # End of 5.5 tests --echo # End of 5.5 tests
--echo # --echo #
#
# Tests for UCA collations with InnoDB
#
let collation=utf8_unicode_ci;
--source include/have_collation.inc
--source include/have_innodb.inc
--echo #
--echo # Start of 5.5 tests
--echo #
--echo #
--echo # MDEV-7649 wrong result when comparing utf8 column with an invalid literal
--echo #
SET NAMES utf8 COLLATE utf8_unicode_ci;
--let ENGINE=InnoDB
--source include/ctype_utf8_ilseq.inc
--echo #
--echo # End of 5.5 tests
--echo #
...@@ -1604,6 +1604,18 @@ CREATE TABLE t2(a CHAR(1) CHARACTER SET utf8 COLLATE utf8_general_ci, b INT NOT ...@@ -1604,6 +1604,18 @@ CREATE TABLE t2(a CHAR(1) CHARACTER SET utf8 COLLATE utf8_general_ci, b INT NOT
SELECT (SELECT t2.a FROM t2 WHERE t2.a=t1.a) AS aa, b, COUNT(b) FROM t1 GROUP BY aa; SELECT (SELECT t2.a FROM t2 WHERE t2.a=t1.a) AS aa, b, COUNT(b) FROM t1 GROUP BY aa;
DROP TABLE t1,t2; DROP TABLE t1,t2;
--echo #
--echo # MDEV-7649 wrong result when comparing utf8 column with an invalid literal
--echo #
SET NAMES utf8 COLLATE utf8_general_ci;
--let ENGINE=InnoDB
--source include/ctype_utf8_ilseq.inc
--let ENGINE=MyISAM
--source include/ctype_utf8_ilseq.inc
--let ENGINE=HEAP
--source include/ctype_utf8_ilseq.inc
--echo # --echo #
--echo # End of 5.5 tests --echo # End of 5.5 tests
--echo # --echo #
...@@ -7075,7 +7075,25 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner) ...@@ -7075,7 +7075,25 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc, if (((mb_len= scanner->cs->cset->mb_wc(scanner->cs, &wc,
scanner->sbeg, scanner->sbeg,
scanner->send)) <= 0)) scanner->send)) <= 0))
return -1; {
if (scanner->sbeg >= scanner->send)
return -1; /* No more bytes, end of line reached */
/*
There are some more bytes left. Non-positive mb_len means that
we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
*/
if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
{
/* For safety purposes don't go beyond the string range. */
scanner->sbeg= scanner->send;
}
/*
Treat every complete or incomplete mbminlen unit as a weight which is
greater than weight for any possible normal character.
0xFFFF is greater than any possible weight in the UCA weight table.
*/
return 0xFFFF;
}
scanner->sbeg+= mb_len; scanner->sbeg+= mb_len;
if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT) if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment