BUG#29299 - repeatable myisam fulltext index corruption

Fulltext index may get corrupt by certain gbk characters.

The problem was that when skipping leading non-true-word-characters,
we assumed that these characters are always 1 byte long. This is not
the case with gbk character set, since non-true-word-characters may
be 2 bytes long.

Affects 5.0 only.
parent 717c8fe9
...@@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, ...@@ -111,7 +111,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
while (doc<end) while (doc<end)
{ {
for (;doc<end;doc++) for (; doc < end; doc+= mbl)
{ {
if (true_word_char(cs,*doc)) break; if (true_word_char(cs,*doc)) break;
if (*doc == FTB_RQUOT && param->quot) if (*doc == FTB_RQUOT && param->quot)
...@@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end, ...@@ -120,6 +120,7 @@ byte ft_get_word(CHARSET_INFO *cs, byte **start, byte *end,
*start=doc+1; *start=doc+1;
return 3; /* FTB_RBR */ return 3; /* FTB_RBR */
} }
mbl= my_mbcharlen(cs, *(uchar *)doc);
if (!param->quot) if (!param->quot)
{ {
if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT) if (*doc == FTB_LBR || *doc == FTB_RBR || *doc == FTB_LQUOT)
...@@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end, ...@@ -187,10 +188,11 @@ byte ft_simple_get_word(CHARSET_INFO *cs, byte **start, const byte *end,
do do
{ {
for (;; doc++) for (;; doc+= mbl)
{ {
if (doc >= end) DBUG_RETURN(0); if (doc >= end) DBUG_RETURN(0);
if (true_word_char(cs, *doc)) break; if (true_word_char(cs, *doc)) break;
mbl= my_mbcharlen(cs, *(uchar *)doc);
} }
mwc= length= 0; mwc= length= 0;
......
...@@ -241,3 +241,15 @@ select * from t1 where match a against('ab c' in boolean mode); ...@@ -241,3 +241,15 @@ select * from t1 where match a against('ab c' in boolean mode);
a a
drop table t1; drop table t1;
set names latin1; set names latin1;
CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
SET NAMES utf8;
INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
HEX(a)
BEF361616197C22061616161
DELETE FROM t1 LIMIT 1;
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check status OK
SET NAMES latin1;
DROP TABLE t1;
...@@ -220,4 +220,16 @@ select * from t1 where match a against('ab c' in boolean mode); ...@@ -220,4 +220,16 @@ select * from t1 where match a against('ab c' in boolean mode);
drop table t1; drop table t1;
set names latin1; set names latin1;
#
# BUG#29299 - repeatable myisam fulltext index corruption
#
CREATE TABLE t1(a VARCHAR(255) CHARACTER SET gbk, FULLTEXT(a));
SET NAMES utf8;
INSERT INTO t1 VALUES(0xF043616161),(0xBEF361616197C22061616161);
SELECT HEX(a) FROM t1 WHERE MATCH(a) AGAINST(0x97C22061616161 IN BOOLEAN MODE);
DELETE FROM t1 LIMIT 1;
CHECK TABLE t1;
SET NAMES latin1;
DROP TABLE t1;
# End of 4.1 tests # End of 4.1 tests
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment