Commit 907eb131 authored by unknown's avatar unknown

Bug #3928 regexp [[:>:]] and UTF-8

parent 0839550f
...@@ -218,3 +218,25 @@ b ...@@ -218,3 +218,25 @@ b
select * from t1 where a = 'b' and a != 'b'; select * from t1 where a = 'b' and a != 'b';
a a
drop table t1; drop table t1;
set names utf8;
select 'вася' rlike '[[:<:]]вася[[:>:]]';
'вася' rlike '[[:<:]]вася[[:>:]]'
1
select 'вася ' rlike '[[:<:]]вася[[:>:]]';
'вася ' rlike '[[:<:]]вася[[:>:]]'
1
select ' вася' rlike '[[:<:]]вася[[:>:]]';
' вася' rlike '[[:<:]]вася[[:>:]]'
1
select ' вася ' rlike '[[:<:]]вася[[:>:]]';
' вася ' rlike '[[:<:]]вася[[:>:]]'
1
select 'васяz' rlike '[[:<:]]вася[[:>:]]';
'васяz' rlike '[[:<:]]вася[[:>:]]'
0
select 'zвася' rlike '[[:<:]]вася[[:>:]]';
'zвася' rlike '[[:<:]]вася[[:>:]]'
0
select 'zвасяz' rlike '[[:<:]]вася[[:>:]]';
'zвасяz' rlike '[[:<:]]вася[[:>:]]'
0
...@@ -141,3 +141,19 @@ select * from t1 where a = 'b'; ...@@ -141,3 +141,19 @@ select * from t1 where a = 'b';
select * from t1 where a = 'b' and a = 'b'; select * from t1 where a = 'b' and a = 'b';
select * from t1 where a = 'b' and a != 'b'; select * from t1 where a = 'b' and a != 'b';
drop table t1; drop table t1;
#
# Bug #3928 regexp [[:>:]] and UTF-8
#
set names utf8;
# This should return TRUE
select 'вася' rlike '[[:<:]]вася[[:>:]]';
select 'вася ' rlike '[[:<:]]вася[[:>:]]';
select ' вася' rlike '[[:<:]]вася[[:>:]]';
select ' вася ' rlike '[[:<:]]вася[[:>:]]';
# This should return FALSE
select 'васяz' rlike '[[:<:]]вася[[:>:]]';
select 'zвася' rlike '[[:<:]]вася[[:>:]]';
select 'zвасяz' rlike '[[:<:]]вася[[:>:]]';
...@@ -1524,8 +1524,12 @@ MY_UNICASE_INFO *uni_plane[256]={ ...@@ -1524,8 +1524,12 @@ MY_UNICASE_INFO *uni_plane[256]={
#ifdef HAVE_CHARSET_utf8 #ifdef HAVE_CHARSET_utf8
/* These arrays are taken from usa7 implementation */ /*
We consider bytes with code more than 127 as a letter.
This garantees that word boundaries work fine with regular
expressions. Note, there is no need to mark byte 255 as a
letter, it is illegal byte in UTF8.
*/
static uchar ctype_utf8[] = { static uchar ctype_utf8[] = {
0, 0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
...@@ -1536,16 +1540,18 @@ static uchar ctype_utf8[] = { ...@@ -1536,16 +1540,18 @@ static uchar ctype_utf8[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0
}; };
/* The below are taken from usa7 implementation */
static uchar to_lower_utf8[] = { static uchar to_lower_utf8[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment