Commit 3354e7f8 authored by unknown's avatar unknown

regexp worked only with the default character set.

Now it can work with any character set.
parent 0a9b64cc
......@@ -52,3 +52,27 @@ select * from t1 where xxx REGEXP '^this is some text: to test - out\\.reg exp [
xxx
this is some text: to test - out.reg exp (22/45)
drop table t1;
select _latin1 0xFF regexp _latin1 '[[:lower:]]' COLLATE latin1_bin;
_latin1 0xFF regexp _latin1 '[[:lower:]]' COLLATE latin1_bin
1
select _koi8r 0xFF regexp _koi8r '[[:lower:]]' COLLATE koi8r_bin;
_koi8r 0xFF regexp _koi8r '[[:lower:]]' COLLATE koi8r_bin
0
select _latin1 0xFF regexp _latin1 '[[:upper:]]' COLLATE latin1_bin;
_latin1 0xFF regexp _latin1 '[[:upper:]]' COLLATE latin1_bin
0
select _koi8r 0xFF regexp _koi8r '[[:upper:]]' COLLATE koi8r_bin;
_koi8r 0xFF regexp _koi8r '[[:upper:]]' COLLATE koi8r_bin
1
select _latin1 0xF7 regexp _latin1 '[[:alpha:]]';
_latin1 0xF7 regexp _latin1 '[[:alpha:]]'
0
select _koi8r 0xF7 regexp _koi8r '[[:alpha:]]';
_koi8r 0xF7 regexp _koi8r '[[:alpha:]]'
1
select _latin1'a' regexp _latin1'A' collate latin1_general_ci;
_latin1'a' regexp _latin1'A' collate latin1_general_ci
1
select _latin1'a' regexp _latin1'A' collate latin1_bin;
_latin1'a' regexp _latin1'A' collate latin1_bin
0
......@@ -45,3 +45,17 @@ create table t1 (xxx char(128));
insert into t1 (xxx) values('this is some text: to test - out.reg exp (22/45)');
select * from t1 where xxx REGEXP '^this is some text: to test - out\\.reg exp [[(][0-9]+[/\\][0-9]+[])][ ]*$';
drop table t1;
#
# Check with different character sets and collations
#
select _latin1 0xFF regexp _latin1 '[[:lower:]]' COLLATE latin1_bin;
select _koi8r 0xFF regexp _koi8r '[[:lower:]]' COLLATE koi8r_bin;
select _latin1 0xFF regexp _latin1 '[[:upper:]]' COLLATE latin1_bin;
select _koi8r 0xFF regexp _koi8r '[[:upper:]]' COLLATE koi8r_bin;
select _latin1 0xF7 regexp _latin1 '[[:alpha:]]';
select _koi8r 0xF7 regexp _koi8r '[[:alpha:]]';
select _latin1'a' regexp _latin1'A' collate latin1_general_ci;
select _latin1'a' regexp _latin1'A' collate latin1_bin;
......@@ -18,4 +18,5 @@ extern struct cclass {
const char *name;
const char *chars;
const char *multis;
uint mask;
} cclasses[];
......@@ -36,19 +36,19 @@ struct parse {
static char nuls[10]; /* place to point scanner in event of error */
struct cclass cclasses[CCLASS_LAST+1]= {
{ "alnum", "","" },
{ "alpha", "","" },
{ "blank", "","" },
{ "cntrl", "","" },
{ "digit", "","" },
{ "graph", "","" },
{ "lower", "","" },
{ "print", "","" },
{ "punct", "","" },
{ "space", "","" },
{ "upper", "","" },
{ "xdigit", "","" },
{ NULL,NULL,NULL }
{ "alnum", "","", _U | _L | _NMR},
{ "alpha", "","", _U | _L },
{ "blank", "","", _B },
{ "cntrl", "","", _CTR },
{ "digit", "","", _NMR },
{ "graph", "","", _PNT | _U | _L | _NMR},
{ "lower", "","", _L },
{ "print", "","", _PNT | _U | _L | _NMR | _B },
{ "punct", "","", _PNT },
{ "space", "","", _SPC },
{ "upper", "","", _U },
{ "xdigit", "","", _X },
{ NULL,NULL,NULL, 0 }
};
/*
......@@ -747,8 +747,6 @@ register cset *cs;
register char *sp = p->next;
register struct cclass *cp;
register size_t len;
register char *u;
register char c;
while (MORE() && my_isalpha(p->charset,PEEK()))
NEXT();
......@@ -762,11 +760,26 @@ register cset *cs;
return;
}
u = (char*) cp->chars;
#ifndef USE_ORIG_REGEX_CODE
{
register size_t i;
for (i=1 ; i<256 ; i++)
if (p->charset->ctype[i+1] & cp->mask)
CHadd(cs, i);
}
#else
{
register char *u = (char*) cp->chars;
register char c;
while ((c = *u++) != '\0')
CHadd(cs, c);
for (u = (char*) cp->multis; *u != '\0'; u += strlen(u) + 1)
MCadd(p, cs, u);
}
#endif
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment