Commit de1e5113 authored by unknown's avatar unknown

Regex library is switched to use new ctype tools

to allow usage of many character sets at a time.


include/m_ctype.h:
  Added condition to simplify migrating from old ctype
  Added new style toupper, tolower which accepts charset in first argument
regex/debug.c:
  Added charset argument
regex/debug.ih:
  added charset argument
regex/engine.c:
  added charset argument
regex/engine.ih:
  added charset arguent
regex/main.c:
  added charset argument
regex/regcomp.c:
  added CHARSET_INFO field
regex/regcomp.ih:
  Added charset argument
regex/regex.h:
  Added #include <m_ctype.h> for CHARSET_INFO
  Added charset argument for regcomp()
regex/regex2.h:
  New charset argument for ISWORD()
regex/regexec.c:
  New charset argument
regex/reginit.c:
  Move to new style ctype. 
  However still needs fixes:
    instead of single static cclass variable,
    each charset must have it's own variable.
sql/item_cmpfunc.cc:
  Pass charset field into regcomp()
  This will be fixed tommorow to use String->charset
    instead of default_charset_info
parent 63db19c8
...@@ -180,6 +180,7 @@ extern const char *compiled_charset_name(uint charset_number); ...@@ -180,6 +180,7 @@ extern const char *compiled_charset_name(uint charset_number);
#define _B 0100 /* Blank */ #define _B 0100 /* Blank */
#define _X 0200 /* heXadecimal digit */ #define _X 0200 /* heXadecimal digit */
#ifndef HIDE_OLD_CTYPE
#define my_ctype (default_charset_info->ctype) #define my_ctype (default_charset_info->ctype)
#define my_to_upper (default_charset_info->to_upper) #define my_to_upper (default_charset_info->to_upper)
#define my_to_lower (default_charset_info->to_lower) #define my_to_lower (default_charset_info->to_lower)
...@@ -201,6 +202,8 @@ extern const char *compiled_charset_name(uint charset_number); ...@@ -201,6 +202,8 @@ extern const char *compiled_charset_name(uint charset_number);
#define isprint(c) ((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N | _B)) #define isprint(c) ((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N | _B))
#define isgraph(c) ((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N)) #define isgraph(c) ((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N))
#define iscntrl(c) ((my_ctype+1)[(uchar) (c)] & _C) #define iscntrl(c) ((my_ctype+1)[(uchar) (c)] & _C)
#endif
#define isascii(c) (!((c) & ~0177)) #define isascii(c) (!((c) & ~0177))
#define toascii(c) ((c) & 0177) #define toascii(c) ((c) & 0177)
...@@ -208,6 +211,8 @@ extern const char *compiled_charset_name(uint charset_number); ...@@ -208,6 +211,8 @@ extern const char *compiled_charset_name(uint charset_number);
#undef ctype #undef ctype
#endif /* ctype */ #endif /* ctype */
#define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)])
#define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)])
#define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_U | _L)) #define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_U | _L))
#define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _U) #define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _U)
#define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _L) #define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _L)
......
...@@ -45,7 +45,7 @@ FILE *d; ...@@ -45,7 +45,7 @@ FILE *d;
if (g->nplus > 0) if (g->nplus > 0)
fprintf(d, ", nplus %ld", (long)g->nplus); fprintf(d, ", nplus %ld", (long)g->nplus);
fprintf(d, "\n"); fprintf(d, "\n");
s_print(g, d); s_print(r->charset, g, d);
for (i = 0; i < g->ncategories; i++) { for (i = 0; i < g->ncategories; i++) {
nincat[i] = 0; nincat[i] = 0;
for (c = CHAR_MIN; c <= CHAR_MAX; c++) for (c = CHAR_MIN; c <= CHAR_MAX; c++)
...@@ -58,7 +58,7 @@ FILE *d; ...@@ -58,7 +58,7 @@ FILE *d;
for (c = CHAR_MIN; c <= CHAR_MAX; c++) for (c = CHAR_MIN; c <= CHAR_MAX; c++)
if (g->categories[c] == i) if (g->categories[c] == i)
break; break;
fprintf(d, ", %d=%s", i, regchar(c,buf)); fprintf(d, ", %d=%s", i, regchar(r->charset,c,buf));
} }
fprintf(d, "\n"); fprintf(d, "\n");
for (i = 1; i < g->ncategories; i++) for (i = 1; i < g->ncategories; i++)
...@@ -68,14 +68,14 @@ FILE *d; ...@@ -68,14 +68,14 @@ FILE *d;
for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */
if (c <= CHAR_MAX && g->categories[c] == i) { if (c <= CHAR_MAX && g->categories[c] == i) {
if (last < 0) { if (last < 0) {
fprintf(d, "%s", regchar(c,buf)); fprintf(d, "%s", regchar(r->charset,c,buf));
last = c; last = c;
} }
} else { } else {
if (last >= 0) { if (last >= 0) {
if (last != c-1) if (last != c-1)
fprintf(d, "-%s", fprintf(d, "-%s",
regchar(c-1,buf)); regchar(r->charset,c-1,buf));
last = -1; last = -1;
} }
} }
...@@ -88,7 +88,8 @@ FILE *d; ...@@ -88,7 +88,8 @@ FILE *d;
== static void s_print(register struct re_guts *g, FILE *d); == static void s_print(register struct re_guts *g, FILE *d);
*/ */
static void static void
s_print(g, d) s_print(charset, g, d)
CHARSET_INFO *charset;
register struct re_guts *g; register struct re_guts *g;
FILE *d; FILE *d;
{ {
...@@ -127,7 +128,7 @@ FILE *d; ...@@ -127,7 +128,7 @@ FILE *d;
if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
fprintf(d, "\\%c", (char)opnd); fprintf(d, "\\%c", (char)opnd);
else else
fprintf(d, "%s", regchar((char)opnd,buf)); fprintf(d, "%s", regchar(charset,(char)opnd,buf));
break; break;
case OBOL: case OBOL:
fprintf(d, "^"); fprintf(d, "^");
...@@ -151,14 +152,14 @@ FILE *d; ...@@ -151,14 +152,14 @@ FILE *d;
for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */
if (CHIN(cs, i) && i < g->csetsize) { if (CHIN(cs, i) && i < g->csetsize) {
if (last < 0) { if (last < 0) {
fprintf(d, "%s", regchar(i,buf)); fprintf(d, "%s", regchar(charset,i,buf));
last = i; last = i;
} }
} else { } else {
if (last >= 0) { if (last >= 0) {
if (last != i-1) if (last != i-1)
fprintf(d, "-%s", fprintf(d, "-%s",
regchar(i-1,buf)); regchar(charset,i-1,buf));
last = -1; last = -1;
} }
} }
...@@ -230,12 +231,13 @@ FILE *d; ...@@ -230,12 +231,13 @@ FILE *d;
== static char *regchar(int ch); == static char *regchar(int ch);
*/ */
static char * /* -> representation */ static char * /* -> representation */
regchar(ch,buf) regchar(charset,ch,buf)
CHARSET_INFO *charset;
int ch; int ch;
char *buf; char *buf;
{ {
if (isprint(ch) || ch == ' ') if (my_isprint(charset,ch) || ch == ' ')
sprintf(buf, "%c", ch); sprintf(buf, "%c", ch);
else else
sprintf(buf, "\\%o", ch); sprintf(buf, "\\%o", ch);
......
...@@ -5,8 +5,8 @@ extern "C" { ...@@ -5,8 +5,8 @@ extern "C" {
/* === debug.c === */ /* === debug.c === */
void regprint(regex_t *r, FILE *d); void regprint(regex_t *r, FILE *d);
static void s_print(register struct re_guts *g, FILE *d); static void s_print(CHARSET_INFO *charset, register struct re_guts *g, FILE *d);
static char *regchar(int ch,char *buf); static char *regchar(CHARSET_INFO *charset, int ch,char *buf);
#ifdef __cplusplus #ifdef __cplusplus
} }
......
...@@ -63,7 +63,8 @@ struct match { ...@@ -63,7 +63,8 @@ struct match {
== size_t nmatch, regmatch_t pmatch[], int eflags); == size_t nmatch, regmatch_t pmatch[], int eflags);
*/ */
static int /* 0 success, REG_NOMATCH failure */ static int /* 0 success, REG_NOMATCH failure */
matcher(g, str, nmatch, pmatch, eflags) matcher(charset,g, str, nmatch, pmatch, eflags)
CHARSET_INFO *charset;
register struct re_guts *g; register struct re_guts *g;
char *str; char *str;
size_t nmatch; size_t nmatch;
...@@ -120,7 +121,7 @@ int eflags; ...@@ -120,7 +121,7 @@ int eflags;
/* this loop does only one repetition except for backrefs */ /* this loop does only one repetition except for backrefs */
for (;;) { for (;;) {
endp = fast(m, start, stop, gf, gl); endp = fast(charset, m, start, stop, gf, gl);
if (endp == NULL) { /* a miss */ if (endp == NULL) { /* a miss */
if (m->pmatch != NULL) if (m->pmatch != NULL)
free((char *)m->pmatch); free((char *)m->pmatch);
...@@ -136,7 +137,7 @@ int eflags; ...@@ -136,7 +137,7 @@ int eflags;
assert(m->coldp != NULL); assert(m->coldp != NULL);
for (;;) { for (;;) {
NOTE("finding start"); NOTE("finding start");
endp = slow(m, m->coldp, stop, gf, gl); endp = slow(charset, m, m->coldp, stop, gf, gl);
if (endp != NULL) if (endp != NULL)
break; break;
assert(m->coldp < m->endp); assert(m->coldp < m->endp);
...@@ -159,7 +160,7 @@ int eflags; ...@@ -159,7 +160,7 @@ int eflags;
m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
if (!g->backrefs && !(m->eflags&REG_BACKR)) { if (!g->backrefs && !(m->eflags&REG_BACKR)) {
NOTE("dissecting"); NOTE("dissecting");
dp = dissect(m, m->coldp, endp, gf, gl); dp = dissect(charset, m, m->coldp, endp, gf, gl);
} else { } else {
if (g->nplus > 0 && m->lastpos == NULL) if (g->nplus > 0 && m->lastpos == NULL)
m->lastpos = (char **)malloc((g->nplus+1) * m->lastpos = (char **)malloc((g->nplus+1) *
...@@ -170,7 +171,7 @@ int eflags; ...@@ -170,7 +171,7 @@ int eflags;
return(REG_ESPACE); return(REG_ESPACE);
} }
NOTE("backref dissect"); NOTE("backref dissect");
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
} }
if (dp != NULL) if (dp != NULL)
break; break;
...@@ -182,7 +183,7 @@ int eflags; ...@@ -182,7 +183,7 @@ int eflags;
if (dp != NULL || endp <= m->coldp) if (dp != NULL || endp <= m->coldp)
break; /* defeat */ break; /* defeat */
NOTE("backoff"); NOTE("backoff");
endp = slow(m, m->coldp, endp-1, gf, gl); endp = slow(charset, m, m->coldp, endp-1, gf, gl);
if (endp == NULL) if (endp == NULL)
break; /* defeat */ break; /* defeat */
/* try it on a shorter possibility */ /* try it on a shorter possibility */
...@@ -193,7 +194,7 @@ int eflags; ...@@ -193,7 +194,7 @@ int eflags;
} }
#endif #endif
NOTE("backoff dissect"); NOTE("backoff dissect");
dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
} }
assert(dp == NULL || dp == endp); assert(dp == NULL || dp == endp);
if (dp != NULL) /* found a shorter one */ if (dp != NULL) /* found a shorter one */
...@@ -235,7 +236,8 @@ int eflags; ...@@ -235,7 +236,8 @@ int eflags;
== char *stop, sopno startst, sopno stopst); == char *stop, sopno startst, sopno stopst);
*/ */
static char * /* == stop (success) always */ static char * /* == stop (success) always */
dissect(m, start, stop, startst, stopst) dissect(charset, m, start, stop, startst, stopst)
CHARSET_INFO *charset;
register struct match *m; register struct match *m;
char *start; char *start;
char *stop; char *stop;
...@@ -299,10 +301,10 @@ sopno stopst; ...@@ -299,10 +301,10 @@ sopno stopst;
stp = stop; stp = stop;
for (;;) { for (;;) {
/* how long could this one be? */ /* how long could this one be? */
rest = slow(m, sp, stp, ss, es); rest = slow(charset, m, sp, stp, ss, es);
assert(rest != NULL); /* it did match */ assert(rest != NULL); /* it did match */
/* could the rest match the rest? */ /* could the rest match the rest? */
tail = slow(m, rest, stop, es, stopst); tail = slow(charset, m, rest, stop, es, stopst);
if (tail == stop) if (tail == stop)
break; /* yes! */ break; /* yes! */
/* no -- try a shorter match for this one */ /* no -- try a shorter match for this one */
...@@ -312,8 +314,8 @@ sopno stopst; ...@@ -312,8 +314,8 @@ sopno stopst;
ssub = ss + 1; ssub = ss + 1;
esub = es - 1; esub = es - 1;
/* did innards match? */ /* did innards match? */
if (slow(m, sp, rest, ssub, esub) != NULL) { if (slow(charset, m, sp, rest, ssub, esub) != NULL) {
dp = dissect(m, sp, rest, ssub, esub); dp = dissect(charset, m, sp, rest, ssub, esub);
assert(dp == rest); assert(dp == rest);
} else /* no */ } else /* no */
assert(sp == rest); assert(sp == rest);
...@@ -323,10 +325,10 @@ sopno stopst; ...@@ -323,10 +325,10 @@ sopno stopst;
stp = stop; stp = stop;
for (;;) { for (;;) {
/* how long could this one be? */ /* how long could this one be? */
rest = slow(m, sp, stp, ss, es); rest = slow(charset, m, sp, stp, ss, es);
assert(rest != NULL); /* it did match */ assert(rest != NULL); /* it did match */
/* could the rest match the rest? */ /* could the rest match the rest? */
tail = slow(m, rest, stop, es, stopst); tail = slow(charset, m, rest, stop, es, stopst);
if (tail == stop) if (tail == stop)
break; /* yes! */ break; /* yes! */
/* no -- try a shorter match for this one */ /* no -- try a shorter match for this one */
...@@ -338,7 +340,7 @@ sopno stopst; ...@@ -338,7 +340,7 @@ sopno stopst;
ssp = sp; ssp = sp;
oldssp = ssp; oldssp = ssp;
for (;;) { /* find last match of innards */ for (;;) { /* find last match of innards */
sep = slow(m, ssp, rest, ssub, esub); sep = slow(charset, m, ssp, rest, ssub, esub);
if (sep == NULL || sep == ssp) if (sep == NULL || sep == ssp)
break; /* failed or matched null */ break; /* failed or matched null */
oldssp = ssp; /* on to next try */ oldssp = ssp; /* on to next try */
...@@ -350,8 +352,8 @@ sopno stopst; ...@@ -350,8 +352,8 @@ sopno stopst;
ssp = oldssp; ssp = oldssp;
} }
assert(sep == rest); /* must exhaust substring */ assert(sep == rest); /* must exhaust substring */
assert(slow(m, ssp, sep, ssub, esub) == rest); assert(slow(charset, m, ssp, sep, ssub, esub) == rest);
dp = dissect(m, ssp, sep, ssub, esub); dp = dissect(charset, m, ssp, sep, ssub, esub);
assert(dp == sep); assert(dp == sep);
sp = rest; sp = rest;
break; break;
...@@ -359,10 +361,10 @@ sopno stopst; ...@@ -359,10 +361,10 @@ sopno stopst;
stp = stop; stp = stop;
for (;;) { for (;;) {
/* how long could this one be? */ /* how long could this one be? */
rest = slow(m, sp, stp, ss, es); rest = slow(charset, m, sp, stp, ss, es);
assert(rest != NULL); /* it did match */ assert(rest != NULL); /* it did match */
/* could the rest match the rest? */ /* could the rest match the rest? */
tail = slow(m, rest, stop, es, stopst); tail = slow(charset, m, rest, stop, es, stopst);
if (tail == stop) if (tail == stop)
break; /* yes! */ break; /* yes! */
/* no -- try a shorter match for this one */ /* no -- try a shorter match for this one */
...@@ -373,7 +375,7 @@ sopno stopst; ...@@ -373,7 +375,7 @@ sopno stopst;
esub = ss + OPND(m->g->strip[ss]) - 1; esub = ss + OPND(m->g->strip[ss]) - 1;
assert(OP(m->g->strip[esub]) == OOR1); assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */ for (;;) { /* find first matching branch */
if (slow(m, sp, rest, ssub, esub) == rest) if (slow(charset, m, sp, rest, ssub, esub) == rest)
break; /* it matched all of it */ break; /* it matched all of it */
/* that one missed, try next one */ /* that one missed, try next one */
assert(OP(m->g->strip[esub]) == OOR1); assert(OP(m->g->strip[esub]) == OOR1);
...@@ -386,7 +388,7 @@ sopno stopst; ...@@ -386,7 +388,7 @@ sopno stopst;
else else
assert(OP(m->g->strip[esub]) == O_CH); assert(OP(m->g->strip[esub]) == O_CH);
} }
dp = dissect(m, sp, rest, ssub, esub); dp = dissect(charset, m, sp, rest, ssub, esub);
assert(dp == rest); assert(dp == rest);
sp = rest; sp = rest;
break; break;
...@@ -423,7 +425,8 @@ sopno stopst; ...@@ -423,7 +425,8 @@ sopno stopst;
== char *stop, sopno startst, sopno stopst, sopno lev); == char *stop, sopno startst, sopno stopst, sopno lev);
*/ */
static char * /* == stop (success) or NULL (failure) */ static char * /* == stop (success) or NULL (failure) */
backref(m, start, stop, startst, stopst, lev) backref(charset,m, start, stop, startst, stopst, lev)
CHARSET_INFO *charset;
register struct match *m; register struct match *m;
char *start; char *start;
char *stop; char *stop;
...@@ -486,8 +489,8 @@ sopno lev; /* PLUS nesting level */ ...@@ -486,8 +489,8 @@ sopno lev; /* PLUS nesting level */
(sp < m->endp && *(sp-1) == '\n' && (sp < m->endp && *(sp-1) == '\n' &&
(m->g->cflags&REG_NEWLINE)) || (m->g->cflags&REG_NEWLINE)) ||
(sp > m->beginp && (sp > m->beginp &&
!ISWORD(*(sp-1))) ) && !ISWORD(charset,*(sp-1))) ) &&
(sp < m->endp && ISWORD(*sp)) ) (sp < m->endp && ISWORD(charset,*sp)) )
{ /* yes */ } { /* yes */ }
else else
return(NULL); return(NULL);
...@@ -496,8 +499,8 @@ sopno lev; /* PLUS nesting level */ ...@@ -496,8 +499,8 @@ sopno lev; /* PLUS nesting level */
if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) || if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
(sp < m->endp && *sp == '\n' && (sp < m->endp && *sp == '\n' &&
(m->g->cflags&REG_NEWLINE)) || (m->g->cflags&REG_NEWLINE)) ||
(sp < m->endp && !ISWORD(*sp)) ) && (sp < m->endp && !ISWORD(charset,*sp)) ) &&
(sp > m->beginp && ISWORD(*(sp-1))) ) (sp > m->beginp && ISWORD(charset,*(sp-1))) )
{ /* yes */ } { /* yes */ }
else else
return(NULL); return(NULL);
...@@ -543,28 +546,28 @@ sopno lev; /* PLUS nesting level */ ...@@ -543,28 +546,28 @@ sopno lev; /* PLUS nesting level */
return(NULL); return(NULL);
while (m->g->strip[ss] != SOP(O_BACK, i)) while (m->g->strip[ss] != SOP(O_BACK, i))
ss++; ss++;
return(backref(m, sp+len, stop, ss+1, stopst, lev)); return(backref(charset, m, sp+len, stop, ss+1, stopst, lev));
break; break;
case OQUEST_: /* to null or not */ case OQUEST_: /* to null or not */
dp = backref(m, sp, stop, ss+1, stopst, lev); dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
if (dp != NULL) if (dp != NULL)
return(dp); /* not */ return(dp); /* not */
return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev));
break; break;
case OPLUS_: case OPLUS_:
assert(m->lastpos != NULL); assert(m->lastpos != NULL);
assert(lev+1 <= m->g->nplus); assert(lev+1 <= m->g->nplus);
m->lastpos[lev+1] = sp; m->lastpos[lev+1] = sp;
return(backref(m, sp, stop, ss+1, stopst, lev+1)); return(backref(charset, m, sp, stop, ss+1, stopst, lev+1));
break; break;
case O_PLUS: case O_PLUS:
if (sp == m->lastpos[lev]) /* last pass matched null */ if (sp == m->lastpos[lev]) /* last pass matched null */
return(backref(m, sp, stop, ss+1, stopst, lev-1)); return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
/* try another pass */ /* try another pass */
m->lastpos[lev] = sp; m->lastpos[lev] = sp;
dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev);
if (dp == NULL) if (dp == NULL)
return(backref(m, sp, stop, ss+1, stopst, lev-1)); return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
else else
return(dp); return(dp);
break; break;
...@@ -573,7 +576,7 @@ sopno lev; /* PLUS nesting level */ ...@@ -573,7 +576,7 @@ sopno lev; /* PLUS nesting level */
esub = ss + OPND(s) - 1; esub = ss + OPND(s) - 1;
assert(OP(m->g->strip[esub]) == OOR1); assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */ for (;;) { /* find first matching branch */
dp = backref(m, sp, stop, ssub, esub, lev); dp = backref(charset, m, sp, stop, ssub, esub, lev);
if (dp != NULL) if (dp != NULL)
return(dp); return(dp);
/* that one missed, try next one */ /* that one missed, try next one */
...@@ -594,7 +597,7 @@ sopno lev; /* PLUS nesting level */ ...@@ -594,7 +597,7 @@ sopno lev; /* PLUS nesting level */
assert(0 < i && i <= m->g->nsub); assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_so; offsave = m->pmatch[i].rm_so;
m->pmatch[i].rm_so = sp - m->offp; m->pmatch[i].rm_so = sp - m->offp;
dp = backref(m, sp, stop, ss+1, stopst, lev); dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
if (dp != NULL) if (dp != NULL)
return(dp); return(dp);
m->pmatch[i].rm_so = offsave; m->pmatch[i].rm_so = offsave;
...@@ -605,7 +608,7 @@ sopno lev; /* PLUS nesting level */ ...@@ -605,7 +608,7 @@ sopno lev; /* PLUS nesting level */
assert(0 < i && i <= m->g->nsub); assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_eo; offsave = m->pmatch[i].rm_eo;
m->pmatch[i].rm_eo = sp - m->offp; m->pmatch[i].rm_eo = sp - m->offp;
dp = backref(m, sp, stop, ss+1, stopst, lev); dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
if (dp != NULL) if (dp != NULL)
return(dp); return(dp);
m->pmatch[i].rm_eo = offsave; m->pmatch[i].rm_eo = offsave;
...@@ -628,7 +631,8 @@ sopno lev; /* PLUS nesting level */ ...@@ -628,7 +631,8 @@ sopno lev; /* PLUS nesting level */
== char *stop, sopno startst, sopno stopst); == char *stop, sopno startst, sopno stopst);
*/ */
static char * /* where tentative match ended, or NULL */ static char * /* where tentative match ended, or NULL */
fast(m, start, stop, startst, stopst) fast(charset, m, start, stop, startst, stopst)
CHARSET_INFO *charset;
register struct match *m; register struct match *m;
char *start; char *start;
char *stop; char *stop;
...@@ -678,12 +682,12 @@ sopno stopst; ...@@ -678,12 +682,12 @@ sopno stopst;
} }
/* how about a word boundary? */ /* how about a word boundary? */
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
(c != OUT && ISWORD(c)) ) { (c != OUT && ISWORD(charset,c)) ) {
flagch = BOW; flagch = BOW;
} }
if ( (lastc != OUT && ISWORD(lastc)) && if ( (lastc != OUT && ISWORD(charset,lastc)) &&
(flagch == EOL || (c != OUT && !ISWORD(c))) ) { (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
flagch = EOW; flagch = EOW;
} }
if (flagch == BOW || flagch == EOW) { if (flagch == BOW || flagch == EOW) {
...@@ -719,7 +723,8 @@ sopno stopst; ...@@ -719,7 +723,8 @@ sopno stopst;
== char *stop, sopno startst, sopno stopst); == char *stop, sopno startst, sopno stopst);
*/ */
static char * /* where it ended */ static char * /* where it ended */
slow(m, start, stop, startst, stopst) slow(charset, m, start, stop, startst, stopst)
CHARSET_INFO *charset;
register struct match *m; register struct match *m;
char *start; char *start;
char *stop; char *stop;
...@@ -767,12 +772,12 @@ sopno stopst; ...@@ -767,12 +772,12 @@ sopno stopst;
} }
/* how about a word boundary? */ /* how about a word boundary? */
if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
(c != OUT && ISWORD(c)) ) { (c != OUT && ISWORD(charset,c)) ) {
flagch = BOW; flagch = BOW;
} }
if ( (lastc != OUT && ISWORD(lastc)) && if ( (lastc != OUT && ISWORD(charset,lastc)) &&
(flagch == EOL || (c != OUT && !ISWORD(c))) ) { (flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
flagch = EOW; flagch = EOW;
} }
if (flagch == BOW || flagch == EOW) { if (flagch == BOW || flagch == EOW) {
......
...@@ -4,11 +4,11 @@ extern "C" { ...@@ -4,11 +4,11 @@ extern "C" {
#endif #endif
/* === engine.c === */ /* === engine.c === */
static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags); static int matcher(CHARSET_INFO *charset,register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); static char *dissect(CHARSET_INFO *charset,register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev); static char *backref(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); static char *fast(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst); static char *slow(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft); static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
#define BOL (OUT+1) #define BOL (OUT+1)
#define EOL (BOL+1) #define EOL (BOL+1)
......
...@@ -74,7 +74,7 @@ char *argv[]; ...@@ -74,7 +74,7 @@ char *argv[];
exit(status); exit(status);
} }
err = regcomp(&re, argv[optind++], copts); err = regcomp(&re, argv[optind++], copts, default_charset_info);
if (err) { if (err) {
len = regerror(err, &re, erbuf, sizeof(erbuf)); len = regerror(err, &re, erbuf, sizeof(erbuf));
fprintf(stderr, "error %s, %d/%d `%s'\n", fprintf(stderr, "error %s, %d/%d `%s'\n",
...@@ -226,7 +226,7 @@ int opts; /* may not match f1 */ ...@@ -226,7 +226,7 @@ int opts; /* may not match f1 */
strcpy(f0copy, f0); strcpy(f0copy, f0);
re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL; re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
fixstr(f0copy); fixstr(f0copy);
err = regcomp(&re, f0copy, opts); err = regcomp(&re, f0copy, opts, default_charset_info);
if (err != 0 && (!opt('C', f1) || err != efind(f2))) { if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
/* unexpected error or wrong error */ /* unexpected error or wrong error */
len = regerror(err, &re, erbuf, sizeof(erbuf)); len = regerror(err, &re, erbuf, sizeof(erbuf));
......
...@@ -28,6 +28,7 @@ struct parse { ...@@ -28,6 +28,7 @@ struct parse {
# define NPAREN 10 /* we need to remember () 1-9 for back refs */ # define NPAREN 10 /* we need to remember () 1-9 for back refs */
sopno pbegin[NPAREN]; /* -> ( ([0] unused) */ sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
sopno pend[NPAREN]; /* -> ) ([0] unused) */ sopno pend[NPAREN]; /* -> ) ([0] unused) */
CHARSET_INFO *charset; /* for ctype things */
}; };
#include "regcomp.ih" #include "regcomp.ih"
...@@ -99,10 +100,11 @@ static int never = 0; /* for use in asserts; shuts lint up */ ...@@ -99,10 +100,11 @@ static int never = 0; /* for use in asserts; shuts lint up */
= #define REG_DUMP 0200 = #define REG_DUMP 0200
*/ */
int /* 0 success, otherwise REG_something */ int /* 0 success, otherwise REG_something */
regcomp(preg, pattern, cflags) regcomp(preg, pattern, cflags, charset)
regex_t *preg; regex_t *preg;
const char *pattern; const char *pattern;
int cflags; int cflags;
CHARSET_INFO *charset;
{ {
struct parse pa; struct parse pa;
register struct re_guts *g; register struct re_guts *g;
...@@ -116,6 +118,7 @@ int cflags; ...@@ -116,6 +118,7 @@ int cflags;
#endif #endif
regex_init(); /* Init cclass if neaded */ regex_init(); /* Init cclass if neaded */
preg->charset=charset;
cflags = GOODFLAGS(cflags); cflags = GOODFLAGS(cflags);
if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC)) if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
return(REG_INVARG); return(REG_INVARG);
...@@ -146,6 +149,7 @@ int cflags; ...@@ -146,6 +149,7 @@ int cflags;
p->end = p->next + len; p->end = p->next + len;
p->error = 0; p->error = 0;
p->ncsalloc = 0; p->ncsalloc = 0;
p->charset = preg->charset;
for (i = 0; i < NPAREN; i++) { for (i = 0; i < NPAREN; i++) {
p->pbegin[i] = 0; p->pbegin[i] = 0;
p->pend[i] = 0; p->pend[i] = 0;
...@@ -327,7 +331,7 @@ register struct parse *p; ...@@ -327,7 +331,7 @@ register struct parse *p;
ordinary(p, c); ordinary(p, c);
break; break;
case '{': /* okay as ordinary except if digit follows */ case '{': /* okay as ordinary except if digit follows */
if(REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT)) {} if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), REG_BADRPT)) {}
/* FALLTHROUGH */ /* FALLTHROUGH */
default: default:
ordinary(p, c); ordinary(p, c);
...@@ -339,7 +343,8 @@ register struct parse *p; ...@@ -339,7 +343,8 @@ register struct parse *p;
c = PEEK(); c = PEEK();
/* we call { a repetition if followed by a digit */ /* we call { a repetition if followed by a digit */
if (!( c == '*' || c == '+' || c == '?' || if (!( c == '*' || c == '+' || c == '?' ||
(c == '{' && MORE2() && isdigit(PEEK2())) )) (c == '{' && MORE2() &&
my_isdigit(p->charset,PEEK2())) ))
return; /* no repetition, we're done */ return; /* no repetition, we're done */
NEXT(); NEXT();
...@@ -368,7 +373,7 @@ register struct parse *p; ...@@ -368,7 +373,7 @@ register struct parse *p;
case '{': case '{':
count = p_count(p); count = p_count(p);
if (EAT(',')) { if (EAT(',')) {
if (isdigit(PEEK())) { if (my_isdigit(p->charset,PEEK())) {
count2 = p_count(p); count2 = p_count(p);
if(REQUIRE(count <= count2, REG_BADBR)) {} if(REQUIRE(count <= count2, REG_BADBR)) {}
} else /* single number with comma */ } else /* single number with comma */
...@@ -389,7 +394,8 @@ register struct parse *p; ...@@ -389,7 +394,8 @@ register struct parse *p;
return; return;
c = PEEK(); c = PEEK();
if (!( c == '*' || c == '+' || c == '?' || if (!( c == '*' || c == '+' || c == '?' ||
(c == '{' && MORE2() && isdigit(PEEK2())) ) ) (c == '{' && MORE2() &&
my_isdigit(p->charset,PEEK2())) ) )
return; return;
SETERROR(REG_BADRPT); SETERROR(REG_BADRPT);
} }
...@@ -546,7 +552,7 @@ int starordinary; /* is a leading * an ordinary character? */ ...@@ -546,7 +552,7 @@ int starordinary; /* is a leading * an ordinary character? */
} else if (EATTWO('\\', '{')) { } else if (EATTWO('\\', '{')) {
count = p_count(p); count = p_count(p);
if (EAT(',')) { if (EAT(',')) {
if (MORE() && isdigit(PEEK())) { if (MORE() && my_isdigit(p->charset,PEEK())) {
count2 = p_count(p); count2 = p_count(p);
if(REQUIRE(count <= count2, REG_BADBR)) {} if(REQUIRE(count <= count2, REG_BADBR)) {}
} else /* single number with comma */ } else /* single number with comma */
...@@ -577,7 +583,7 @@ register struct parse *p; ...@@ -577,7 +583,7 @@ register struct parse *p;
register int count = 0; register int count = 0;
register int ndigits = 0; register int ndigits = 0;
while (MORE() && isdigit(PEEK()) && count <= DUPMAX) { while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) {
count = count*10 + (GETNEXT() - '0'); count = count*10 + (GETNEXT() - '0');
ndigits++; ndigits++;
} }
...@@ -632,8 +638,8 @@ register struct parse *p; ...@@ -632,8 +638,8 @@ register struct parse *p;
register int ci; register int ci;
for (i = p->g->csetsize - 1; i >= 0; i--) for (i = p->g->csetsize - 1; i >= 0; i--)
if (CHIN(cs, i) && isalpha(i)) { if (CHIN(cs, i) && my_isalpha(p->charset,i)) {
ci = othercase(i); ci = othercase(p->charset,i);
if (ci != i) if (ci != i)
CHadd(cs, ci); CHadd(cs, ci);
} }
...@@ -744,7 +750,7 @@ register cset *cs; ...@@ -744,7 +750,7 @@ register cset *cs;
register char *u; register char *u;
register char c; register char c;
while (MORE() && isalpha(PEEK())) while (MORE() && my_isalpha(p->charset,PEEK()))
NEXT(); NEXT();
len = p->next - sp; len = p->next - sp;
for (cp = cclasses; cp->name != NULL; cp++) for (cp = cclasses; cp->name != NULL; cp++)
...@@ -837,14 +843,15 @@ int endc; /* name ended by endc,']' */ ...@@ -837,14 +843,15 @@ int endc; /* name ended by endc,']' */
== static char othercase(int ch); == static char othercase(int ch);
*/ */
static char /* if no counterpart, return ch */ static char /* if no counterpart, return ch */
othercase(ch) othercase(charset,ch)
CHARSET_INFO *charset;
int ch; int ch;
{ {
assert(isalpha(ch)); assert(my_isalpha(charset,ch));
if (isupper(ch)) if (my_isupper(charset,ch))
return(tolower(ch)); return(my_tolower(charset,ch));
else if (islower(ch)) else if (my_islower(charset,ch))
return(toupper(ch)); return(my_toupper(charset,ch));
else /* peculiar, but could happen */ else /* peculiar, but could happen */
return(ch); return(ch);
} }
...@@ -887,7 +894,8 @@ register int ch; ...@@ -887,7 +894,8 @@ register int ch;
{ {
register cat_t *cap = p->g->categories; register cat_t *cap = p->g->categories;
if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch) if ((p->g->cflags&REG_ICASE) && my_isalpha(p->charset,ch) &&
othercase(p->charset,ch) != ch)
bothcases(p, ch); bothcases(p, ch);
else { else {
EMIT(OCHAR, (unsigned char)ch); EMIT(OCHAR, (unsigned char)ch);
......
...@@ -16,7 +16,7 @@ static void p_b_cclass(register struct parse *p, register cset *cs); ...@@ -16,7 +16,7 @@ static void p_b_cclass(register struct parse *p, register cset *cs);
static void p_b_eclass(register struct parse *p, register cset *cs); static void p_b_eclass(register struct parse *p, register cset *cs);
static char p_b_symbol(register struct parse *p); static char p_b_symbol(register struct parse *p);
static char p_b_coll_elem(register struct parse *p, int endc); static char p_b_coll_elem(register struct parse *p, int endc);
static char othercase(int ch); static char othercase(CHARSET_INFO *charset,int ch);
static void bothcases(register struct parse *p, int ch); static void bothcases(register struct parse *p, int ch);
static void ordinary(register struct parse *p, register int ch); static void ordinary(register struct parse *p, register int ch);
static void nonnewline(register struct parse *p); static void nonnewline(register struct parse *p);
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
extern "C" { extern "C" {
#endif #endif
#include "m_ctype.h"
/* === regex2.h === */ /* === regex2.h === */
#ifdef _WIN64 #ifdef _WIN64
typedef __int64 regoff_t; typedef __int64 regoff_t;
...@@ -17,6 +19,7 @@ typedef struct { ...@@ -17,6 +19,7 @@ typedef struct {
size_t re_nsub; /* number of parenthesized subexpressions */ size_t re_nsub; /* number of parenthesized subexpressions */
const char *re_endp; /* end pointer for REG_PEND */ const char *re_endp; /* end pointer for REG_PEND */
struct re_guts *re_g; /* none of your business :-) */ struct re_guts *re_g; /* none of your business :-) */
CHARSET_INFO *charset; /* For ctype things */
} regex_t; } regex_t;
typedef struct { typedef struct {
regoff_t rm_so; /* start of match */ regoff_t rm_so; /* start of match */
...@@ -25,7 +28,7 @@ typedef struct { ...@@ -25,7 +28,7 @@ typedef struct {
/* === regcomp.c === */ /* === regcomp.c === */
extern int regcomp(regex_t *, const char *, int); extern int regcomp(regex_t *, const char *, int, CHARSET_INFO *charset);
#define REG_BASIC 0000 #define REG_BASIC 0000
#define REG_EXTENDED 0001 #define REG_EXTENDED 0001
#define REG_ICASE 0002 #define REG_ICASE 0002
......
...@@ -140,6 +140,6 @@ struct re_guts { ...@@ -140,6 +140,6 @@ struct re_guts {
/* misc utilities */ /* misc utilities */
#undef OUT /* May be defined in windows */ #undef OUT /* May be defined in windows */
#define OUT (CHAR_MAX+1) /* a non-character value */ #define OUT (CHAR_MAX+1) /* a non-character value */
#define ISWORD(c) (isalnum(c) || (c) == '_') #define ISWORD(s,c) (my_isalnum(s,c) || (c) == '_')
#endif /* __regex2_h__ */ #endif /* __regex2_h__ */
...@@ -133,7 +133,7 @@ int eflags; ...@@ -133,7 +133,7 @@ int eflags;
if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) && if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) &&
!(eflags&REG_LARGE)) !(eflags&REG_LARGE))
return(smatcher(g, (char *)str, nmatch, pmatch, eflags)); return(smatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags));
else else
return(lmatcher(g, (char *)str, nmatch, pmatch, eflags)); return(lmatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags));
} }
...@@ -12,6 +12,7 @@ void regex_init() ...@@ -12,6 +12,7 @@ void regex_init()
char buff[CCLASS_LAST][256]; char buff[CCLASS_LAST][256];
int count[CCLASS_LAST]; int count[CCLASS_LAST];
uint i; uint i;
CHARSET_INFO *cs=default_charset_info;
if (!regex_inited) if (!regex_inited)
{ {
...@@ -20,27 +21,27 @@ void regex_init() ...@@ -20,27 +21,27 @@ void regex_init()
for (i=1 ; i<= 255; i++) for (i=1 ; i<= 255; i++)
{ {
if (isalnum(i)) if (my_isalnum(cs,i))
buff[CCLASS_ALNUM][count[CCLASS_ALNUM]++]=(char) i; buff[CCLASS_ALNUM][count[CCLASS_ALNUM]++]=(char) i;
if (isalpha(i)) if (my_isalpha(cs,i))
buff[CCLASS_ALPHA][count[CCLASS_ALPHA]++]=(char) i; buff[CCLASS_ALPHA][count[CCLASS_ALPHA]++]=(char) i;
if (iscntrl(i)) if (my_iscntrl(cs,i))
buff[CCLASS_CNTRL][count[CCLASS_CNTRL]++]=(char) i; buff[CCLASS_CNTRL][count[CCLASS_CNTRL]++]=(char) i;
if (isdigit(i)) if (my_isdigit(cs,i))
buff[CCLASS_DIGIT][count[CCLASS_DIGIT]++]=(char) i; buff[CCLASS_DIGIT][count[CCLASS_DIGIT]++]=(char) i;
if (isgraph(i)) if (my_isgraph(cs,i))
buff[CCLASS_GRAPH][count[CCLASS_GRAPH]++]=(char) i; buff[CCLASS_GRAPH][count[CCLASS_GRAPH]++]=(char) i;
if (islower(i)) if (my_islower(cs,i))
buff[CCLASS_LOWER][count[CCLASS_LOWER]++]=(char) i; buff[CCLASS_LOWER][count[CCLASS_LOWER]++]=(char) i;
if (isprint(i)) if (my_isprint(cs,i))
buff[CCLASS_PRINT][count[CCLASS_PRINT]++]=(char) i; buff[CCLASS_PRINT][count[CCLASS_PRINT]++]=(char) i;
if (ispunct(i)) if (my_ispunct(cs,i))
buff[CCLASS_PUNCT][count[CCLASS_PUNCT]++]=(char) i; buff[CCLASS_PUNCT][count[CCLASS_PUNCT]++]=(char) i;
if (isspace(i)) if (my_isspace(cs,i))
buff[CCLASS_SPACE][count[CCLASS_SPACE]++]=(char) i; buff[CCLASS_SPACE][count[CCLASS_SPACE]++]=(char) i;
if (isupper(i)) if (my_isupper(cs,i))
buff[CCLASS_UPPER][count[CCLASS_UPPER]++]=(char) i; buff[CCLASS_UPPER][count[CCLASS_UPPER]++]=(char) i;
if (isxdigit(i)) if (my_isxdigit(cs,i))
buff[CCLASS_XDIGIT][count[CCLASS_XDIGIT]++]=(char) i; buff[CCLASS_XDIGIT][count[CCLASS_XDIGIT]++]=(char) i;
} }
buff[CCLASS_BLANK][0]=' '; buff[CCLASS_BLANK][0]=' ';
......
...@@ -1278,7 +1278,8 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables) ...@@ -1278,7 +1278,8 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables)
int error; int error;
if ((error=regcomp(&preg,res->c_ptr(), if ((error=regcomp(&preg,res->c_ptr(),
binary ? REG_EXTENDED | REG_NOSUB : binary ? REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE))) REG_EXTENDED | REG_NOSUB | REG_ICASE,
default_charset_info)))
{ {
(void) regerror(error,&preg,buff,sizeof(buff)); (void) regerror(error,&preg,buff,sizeof(buff));
my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff); my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
...@@ -1325,7 +1326,8 @@ longlong Item_func_regex::val_int() ...@@ -1325,7 +1326,8 @@ longlong Item_func_regex::val_int()
} }
if (regcomp(&preg,res2->c_ptr(), if (regcomp(&preg,res2->c_ptr(),
binary ? REG_EXTENDED | REG_NOSUB : binary ? REG_EXTENDED | REG_NOSUB :
REG_EXTENDED | REG_NOSUB | REG_ICASE)) REG_EXTENDED | REG_NOSUB | REG_ICASE,
default_charset_info))
{ {
null_value=1; null_value=1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment