Regex library is switched to use new ctype tools

to allow usage of many character sets at a time.

Regex library is switched to use new ctype tools
to allow usage of many character sets at a time.
2eed4065 · bar@gw.udmsearch.izhnet.ru · 576c9b81 · 2eed4065 · 2eed4065 · 2eed4065
Commit 2eed4065 authored Mar 06, 2002 by bar@gw.udmsearch.izhnet.ru
13 changed files
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -180,6 +180,7 @@ extern const char *compiled_charset_name(uint charset_number);
 #define	_B	0100	/* Blank */
 #define	_X	0200	/* heXadecimal digit */

+#ifndef HIDE_OLD_CTYPE
 #define my_ctype	(default_charset_info->ctype)
 #define my_to_upper	(default_charset_info->to_upper)
 #define my_to_lower	(default_charset_info->to_lower)
@@ -201,6 +202,8 @@ extern const char *compiled_charset_name(uint charset_number);
 #define	isprint(c)	((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N | _B))
 #define	isgraph(c)	((my_ctype+1)[(uchar) (c)] & (_P | _U | _L | _N))
 #define	iscntrl(c)	((my_ctype+1)[(uchar) (c)] & _C)
+#endif
+
 #define	isascii(c)	(!((c) & ~0177))
 #define	toascii(c)	((c) & 0177)

@@ -208,6 +211,8 @@ extern const char *compiled_charset_name(uint charset_number);
 #undef ctype
 #endif /* ctype */

+#define my_toupper(s,c)	(char) ((s)->to_upper[(uchar) (c)])
+#define my_tolower(s,c)	(char) ((s)->to_lower[(uchar) (c)])
 #define	my_isalpha(s, c)  (((s)->ctype+1)[(uchar) (c)] & (_U | _L))
 #define	my_isupper(s, c)  (((s)->ctype+1)[(uchar) (c)] & _U)
 #define	my_islower(s, c)  (((s)->ctype+1)[(uchar) (c)] & _L)

--- a/regex/debug.c
+++ b/regex/debug.c
@@ -45,7 +45,7 @@ FILE *d;
 	if (g->nplus > 0)
 		fprintf(d, ", nplus %ld", (long)g->nplus);
 	fprintf(d, "\n");
-	s_print(g, d);
+	s_print(r->charset, g, d);
 	for (i = 0; i < g->ncategories; i++) {
 		nincat[i] = 0;
 		for (c = CHAR_MIN; c <= CHAR_MAX; c++)
@@ -58,7 +58,7 @@ FILE *d;
 			for (c = CHAR_MIN; c <= CHAR_MAX; c++)
 				if (g->categories[c] == i)
 					break;
-			fprintf(d, ", %d=%s", i, regchar(c,buf));
+			fprintf(d, ", %d=%s", i, regchar(r->charset,c,buf));
 		}
 	fprintf(d, "\n");
 	for (i = 1; i < g->ncategories; i++)
@@ -68,14 +68,14 @@ FILE *d;
 			for (c = CHAR_MIN; c <= CHAR_MAX+1; c++)	/* +1 does flush */
 				if (c <= CHAR_MAX && g->categories[c] == i) {
 					if (last < 0) {
-						fprintf(d, "%s", regchar(c,buf));
+						fprintf(d, "%s", regchar(r->charset,c,buf));
 						last = c;
 					}
 				} else {
 					if (last >= 0) {
 						if (last != c-1)
 							fprintf(d, "-%s",
-								regchar(c-1,buf));
+								regchar(r->charset,c-1,buf));
 						last = -1;
 					}
 				}
@@ -88,7 +88,8 @@ FILE *d;
 == static void s_print(register struct re_guts *g, FILE *d);
 */
 static void
-s_print(g, d)
+s_print(charset, g, d)
+CHARSET_INFO *charset;
 register struct re_guts *g;
 FILE *d;
 {
@@ -127,7 +128,7 @@ FILE *d;
 			if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL)
 				fprintf(d, "\\%c", (char)opnd);
 			else
-				fprintf(d, "%s", regchar((char)opnd,buf));
+				fprintf(d, "%s", regchar(charset,(char)opnd,buf));
 			break;
 		case OBOL:
 			fprintf(d, "^");
@@ -151,14 +152,14 @@ FILE *d;
 			for (i = 0; i < g->csetsize+1; i++)	/* +1 flushes */
 				if (CHIN(cs, i) && i < g->csetsize) {
 					if (last < 0) {
-						fprintf(d, "%s", regchar(i,buf));
+						fprintf(d, "%s", regchar(charset,i,buf));
 						last = i;
 					}
 				} else {
 					if (last >= 0) {
 						if (last != i-1)
 							fprintf(d, "-%s",
-								regchar(i-1,buf));
+								regchar(charset,i-1,buf));
 						last = -1;
 					}
 				}
@@ -230,12 +231,13 @@ FILE *d;
 == static char *regchar(int ch);
 */
 static char *			/* -> representation */
-regchar(ch,buf)
+regchar(charset,ch,buf)
+CHARSET_INFO *charset;
 int ch;
 char *buf;
 {

-	if (isprint(ch) || ch == ' ')
+	if (my_isprint(charset,ch) || ch == ' ')
 		sprintf(buf, "%c", ch);
 	else
 		sprintf(buf, "\\%o", ch);

--- a/regex/debug.ih
+++ b/regex/debug.ih
@@ -5,8 +5,8 @@ extern "C" {

 /* === debug.c === */
 void regprint(regex_t *r, FILE *d);
-static void s_print(register struct re_guts *g, FILE *d);
-static char *regchar(int ch,char *buf);
+static void s_print(CHARSET_INFO *charset, register struct re_guts *g, FILE *d);
+static char *regchar(CHARSET_INFO *charset, int ch,char *buf);

 #ifdef __cplusplus
 }

--- a/regex/engine.c
+++ b/regex/engine.c
@@ -63,7 +63,8 @@ struct match {
 ==	size_t nmatch, regmatch_t pmatch[], int eflags);
 */
 static int			/* 0 success, REG_NOMATCH failure */
-matcher(g, str, nmatch, pmatch, eflags)
+matcher(charset,g, str, nmatch, pmatch, eflags)
+CHARSET_INFO *charset;
 register struct re_guts *g;
 char *str;
 size_t nmatch;
@@ -120,7 +121,7 @@ int eflags;

 	/* this loop does only one repetition except for backrefs */
 	for (;;) {
-		endp = fast(m, start, stop, gf, gl);
+		endp = fast(charset, m, start, stop, gf, gl);
 		if (endp == NULL) {		/* a miss */
 		  if (m->pmatch != NULL)
 		    free((char *)m->pmatch);
@@ -136,7 +137,7 @@ int eflags;
 		assert(m->coldp != NULL);
 		for (;;) {
 			NOTE("finding start");
-			endp = slow(m, m->coldp, stop, gf, gl);
+			endp = slow(charset, m, m->coldp, stop, gf, gl);
 			if (endp != NULL)
 				break;
 			assert(m->coldp < m->endp);
@@ -159,7 +160,7 @@ int eflags;
 			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
 		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
 			NOTE("dissecting");
-			dp = dissect(m, m->coldp, endp, gf, gl);
+			dp = dissect(charset, m, m->coldp, endp, gf, gl);
 		} else {
 			if (g->nplus > 0 && m->lastpos == NULL)
 				m->lastpos = (char **)malloc((g->nplus+1) *
@@ -170,7 +171,7 @@ int eflags;
 				return(REG_ESPACE);
 			}
 			NOTE("backref dissect");
-			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+			dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
 		}
 		if (dp != NULL)
 			break;
@@ -182,7 +183,7 @@ int eflags;
 			if (dp != NULL || endp <= m->coldp)
 				break;		/* defeat */
 			NOTE("backoff");
-			endp = slow(m, m->coldp, endp-1, gf, gl);
+			endp = slow(charset, m, m->coldp, endp-1, gf, gl);
 			if (endp == NULL)
 				break;		/* defeat */
 			/* try it on a shorter possibility */
@@ -193,7 +194,7 @@ int eflags;
 			}
 #endif
 			NOTE("backoff dissect");
-			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+			dp = backref(charset, m, m->coldp, endp, gf, gl, (sopno)0);
 		}
 		assert(dp == NULL || dp == endp);
 		if (dp != NULL)		/* found a shorter one */
@@ -235,7 +236,8 @@ int eflags;
 ==	char *stop, sopno startst, sopno stopst);
 */
 static char *			/* == stop (success) always */
-dissect(m, start, stop, startst, stopst)
+dissect(charset, m, start, stop, startst, stopst)
+CHARSET_INFO *charset;
 register struct match *m;
 char *start;
 char *stop;
@@ -299,10 +301,10 @@ sopno stopst;
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = slow(charset, m, sp, stp, ss, es);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = slow(charset, m, rest, stop, es, stopst);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -312,8 +314,8 @@ sopno stopst;
 			ssub = ss + 1;
 			esub = es - 1;
 			/* did innards match? */
-			if (slow(m, sp, rest, ssub, esub) != NULL) {
-				dp = dissect(m, sp, rest, ssub, esub);
+			if (slow(charset, m, sp, rest, ssub, esub) != NULL) {
+				dp = dissect(charset, m, sp, rest, ssub, esub);
 				assert(dp == rest);
 			} else		/* no */
 				assert(sp == rest);
@@ -323,10 +325,10 @@ sopno stopst;
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = slow(charset, m, sp, stp, ss, es);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = slow(charset, m, rest, stop, es, stopst);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -338,7 +340,7 @@ sopno stopst;
 			ssp = sp;
 			oldssp = ssp;
 			for (;;) {	/* find last match of innards */
-				sep = slow(m, ssp, rest, ssub, esub);
+				sep = slow(charset, m, ssp, rest, ssub, esub);
 				if (sep == NULL || sep == ssp)
 					break;	/* failed or matched null */
 				oldssp = ssp;	/* on to next try */
@@ -350,8 +352,8 @@ sopno stopst;
 				ssp = oldssp;
 			}
 			assert(sep == rest);	/* must exhaust substring */
-			assert(slow(m, ssp, sep, ssub, esub) == rest);
-			dp = dissect(m, ssp, sep, ssub, esub);
+			assert(slow(charset, m, ssp, sep, ssub, esub) == rest);
+			dp = dissect(charset, m, ssp, sep, ssub, esub);
 			assert(dp == sep);
 			sp = rest;
 			break;
@@ -359,10 +361,10 @@ sopno stopst;
 			stp = stop;
 			for (;;) {
 				/* how long could this one be? */
-				rest = slow(m, sp, stp, ss, es);
+				rest = slow(charset, m, sp, stp, ss, es);
 				assert(rest != NULL);	/* it did match */
 				/* could the rest match the rest? */
-				tail = slow(m, rest, stop, es, stopst);
+				tail = slow(charset, m, rest, stop, es, stopst);
 				if (tail == stop)
 					break;		/* yes! */
 				/* no -- try a shorter match for this one */
@@ -373,7 +375,7 @@ sopno stopst;
 			esub = ss + OPND(m->g->strip[ss]) - 1;
 			assert(OP(m->g->strip[esub]) == OOR1);
 			for (;;) {	/* find first matching branch */
-				if (slow(m, sp, rest, ssub, esub) == rest)
+				if (slow(charset, m, sp, rest, ssub, esub) == rest)
 					break;	/* it matched all of it */
 				/* that one missed, try next one */
 				assert(OP(m->g->strip[esub]) == OOR1);
@@ -386,7 +388,7 @@ sopno stopst;
 				else
 					assert(OP(m->g->strip[esub]) == O_CH);
 			}
-			dp = dissect(m, sp, rest, ssub, esub);
+			dp = dissect(charset, m, sp, rest, ssub, esub);
 			assert(dp == rest);
 			sp = rest;
 			break;
@@ -423,7 +425,8 @@ sopno stopst;
 ==	char *stop, sopno startst, sopno stopst, sopno lev);
 */
 static char *			/* == stop (success) or NULL (failure) */
-backref(m, start, stop, startst, stopst, lev)
+backref(charset,m, start, stop, startst, stopst, lev)
+CHARSET_INFO *charset;
 register struct match *m;
 char *start;
 char *stop;
@@ -486,8 +489,8 @@ sopno lev;			/* PLUS nesting level */
 					(sp < m->endp && *(sp-1) == '\n' &&
 						(m->g->cflags&REG_NEWLINE)) ||
 					(sp > m->beginp &&
-							!ISWORD(*(sp-1))) ) &&
-					(sp < m->endp && ISWORD(*sp)) )
+							!ISWORD(charset,*(sp-1))) ) &&
+					(sp < m->endp && ISWORD(charset,*sp)) )
 				{ /* yes */ }
 			else
 				return(NULL);
@@ -496,8 +499,8 @@ sopno lev;			/* PLUS nesting level */
 			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
 					(sp < m->endp && *sp == '\n' &&
 						(m->g->cflags&REG_NEWLINE)) ||
-					(sp < m->endp && !ISWORD(*sp)) ) &&
-					(sp > m->beginp && ISWORD(*(sp-1))) )
+					(sp < m->endp && !ISWORD(charset,*sp)) ) &&
+					(sp > m->beginp && ISWORD(charset,*(sp-1))) )
 				{ /* yes */ }
 			else
 				return(NULL);
@@ -543,28 +546,28 @@ sopno lev;			/* PLUS nesting level */
 			return(NULL);
 		while (m->g->strip[ss] != SOP(O_BACK, i))
 			ss++;
-		return(backref(m, sp+len, stop, ss+1, stopst, lev));
+		return(backref(charset, m, sp+len, stop, ss+1, stopst, lev));
 		break;
 	case OQUEST_:		/* to null or not */
-		dp = backref(m, sp, stop, ss+1, stopst, lev);
+		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
 		if (dp != NULL)
 			return(dp);	/* not */
-		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
+		return(backref(charset, m, sp, stop, ss+OPND(s)+1, stopst, lev));
 		break;
 	case OPLUS_:
 		assert(m->lastpos != NULL);
 		assert(lev+1 <= m->g->nplus);
 		m->lastpos[lev+1] = sp;
-		return(backref(m, sp, stop, ss+1, stopst, lev+1));
+		return(backref(charset, m, sp, stop, ss+1, stopst, lev+1));
 		break;
 	case O_PLUS:
 		if (sp == m->lastpos[lev])	/* last pass matched null */
-			return(backref(m, sp, stop, ss+1, stopst, lev-1));
+			return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
 		/* try another pass */
 		m->lastpos[lev] = sp;
-		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
+		dp = backref(charset, m, sp, stop, ss-OPND(s)+1, stopst, lev);
 		if (dp == NULL)
-			return(backref(m, sp, stop, ss+1, stopst, lev-1));
+			return(backref(charset, m, sp, stop, ss+1, stopst, lev-1));
 		else
 			return(dp);
 		break;
@@ -573,7 +576,7 @@ sopno lev;			/* PLUS nesting level */
 		esub = ss + OPND(s) - 1;
 		assert(OP(m->g->strip[esub]) == OOR1);
 		for (;;) {	/* find first matching branch */
-			dp = backref(m, sp, stop, ssub, esub, lev);
+			dp = backref(charset, m, sp, stop, ssub, esub, lev);
 			if (dp != NULL)
 				return(dp);
 			/* that one missed, try next one */
@@ -594,7 +597,7 @@ sopno lev;			/* PLUS nesting level */
 		assert(0 < i && i <= m->g->nsub);
 		offsave = m->pmatch[i].rm_so;
 		m->pmatch[i].rm_so = sp - m->offp;
-		dp = backref(m, sp, stop, ss+1, stopst, lev);
+		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
 		if (dp != NULL)
 			return(dp);
 		m->pmatch[i].rm_so = offsave;
@@ -605,7 +608,7 @@ sopno lev;			/* PLUS nesting level */
 		assert(0 < i && i <= m->g->nsub);
 		offsave = m->pmatch[i].rm_eo;
 		m->pmatch[i].rm_eo = sp - m->offp;
-		dp = backref(m, sp, stop, ss+1, stopst, lev);
+		dp = backref(charset, m, sp, stop, ss+1, stopst, lev);
 		if (dp != NULL)
 			return(dp);
 		m->pmatch[i].rm_eo = offsave;
@@ -628,7 +631,8 @@ sopno lev;			/* PLUS nesting level */
 ==	char *stop, sopno startst, sopno stopst);
 */
 static char *			/* where tentative match ended, or NULL */
-fast(m, start, stop, startst, stopst)
+fast(charset, m, start, stop, startst, stopst)
+CHARSET_INFO *charset;
 register struct match *m;
 char *start;
 char *stop;
@@ -678,12 +682,12 @@ sopno stopst;
 		}

 		/* how about a word boundary? */
-		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
-					(c != OUT && ISWORD(c)) ) {
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
+					(c != OUT && ISWORD(charset,c)) ) {
 			flagch = BOW;
 		}
-		if ( (lastc != OUT && ISWORD(lastc)) &&
-				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+		if ( (lastc != OUT && ISWORD(charset,lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
 			flagch = EOW;
 		}
 		if (flagch == BOW || flagch == EOW) {
@@ -719,7 +723,8 @@ sopno stopst;
 ==	char *stop, sopno startst, sopno stopst);
 */
 static char *			/* where it ended */
-slow(m, start, stop, startst, stopst)
+slow(charset, m, start, stop, startst, stopst)
+CHARSET_INFO *charset;
 register struct match *m;
 char *start;
 char *stop;
@@ -767,12 +772,12 @@ sopno stopst;
 		}

 		/* how about a word boundary? */
-		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
-					(c != OUT && ISWORD(c)) ) {
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(charset,lastc))) &&
+					(c != OUT && ISWORD(charset,c)) ) {
 			flagch = BOW;
 		}
-		if ( (lastc != OUT && ISWORD(lastc)) &&
-				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+		if ( (lastc != OUT && ISWORD(charset,lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(charset,c))) ) {
 			flagch = EOW;
 		}
 		if (flagch == BOW || flagch == EOW) {

--- a/regex/engine.ih
+++ b/regex/engine.ih
@@ -4,11 +4,11 @@ extern "C" {
 #endif

 /* === engine.c === */
-static int matcher(register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
-static char *dissect(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
-static char *backref(register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
-static char *fast(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
-static char *slow(register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
+static int matcher(CHARSET_INFO *charset,register struct re_guts *g, char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
+static char *dissect(CHARSET_INFO *charset,register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
+static char *backref(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
+static char *fast(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
+static char *slow(CHARSET_INFO *charset, register struct match *m, char *start, char *stop, sopno startst, sopno stopst);
 static states step(register struct re_guts *g, sopno start, sopno stop, register states bef, int ch, register states aft);
 #define	BOL	(OUT+1)
 #define	EOL	(BOL+1)

--- a/regex/main.c
+++ b/regex/main.c
@@ -74,7 +74,7 @@ char *argv[];
 		exit(status);
 	}

-	err = regcomp(&re, argv[optind++], copts);
+	err = regcomp(&re, argv[optind++], copts, default_charset_info);
 	if (err) {
 		len = regerror(err, &re, erbuf, sizeof(erbuf));
 		fprintf(stderr, "error %s, %d/%d `%s'\n",
@@ -226,7 +226,7 @@ int opts;			/* may not match f1 */
 	strcpy(f0copy, f0);
 	re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
 	fixstr(f0copy);
-	err = regcomp(&re, f0copy, opts);
+	err = regcomp(&re, f0copy, opts, default_charset_info);
 	if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
 		/* unexpected error or wrong error */
 		len = regerror(err, &re, erbuf, sizeof(erbuf));

--- a/regex/regcomp.c
+++ b/regex/regcomp.c
@@ -28,6 +28,7 @@ struct parse {
 #	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
 	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
 	sopno pend[NPAREN];	/* -> ) ([0] unused) */
+	CHARSET_INFO *charset;	/* for ctype things  */
 };

 #include "regcomp.ih"
@@ -99,10 +100,11 @@ static int never = 0;		/* for use in asserts; shuts lint up */
 = #define	REG_DUMP	0200
 */
 int				/* 0 success, otherwise REG_something */
-regcomp(preg, pattern, cflags)
+regcomp(preg, pattern, cflags, charset)
 regex_t *preg;
 const char *pattern;
 int cflags;
+CHARSET_INFO *charset;
 {
 	struct parse pa;
 	register struct re_guts *g;
@@ -116,6 +118,7 @@ int cflags;
 #endif

 	regex_init();				/* Init cclass if neaded */
+	preg->charset=charset;
 	cflags = GOODFLAGS(cflags);
 	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
 		return(REG_INVARG);
@@ -146,6 +149,7 @@ int cflags;
 	p->end = p->next + len;
 	p->error = 0;
 	p->ncsalloc = 0;
+	p->charset = preg->charset;
 	for (i = 0; i < NPAREN; i++) {
 		p->pbegin[i] = 0;
 		p->pend[i] = 0;
@@ -327,7 +331,7 @@ register struct parse *p;
 		ordinary(p, c);
 		break;
 	case '{':		/* okay as ordinary except if digit follows */
-		if(REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT)) {}
+		if(REQUIRE(!MORE() || !my_isdigit(p->charset,PEEK()), REG_BADRPT)) {}
 		/* FALLTHROUGH */
 	default:
 		ordinary(p, c);
@@ -339,7 +343,8 @@ register struct parse *p;
 	c = PEEK();
 	/* we call { a repetition if followed by a digit */
 	if (!( c == '*' || c == '+' || c == '?' ||
-				(c == '{' && MORE2() && isdigit(PEEK2())) ))
+				(c == '{' && MORE2() && 
+				 my_isdigit(p->charset,PEEK2())) ))
 		return;		/* no repetition, we're done */
 	NEXT();

@@ -368,7 +373,7 @@ register struct parse *p;
 	case '{':
 		count = p_count(p);
 		if (EAT(',')) {
-			if (isdigit(PEEK())) {
+			if (my_isdigit(p->charset,PEEK())) {
 				count2 = p_count(p);
 				if(REQUIRE(count <= count2, REG_BADBR)) {}
 			} else		/* single number with comma */
@@ -389,7 +394,8 @@ register struct parse *p;
 		return;
 	c = PEEK();
 	if (!( c == '*' || c == '+' || c == '?' ||
-				(c == '{' && MORE2() && isdigit(PEEK2())) ) )
+				(c == '{' && MORE2() && 
+				 my_isdigit(p->charset,PEEK2())) ) )
 		return;
 	SETERROR(REG_BADRPT);
 }
@@ -546,7 +552,7 @@ int starordinary;		/* is a leading * an ordinary character? */
 	} else if (EATTWO('\\', '{')) {
 		count = p_count(p);
 		if (EAT(',')) {
-			if (MORE() && isdigit(PEEK())) {
+			if (MORE() && my_isdigit(p->charset,PEEK())) {
 				count2 = p_count(p);
 				if(REQUIRE(count <= count2, REG_BADBR)) {}
 			} else		/* single number with comma */
@@ -577,7 +583,7 @@ register struct parse *p;
 	register int count = 0;
 	register int ndigits = 0;

-	while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
+	while (MORE() && my_isdigit(p->charset,PEEK()) && count <= DUPMAX) {
 		count = count*10 + (GETNEXT() - '0');
 		ndigits++;
 	}
@@ -632,8 +638,8 @@ register struct parse *p;
 		register int ci;

 		for (i = p->g->csetsize - 1; i >= 0; i--)
-			if (CHIN(cs, i) && isalpha(i)) {
-				ci = othercase(i);
+			if (CHIN(cs, i) && my_isalpha(p->charset,i)) {
+				ci = othercase(p->charset,i);
 				if (ci != i)
 					CHadd(cs, ci);
 			}
@@ -744,7 +750,7 @@ register cset *cs;
 	register char *u;
 	register char c;

-	while (MORE() && isalpha(PEEK()))
+	while (MORE() && my_isalpha(p->charset,PEEK()))
 		NEXT();
 	len = p->next - sp;
 	for (cp = cclasses; cp->name != NULL; cp++)
@@ -837,14 +843,15 @@ int endc;			/* name ended by endc,']' */
 == static char othercase(int ch);
 */
 static char			/* if no counterpart, return ch */
-othercase(ch)
+othercase(charset,ch)
+CHARSET_INFO *charset;
 int ch;
 {
-	assert(isalpha(ch));
-	if (isupper(ch))
-		return(tolower(ch));
-	else if (islower(ch))
-		return(toupper(ch));
+	assert(my_isalpha(charset,ch));
+	if (my_isupper(charset,ch))
+		return(my_tolower(charset,ch));
+	else if (my_islower(charset,ch))
+		return(my_toupper(charset,ch));
 	else			/* peculiar, but could happen */
 		return(ch);
 }
@@ -887,7 +894,8 @@ register int ch;
 {
 	register cat_t *cap = p->g->categories;

-	if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
+	if ((p->g->cflags&REG_ICASE) && my_isalpha(p->charset,ch) && 
+	     othercase(p->charset,ch) != ch)
 		bothcases(p, ch);
 	else {
 		EMIT(OCHAR, (unsigned char)ch);

--- a/regex/regcomp.ih
+++ b/regex/regcomp.ih
@@ -16,7 +16,7 @@ static void p_b_cclass(register struct parse *p, register cset *cs);
 static void p_b_eclass(register struct parse *p, register cset *cs);
 static char p_b_symbol(register struct parse *p);
 static char p_b_coll_elem(register struct parse *p, int endc);
-static char othercase(int ch);
+static char othercase(CHARSET_INFO *charset,int ch);
 static void bothcases(register struct parse *p, int ch);
 static void ordinary(register struct parse *p, register int ch);
 static void nonnewline(register struct parse *p);

--- a/regex/regex.h
+++ b/regex/regex.h
@@ -5,6 +5,8 @@
 extern "C" {
 #endif

+#include "m_ctype.h"
+
 /* === regex2.h === */
 #ifdef _WIN64
 typedef __int64 regoff_t;
@@ -17,6 +19,7 @@ typedef struct {
 	size_t re_nsub;		/* number of parenthesized subexpressions */
 	const char *re_endp;	/* end pointer for REG_PEND */
 	struct re_guts *re_g;	/* none of your business :-) */
+	CHARSET_INFO *charset;	/* For ctype things */
 } regex_t;
 typedef struct {
 	regoff_t rm_so;		/* start of match */
@@ -25,7 +28,7 @@ typedef struct {


 /* === regcomp.c === */
-extern int regcomp(regex_t *, const char *, int);
+extern int regcomp(regex_t *, const char *, int, CHARSET_INFO *charset);
 #define	REG_BASIC	0000
 #define	REG_EXTENDED	0001
 #define	REG_ICASE	0002

--- a/regex/regex2.h
+++ b/regex/regex2.h
@@ -140,6 +140,6 @@ struct re_guts {
 /* misc utilities */
 #undef OUT			/* May be defined in windows */
 #define	OUT	(CHAR_MAX+1)	/* a non-character value */
-#define	ISWORD(c)	(isalnum(c) || (c) == '_')
+#define	ISWORD(s,c)	(my_isalnum(s,c) || (c) == '_')

 #endif /* __regex2_h__ */
--- a/regex/regexec.c
+++ b/regex/regexec.c
@@ -133,7 +133,7 @@ int eflags;

 	if ((size_t) g->nstates <= CHAR_BIT*sizeof(states1) &&
 	    !(eflags&REG_LARGE))
-		return(smatcher(g, (char *)str, nmatch, pmatch, eflags));
+		return(smatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags));
 	else
-		return(lmatcher(g, (char *)str, nmatch, pmatch, eflags));
+		return(lmatcher(preg->charset, g, (char *)str, nmatch, pmatch, eflags));
 }
--- a/regex/reginit.c
+++ b/regex/reginit.c
@@ -12,6 +12,7 @@ void regex_init()
  char buff[CCLASS_LAST][256];
  int  count[CCLASS_LAST];
  uint i;
+  CHARSET_INFO *cs=default_charset_info;

  if (!regex_inited)
  {
@@ -20,27 +21,27 @@ void regex_init()

    for (i=1 ; i<= 255; i++)
    {
-      if (isalnum(i))
+      if (my_isalnum(cs,i))
 	buff[CCLASS_ALNUM][count[CCLASS_ALNUM]++]=(char) i;
-      if (isalpha(i))
+      if (my_isalpha(cs,i))
 	buff[CCLASS_ALPHA][count[CCLASS_ALPHA]++]=(char) i;
-      if (iscntrl(i))
+      if (my_iscntrl(cs,i))
 	buff[CCLASS_CNTRL][count[CCLASS_CNTRL]++]=(char) i;
-      if (isdigit(i))
+      if (my_isdigit(cs,i))
 	buff[CCLASS_DIGIT][count[CCLASS_DIGIT]++]=(char) i;
-      if (isgraph(i))
+      if (my_isgraph(cs,i))
 	buff[CCLASS_GRAPH][count[CCLASS_GRAPH]++]=(char) i;
-      if (islower(i))
+      if (my_islower(cs,i))
 	buff[CCLASS_LOWER][count[CCLASS_LOWER]++]=(char) i;
-      if (isprint(i))
+      if (my_isprint(cs,i))
 	buff[CCLASS_PRINT][count[CCLASS_PRINT]++]=(char) i;
-      if (ispunct(i))
+      if (my_ispunct(cs,i))
 	buff[CCLASS_PUNCT][count[CCLASS_PUNCT]++]=(char) i;
-      if (isspace(i))
+      if (my_isspace(cs,i))
 	buff[CCLASS_SPACE][count[CCLASS_SPACE]++]=(char) i;
-      if (isupper(i))
+      if (my_isupper(cs,i))
 	buff[CCLASS_UPPER][count[CCLASS_UPPER]++]=(char) i;
-      if (isxdigit(i))
+      if (my_isxdigit(cs,i))
 	buff[CCLASS_XDIGIT][count[CCLASS_XDIGIT]++]=(char) i;
    }
    buff[CCLASS_BLANK][0]=' ';

--- a/sql/item_cmpfunc.cc
+++ b/sql/item_cmpfunc.cc
@@ -1278,7 +1278,8 @@ Item_func_regex::fix_fields(THD *thd,TABLE_LIST *tables)
    int error;
    if ((error=regcomp(&preg,res->c_ptr(),
 		       binary ? REG_EXTENDED | REG_NOSUB :
-		       REG_EXTENDED | REG_NOSUB | REG_ICASE)))
+		       REG_EXTENDED | REG_NOSUB | REG_ICASE,
+		       default_charset_info)))
    {
      (void) regerror(error,&preg,buff,sizeof(buff));
      my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
@@ -1325,7 +1326,8 @@ longlong Item_func_regex::val_int()
      }
      if (regcomp(&preg,res2->c_ptr(),
 		  binary ? REG_EXTENDED | REG_NOSUB :
-		  REG_EXTENDED | REG_NOSUB | REG_ICASE))
+		  REG_EXTENDED | REG_NOSUB | REG_ICASE,
+		  default_charset_info))

      {
 	null_value=1;