Commit 5dd2881f authored by bar@mysql.com's avatar bar@mysql.com

UTF8 UCA based collations.

parent 2f629a31
...@@ -40,6 +40,24 @@ extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci; ...@@ -40,6 +40,24 @@ extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci;
extern CHARSET_INFO my_charset_ucs2_spanish2_uca_ci; extern CHARSET_INFO my_charset_ucs2_spanish2_uca_ci;
#endif #endif
#ifdef HAVE_CHARSET_utf8
extern CHARSET_INFO my_charset_utf8_general_uca_ci;
extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
extern CHARSET_INFO my_charset_utf8_romanian_uca_ci;
extern CHARSET_INFO my_charset_utf8_slovenian_uca_ci;
extern CHARSET_INFO my_charset_utf8_polish_uca_ci;
extern CHARSET_INFO my_charset_utf8_estonian_uca_ci;
extern CHARSET_INFO my_charset_utf8_spanish_uca_ci;
extern CHARSET_INFO my_charset_utf8_swedish_uca_ci;
extern CHARSET_INFO my_charset_utf8_turkish_uca_ci;
extern CHARSET_INFO my_charset_utf8_czech_uca_ci;
extern CHARSET_INFO my_charset_utf8_danish_uca_ci;
extern CHARSET_INFO my_charset_utf8_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_utf8_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf8_spanish2_uca_ci;
#endif
my_bool init_compiled_charsets(myf flags __attribute__((unused))) my_bool init_compiled_charsets(myf flags __attribute__((unused)))
{ {
CHARSET_INFO *cs; CHARSET_INFO *cs;
...@@ -92,7 +110,6 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) ...@@ -92,7 +110,6 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_ucs2_general_ci); add_compiled_collation(&my_charset_ucs2_general_ci);
add_compiled_collation(&my_charset_ucs2_bin); add_compiled_collation(&my_charset_ucs2_bin);
add_compiled_collation(&my_charset_ucs2_general_uca); add_compiled_collation(&my_charset_ucs2_general_uca);
add_compiled_collation(&my_charset_ucs2_general_uca);
add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci); add_compiled_collation(&my_charset_ucs2_icelandic_uca_ci);
add_compiled_collation(&my_charset_ucs2_latvian_uca_ci); add_compiled_collation(&my_charset_ucs2_latvian_uca_ci);
add_compiled_collation(&my_charset_ucs2_romanian_uca_ci); add_compiled_collation(&my_charset_ucs2_romanian_uca_ci);
...@@ -117,6 +134,21 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused))) ...@@ -117,6 +134,21 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#ifdef HAVE_CHARSET_utf8 #ifdef HAVE_CHARSET_utf8
add_compiled_collation(&my_charset_utf8_general_ci); add_compiled_collation(&my_charset_utf8_general_ci);
add_compiled_collation(&my_charset_utf8_bin); add_compiled_collation(&my_charset_utf8_bin);
add_compiled_collation(&my_charset_utf8_general_uca_ci);
add_compiled_collation(&my_charset_utf8_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf8_latvian_uca_ci);
add_compiled_collation(&my_charset_utf8_romanian_uca_ci);
add_compiled_collation(&my_charset_utf8_slovenian_uca_ci);
add_compiled_collation(&my_charset_utf8_polish_uca_ci);
add_compiled_collation(&my_charset_utf8_estonian_uca_ci);
add_compiled_collation(&my_charset_utf8_spanish_uca_ci);
add_compiled_collation(&my_charset_utf8_swedish_uca_ci);
add_compiled_collation(&my_charset_utf8_turkish_uca_ci);
add_compiled_collation(&my_charset_utf8_czech_uca_ci);
add_compiled_collation(&my_charset_utf8_danish_uca_ci);
add_compiled_collation(&my_charset_utf8_lithuanian_uca_ci);
add_compiled_collation(&my_charset_utf8_slovak_uca_ci);
add_compiled_collation(&my_charset_utf8_spanish2_uca_ci);
#endif #endif
/* Copy compiled charsets */ /* Copy compiled charsets */
......
...@@ -19,14 +19,15 @@ ...@@ -19,14 +19,15 @@
UCA (Unicode Collation Algorithm) support. UCA (Unicode Collation Algorithm) support.
Written by Alexander Barkov <bar@mysql.com> Written by Alexander Barkov <bar@mysql.com>
Currently supports only subset of the full UCA. Currently supports only subset of the full UCA:
- Only Primary level key comparison - Only Primary level key comparison
- Basic Latin letters contraction is implemented
- Variable weighting is done for Non-ignorable option - Variable weighting is done for Non-ignorable option
Features that are not implemented yet:
- No Normalization From D is done - No Normalization From D is done
+ No decomposition is done + No decomposition is done
+ No Thai/Lao orderding is done + No Thai/Lao orderding is done
- No contraction is done
- No combining marks processing is done - No combining marks processing is done
*/ */
...@@ -36,8 +37,6 @@ ...@@ -36,8 +37,6 @@
#include "m_ctype.h" #include "m_ctype.h"
#ifdef HAVE_CHARSET_ucs2
#define MY_UCA_NPAGES 256 #define MY_UCA_NPAGES 256
#define MY_UCA_NCHARS 256 #define MY_UCA_NCHARS 256
#define MY_UCA_CMASK 255 #define MY_UCA_CMASK 255
...@@ -6672,9 +6671,24 @@ typedef struct my_uca_scanner_st ...@@ -6672,9 +6671,24 @@ typedef struct my_uca_scanner_st
uint16 implicit[2]; uint16 implicit[2];
int page; int page;
int code; int code;
CHARSET_INFO *cs;
} my_uca_scanner; } my_uca_scanner;
/*
Charset dependent scanner part, to optimize
some character sets.
*/
typedef struct my_uca_scanner_handler_st
{
void (*init)(my_uca_scanner *scanner, CHARSET_INFO *cs,
const uchar *str, uint length);
int (*next)(my_uca_scanner *scanner);
} my_uca_scanner_handler;
static uint16 nochar[]= {0};
#ifdef HAVE_CHARSET_ucs2
/* /*
Initialize collation weight scanner Initialize collation weight scanner
...@@ -6686,15 +6700,15 @@ typedef struct my_uca_scanner_st ...@@ -6686,15 +6700,15 @@ typedef struct my_uca_scanner_st
length Length of the string. length Length of the string.
NOTES: NOTES:
Optimized for UCS2
RETURN RETURN
N/A N/A
*/ */
static uint16 nochar[]= {0}; static void my_uca_scanner_init_ucs2(my_uca_scanner *scanner,
static void my_uca_scanner_init(my_uca_scanner *scanner, CHARSET_INFO *cs __attribute__((unused)),
CHARSET_INFO *cs __attribute__((unused)), const uchar *str, uint length)
const uchar *str, uint length)
{ {
/* Note, no needs to initialize scanner->wbeg */ /* Note, no needs to initialize scanner->wbeg */
scanner->sbeg= str; scanner->sbeg= str;
...@@ -6715,6 +6729,8 @@ static void my_uca_scanner_init(my_uca_scanner *scanner, ...@@ -6715,6 +6729,8 @@ static void my_uca_scanner_init(my_uca_scanner *scanner,
scanner Address of a previously initialized scanner strucuture scanner Address of a previously initialized scanner strucuture
NOTES: NOTES:
Optimized for UCS2
Checks if the current character's weight string has been fully scanned, Checks if the current character's weight string has been fully scanned,
if no, then returns the next weight for this character, if no, then returns the next weight for this character,
else scans the next character and returns its first weight. else scans the next character and returns its first weight.
...@@ -6745,7 +6761,7 @@ static void my_uca_scanner_init(my_uca_scanner *scanner, ...@@ -6745,7 +6761,7 @@ static void my_uca_scanner_init(my_uca_scanner *scanner,
Or -1 on error (END-OF-STRING or ILLEGAL MULTIBYTE SEQUENCE) Or -1 on error (END-OF-STRING or ILLEGAL MULTIBYTE SEQUENCE)
*/ */
static int my_uca_scanner_next(my_uca_scanner *scanner) static int my_uca_scanner_next_ucs2(my_uca_scanner *scanner)
{ {
/* /*
...@@ -6811,6 +6827,111 @@ implicit: ...@@ -6811,6 +6827,111 @@ implicit:
return scanner->page; return scanner->page;
} }
static my_uca_scanner_handler my_ucs2_uca_scanner_handler=
{
my_uca_scanner_init_ucs2,
my_uca_scanner_next_ucs2
};
#endif
/*
The same two functions for any character set
*/
static void my_uca_scanner_init_any(my_uca_scanner *scanner,
CHARSET_INFO *cs __attribute__((unused)),
const uchar *str, uint length)
{
/* Note, no needs to initialize scanner->wbeg */
scanner->sbeg= str;
scanner->send= str + length;
scanner->wbeg= nochar;
scanner->uca_length= cs->sort_order;
scanner->uca_weight= cs->sort_order_big;
scanner->contractions= cs->contractions;
scanner->cs= cs;
}
static int my_uca_scanner_next_any(my_uca_scanner *scanner)
{
/*
Check if the weights for the previous character have been
already fully scanned. If yes, then get the next character and
initialize wbeg and wlength to its weight string.
*/
if (scanner->wbeg[0])
return *scanner->wbeg++;
do
{
uint16 **ucaw= scanner->uca_weight;
uchar *ucal= scanner->uca_length;
my_wc_t wc;
int mblen;
if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc,
scanner->sbeg, scanner->send)) < 0))
return -1;
scanner->page= wc >> 8;
scanner->code= wc & 0xFF;
scanner->sbeg+= mblen;
if (scanner->contractions && !scanner->page &&
(scanner->code > 0x40) && (scanner->code < 0x80))
{
uint page1, code1, cweight;
if (((mblen= scanner->cs->cset->mb_wc(scanner->cs, &wc,
scanner->sbeg,
scanner->send)) >=0) &&
(!(page1= (wc >> 8))) &&
((code1= wc & 0xFF) > 0x40) &&
(code1 < 0x80) &&
(cweight= scanner->contractions[(scanner->code-0x40)*0x40+scanner->sbeg[1]-0x40]))
{
scanner->implicit[0]= 0;
scanner->wbeg= scanner->implicit;
scanner->sbeg+= mblen;
return cweight;
}
}
if (!ucaw[scanner->page])
goto implicit;
scanner->wbeg= ucaw[scanner->page] + scanner->code * ucal[scanner->page];
} while (!scanner->wbeg[0]);
return *scanner->wbeg++;
implicit:
scanner->code= (scanner->page << 8) + scanner->code;
scanner->implicit[0]= (scanner->code & 0x7FFF) | 0x8000;
scanner->implicit[1]= 0;
scanner->wbeg= scanner->implicit;
scanner->page= scanner->page >> 7;
if (scanner->code >= 0x3400 && scanner->code <= 0x4DB5)
scanner->page+= 0xFB80;
else if (scanner->code >= 0x4E00 && scanner->code <= 0x9FA5)
scanner->page+= 0xFB40;
else
scanner->page+= 0xFBC0;
return scanner->page;
}
static my_uca_scanner_handler my_any_uca_scanner_handler=
{
my_uca_scanner_init_any,
my_uca_scanner_next_any
};
/* /*
Compares two strings according to the collation Compares two strings according to the collation
...@@ -6854,6 +6975,7 @@ implicit: ...@@ -6854,6 +6975,7 @@ implicit:
*/ */
static int my_strnncoll_uca(CHARSET_INFO *cs, static int my_strnncoll_uca(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, uint slen, const uchar *s, uint slen,
const uchar *t, uint tlen, const uchar *t, uint tlen,
my_bool t_is_prefix) my_bool t_is_prefix)
...@@ -6863,20 +6985,18 @@ static int my_strnncoll_uca(CHARSET_INFO *cs, ...@@ -6863,20 +6985,18 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
int s_res; int s_res;
int t_res; int t_res;
my_uca_scanner_init(&sscanner, cs, s, slen); scanner_handler->init(&sscanner, cs, s, slen);
my_uca_scanner_init(&tscanner, cs, t, tlen); scanner_handler->init(&tscanner, cs, t, tlen);
do do
{ {
s_res= my_uca_scanner_next(&sscanner); s_res= scanner_handler->next(&sscanner);
t_res= my_uca_scanner_next(&tscanner); t_res= scanner_handler->next(&tscanner);
} while ( s_res == t_res && s_res >0); } while ( s_res == t_res && s_res >0);
return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res); return (t_is_prefix && t_res < 0) ? 0 : (s_res - t_res);
} }
/* /*
Compares two strings according to the collation, Compares two strings according to the collation,
ignoring trailing spaces. ignoring trailing spaces.
...@@ -6901,8 +7021,9 @@ static int my_strnncoll_uca(CHARSET_INFO *cs, ...@@ -6901,8 +7021,9 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
*/ */
static int my_strnncollsp_uca(CHARSET_INFO *cs, static int my_strnncollsp_uca(CHARSET_INFO *cs,
const uchar *s, uint slen, my_uca_scanner_handler *scanner_handler,
const uchar *t, uint tlen) const uchar *s, uint slen,
const uchar *t, uint tlen)
{ {
my_uca_scanner sscanner; my_uca_scanner sscanner;
my_uca_scanner tscanner; my_uca_scanner tscanner;
...@@ -6912,19 +7033,18 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, ...@@ -6912,19 +7033,18 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
slen= cs->cset->lengthsp(cs, (char*) s, slen); slen= cs->cset->lengthsp(cs, (char*) s, slen);
tlen= cs->cset->lengthsp(cs, (char*) t, tlen); tlen= cs->cset->lengthsp(cs, (char*) t, tlen);
my_uca_scanner_init(&sscanner, cs, s, slen); scanner_handler->init(&sscanner, cs, s, slen);
my_uca_scanner_init(&tscanner, cs, t, tlen); scanner_handler->init(&tscanner, cs, t, tlen);
do do
{ {
s_res= my_uca_scanner_next(&sscanner); s_res= scanner_handler->next(&sscanner);
t_res= my_uca_scanner_next(&tscanner); t_res= scanner_handler->next(&tscanner);
} while ( s_res == t_res && s_res >0); } while ( s_res == t_res && s_res >0);
return ( s_res - t_res ); return ( s_res - t_res );
} }
/* /*
Calculates hash value for the given string, Calculates hash value for the given string,
according to the collation, and ignoring trailing spaces. according to the collation, and ignoring trailing spaces.
...@@ -6949,6 +7069,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs, ...@@ -6949,6 +7069,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
*/ */
static void my_hash_sort_uca(CHARSET_INFO *cs, static void my_hash_sort_uca(CHARSET_INFO *cs,
my_uca_scanner_handler *scanner_handler,
const uchar *s, uint slen, const uchar *s, uint slen,
ulong *n1, ulong *n2) ulong *n1, ulong *n2)
{ {
...@@ -6956,9 +7077,9 @@ static void my_hash_sort_uca(CHARSET_INFO *cs, ...@@ -6956,9 +7077,9 @@ static void my_hash_sort_uca(CHARSET_INFO *cs,
my_uca_scanner scanner; my_uca_scanner scanner;
slen= cs->cset->lengthsp(cs, (char*) s, slen); slen= cs->cset->lengthsp(cs, (char*) s, slen);
my_uca_scanner_init(&scanner, cs, s, slen); scanner_handler->init(&scanner, cs, s, slen);
while ((s_res= my_uca_scanner_next(&scanner)) >0) while ((s_res= scanner_handler->next(&scanner)) >0)
{ {
n1[0]^= (((n1[0] & 63)+n2[0])*(s_res >> 8))+ (n1[0] << 8); n1[0]^= (((n1[0] & 63)+n2[0])*(s_res >> 8))+ (n1[0] << 8);
n2[0]+=3; n2[0]+=3;
...@@ -7000,16 +7121,17 @@ static void my_hash_sort_uca(CHARSET_INFO *cs, ...@@ -7000,16 +7121,17 @@ static void my_hash_sort_uca(CHARSET_INFO *cs,
*/ */
static int my_strnxfrm_uca(CHARSET_INFO *cs, static int my_strnxfrm_uca(CHARSET_INFO *cs,
uchar *dst, uint dstlen, my_uca_scanner_handler *scanner_handler,
const uchar *src, uint srclen) uchar *dst, uint dstlen,
const uchar *src, uint srclen)
{ {
uchar *de = dst + dstlen; uchar *de = dst + dstlen;
const uchar *dst_orig = dst; const uchar *dst_orig = dst;
int s_res; int s_res;
my_uca_scanner scanner; my_uca_scanner scanner;
my_uca_scanner_init(&scanner, cs, src, srclen); scanner_handler->init(&scanner, cs, src, srclen);
while (dst < de && (s_res= my_uca_scanner_next(&scanner)) >0) while (dst < de && (s_res= scanner_handler->next(&scanner)) >0)
{ {
dst[0]= s_res >> 8; dst[0]= s_res >> 8;
dst[1]= s_res & 0xFF; dst[1]= s_res & 0xFF;
...@@ -7018,6 +7140,8 @@ static int my_strnxfrm_uca(CHARSET_INFO *cs, ...@@ -7018,6 +7140,8 @@ static int my_strnxfrm_uca(CHARSET_INFO *cs,
return dst - dst_orig; return dst - dst_orig;
} }
/* /*
This function compares if two characters are the same. This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only The sign +1 or -1 does not matter. The only
...@@ -7572,7 +7696,7 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) ...@@ -7572,7 +7696,7 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE, if ((rc= my_coll_rule_parse(rule, MY_MAX_COLL_RULE,
cs->tailoring, cs->tailoring,
cs->tailoring + strlen(cs->tailoring), cs->tailoring + strlen(cs->tailoring),
errstr, sizeof(errstr))) <= 0) errstr, sizeof(errstr))) < 0)
{ {
/* /*
TODO: add error message reporting. TODO: add error message reporting.
...@@ -7694,30 +7818,106 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint)) ...@@ -7694,30 +7818,106 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
return 0; return 0;
} }
/*
Universal CHARSET_INFO compatible wrappers
for the above internal functions.
Should work for any character set.
*/
static my_bool my_coll_init_uca(CHARSET_INFO *cs, void *(*alloc)(uint)) static my_bool my_coll_init_uca(CHARSET_INFO *cs, void *(*alloc)(uint))
{ {
return create_tailoring(cs, alloc); return create_tailoring(cs, alloc);
} }
static int my_strnncoll_any_uca(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
return my_strnncoll_uca(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen, t_is_prefix);
}
static int my_strnncollsp_any_uca(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
{
return my_strnncollsp_uca(cs, &my_any_uca_scanner_handler,
s, slen, t, tlen);
}
static void my_hash_sort_any_uca(CHARSET_INFO *cs,
const uchar *s, uint slen,
ulong *n1, ulong *n2)
{
return my_hash_sort_uca(cs, &my_any_uca_scanner_handler, s, slen, n1, n2);
}
static int my_strnxfrm_any_uca(CHARSET_INFO *cs,
uchar *dst, uint dstlen,
const uchar *src, uint srclen)
{
return my_strnxfrm_uca(cs, &my_any_uca_scanner_handler,
dst, dstlen, src, srclen);
}
#ifdef HAVE_CHARSET_ucs2
/*
UCS2 optimized CHARSET_INFO compatible wrappers.
*/
static int my_strnncoll_ucs2_uca(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen,
my_bool t_is_prefix)
{
return my_strnncoll_uca(cs, &my_ucs2_uca_scanner_handler,
s, slen, t, tlen, t_is_prefix);
}
static int my_strnncollsp_ucs2_uca(CHARSET_INFO *cs,
const uchar *s, uint slen,
const uchar *t, uint tlen)
{
return my_strnncollsp_uca(cs, &my_ucs2_uca_scanner_handler,
s, slen, t, tlen);
}
static void my_hash_sort_ucs2_uca(CHARSET_INFO *cs,
const uchar *s, uint slen,
ulong *n1, ulong *n2)
{
return my_hash_sort_uca(cs, &my_ucs2_uca_scanner_handler, s, slen, n1, n2);
}
static int my_strnxfrm_ucs2_uca(CHARSET_INFO *cs,
uchar *dst, uint dstlen,
const uchar *src, uint srclen)
{
return my_strnxfrm_uca(cs, &my_ucs2_uca_scanner_handler,
dst, dstlen, src, srclen);
}
MY_COLLATION_HANDLER my_collation_ucs2_uca_handler = MY_COLLATION_HANDLER my_collation_ucs2_uca_handler =
{ {
my_coll_init_uca, /* init */ my_coll_init_uca, /* init */
my_strnncoll_uca, my_strnncoll_ucs2_uca,
my_strnncollsp_uca, my_strnncollsp_ucs2_uca,
my_strnxfrm_uca, my_strnxfrm_ucs2_uca,
my_like_range_simple, my_like_range_simple,
my_wildcmp_uca, my_wildcmp_uca,
NULL, NULL,
my_instr_mb, my_instr_mb,
my_hash_sort_uca my_hash_sort_ucs2_uca
}; };
CHARSET_INFO my_charset_ucs2_general_uca= CHARSET_INFO my_charset_ucs2_general_uca=
{ {
45,0,0, /* number */ 128,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_uca_ci", /* name */ "ucs2_unicode_ci", /* name */
"", /* comment */ "", /* comment */
NULL, /* tailoring */ NULL, /* tailoring */
NULL, /* ctype */ NULL, /* ctype */
...@@ -7739,10 +7939,9 @@ CHARSET_INFO my_charset_ucs2_general_uca= ...@@ -7739,10 +7939,9 @@ CHARSET_INFO my_charset_ucs2_general_uca=
&my_collation_ucs2_uca_handler &my_collation_ucs2_uca_handler
}; };
CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
{ {
128,0,0, /* number */ 129,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_icelandic_ci",/* name */ "ucs2_icelandic_ci",/* name */
...@@ -7769,7 +7968,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci= ...@@ -7769,7 +7968,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
CHARSET_INFO my_charset_ucs2_latvian_uca_ci= CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
{ {
129,0,0, /* number */ 130,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_latvian_ci", /* name */ "ucs2_latvian_ci", /* name */
...@@ -7796,7 +7995,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci= ...@@ -7796,7 +7995,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
CHARSET_INFO my_charset_ucs2_romanian_uca_ci= CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
{ {
130,0,0, /* number */ 131,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_romanian_ci", /* name */ "ucs2_romanian_ci", /* name */
...@@ -7823,7 +8022,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci= ...@@ -7823,7 +8022,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
{ {
131,0,0, /* number */ 132,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_slovenian_ci",/* name */ "ucs2_slovenian_ci",/* name */
...@@ -7850,7 +8049,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci= ...@@ -7850,7 +8049,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
CHARSET_INFO my_charset_ucs2_polish_uca_ci= CHARSET_INFO my_charset_ucs2_polish_uca_ci=
{ {
132,0,0, /* number */ 133,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_polish_ci", /* name */ "ucs2_polish_ci", /* name */
...@@ -7877,7 +8076,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci= ...@@ -7877,7 +8076,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci=
CHARSET_INFO my_charset_ucs2_estonian_uca_ci= CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
{ {
133,0,0, /* number */ 134,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_estonian_ci", /* name */ "ucs2_estonian_ci", /* name */
...@@ -7904,7 +8103,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci= ...@@ -7904,7 +8103,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
CHARSET_INFO my_charset_ucs2_spanish_uca_ci= CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
{ {
134,0,0, /* number */ 135,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_spanish_ci", /* name */ "ucs2_spanish_ci", /* name */
...@@ -7931,7 +8130,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci= ...@@ -7931,7 +8130,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
CHARSET_INFO my_charset_ucs2_swedish_uca_ci= CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
{ {
135,0,0, /* number */ 136,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_swedish_ci", /* name */ "ucs2_swedish_ci", /* name */
...@@ -7958,7 +8157,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci= ...@@ -7958,7 +8157,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
CHARSET_INFO my_charset_ucs2_turkish_uca_ci= CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
{ {
136,0,0, /* number */ 137,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_turkish_ci", /* name */ "ucs2_turkish_ci", /* name */
...@@ -7985,7 +8184,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci= ...@@ -7985,7 +8184,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
CHARSET_INFO my_charset_ucs2_czech_uca_ci= CHARSET_INFO my_charset_ucs2_czech_uca_ci=
{ {
137,0,0, /* number */ 138,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_czech_ci", /* name */ "ucs2_czech_ci", /* name */
...@@ -8013,7 +8212,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_ci= ...@@ -8013,7 +8212,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_ci=
CHARSET_INFO my_charset_ucs2_danish_uca_ci= CHARSET_INFO my_charset_ucs2_danish_uca_ci=
{ {
138,0,0, /* number */ 139,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_danish_ci", /* name */ "ucs2_danish_ci", /* name */
...@@ -8040,7 +8239,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ci= ...@@ -8040,7 +8239,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ci=
CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
{ {
139,0,0, /* number */ 140,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_lithuanian_ci",/* name */ "ucs2_lithuanian_ci",/* name */
...@@ -8067,7 +8266,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci= ...@@ -8067,7 +8266,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
CHARSET_INFO my_charset_ucs2_slovak_uca_ci= CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
{ {
140,0,0, /* number */ 141,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_slovak_ci", /* name */ "ucs2_slovak_ci", /* name */
...@@ -8094,7 +8293,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ci= ...@@ -8094,7 +8293,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
{ {
141,0,0, /* number */ 142,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE, MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* cs name */ "ucs2", /* cs name */
"ucs2_spanish2_ci", /* name */ "ucs2_spanish2_ci", /* name */
...@@ -8120,3 +8319,454 @@ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci= ...@@ -8120,3 +8319,454 @@ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
}; };
#endif #endif
#ifdef HAVE_CHARSET_utf8
MY_COLLATION_HANDLER my_collation_any_uca_handler =
{
my_coll_init_uca, /* init */
my_strnncoll_any_uca,
my_strnncollsp_any_uca,
my_strnxfrm_any_uca,
my_like_range_simple,
my_wildcmp_uca,
NULL,
my_instr_mb,
my_hash_sort_any_uca
};
/*
We consider bytes with code more than 127 as a letter.
This garantees that word boundaries work fine with regular
expressions. Note, there is no need to mark byte 255 as a
letter, it is illegal byte in UTF8.
*/
static uchar ctype_utf8[] = {
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0
};
extern MY_CHARSET_HANDLER my_charset_utf8_handler;
CHARSET_INFO my_charset_utf8_general_uca_ci=
{
192,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_unicode_ci", /* name */
"", /* comment */
"", /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
uca_length, /* sort_order */
NULL, /* contractions */
uca_weight, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
1, /* mbminlen */
3, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_icelandic_uca_ci=
{
193,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_icelandic_ci",/* name */
"", /* comment */
icelandic, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_latvian_uca_ci=
{
194,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_latvian_ci", /* name */
"", /* comment */
latvian, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_romanian_uca_ci=
{
195,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_romanian_ci", /* name */
"", /* comment */
romanian, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_slovenian_uca_ci=
{
196,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_slovenian_ci",/* name */
"", /* comment */
slovenian, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_polish_uca_ci=
{
197,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_polish_ci", /* name */
"", /* comment */
polish, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_estonian_uca_ci=
{
198,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_estonian_ci", /* name */
"", /* comment */
estonian, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_spanish_uca_ci=
{
199,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_spanish_ci", /* name */
"", /* comment */
spanish, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_swedish_uca_ci=
{
200,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_swedish_ci", /* name */
"", /* comment */
swedish, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_turkish_uca_ci=
{
201,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_turkish_ci", /* name */
"", /* comment */
turkish, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_czech_uca_ci=
{
202,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_czech_ci", /* name */
"", /* comment */
czech, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_danish_uca_ci=
{
203,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_danish_ci", /* name */
"", /* comment */
danish, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_lithuanian_uca_ci=
{
204,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_lithuanian_ci",/* name */
"", /* comment */
lithuanian, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_slovak_uca_ci=
{
205,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_slovak_ci", /* name */
"", /* comment */
lithuanian, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_spanish2_uca_ci=
{
206,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_spanish2_ci", /* name */
"", /* comment */
spanish2, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
#endif
...@@ -2057,7 +2057,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler = ...@@ -2057,7 +2057,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_hash_sort_utf8 my_hash_sort_utf8
}; };
static MY_CHARSET_HANDLER my_charset_handler= MY_CHARSET_HANDLER my_charset_utf8_handler=
{ {
NULL, /* init */ NULL, /* init */
my_ismbchar_utf8, my_ismbchar_utf8,
...@@ -2109,7 +2109,7 @@ CHARSET_INFO my_charset_utf8_general_ci= ...@@ -2109,7 +2109,7 @@ CHARSET_INFO my_charset_utf8_general_ci=
3, /* mbmaxlen */ 3, /* mbmaxlen */
0, /* min_sort_char */ 0, /* min_sort_char */
255, /* max_sort_char */ 255, /* max_sort_char */
&my_charset_handler, &my_charset_utf8_handler,
&my_collation_ci_handler &my_collation_ci_handler
}; };
...@@ -2137,13 +2137,12 @@ CHARSET_INFO my_charset_utf8_bin= ...@@ -2137,13 +2137,12 @@ CHARSET_INFO my_charset_utf8_bin=
3, /* mbmaxlen */ 3, /* mbmaxlen */
0, /* min_sort_char */ 0, /* min_sort_char */
255, /* max_sort_char */ 255, /* max_sort_char */
&my_charset_handler, &my_charset_utf8_handler,
&my_collation_mb_bin_handler &my_collation_mb_bin_handler
}; };
#ifdef MY_TEST_UTF8 #ifdef MY_TEST_UTF8
#include <stdio.h> #include <stdio.h>
static void test_mb(CHARSET_INFO *cs, uchar *s) static void test_mb(CHARSET_INFO *cs, uchar *s)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment