Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
5dd2881f
Commit
5dd2881f
authored
Jun 14, 2004
by
bar@mysql.com
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
UTF8 UCA based collations.
parent
2f629a31
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
737 additions
and
56 deletions
+737
-56
mysys/charset-def.c
mysys/charset-def.c
+33
-1
strings/ctype-uca.c
strings/ctype-uca.c
+701
-51
strings/ctype-utf8.c
strings/ctype-utf8.c
+3
-4
No files found.
mysys/charset-def.c
View file @
5dd2881f
...
@@ -40,6 +40,24 @@ extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci;
...
@@ -40,6 +40,24 @@ extern CHARSET_INFO my_charset_ucs2_slovak_uca_ci;
extern
CHARSET_INFO
my_charset_ucs2_spanish2_uca_ci
;
extern
CHARSET_INFO
my_charset_ucs2_spanish2_uca_ci
;
#endif
#endif
#ifdef HAVE_CHARSET_utf8
extern
CHARSET_INFO
my_charset_utf8_general_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_icelandic_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_latvian_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_romanian_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_slovenian_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_polish_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_estonian_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_spanish_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_swedish_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_turkish_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_czech_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_danish_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_lithuanian_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_slovak_uca_ci
;
extern
CHARSET_INFO
my_charset_utf8_spanish2_uca_ci
;
#endif
my_bool
init_compiled_charsets
(
myf
flags
__attribute__
((
unused
)))
my_bool
init_compiled_charsets
(
myf
flags
__attribute__
((
unused
)))
{
{
CHARSET_INFO
*
cs
;
CHARSET_INFO
*
cs
;
...
@@ -92,7 +110,6 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
...
@@ -92,7 +110,6 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation
(
&
my_charset_ucs2_general_ci
);
add_compiled_collation
(
&
my_charset_ucs2_general_ci
);
add_compiled_collation
(
&
my_charset_ucs2_bin
);
add_compiled_collation
(
&
my_charset_ucs2_bin
);
add_compiled_collation
(
&
my_charset_ucs2_general_uca
);
add_compiled_collation
(
&
my_charset_ucs2_general_uca
);
add_compiled_collation
(
&
my_charset_ucs2_general_uca
);
add_compiled_collation
(
&
my_charset_ucs2_icelandic_uca_ci
);
add_compiled_collation
(
&
my_charset_ucs2_icelandic_uca_ci
);
add_compiled_collation
(
&
my_charset_ucs2_latvian_uca_ci
);
add_compiled_collation
(
&
my_charset_ucs2_latvian_uca_ci
);
add_compiled_collation
(
&
my_charset_ucs2_romanian_uca_ci
);
add_compiled_collation
(
&
my_charset_ucs2_romanian_uca_ci
);
...
@@ -117,6 +134,21 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
...
@@ -117,6 +134,21 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
#ifdef HAVE_CHARSET_utf8
#ifdef HAVE_CHARSET_utf8
add_compiled_collation
(
&
my_charset_utf8_general_ci
);
add_compiled_collation
(
&
my_charset_utf8_general_ci
);
add_compiled_collation
(
&
my_charset_utf8_bin
);
add_compiled_collation
(
&
my_charset_utf8_bin
);
add_compiled_collation
(
&
my_charset_utf8_general_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_icelandic_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_latvian_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_romanian_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_slovenian_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_polish_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_estonian_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_spanish_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_swedish_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_turkish_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_czech_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_danish_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_lithuanian_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_slovak_uca_ci
);
add_compiled_collation
(
&
my_charset_utf8_spanish2_uca_ci
);
#endif
#endif
/* Copy compiled charsets */
/* Copy compiled charsets */
...
...
strings/ctype-uca.c
View file @
5dd2881f
...
@@ -19,14 +19,15 @@
...
@@ -19,14 +19,15 @@
UCA (Unicode Collation Algorithm) support.
UCA (Unicode Collation Algorithm) support.
Written by Alexander Barkov <bar@mysql.com>
Written by Alexander Barkov <bar@mysql.com>
Currently supports only subset of the full UCA.
Currently supports only subset of the full UCA:
- Only Primary level key comparison
- Only Primary level key comparison
- Basic Latin letters contraction is implemented
- Variable weighting is done for Non-ignorable option
- Variable weighting is done for Non-ignorable option
Features that are not implemented yet:
- No Normalization From D is done
- No Normalization From D is done
+ No decomposition is done
+ No decomposition is done
+ No Thai/Lao orderding is done
+ No Thai/Lao orderding is done
- No contraction is done
- No combining marks processing is done
- No combining marks processing is done
*/
*/
...
@@ -36,8 +37,6 @@
...
@@ -36,8 +37,6 @@
#include "m_ctype.h"
#include "m_ctype.h"
#ifdef HAVE_CHARSET_ucs2
#define MY_UCA_NPAGES 256
#define MY_UCA_NPAGES 256
#define MY_UCA_NCHARS 256
#define MY_UCA_NCHARS 256
#define MY_UCA_CMASK 255
#define MY_UCA_CMASK 255
...
@@ -6672,9 +6671,24 @@ typedef struct my_uca_scanner_st
...
@@ -6672,9 +6671,24 @@ typedef struct my_uca_scanner_st
uint16
implicit
[
2
];
uint16
implicit
[
2
];
int
page
;
int
page
;
int
code
;
int
code
;
CHARSET_INFO
*
cs
;
}
my_uca_scanner
;
}
my_uca_scanner
;
/*
Charset dependent scanner part, to optimize
some character sets.
*/
typedef
struct
my_uca_scanner_handler_st
{
void
(
*
init
)(
my_uca_scanner
*
scanner
,
CHARSET_INFO
*
cs
,
const
uchar
*
str
,
uint
length
);
int
(
*
next
)(
my_uca_scanner
*
scanner
);
}
my_uca_scanner_handler
;
static
uint16
nochar
[]
=
{
0
};
#ifdef HAVE_CHARSET_ucs2
/*
/*
Initialize collation weight scanner
Initialize collation weight scanner
...
@@ -6686,13 +6700,13 @@ typedef struct my_uca_scanner_st
...
@@ -6686,13 +6700,13 @@ typedef struct my_uca_scanner_st
length Length of the string.
length Length of the string.
NOTES:
NOTES:
Optimized for UCS2
RETURN
RETURN
N/A
N/A
*/
*/
static
uint16
nochar
[]
=
{
0
};
static
void
my_uca_scanner_init_ucs2
(
my_uca_scanner
*
scanner
,
static
void
my_uca_scanner_init
(
my_uca_scanner
*
scanner
,
CHARSET_INFO
*
cs
__attribute__
((
unused
)),
CHARSET_INFO
*
cs
__attribute__
((
unused
)),
const
uchar
*
str
,
uint
length
)
const
uchar
*
str
,
uint
length
)
{
{
...
@@ -6715,6 +6729,8 @@ static void my_uca_scanner_init(my_uca_scanner *scanner,
...
@@ -6715,6 +6729,8 @@ static void my_uca_scanner_init(my_uca_scanner *scanner,
scanner Address of a previously initialized scanner strucuture
scanner Address of a previously initialized scanner strucuture
NOTES:
NOTES:
Optimized for UCS2
Checks if the current character's weight string has been fully scanned,
Checks if the current character's weight string has been fully scanned,
if no, then returns the next weight for this character,
if no, then returns the next weight for this character,
else scans the next character and returns its first weight.
else scans the next character and returns its first weight.
...
@@ -6745,7 +6761,7 @@ static void my_uca_scanner_init(my_uca_scanner *scanner,
...
@@ -6745,7 +6761,7 @@ static void my_uca_scanner_init(my_uca_scanner *scanner,
Or -1 on error (END-OF-STRING or ILLEGAL MULTIBYTE SEQUENCE)
Or -1 on error (END-OF-STRING or ILLEGAL MULTIBYTE SEQUENCE)
*/
*/
static
int
my_uca_scanner_next
(
my_uca_scanner
*
scanner
)
static
int
my_uca_scanner_next
_ucs2
(
my_uca_scanner
*
scanner
)
{
{
/*
/*
...
@@ -6811,6 +6827,111 @@ implicit:
...
@@ -6811,6 +6827,111 @@ implicit:
return
scanner
->
page
;
return
scanner
->
page
;
}
}
static
my_uca_scanner_handler
my_ucs2_uca_scanner_handler
=
{
my_uca_scanner_init_ucs2
,
my_uca_scanner_next_ucs2
};
#endif
/*
The same two functions for any character set
*/
static
void
my_uca_scanner_init_any
(
my_uca_scanner
*
scanner
,
CHARSET_INFO
*
cs
__attribute__
((
unused
)),
const
uchar
*
str
,
uint
length
)
{
/* Note, no needs to initialize scanner->wbeg */
scanner
->
sbeg
=
str
;
scanner
->
send
=
str
+
length
;
scanner
->
wbeg
=
nochar
;
scanner
->
uca_length
=
cs
->
sort_order
;
scanner
->
uca_weight
=
cs
->
sort_order_big
;
scanner
->
contractions
=
cs
->
contractions
;
scanner
->
cs
=
cs
;
}
static
int
my_uca_scanner_next_any
(
my_uca_scanner
*
scanner
)
{
/*
Check if the weights for the previous character have been
already fully scanned. If yes, then get the next character and
initialize wbeg and wlength to its weight string.
*/
if
(
scanner
->
wbeg
[
0
])
return
*
scanner
->
wbeg
++
;
do
{
uint16
**
ucaw
=
scanner
->
uca_weight
;
uchar
*
ucal
=
scanner
->
uca_length
;
my_wc_t
wc
;
int
mblen
;
if
(((
mblen
=
scanner
->
cs
->
cset
->
mb_wc
(
scanner
->
cs
,
&
wc
,
scanner
->
sbeg
,
scanner
->
send
))
<
0
))
return
-
1
;
scanner
->
page
=
wc
>>
8
;
scanner
->
code
=
wc
&
0xFF
;
scanner
->
sbeg
+=
mblen
;
if
(
scanner
->
contractions
&&
!
scanner
->
page
&&
(
scanner
->
code
>
0x40
)
&&
(
scanner
->
code
<
0x80
))
{
uint
page1
,
code1
,
cweight
;
if
(((
mblen
=
scanner
->
cs
->
cset
->
mb_wc
(
scanner
->
cs
,
&
wc
,
scanner
->
sbeg
,
scanner
->
send
))
>=
0
)
&&
(
!
(
page1
=
(
wc
>>
8
)))
&&
((
code1
=
wc
&
0xFF
)
>
0x40
)
&&
(
code1
<
0x80
)
&&
(
cweight
=
scanner
->
contractions
[(
scanner
->
code
-
0x40
)
*
0x40
+
scanner
->
sbeg
[
1
]
-
0x40
]))
{
scanner
->
implicit
[
0
]
=
0
;
scanner
->
wbeg
=
scanner
->
implicit
;
scanner
->
sbeg
+=
mblen
;
return
cweight
;
}
}
if
(
!
ucaw
[
scanner
->
page
])
goto
implicit
;
scanner
->
wbeg
=
ucaw
[
scanner
->
page
]
+
scanner
->
code
*
ucal
[
scanner
->
page
];
}
while
(
!
scanner
->
wbeg
[
0
]);
return
*
scanner
->
wbeg
++
;
implicit:
scanner
->
code
=
(
scanner
->
page
<<
8
)
+
scanner
->
code
;
scanner
->
implicit
[
0
]
=
(
scanner
->
code
&
0x7FFF
)
|
0x8000
;
scanner
->
implicit
[
1
]
=
0
;
scanner
->
wbeg
=
scanner
->
implicit
;
scanner
->
page
=
scanner
->
page
>>
7
;
if
(
scanner
->
code
>=
0x3400
&&
scanner
->
code
<=
0x4DB5
)
scanner
->
page
+=
0xFB80
;
else
if
(
scanner
->
code
>=
0x4E00
&&
scanner
->
code
<=
0x9FA5
)
scanner
->
page
+=
0xFB40
;
else
scanner
->
page
+=
0xFBC0
;
return
scanner
->
page
;
}
static
my_uca_scanner_handler
my_any_uca_scanner_handler
=
{
my_uca_scanner_init_any
,
my_uca_scanner_next_any
};
/*
/*
Compares two strings according to the collation
Compares two strings according to the collation
...
@@ -6854,6 +6975,7 @@ implicit:
...
@@ -6854,6 +6975,7 @@ implicit:
*/
*/
static
int
my_strnncoll_uca
(
CHARSET_INFO
*
cs
,
static
int
my_strnncoll_uca
(
CHARSET_INFO
*
cs
,
my_uca_scanner_handler
*
scanner_handler
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
,
const
uchar
*
t
,
uint
tlen
,
my_bool
t_is_prefix
)
my_bool
t_is_prefix
)
...
@@ -6863,20 +6985,18 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
...
@@ -6863,20 +6985,18 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
int
s_res
;
int
s_res
;
int
t_res
;
int
t_res
;
my_uca_scanner_
init
(
&
sscanner
,
cs
,
s
,
slen
);
scanner_handler
->
init
(
&
sscanner
,
cs
,
s
,
slen
);
my_uca_scanner_
init
(
&
tscanner
,
cs
,
t
,
tlen
);
scanner_handler
->
init
(
&
tscanner
,
cs
,
t
,
tlen
);
do
do
{
{
s_res
=
my_uca_scanner_
next
(
&
sscanner
);
s_res
=
scanner_handler
->
next
(
&
sscanner
);
t_res
=
my_uca_scanner_
next
(
&
tscanner
);
t_res
=
scanner_handler
->
next
(
&
tscanner
);
}
while
(
s_res
==
t_res
&&
s_res
>
0
);
}
while
(
s_res
==
t_res
&&
s_res
>
0
);
return
(
t_is_prefix
&&
t_res
<
0
)
?
0
:
(
s_res
-
t_res
);
return
(
t_is_prefix
&&
t_res
<
0
)
?
0
:
(
s_res
-
t_res
);
}
}
/*
/*
Compares two strings according to the collation,
Compares two strings according to the collation,
ignoring trailing spaces.
ignoring trailing spaces.
...
@@ -6901,6 +7021,7 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
...
@@ -6901,6 +7021,7 @@ static int my_strnncoll_uca(CHARSET_INFO *cs,
*/
*/
static
int
my_strnncollsp_uca
(
CHARSET_INFO
*
cs
,
static
int
my_strnncollsp_uca
(
CHARSET_INFO
*
cs
,
my_uca_scanner_handler
*
scanner_handler
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
)
const
uchar
*
t
,
uint
tlen
)
{
{
...
@@ -6912,19 +7033,18 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
...
@@ -6912,19 +7033,18 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
slen
=
cs
->
cset
->
lengthsp
(
cs
,
(
char
*
)
s
,
slen
);
slen
=
cs
->
cset
->
lengthsp
(
cs
,
(
char
*
)
s
,
slen
);
tlen
=
cs
->
cset
->
lengthsp
(
cs
,
(
char
*
)
t
,
tlen
);
tlen
=
cs
->
cset
->
lengthsp
(
cs
,
(
char
*
)
t
,
tlen
);
my_uca_scanner_
init
(
&
sscanner
,
cs
,
s
,
slen
);
scanner_handler
->
init
(
&
sscanner
,
cs
,
s
,
slen
);
my_uca_scanner_
init
(
&
tscanner
,
cs
,
t
,
tlen
);
scanner_handler
->
init
(
&
tscanner
,
cs
,
t
,
tlen
);
do
do
{
{
s_res
=
my_uca_scanner_
next
(
&
sscanner
);
s_res
=
scanner_handler
->
next
(
&
sscanner
);
t_res
=
my_uca_scanner_
next
(
&
tscanner
);
t_res
=
scanner_handler
->
next
(
&
tscanner
);
}
while
(
s_res
==
t_res
&&
s_res
>
0
);
}
while
(
s_res
==
t_res
&&
s_res
>
0
);
return
(
s_res
-
t_res
);
return
(
s_res
-
t_res
);
}
}
/*
/*
Calculates hash value for the given string,
Calculates hash value for the given string,
according to the collation, and ignoring trailing spaces.
according to the collation, and ignoring trailing spaces.
...
@@ -6949,6 +7069,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
...
@@ -6949,6 +7069,7 @@ static int my_strnncollsp_uca(CHARSET_INFO *cs,
*/
*/
static
void
my_hash_sort_uca
(
CHARSET_INFO
*
cs
,
static
void
my_hash_sort_uca
(
CHARSET_INFO
*
cs
,
my_uca_scanner_handler
*
scanner_handler
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
s
,
uint
slen
,
ulong
*
n1
,
ulong
*
n2
)
ulong
*
n1
,
ulong
*
n2
)
{
{
...
@@ -6956,9 +7077,9 @@ static void my_hash_sort_uca(CHARSET_INFO *cs,
...
@@ -6956,9 +7077,9 @@ static void my_hash_sort_uca(CHARSET_INFO *cs,
my_uca_scanner
scanner
;
my_uca_scanner
scanner
;
slen
=
cs
->
cset
->
lengthsp
(
cs
,
(
char
*
)
s
,
slen
);
slen
=
cs
->
cset
->
lengthsp
(
cs
,
(
char
*
)
s
,
slen
);
my_uca_scanner_
init
(
&
scanner
,
cs
,
s
,
slen
);
scanner_handler
->
init
(
&
scanner
,
cs
,
s
,
slen
);
while
((
s_res
=
my_uca_scanner_
next
(
&
scanner
))
>
0
)
while
((
s_res
=
scanner_handler
->
next
(
&
scanner
))
>
0
)
{
{
n1
[
0
]
^=
(((
n1
[
0
]
&
63
)
+
n2
[
0
])
*
(
s_res
>>
8
))
+
(
n1
[
0
]
<<
8
);
n1
[
0
]
^=
(((
n1
[
0
]
&
63
)
+
n2
[
0
])
*
(
s_res
>>
8
))
+
(
n1
[
0
]
<<
8
);
n2
[
0
]
+=
3
;
n2
[
0
]
+=
3
;
...
@@ -7000,6 +7121,7 @@ static void my_hash_sort_uca(CHARSET_INFO *cs,
...
@@ -7000,6 +7121,7 @@ static void my_hash_sort_uca(CHARSET_INFO *cs,
*/
*/
static
int
my_strnxfrm_uca
(
CHARSET_INFO
*
cs
,
static
int
my_strnxfrm_uca
(
CHARSET_INFO
*
cs
,
my_uca_scanner_handler
*
scanner_handler
,
uchar
*
dst
,
uint
dstlen
,
uchar
*
dst
,
uint
dstlen
,
const
uchar
*
src
,
uint
srclen
)
const
uchar
*
src
,
uint
srclen
)
{
{
...
@@ -7007,9 +7129,9 @@ static int my_strnxfrm_uca(CHARSET_INFO *cs,
...
@@ -7007,9 +7129,9 @@ static int my_strnxfrm_uca(CHARSET_INFO *cs,
const
uchar
*
dst_orig
=
dst
;
const
uchar
*
dst_orig
=
dst
;
int
s_res
;
int
s_res
;
my_uca_scanner
scanner
;
my_uca_scanner
scanner
;
my_uca_scanner_
init
(
&
scanner
,
cs
,
src
,
srclen
);
scanner_handler
->
init
(
&
scanner
,
cs
,
src
,
srclen
);
while
(
dst
<
de
&&
(
s_res
=
my_uca_scanner_
next
(
&
scanner
))
>
0
)
while
(
dst
<
de
&&
(
s_res
=
scanner_handler
->
next
(
&
scanner
))
>
0
)
{
{
dst
[
0
]
=
s_res
>>
8
;
dst
[
0
]
=
s_res
>>
8
;
dst
[
1
]
=
s_res
&
0xFF
;
dst
[
1
]
=
s_res
&
0xFF
;
...
@@ -7018,6 +7140,8 @@ static int my_strnxfrm_uca(CHARSET_INFO *cs,
...
@@ -7018,6 +7140,8 @@ static int my_strnxfrm_uca(CHARSET_INFO *cs,
return
dst
-
dst_orig
;
return
dst
-
dst_orig
;
}
}
/*
/*
This function compares if two characters are the same.
This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only
The sign +1 or -1 does not matter. The only
...
@@ -7572,7 +7696,7 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
...
@@ -7572,7 +7696,7 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
if
((
rc
=
my_coll_rule_parse
(
rule
,
MY_MAX_COLL_RULE
,
if
((
rc
=
my_coll_rule_parse
(
rule
,
MY_MAX_COLL_RULE
,
cs
->
tailoring
,
cs
->
tailoring
,
cs
->
tailoring
+
strlen
(
cs
->
tailoring
),
cs
->
tailoring
+
strlen
(
cs
->
tailoring
),
errstr
,
sizeof
(
errstr
)))
<
=
0
)
errstr
,
sizeof
(
errstr
)))
<
0
)
{
{
/*
/*
TODO: add error message reporting.
TODO: add error message reporting.
...
@@ -7694,30 +7818,106 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
...
@@ -7694,30 +7818,106 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(uint))
return
0
;
return
0
;
}
}
/*
Universal CHARSET_INFO compatible wrappers
for the above internal functions.
Should work for any character set.
*/
static
my_bool
my_coll_init_uca
(
CHARSET_INFO
*
cs
,
void
*
(
*
alloc
)(
uint
))
static
my_bool
my_coll_init_uca
(
CHARSET_INFO
*
cs
,
void
*
(
*
alloc
)(
uint
))
{
{
return
create_tailoring
(
cs
,
alloc
);
return
create_tailoring
(
cs
,
alloc
);
}
}
static
int
my_strnncoll_any_uca
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
,
my_bool
t_is_prefix
)
{
return
my_strnncoll_uca
(
cs
,
&
my_any_uca_scanner_handler
,
s
,
slen
,
t
,
tlen
,
t_is_prefix
);
}
static
int
my_strnncollsp_any_uca
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
)
{
return
my_strnncollsp_uca
(
cs
,
&
my_any_uca_scanner_handler
,
s
,
slen
,
t
,
tlen
);
}
static
void
my_hash_sort_any_uca
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
ulong
*
n1
,
ulong
*
n2
)
{
return
my_hash_sort_uca
(
cs
,
&
my_any_uca_scanner_handler
,
s
,
slen
,
n1
,
n2
);
}
static
int
my_strnxfrm_any_uca
(
CHARSET_INFO
*
cs
,
uchar
*
dst
,
uint
dstlen
,
const
uchar
*
src
,
uint
srclen
)
{
return
my_strnxfrm_uca
(
cs
,
&
my_any_uca_scanner_handler
,
dst
,
dstlen
,
src
,
srclen
);
}
#ifdef HAVE_CHARSET_ucs2
/*
UCS2 optimized CHARSET_INFO compatible wrappers.
*/
static
int
my_strnncoll_ucs2_uca
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
,
my_bool
t_is_prefix
)
{
return
my_strnncoll_uca
(
cs
,
&
my_ucs2_uca_scanner_handler
,
s
,
slen
,
t
,
tlen
,
t_is_prefix
);
}
static
int
my_strnncollsp_ucs2_uca
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
const
uchar
*
t
,
uint
tlen
)
{
return
my_strnncollsp_uca
(
cs
,
&
my_ucs2_uca_scanner_handler
,
s
,
slen
,
t
,
tlen
);
}
static
void
my_hash_sort_ucs2_uca
(
CHARSET_INFO
*
cs
,
const
uchar
*
s
,
uint
slen
,
ulong
*
n1
,
ulong
*
n2
)
{
return
my_hash_sort_uca
(
cs
,
&
my_ucs2_uca_scanner_handler
,
s
,
slen
,
n1
,
n2
);
}
static
int
my_strnxfrm_ucs2_uca
(
CHARSET_INFO
*
cs
,
uchar
*
dst
,
uint
dstlen
,
const
uchar
*
src
,
uint
srclen
)
{
return
my_strnxfrm_uca
(
cs
,
&
my_ucs2_uca_scanner_handler
,
dst
,
dstlen
,
src
,
srclen
);
}
MY_COLLATION_HANDLER
my_collation_ucs2_uca_handler
=
MY_COLLATION_HANDLER
my_collation_ucs2_uca_handler
=
{
{
my_coll_init_uca
,
/* init */
my_coll_init_uca
,
/* init */
my_strnncoll_uca
,
my_strnncoll_uc
s2_uc
a
,
my_strnncollsp_uca
,
my_strnncollsp_uc
s2_uc
a
,
my_strnxfrm_uca
,
my_strnxfrm_uc
s2_uc
a
,
my_like_range_simple
,
my_like_range_simple
,
my_wildcmp_uca
,
my_wildcmp_uca
,
NULL
,
NULL
,
my_instr_mb
,
my_instr_mb
,
my_hash_sort_uca
my_hash_sort_uc
s2_uc
a
};
};
CHARSET_INFO
my_charset_ucs2_general_uca
=
CHARSET_INFO
my_charset_ucs2_general_uca
=
{
{
45
,
0
,
0
,
/* number */
128
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_u
ca
_ci"
,
/* name */
"ucs2_u
nicode
_ci"
,
/* name */
""
,
/* comment */
""
,
/* comment */
NULL
,
/* tailoring */
NULL
,
/* tailoring */
NULL
,
/* ctype */
NULL
,
/* ctype */
...
@@ -7739,10 +7939,9 @@ CHARSET_INFO my_charset_ucs2_general_uca=
...
@@ -7739,10 +7939,9 @@ CHARSET_INFO my_charset_ucs2_general_uca=
&
my_collation_ucs2_uca_handler
&
my_collation_ucs2_uca_handler
};
};
CHARSET_INFO
my_charset_ucs2_icelandic_uca_ci
=
CHARSET_INFO
my_charset_ucs2_icelandic_uca_ci
=
{
{
12
8
,
0
,
0
,
/* number */
12
9
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_icelandic_ci"
,
/* name */
"ucs2_icelandic_ci"
,
/* name */
...
@@ -7769,7 +7968,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
...
@@ -7769,7 +7968,7 @@ CHARSET_INFO my_charset_ucs2_icelandic_uca_ci=
CHARSET_INFO
my_charset_ucs2_latvian_uca_ci
=
CHARSET_INFO
my_charset_ucs2_latvian_uca_ci
=
{
{
1
29
,
0
,
0
,
/* number */
1
30
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_latvian_ci"
,
/* name */
"ucs2_latvian_ci"
,
/* name */
...
@@ -7796,7 +7995,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
...
@@ -7796,7 +7995,7 @@ CHARSET_INFO my_charset_ucs2_latvian_uca_ci=
CHARSET_INFO
my_charset_ucs2_romanian_uca_ci
=
CHARSET_INFO
my_charset_ucs2_romanian_uca_ci
=
{
{
13
0
,
0
,
0
,
/* number */
13
1
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_romanian_ci"
,
/* name */
"ucs2_romanian_ci"
,
/* name */
...
@@ -7823,7 +8022,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
...
@@ -7823,7 +8022,7 @@ CHARSET_INFO my_charset_ucs2_romanian_uca_ci=
CHARSET_INFO
my_charset_ucs2_slovenian_uca_ci
=
CHARSET_INFO
my_charset_ucs2_slovenian_uca_ci
=
{
{
13
1
,
0
,
0
,
/* number */
13
2
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_slovenian_ci"
,
/* name */
"ucs2_slovenian_ci"
,
/* name */
...
@@ -7850,7 +8049,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
...
@@ -7850,7 +8049,7 @@ CHARSET_INFO my_charset_ucs2_slovenian_uca_ci=
CHARSET_INFO
my_charset_ucs2_polish_uca_ci
=
CHARSET_INFO
my_charset_ucs2_polish_uca_ci
=
{
{
13
2
,
0
,
0
,
/* number */
13
3
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_polish_ci"
,
/* name */
"ucs2_polish_ci"
,
/* name */
...
@@ -7877,7 +8076,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci=
...
@@ -7877,7 +8076,7 @@ CHARSET_INFO my_charset_ucs2_polish_uca_ci=
CHARSET_INFO
my_charset_ucs2_estonian_uca_ci
=
CHARSET_INFO
my_charset_ucs2_estonian_uca_ci
=
{
{
13
3
,
0
,
0
,
/* number */
13
4
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_estonian_ci"
,
/* name */
"ucs2_estonian_ci"
,
/* name */
...
@@ -7904,7 +8103,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
...
@@ -7904,7 +8103,7 @@ CHARSET_INFO my_charset_ucs2_estonian_uca_ci=
CHARSET_INFO
my_charset_ucs2_spanish_uca_ci
=
CHARSET_INFO
my_charset_ucs2_spanish_uca_ci
=
{
{
13
4
,
0
,
0
,
/* number */
13
5
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_spanish_ci"
,
/* name */
"ucs2_spanish_ci"
,
/* name */
...
@@ -7931,7 +8130,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
...
@@ -7931,7 +8130,7 @@ CHARSET_INFO my_charset_ucs2_spanish_uca_ci=
CHARSET_INFO
my_charset_ucs2_swedish_uca_ci
=
CHARSET_INFO
my_charset_ucs2_swedish_uca_ci
=
{
{
13
5
,
0
,
0
,
/* number */
13
6
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_swedish_ci"
,
/* name */
"ucs2_swedish_ci"
,
/* name */
...
@@ -7958,7 +8157,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
...
@@ -7958,7 +8157,7 @@ CHARSET_INFO my_charset_ucs2_swedish_uca_ci=
CHARSET_INFO
my_charset_ucs2_turkish_uca_ci
=
CHARSET_INFO
my_charset_ucs2_turkish_uca_ci
=
{
{
13
6
,
0
,
0
,
/* number */
13
7
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_turkish_ci"
,
/* name */
"ucs2_turkish_ci"
,
/* name */
...
@@ -7985,7 +8184,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
...
@@ -7985,7 +8184,7 @@ CHARSET_INFO my_charset_ucs2_turkish_uca_ci=
CHARSET_INFO
my_charset_ucs2_czech_uca_ci
=
CHARSET_INFO
my_charset_ucs2_czech_uca_ci
=
{
{
13
7
,
0
,
0
,
/* number */
13
8
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_czech_ci"
,
/* name */
"ucs2_czech_ci"
,
/* name */
...
@@ -8013,7 +8212,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_ci=
...
@@ -8013,7 +8212,7 @@ CHARSET_INFO my_charset_ucs2_czech_uca_ci=
CHARSET_INFO
my_charset_ucs2_danish_uca_ci
=
CHARSET_INFO
my_charset_ucs2_danish_uca_ci
=
{
{
13
8
,
0
,
0
,
/* number */
13
9
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_danish_ci"
,
/* name */
"ucs2_danish_ci"
,
/* name */
...
@@ -8040,7 +8239,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ci=
...
@@ -8040,7 +8239,7 @@ CHARSET_INFO my_charset_ucs2_danish_uca_ci=
CHARSET_INFO
my_charset_ucs2_lithuanian_uca_ci
=
CHARSET_INFO
my_charset_ucs2_lithuanian_uca_ci
=
{
{
1
39
,
0
,
0
,
/* number */
1
40
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_lithuanian_ci"
,
/* name */
"ucs2_lithuanian_ci"
,
/* name */
...
@@ -8067,7 +8266,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
...
@@ -8067,7 +8266,7 @@ CHARSET_INFO my_charset_ucs2_lithuanian_uca_ci=
CHARSET_INFO
my_charset_ucs2_slovak_uca_ci
=
CHARSET_INFO
my_charset_ucs2_slovak_uca_ci
=
{
{
14
0
,
0
,
0
,
/* number */
14
1
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_slovak_ci"
,
/* name */
"ucs2_slovak_ci"
,
/* name */
...
@@ -8094,7 +8293,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
...
@@ -8094,7 +8293,7 @@ CHARSET_INFO my_charset_ucs2_slovak_uca_ci=
CHARSET_INFO
my_charset_ucs2_spanish2_uca_ci
=
CHARSET_INFO
my_charset_ucs2_spanish2_uca_ci
=
{
{
14
1
,
0
,
0
,
/* number */
14
2
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"ucs2"
,
/* cs name */
"ucs2"
,
/* cs name */
"ucs2_spanish2_ci"
,
/* name */
"ucs2_spanish2_ci"
,
/* name */
...
@@ -8120,3 +8319,454 @@ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
...
@@ -8120,3 +8319,454 @@ CHARSET_INFO my_charset_ucs2_spanish2_uca_ci=
};
};
#endif
#endif
#ifdef HAVE_CHARSET_utf8
MY_COLLATION_HANDLER
my_collation_any_uca_handler
=
{
my_coll_init_uca
,
/* init */
my_strnncoll_any_uca
,
my_strnncollsp_any_uca
,
my_strnxfrm_any_uca
,
my_like_range_simple
,
my_wildcmp_uca
,
NULL
,
my_instr_mb
,
my_hash_sort_any_uca
};
/*
We consider bytes with code more than 127 as a letter.
This garantees that word boundaries work fine with regular
expressions. Note, there is no need to mark byte 255 as a
letter, it is illegal byte in UTF8.
*/
static
uchar
ctype_utf8
[]
=
{
0
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
40
,
40
,
40
,
40
,
40
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
32
,
72
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
132
,
132
,
132
,
132
,
132
,
132
,
132
,
132
,
132
,
132
,
16
,
16
,
16
,
16
,
16
,
16
,
16
,
129
,
129
,
129
,
129
,
129
,
129
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
16
,
16
,
16
,
16
,
16
,
16
,
130
,
130
,
130
,
130
,
130
,
130
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
2
,
16
,
16
,
16
,
16
,
32
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
3
,
0
};
extern
MY_CHARSET_HANDLER
my_charset_utf8_handler
;
CHARSET_INFO
my_charset_utf8_general_uca_ci
=
{
192
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_unicode_ci"
,
/* name */
""
,
/* comment */
""
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
uca_length
,
/* sort_order */
NULL
,
/* contractions */
uca_weight
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
1
,
/* mbminlen */
3
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_icelandic_uca_ci
=
{
193
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_icelandic_ci"
,
/* name */
""
,
/* comment */
icelandic
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_latvian_uca_ci
=
{
194
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_latvian_ci"
,
/* name */
""
,
/* comment */
latvian
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_romanian_uca_ci
=
{
195
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_romanian_ci"
,
/* name */
""
,
/* comment */
romanian
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_slovenian_uca_ci
=
{
196
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_slovenian_ci"
,
/* name */
""
,
/* comment */
slovenian
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_polish_uca_ci
=
{
197
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_polish_ci"
,
/* name */
""
,
/* comment */
polish
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_estonian_uca_ci
=
{
198
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_estonian_ci"
,
/* name */
""
,
/* comment */
estonian
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_spanish_uca_ci
=
{
199
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_spanish_ci"
,
/* name */
""
,
/* comment */
spanish
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_swedish_uca_ci
=
{
200
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_swedish_ci"
,
/* name */
""
,
/* comment */
swedish
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_turkish_uca_ci
=
{
201
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_turkish_ci"
,
/* name */
""
,
/* comment */
turkish
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_czech_uca_ci
=
{
202
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_czech_ci"
,
/* name */
""
,
/* comment */
czech
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_danish_uca_ci
=
{
203
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_danish_ci"
,
/* name */
""
,
/* comment */
danish
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_lithuanian_uca_ci
=
{
204
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_lithuanian_ci"
,
/* name */
""
,
/* comment */
lithuanian
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_slovak_uca_ci
=
{
205
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_slovak_ci"
,
/* name */
""
,
/* comment */
lithuanian
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
CHARSET_INFO
my_charset_utf8_spanish2_uca_ci
=
{
206
,
0
,
0
,
/* number */
MY_CS_COMPILED
|
MY_CS_STRNXFRM
|
MY_CS_UNICODE
,
"utf8"
,
/* cs name */
"utf8_spanish2_ci"
,
/* name */
""
,
/* comment */
spanish2
,
/* tailoring */
ctype_utf8
,
/* ctype */
NULL
,
/* to_lower */
NULL
,
/* to_upper */
NULL
,
/* sort_order */
NULL
,
/* contractions */
NULL
,
/* sort_order_big*/
NULL
,
/* tab_to_uni */
NULL
,
/* tab_from_uni */
NULL
,
/* state_map */
NULL
,
/* ident_map */
8
,
/* strxfrm_multiply */
2
,
/* mbminlen */
2
,
/* mbmaxlen */
9
,
/* min_sort_char */
0xFFFF
,
/* max_sort_char */
&
my_charset_utf8_handler
,
&
my_collation_any_uca_handler
};
#endif
strings/ctype-utf8.c
View file @
5dd2881f
...
@@ -2057,7 +2057,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
...
@@ -2057,7 +2057,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_hash_sort_utf8
my_hash_sort_utf8
};
};
static
MY_CHARSET_HANDLER
my_charset
_handler
=
MY_CHARSET_HANDLER
my_charset_utf8
_handler
=
{
{
NULL
,
/* init */
NULL
,
/* init */
my_ismbchar_utf8
,
my_ismbchar_utf8
,
...
@@ -2109,7 +2109,7 @@ CHARSET_INFO my_charset_utf8_general_ci=
...
@@ -2109,7 +2109,7 @@ CHARSET_INFO my_charset_utf8_general_ci=
3
,
/* mbmaxlen */
3
,
/* mbmaxlen */
0
,
/* min_sort_char */
0
,
/* min_sort_char */
255
,
/* max_sort_char */
255
,
/* max_sort_char */
&
my_charset_handler
,
&
my_charset_
utf8_
handler
,
&
my_collation_ci_handler
&
my_collation_ci_handler
};
};
...
@@ -2137,13 +2137,12 @@ CHARSET_INFO my_charset_utf8_bin=
...
@@ -2137,13 +2137,12 @@ CHARSET_INFO my_charset_utf8_bin=
3
,
/* mbmaxlen */
3
,
/* mbmaxlen */
0
,
/* min_sort_char */
0
,
/* min_sort_char */
255
,
/* max_sort_char */
255
,
/* max_sort_char */
&
my_charset_handler
,
&
my_charset_
utf8_
handler
,
&
my_collation_mb_bin_handler
&
my_collation_mb_bin_handler
};
};
#ifdef MY_TEST_UTF8
#ifdef MY_TEST_UTF8
#include <stdio.h>
#include <stdio.h>
static
void
test_mb
(
CHARSET_INFO
*
cs
,
uchar
*
s
)
static
void
test_mb
(
CHARSET_INFO
*
cs
,
uchar
*
s
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment