Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
mariadb
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
mariadb
Commits
fa23b207
Commit
fa23b207
authored
Sep 11, 2001
by
monty@hundin.mysql.fi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fixes for German sorting order.
parent
c526f5d2
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
328 additions
and
155 deletions
+328
-155
Docs/manual.texi
Docs/manual.texi
+32
-1
configure.in
configure.in
+1
-1
myisam/mi_delete_all.c
myisam/mi_delete_all.c
+3
-1
myisam/mi_search.c
myisam/mi_search.c
+13
-6
mysql-test/r/ctype_latin1_de.result
mysql-test/r/ctype_latin1_de.result
+168
-0
mysql-test/t/ctype_latin1_de-master.opt
mysql-test/t/ctype_latin1_de-master.opt
+1
-0
mysql-test/t/ctype_latin1_de.test
mysql-test/t/ctype_latin1_de.test
+36
-0
sql/item_cmpfunc.cc
sql/item_cmpfunc.cc
+1
-1
strings/ctype-latin1_de.c
strings/ctype-latin1_de.c
+73
-145
No files found.
Docs/manual.texi
View file @
fa23b207
...
@@ -748,7 +748,7 @@ is also available through the SQL interface as well.
...
@@ -748,7 +748,7 @@ is also available through the SQL interface as well.
@item
@item
Full support for several different character sets, including
Full support for several different character sets, including
ISO-8859-1 (Latin1), big5, ujis, and more. For example, the
ISO-8859-1 (Latin1),
german,
big5, ujis, and more. For example, the
Scandinavian characters `@ringaccent{a}', `@"a' and `@"o' are allowed
Scandinavian characters `@ringaccent{a}', `@"a' and `@"o' are allowed
in table and column names.
in table and column names.
...
@@ -20442,6 +20442,35 @@ default-character-set=character-set-name
...
@@ -20442,6 +20442,35 @@ default-character-set=character-set-name
but normally this is never needed.
but normally this is never needed.
@menu
* German character set::
@end menu
@node German character set, , Character sets, Character sets
@subsubsection German character set
To get German sorting order, you should start @code{mysqld} with
@code{--default-character-set=latin_de}. This will give you the following
characteristics.
When sorting and comparing string's the following mapping is done on the
strings before doing the comparison:
@example
ä -> ae
ö -> oe
ü -> ue
ß -> ss
@end example
All accented characters, except @code{'é'} and @code{É} are converted to
their un-accented counterpart. All letters are converted to uppercase.
When comparing strings with @code{LIKE} the one -> two character mapping
is not done. All letters are converted to uppercase. Accent are removed
from all letters except: @code{Ü}, @code{ü}, @code{É}, @code{é}, @code{Ö},
@code{ö}, @code{Ä} and @code{ä}.
@node Languages, Adding character set, Character sets, Localization
@node Languages, Adding character set, Character sets, Localization
@subsection Non-English Error Messages
@subsection Non-English Error Messages
...
@@ -46753,6 +46782,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
...
@@ -46753,6 +46782,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
@itemize @bullet
@itemize @bullet
@item
@item
New character set @code{latin_de} which provides correct German sorting.
@item
@code{TRUNCATE TABLE} and @code{DELETE FROM table_name} are now separate
@code{TRUNCATE TABLE} and @code{DELETE FROM table_name} are now separate
functions. One bonus is that @code{DELETE FROM table_name} now returns
functions. One bonus is that @code{DELETE FROM table_name} now returns
the number of deleted rows.
the number of deleted rows.
configure.in
View file @
fa23b207
...
@@ -1826,7 +1826,7 @@ CHARSETS_AVAILABLE="big5 cp1251 cp1257
...
@@ -1826,7 +1826,7 @@ CHARSETS_AVAILABLE="big5 cp1251 cp1257
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
usa7 win1250 win1251ukr"
usa7 win1250 win1251ukr"
CHARSETS_DEPRECATED
=
"win1251"
CHARSETS_DEPRECATED
=
"win1251"
DEFAULT_CHARSET
=
latin1
_de
DEFAULT_CHARSET
=
latin1
AC_DIVERT_POP
AC_DIVERT_POP
AC_ARG_WITH
(
charset,
AC_ARG_WITH
(
charset,
...
...
myisam/mi_delete_all.c
View file @
fa23b207
...
@@ -15,7 +15,7 @@
...
@@ -15,7 +15,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* Remove all rows from a MyISAM table */
/* Remove all rows from a MyISAM table */
/* This only clears the status information
; The files are not truncated
*/
/* This only clears the status information
and truncates the data file
*/
#include "myisamdef.h"
#include "myisamdef.h"
...
@@ -50,6 +50,8 @@ int mi_delete_all_rows(MI_INFO *info)
...
@@ -50,6 +50,8 @@ int mi_delete_all_rows(MI_INFO *info)
myisam_log_command
(
MI_LOG_DELETE_ALL
,
info
,(
byte
*
)
0
,
0
,
0
);
myisam_log_command
(
MI_LOG_DELETE_ALL
,
info
,(
byte
*
)
0
,
0
,
0
);
VOID
(
_mi_writeinfo
(
info
,
WRITEINFO_UPDATE_KEYFILE
));
VOID
(
_mi_writeinfo
(
info
,
WRITEINFO_UPDATE_KEYFILE
));
if
(
my_chsize
(
info
->
dfile
,
0
,
MYF
(
MY_WME
)))
goto
err
;
allow_break
();
/* Allow SIGHUP & SIGINT */
allow_break
();
/* Allow SIGHUP & SIGINT */
DBUG_RETURN
(
0
);
DBUG_RETURN
(
0
);
...
...
myisam/mi_search.c
View file @
fa23b207
...
@@ -657,19 +657,19 @@ void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos)
...
@@ -657,19 +657,19 @@ void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos)
int
_mi_compare_text
(
CHARSET_INFO
*
charset_info
,
uchar
*
a
,
uint
a_length
,
int
_mi_compare_text
(
CHARSET_INFO
*
charset_info
,
uchar
*
a
,
uint
a_length
,
uchar
*
b
,
uint
b_length
,
my_bool
part_key
)
uchar
*
b
,
uint
b_length
,
my_bool
part_key
)
{
{
uint
length
=
min
(
a_length
,
b_length
);
uchar
*
end
=
a
+
length
;
int
flag
;
int
flag
;
#ifdef USE_STRCOLL
#ifdef USE_STRCOLL
if
(
use_strcoll
(
charset_info
))
if
(
use_strcoll
(
charset_info
))
{
{
if
((
flag
=
my_strnncoll
(
charset_info
,
a
,
a_length
,
b
,
b_length
)))
/* QQ: This needs to work with part keys at some point */
return
flag
;
return
my_strnncoll
(
charset_info
,
a
,
a_length
,
b
,
b_length
)
;
}
}
else
else
#endif
#endif
{
{
uint
length
=
min
(
a_length
,
b_length
);
uchar
*
end
=
a
+
length
;
uchar
*
sort_order
=
charset_info
->
sort_order
;
uchar
*
sort_order
=
charset_info
->
sort_order
;
while
(
a
<
end
)
while
(
a
<
end
)
if
((
flag
=
(
int
)
sort_order
[
*
a
++
]
-
(
int
)
sort_order
[
*
b
++
]))
if
((
flag
=
(
int
)
sort_order
[
*
a
++
]
-
(
int
)
sort_order
[
*
b
++
]))
...
@@ -768,8 +768,15 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
...
@@ -768,8 +768,15 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
}
}
else
else
{
{
uint
length
=
(
uint
)
(
end
-
a
);
uint
length
=
(
uint
)
(
end
-
a
),
a_length
=
length
,
b_length
=
length
;
if
((
flag
=
_mi_compare_text
(
keyseg
->
charset
,
a
,
length
,
b
,
length
,
if
(
!
(
nextflag
&
SEARCH_PREFIX
))
{
while
(
a_length
&&
a
[
a_length
-
1
]
==
' '
)
a_length
--
;
while
(
b_length
&&
b
[
b_length
-
1
]
==
' '
)
b_length
--
;
}
if
((
flag
=
_mi_compare_text
(
keyseg
->
charset
,
a
,
a_length
,
b
,
b_length
,
(
my_bool
)
((
nextflag
&
SEARCH_PREFIX
)
&&
(
my_bool
)
((
nextflag
&
SEARCH_PREFIX
)
&&
next_key_length
<=
0
))))
next_key_length
<=
0
))))
return
((
keyseg
->
flag
&
HA_REVERSE_SORT
)
?
-
flag
:
flag
);
return
((
keyseg
->
flag
&
HA_REVERSE_SORT
)
?
-
flag
:
flag
);
...
...
mysql-test/r/ctype_latin1_de.result
0 → 100644
View file @
fa23b207
a b
a 35
ac 2
ad 4
1
ae 3
31
aeae 33
a 32
aeb 6
c 5
e 28
o 37
oc 15
od 18
14
oe 17
a 16
oeb 20
c 19
o 30
q 34
s 21
ss 22
23
ssa 25
a 27
b 24
ssc 26
u 36
uc 8
ud 10
ue 9
11
ueb 12
c 7
uf 13
u 29
38
39
a b
a 35
ac 2
ad 4
1
ae 3
31
aeae 33
a 32
aeb 6
c 5
e 28
o 37
oc 15
od 18
14
oe 17
a 16
oeb 20
c 19
o 30
q 34
s 21
ss 22
23
ssa 25
a 27
b 24
ssc 26
u 36
uc 8
ud 10
ue 9
11
ueb 12
c 7
uf 13
u 29
38
39
a
u
uf
c
ueb
ue
ud
uc
u
ssc
b
a
ssa
ss
s
q
o
c
oeb
a
oe
od
oc
o
e
c
aeb
a
aeae
ae
ad
ac
a
Table Op Msg_type Msg_text
test.t1 check status OK
a b
a 16
c 19
o 30
a b
38
39
a b
a 35
ac 2
ad 4
ae 3
aeae 33
a 32
aeb 6
a 16
ssa 25
a 27
a b
u 36
uc 8
ud 10
ue 9
ueb 12
uf 13
u 29
a b
ss 22
ssa 25
ssc 26
strcmp('','ae') strcmp('ae','') strcmp('aeq','q') strcmp('q','aeq')
0 0 0 0
strcmp('ss','') strcmp('','ss') strcmp('s','sss') strcmp('q','ssq')
0 0 0 0
strcmp('','af') strcmp('a','') strcmp('','aeq') strcmp('','aeaeq')
-1 -1 -1 -1
strcmp('ss','a') strcmp('','ssa') strcmp('sa','sssb') strcmp('s','')
-1 -1 -1 -1
strcmp('','o') strcmp('','u') strcmp('','oeb')
-1 -1 -1
strcmp('af','') strcmp('','a') strcmp('aeq','') strcmp('aeaeq','')
1 1 1 1
strcmp('a','ss') strcmp('ssa','') strcmp('sssb','sa') strcmp('','s')
1 1 1 1
strcmp('u','a') strcmp('u','')
1 1
mysql-test/t/ctype_latin1_de-master.opt
0 → 100644
View file @
fa23b207
--default-character-set=latin1_de
mysql-test/t/ctype_latin1_de.test
0 → 100644
View file @
fa23b207
#
# Test latin_de character set
#
drop
table
if
exists
t1
;
create
table
t1
(
a
char
(
20
)
not
null
,
b
int
not
null
auto_increment
,
index
(
a
,
b
),
index
(
b
));
insert
into
t1
(
a
)
values
(
''
),(
'ac'
),(
'ae'
),(
'ad'
),(
'c'
),(
'aeb'
);
insert
into
t1
(
a
)
values
(
'c'
),(
'uc'
),(
'ue'
),(
'ud'
),(
''
),(
'ueb'
),(
'uf'
);
insert
into
t1
(
a
)
values
(
''
),(
'oc'
),(
'a'
),(
'oe'
),(
'od'
),(
'c'
),(
'oeb'
);
insert
into
t1
(
a
)
values
(
's'
),(
'ss'
),(
''
),(
'b'
),(
'ssa'
),(
'ssc'
),(
'a'
);
insert
into
t1
(
a
)
values
(
'e'
),(
'u'
),(
'o'
),(
''
),(
'a'
),(
'aeae'
);
insert
into
t1
(
a
)
values
(
'q'
),(
'a'
),(
'u'
),(
'o'
),(
''
),(
''
);
select
a
,
b
from
t1
order
by
a
,
b
;
select
a
,
b
from
t1
order
by
upper
(
a
),
b
;
select
a
from
t1
order
by
a
desc
;
check
table
t1
;
select
*
from
t1
where
a
like
"%"
;
select
*
from
t1
where
a
like
"%%"
;
select
*
from
t1
where
a
like
"%%"
;
select
*
from
t1
where
a
like
"%U%"
;
select
*
from
t1
where
a
like
"%ss%"
;
drop
table
t1
;
# The following should all be true
select
strcmp
(
''
,
'ae'
),
strcmp
(
'ae'
,
''
),
strcmp
(
'aeq'
,
'q'
),
strcmp
(
'q'
,
'aeq'
);
select
strcmp
(
'ss'
,
''
),
strcmp
(
''
,
'ss'
),
strcmp
(
's'
,
'sss'
),
strcmp
(
'q'
,
'ssq'
);
# The following should all return -1
select
strcmp
(
''
,
'af'
),
strcmp
(
'a'
,
''
),
strcmp
(
''
,
'aeq'
),
strcmp
(
''
,
'aeaeq'
);
select
strcmp
(
'ss'
,
'a'
),
strcmp
(
''
,
'ssa'
),
strcmp
(
'sa'
,
'sssb'
),
strcmp
(
's'
,
''
);
select
strcmp
(
''
,
'o'
),
strcmp
(
''
,
'u'
),
strcmp
(
''
,
'oeb'
);
# The following should all return 1
select
strcmp
(
'af'
,
''
),
strcmp
(
''
,
'a'
),
strcmp
(
'aeq'
,
''
),
strcmp
(
'aeaeq'
,
''
);
select
strcmp
(
'a'
,
'ss'
),
strcmp
(
'ssa'
,
''
),
strcmp
(
'sssb'
,
'sa'
),
strcmp
(
''
,
's'
);
select
strcmp
(
'u'
,
'a'
),
strcmp
(
'u'
,
''
);
sql/item_cmpfunc.cc
View file @
fa23b207
...
@@ -254,7 +254,7 @@ longlong Item_func_strcmp::val_int()
...
@@ -254,7 +254,7 @@ longlong Item_func_strcmp::val_int()
null_value
=
1
;
null_value
=
1
;
return
0
;
return
0
;
}
}
int
value
=
string
cmp
(
a
,
b
);
int
value
=
binary
?
stringcmp
(
a
,
b
)
:
sort
cmp
(
a
,
b
);
null_value
=
0
;
null_value
=
0
;
return
!
value
?
0
:
(
value
<
0
?
(
longlong
)
-
1
:
(
longlong
)
1
);
return
!
value
?
0
:
(
value
<
0
?
(
longlong
)
-
1
:
(
longlong
)
1
);
}
}
...
...
strings/ctype-latin1_de.c
View file @
fa23b207
...
@@ -99,12 +99,10 @@ uchar to_upper_latin1_de[] = {
...
@@ -99,12 +99,10 @@ uchar to_upper_latin1_de[] = {
* This is a simple latin1 mapping table, which maps all accented
* This is a simple latin1 mapping table, which maps all accented
* characters to their non-accented equivalents. Note: in this
* characters to their non-accented equivalents. Note: in this
* table, 'ä' is mapped to 'A', 'ÿ' is mapped to 'Y', etc. - all
* table, 'ä' is mapped to 'A', 'ÿ' is mapped to 'Y', etc. - all
* accented characters are treated the same way.
* accented characters except the following are treated the same way.
*
* Ü, ü, É, é, Ö, ö, Ä, ä
* SPECIAL NOTE: 'ß' (the sz ligature), which isn't really an
* accented 's', is mapped to 'S', to simplify the sorting
* functions.
*/
*/
uchar
sort_order_latin1_de
[]
=
{
uchar
sort_order_latin1_de
[]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
...
@@ -118,10 +116,10 @@ uchar sort_order_latin1_de[] = {
...
@@ -118,10 +116,10 @@ uchar sort_order_latin1_de[] = {
144
,
145
,
146
,
147
,
148
,
149
,
150
,
151
,
152
,
153
,
154
,
155
,
156
,
157
,
158
,
159
,
144
,
145
,
146
,
147
,
148
,
149
,
150
,
151
,
152
,
153
,
154
,
155
,
156
,
157
,
158
,
159
,
160
,
161
,
162
,
163
,
164
,
165
,
166
,
167
,
168
,
169
,
170
,
171
,
172
,
173
,
174
,
175
,
160
,
161
,
162
,
163
,
164
,
165
,
166
,
167
,
168
,
169
,
170
,
171
,
172
,
173
,
174
,
175
,
176
,
177
,
178
,
179
,
180
,
181
,
182
,
183
,
184
,
185
,
186
,
187
,
188
,
189
,
190
,
191
,
176
,
177
,
178
,
179
,
180
,
181
,
182
,
183
,
184
,
185
,
186
,
187
,
188
,
189
,
190
,
191
,
65
,
65
,
65
,
65
,
65
,
65
,
92
,
67
,
69
,
69
,
69
,
69
,
73
,
73
,
73
,
73
,
65
,
65
,
65
,
65
,
196
,
65
,
92
,
67
,
69
,
201
,
69
,
69
,
73
,
73
,
73
,
73
,
68
,
78
,
79
,
79
,
79
,
79
,
79
,
215
,
216
,
85
,
85
,
85
,
85
,
89
,
222
,
8
3
,
68
,
78
,
79
,
79
,
79
,
79
,
214
,
215
,
216
,
85
,
85
,
85
,
220
,
89
,
222
,
22
3
,
65
,
65
,
65
,
65
,
65
,
65
,
92
,
67
,
69
,
69
,
69
,
69
,
73
,
73
,
73
,
73
,
65
,
65
,
65
,
65
,
196
,
65
,
92
,
67
,
69
,
201
,
69
,
69
,
73
,
73
,
73
,
73
,
68
,
78
,
79
,
79
,
79
,
79
,
79
,
247
,
216
,
85
,
85
,
85
,
85
,
89
,
222
,
89
68
,
78
,
79
,
79
,
79
,
79
,
214
,
247
,
216
,
85
,
85
,
85
,
220
,
89
,
222
,
89
};
};
#define L1_AE 196
#define L1_AE 196
...
@@ -132,6 +130,39 @@ uchar sort_order_latin1_de[] = {
...
@@ -132,6 +130,39 @@ uchar sort_order_latin1_de[] = {
#define L1_ue 252
#define L1_ue 252
#define L1_ss 223
#define L1_ss 223
/*
Some notes about the following comparison rules:
By definition, my_strnncoll_latin_de must works exactly as if had called
my_strnxfrm_latin_de() on both strings and compared the result strings.
This means that:
Ä must also matches ÁE and Aè, because my_strxn_frm_latin_de() will convert
both to AE.
The other option would be to not do any accent removal in
sort_order_latin_de[] at all
*/
#define CHECK_S1_COMBO(ch1, ch2, str1, str1_end, res_if_str1_smaller, str2, fst, snd, accent) \
/* Invariant: ch1 == fst == sort_order_latin1_de[accent] && ch1 != ch2 */
\
if (ch2 != accent) \
{ \
ch1= fst; \
goto normal; \
} \
if (str1 == str1_end) \
return res_if_str1_smaller; \
{ \
int diff = (int) sort_order_latin1_de[*str1] - snd; \
if (diff) \
return diff*(-(res_if_str1_smaller)); \
/* They are equal (e.g., "Ae" == 'ä') */
\
str1++; \
}
int
my_strnncoll_latin1_de
(
const
uchar
*
s1
,
int
len1
,
int
my_strnncoll_latin1_de
(
const
uchar
*
s1
,
int
len1
,
const
uchar
*
s2
,
int
len2
)
const
uchar
*
s2
,
int
len2
)
{
{
...
@@ -140,172 +171,71 @@ int my_strnncoll_latin1_de(const uchar * s1, int len1,
...
@@ -140,172 +171,71 @@ int my_strnncoll_latin1_de(const uchar * s1, int len1,
while
(
s1
<
e1
&&
s2
<
e2
)
while
(
s1
<
e1
&&
s2
<
e2
)
{
{
/* to_upper is used instead of sort_order, because we don't want
/*
* 'Ä' to match "ÁE", only "AE". This couples the to_upper and
Because sort_order_latin1_de doesn't convert 'Ä', Ü or ß we
* sort_order tables together, but that is acceptable. */
can use it here.
uchar
c1
=
to_upper_latin1_de
[
*
s1
];
*/
uchar
c2
=
to_upper_latin1_de
[
*
s2
];
uchar
c1
=
sort_order_latin1_de
[
*
s1
++
];
uchar
c2
=
sort_order_latin1_de
[
*
s2
++
];
if
(
c1
!=
c2
)
if
(
c1
!=
c2
)
{
{
switch
(
c1
)
switch
(
c1
)
{
{
#define CHECK_S1_COMBO(fst, snd, accent) \
/* Invariant: c1 == fst == sort_order_latin1_de[accent] && c1 != c2 */
\
if (c2 == accent) \
{ \
if (s1 + 1 < e1) \
{ \
if (to_upper_latin1_de[*(s1 + 1)] == snd) \
{ \
/* They are equal (e.g., "Ae" == 'ä') */
\
s1 += 2; \
s2 += 1; \
} \
else \
{ \
int diff = sort_order_latin1_de[*(s1 + 1)] - snd; \
if (diff) \
return diff; \
else \
/* Comparison between, e.g., "AÉ" and 'Ä' */
\
return 1; \
} \
} \
else \
return -1; \
} \
else \
/* The following should work even if c2 is [ÄÖÜß] */
\
return fst - sort_order_latin1_de[c2]
case
'A'
:
case
'A'
:
CHECK_S1_COMBO
(
'A'
,
'E'
,
L1_AE
);
CHECK_S1_COMBO
(
c1
,
c2
,
s1
,
e1
,
-
1
,
s2
,
'A'
,
'E'
,
L1_AE
);
break
;
break
;
case
'O'
:
case
'O'
:
CHECK_S1_COMBO
(
'O'
,
'E'
,
L1_OE
);
CHECK_S1_COMBO
(
c1
,
c2
,
s1
,
e1
,
-
1
,
s2
,
'O'
,
'E'
,
L1_OE
);
break
;
break
;
case
'U'
:
case
'U'
:
CHECK_S1_COMBO
(
'U'
,
'E'
,
L1_UE
);
CHECK_S1_COMBO
(
c1
,
c2
,
s1
,
e1
,
-
1
,
s2
,
'U'
,
'E'
,
L1_UE
);
break
;
break
;
case
'S'
:
case
'S'
:
CHECK_S1_COMBO
(
'S'
,
'S'
,
L1_ss
);
CHECK_S1_COMBO
(
c1
,
c2
,
s1
,
e1
,
-
1
,
s2
,
'S'
,
'S'
,
L1_ss
);
break
;
break
;
#define CHECK_S2_COMBO(fst, snd) \
/* Invariant: sort_order_latin1_de[c1] == fst && c1 != c2 */
\
if (c2 == fst) \
{ \
if (s2 + 1 < e2) \
{ \
if (to_upper_latin1_de[*(s2 + 1)] == snd) \
{ \
/* They are equal (e.g., 'ä' == "Ae") */
\
s1 += 1; \
s2 += 2; \
} \
else \
{ \
int diff = sort_order_latin1_de[*(s1 + 1)] - snd; \
if (diff) \
return diff; \
else \
/* Comparison between, e.g., 'Ä' and "AÉ" */
\
return -1; \
} \
} \
else \
return 1; \
} \
else \
/* The following should work even if c2 is [ÄÖÜß] */
\
return fst - sort_order_latin1_de[c2]
case
L1_AE
:
case
L1_AE
:
CHECK_S
2_COMBO
(
'A'
,
'E
'
);
CHECK_S
1_COMBO
(
c1
,
c2
,
s2
,
e2
,
1
,
s1
,
'A'
,
'E'
,
'A
'
);
break
;
break
;
case
L1_OE
:
case
L1_OE
:
CHECK_S
2_COMBO
(
'O'
,
'E
'
);
CHECK_S
1_COMBO
(
c1
,
c2
,
s2
,
e2
,
1
,
s1
,
'O'
,
'E'
,
'O
'
);
break
;
break
;
case
L1_UE
:
case
L1_UE
:
CHECK_S
2_COMBO
(
'U'
,
'E
'
);
CHECK_S
1_COMBO
(
c1
,
c2
,
s2
,
e2
,
1
,
s1
,
'U'
,
'E'
,
'U
'
);
break
;
break
;
case
L1_ss
:
case
L1_ss
:
CHECK_S
2_COMBO
(
'S'
,
'S'
);
CHECK_S
1_COMBO
(
c1
,
c2
,
s2
,
e2
,
1
,
s1
,
'S'
,
'S'
,
'S'
);
break
;
break
;
default:
default:
/*
Handle the case where 'c2' is a special character
If this is true, we know that c1 can't match this character.
*/
normal:
switch
(
c2
)
{
switch
(
c2
)
{
case
L1_AE
:
case
L1_AE
:
return
(
int
)
c1
-
(
int
)
'A'
;
case
L1_OE
:
case
L1_OE
:
return
(
int
)
c1
-
(
int
)
'O'
;
case
L1_UE
:
case
L1_UE
:
return
(
int
)
c1
-
(
int
)
'U'
;
case
L1_ss
:
case
L1_ss
:
/* Make sure these do not match (e.g., "Ä" != "Á") */
return
(
int
)
c1
-
(
int
)
'S'
;
return
sort_order_latin1_de
[
c1
]
-
sort_order_latin1_de
[
c2
];
break
;
default:
default:
if
(
sort_order_latin1_de
[
*
s1
]
!=
sort_order_latin1_de
[
*
s2
])
{
return
sort_order_latin1_de
[
*
s1
]
-
sort_order_latin1_de
[
*
s2
];
int
diff
=
(
int
)
c1
-
(
int
)
c2
;
++
s1
;
if
(
diff
)
++
s2
;
return
diff
;
break
;
}
}
break
;
break
;
#undef CHECK_S1_COMBO
#undef CHECK_S2_COMBO
}
}
else
{
/* In order to consistently treat "ae" == 'ä', but to NOT allow
* "aé" == 'ä', we must look ahead here to ensure that the second
* letter in a combo really is the unaccented 'e' (or 's' for
* "ss") and is not an accented character with the same sort_order. */
++
s1
;
++
s2
;
if
(
s1
<
e1
&&
s2
<
e2
)
{
switch
(
c1
)
{
case
'A'
:
case
'O'
:
case
'U'
:
if
(
sort_order_latin1_de
[
*
s1
]
==
'E'
&&
to_upper_latin1_de
[
*
s1
]
!=
'E'
&&
to_upper_latin1_de
[
*
s2
]
==
'E'
)
/* Comparison between, e.g., "AÉ" and "AE" */
return
1
;
if
(
sort_order_latin1_de
[
*
s2
]
==
'E'
&&
to_upper_latin1_de
[
*
s2
]
!=
'E'
&&
to_upper_latin1_de
[
*
s1
]
==
'E'
)
/* Comparison between, e.g., "AE" and "AÉ" */
return
-
1
;
break
;
case
'S'
:
if
(
sort_order_latin1_de
[
*
s1
]
==
'S'
&&
to_upper_latin1_de
[
*
s1
]
!=
'S'
&&
to_upper_latin1_de
[
*
s2
]
==
'S'
)
/* Comparison between, e.g., "Sß" and "SS" */
return
1
;
if
(
sort_order_latin1_de
[
*
s2
]
==
'S'
&&
to_upper_latin1_de
[
*
s2
]
!=
'S'
&&
to_upper_latin1_de
[
*
s1
]
==
'S'
)
/* Comparison between, e.g., "SS" and "Sß" */
return
-
1
;
break
;
default:
break
;
}
}
}
}
}
}
}
}
/* A simple test of string lengths won't work -- we test to see
/* A simple test of string lengths won't work -- we test to see
* which string ran out first */
* which string ran out first */
return
s1
<
e1
?
1
:
s2
<
e2
?
-
1
:
0
;
return
s1
<
e1
?
1
:
s2
<
e2
?
-
1
:
0
;
}
}
int
my_strnxfrm_latin1_de
(
uchar
*
dest
,
const
uchar
*
src
,
int
len
,
int
srclen
)
int
my_strnxfrm_latin1_de
(
uchar
*
dest
,
const
uchar
*
src
,
int
len
,
int
srclen
)
{
{
const
uchar
*
dest_orig
=
dest
;
const
uchar
*
dest_orig
=
dest
;
...
@@ -313,22 +243,19 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
...
@@ -313,22 +243,19 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
const
uchar
*
se
=
src
+
srclen
;
const
uchar
*
se
=
src
+
srclen
;
while
(
src
<
se
&&
dest
<
de
)
while
(
src
<
se
&&
dest
<
de
)
{
{
switch
(
*
src
)
uchar
chr
=
sort_order_latin1_de
[
*
src
];
{
switch
(
chr
)
{
case
L1_AE
:
case
L1_AE
:
case
L1_ae
:
*
dest
++
=
'A'
;
*
dest
++
=
'A'
;
if
(
dest
<
de
)
if
(
dest
<
de
)
*
dest
++
=
'E'
;
*
dest
++
=
'E'
;
break
;
break
;
case
L1_OE
:
case
L1_OE
:
case
L1_oe
:
*
dest
++
=
'O'
;
*
dest
++
=
'O'
;
if
(
dest
<
de
)
if
(
dest
<
de
)
*
dest
++
=
'E'
;
*
dest
++
=
'E'
;
break
;
break
;
case
L1_UE
:
case
L1_UE
:
case
L1_ue
:
*
dest
++
=
'U'
;
*
dest
++
=
'U'
;
if
(
dest
<
de
)
if
(
dest
<
de
)
*
dest
++
=
'E'
;
*
dest
++
=
'E'
;
...
@@ -339,7 +266,7 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
...
@@ -339,7 +266,7 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
*
dest
++
=
'S'
;
*
dest
++
=
'S'
;
break
;
break
;
default:
default:
*
dest
++
=
sort_order_latin1_de
[
*
src
]
;
*
dest
++
=
chr
;
break
;
break
;
}
}
++
src
;
++
src
;
...
@@ -347,6 +274,7 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
...
@@ -347,6 +274,7 @@ int my_strnxfrm_latin1_de(uchar * dest, const uchar * src, int len, int srclen)
return
dest
-
dest_orig
;
return
dest
-
dest_orig
;
}
}
int
my_strcoll_latin1_de
(
const
uchar
*
s1
,
const
uchar
*
s2
)
int
my_strcoll_latin1_de
(
const
uchar
*
s1
,
const
uchar
*
s2
)
{
{
/* XXX QQ: This should be fixed to not call strlen */
/* XXX QQ: This should be fixed to not call strlen */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment