Commit 34925f8d authored by unknown's avatar unknown

Fixes for German sorting order.


Docs/manual.texi:
  Update for German sorting
configure.in:
  Don't make the German sort order default
myisam/mi_delete_all.c:
  Truncate files on DELETE FROM table_name to not get warnings when checking files
myisam/mi_search.c:
  Fix for multi-byte character sets.
sql/item_cmpfunc.cc:
  Use current character set when using STRCMP()
strings/ctype-latin1_de.c:
  F
parent 354882c8
......@@ -748,7 +748,7 @@ is also available through the SQL interface as well.
@item
Full support for several different character sets, including
ISO-8859-1 (Latin1), big5, ujis, and more. For example, the
ISO-8859-1 (Latin1), german, big5, ujis, and more. For example, the
Scandinavian characters `@ringaccent{a}', `@"a' and `@"o' are allowed
in table and column names.
......@@ -20442,6 +20442,35 @@ default-character-set=character-set-name
but normally this is never needed.
@menu
* German character set::
@end menu
@node German character set, , Character sets, Character sets
@subsubsection German character set
To get German sorting order, you should start @code{mysqld} with
@code{--default-character-set=latin_de}. This will give you the following
characteristics.
When sorting and comparing string's the following mapping is done on the
strings before doing the comparison:
@example
ä -> ae
ö -> oe
ü -> ue
ß -> ss
@end example
All accented characters, except @code{'é'} and @code{É} are converted to
their un-accented counterpart. All letters are converted to uppercase.
When comparing strings with @code{LIKE} the one -> two character mapping
is not done. All letters are converted to uppercase. Accent are removed
from all letters except: @code{Ü}, @code{ü}, @code{É}, @code{é}, @code{Ö},
@code{ö}, @code{Ä} and @code{ä}.
@node Languages, Adding character set, Character sets, Localization
@subsection Non-English Error Messages
......@@ -46753,6 +46782,8 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}.
@itemize @bullet
@item
New character set @code{latin_de} which provides correct German sorting.
@item
@code{TRUNCATE TABLE} and @code{DELETE FROM table_name} are now separate
functions. One bonus is that @code{DELETE FROM table_name} now returns
the number of deleted rows.
......@@ -1826,7 +1826,7 @@ CHARSETS_AVAILABLE="big5 cp1251 cp1257
latin1 latin1_de latin2 latin5 sjis swe7 tis620 ujis
usa7 win1250 win1251ukr"
CHARSETS_DEPRECATED="win1251"
DEFAULT_CHARSET=latin1_de
DEFAULT_CHARSET=latin1
AC_DIVERT_POP
AC_ARG_WITH(charset,
......
......@@ -15,7 +15,7 @@
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* Remove all rows from a MyISAM table */
/* This only clears the status information; The files are not truncated */
/* This only clears the status information and truncates the data file */
#include "myisamdef.h"
......@@ -50,6 +50,8 @@ int mi_delete_all_rows(MI_INFO *info)
myisam_log_command(MI_LOG_DELETE_ALL,info,(byte*) 0,0,0);
VOID(_mi_writeinfo(info,WRITEINFO_UPDATE_KEYFILE));
if (my_chsize(info->dfile, 0, MYF(MY_WME)))
goto err;
allow_break(); /* Allow SIGHUP & SIGINT */
DBUG_RETURN(0);
......
......@@ -657,19 +657,19 @@ void _mi_dpointer(MI_INFO *info, uchar *buff, my_off_t pos)
int _mi_compare_text(CHARSET_INFO *charset_info, uchar *a, uint a_length,
uchar *b, uint b_length, my_bool part_key)
{
uint length= min(a_length,b_length);
uchar *end= a+ length;
int flag;
#ifdef USE_STRCOLL
if (use_strcoll(charset_info))
{
if ((flag = my_strnncoll(charset_info, a, a_length, b, b_length)))
return flag;
/* QQ: This needs to work with part keys at some point */
return my_strnncoll(charset_info, a, a_length, b, b_length);
}
else
#endif
{
uint length= min(a_length,b_length);
uchar *end= a+ length;
uchar *sort_order=charset_info->sort_order;
while (a < end)
if ((flag= (int) sort_order[*a++] - (int) sort_order[*b++]))
......@@ -768,8 +768,15 @@ int _mi_key_cmp(register MI_KEYSEG *keyseg, register uchar *a,
}
else
{
uint length=(uint) (end-a);
if ((flag=_mi_compare_text(keyseg->charset,a,length,b,length,
uint length=(uint) (end-a), a_length=length, b_length=length;
if (!(nextflag & SEARCH_PREFIX))
{
while (a_length && a[a_length-1] == ' ')
a_length--;
while (b_length && b[b_length-1] == ' ')
b_length--;
}
if ((flag=_mi_compare_text(keyseg->charset,a,a_length,b,b_length,
(my_bool) ((nextflag & SEARCH_PREFIX) &&
next_key_length <= 0))))
return ((keyseg->flag & HA_REVERSE_SORT) ? -flag : flag);
......
a b
a 35
ac 2
ad 4
1
ae 3
31
aeae 33
a 32
aeb 6
c 5
e 28
o 37
oc 15
od 18
14
oe 17
a 16
oeb 20
c 19
o 30
q 34
s 21
ss 22
23
ssa 25
a 27
b 24
ssc 26
u 36
uc 8
ud 10
ue 9
11
ueb 12
c 7
uf 13
u 29
38
39
a b
a 35
ac 2
ad 4
1
ae 3
31
aeae 33
a 32
aeb 6
c 5
e 28
o 37
oc 15
od 18
14
oe 17
a 16
oeb 20
c 19
o 30
q 34
s 21
ss 22
23
ssa 25
a 27
b 24
ssc 26
u 36
uc 8
ud 10
ue 9
11
ueb 12
c 7
uf 13
u 29
38
39
a
u
uf
c
ueb
ue
ud
uc
u
ssc
b
a
ssa
ss
s
q
o
c
oeb
a
oe
od
oc
o
e
c
aeb
a
aeae
ae
ad
ac
a
Table Op Msg_type Msg_text
test.t1 check status OK
a b
a 16
c 19
o 30
a b
38
39
a b
a 35
ac 2
ad 4
ae 3
aeae 33
a 32
aeb 6
a 16
ssa 25
a 27
a b
u 36
uc 8
ud 10
ue 9
ueb 12
uf 13
u 29
a b
ss 22
ssa 25
ssc 26
strcmp('','ae') strcmp('ae','') strcmp('aeq','q') strcmp('q','aeq')
0 0 0 0
strcmp('ss','') strcmp('','ss') strcmp('s','sss') strcmp('q','ssq')
0 0 0 0
strcmp('','af') strcmp('a','') strcmp('','aeq') strcmp('','aeaeq')
-1 -1 -1 -1
strcmp('ss','a') strcmp('','ssa') strcmp('sa','sssb') strcmp('s','')
-1 -1 -1 -1
strcmp('','o') strcmp('','u') strcmp('','oeb')
-1 -1 -1
strcmp('af','') strcmp('','a') strcmp('aeq','') strcmp('aeaeq','')
1 1 1 1
strcmp('a','ss') strcmp('ssa','') strcmp('sssb','sa') strcmp('','s')
1 1 1 1
strcmp('u','a') strcmp('u','')
1 1
--default-character-set=latin1_de
#
# Test latin_de character set
#
drop table if exists t1;
create table t1 (a char (20) not null, b int not null auto_increment, index (a,b),index(b));
insert into t1 (a) values (''),('ac'),('ae'),('ad'),('c'),('aeb');
insert into t1 (a) values ('c'),('uc'),('ue'),('ud'),(''),('ueb'),('uf');
insert into t1 (a) values (''),('oc'),('a'),('oe'),('od'),('c'),('oeb');
insert into t1 (a) values ('s'),('ss'),(''),('b'),('ssa'),('ssc'),('a');
insert into t1 (a) values ('e'),('u'),('o'),(''),('a'),('aeae');
insert into t1 (a) values ('q'),('a'),('u'),('o'),(''),('');
select a,b from t1 order by a,b;
select a,b from t1 order by upper(a),b;
select a from t1 order by a desc;
check table t1;
select * from t1 where a like "%";
select * from t1 where a like "%%";
select * from t1 where a like "%%";
select * from t1 where a like "%U%";
select * from t1 where a like "%ss%";
drop table t1;
# The following should all be true
select strcmp('','ae'),strcmp('ae',''),strcmp('aeq','q'),strcmp('q','aeq');
select strcmp('ss',''),strcmp('','ss'),strcmp('s','sss'),strcmp('q','ssq');
# The following should all return -1
select strcmp('','af'),strcmp('a',''),strcmp('','aeq'),strcmp('','aeaeq');
select strcmp('ss','a'),strcmp('','ssa'),strcmp('sa','sssb'),strcmp('s','');
select strcmp('','o'),strcmp('','u'),strcmp('','oeb');
# The following should all return 1
select strcmp('af',''),strcmp('','a'),strcmp('aeq',''),strcmp('aeaeq','');
select strcmp('a','ss'),strcmp('ssa',''),strcmp('sssb','sa'),strcmp('','s');
select strcmp('u','a'),strcmp('u','');
......@@ -254,7 +254,7 @@ longlong Item_func_strcmp::val_int()
null_value=1;
return 0;
}
int value=stringcmp(a,b);
int value= binary ? stringcmp(a,b) : sortcmp(a,b);
null_value=0;
return !value ? 0 : (value < 0 ? (longlong) -1 : (longlong) 1);
}
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment