Commit c6de4d1c authored by unknown's avatar unknown

Merge mysql.com:/users/lthalmann/bkroot/mysql-5.0-rpl

into  mysql.com:/users/lthalmann/bk/MERGE/mysql-5.0-merge


BitKeeper/etc/collapsed:
  auto-union
mysql-test/r/ctype_utf8.result:
  Auto merged
sql/field.cc:
  Auto merged
sql/sql_parse.cc:
  Auto merged
parents 024a1781 fff7e919
......@@ -15,3 +15,5 @@
45214442pBGT9KuZEGixBH71jTzbOA
45214a07hVsIGwvwa-WrO-jpeaSwVw
452a92d0-31-8wSzSfZi165fcGcXPA
452c6c6dAjuNghfc1ObZ_UQ5SCl85g
4538a7b0EbDHHkWPbIwxO6ZIDdg6Dg
......@@ -178,8 +178,8 @@ typedef struct my_charset_handler_st
unsigned char *s,unsigned char *e);
/* Functions for case and sort convertion */
void (*caseup_str)(struct charset_info_st *, char *);
void (*casedn_str)(struct charset_info_st *, char *);
uint (*caseup_str)(struct charset_info_st *, char *);
uint (*casedn_str)(struct charset_info_st *, char *);
uint (*caseup)(struct charset_info_st *, char *src, uint srclen,
char *dst, uint dstlen);
uint (*casedn)(struct charset_info_st *, char *src, uint srclen,
......@@ -311,8 +311,8 @@ extern uint my_instr_simple(struct charset_info_st *,
/* Functions for 8bit */
extern void my_caseup_str_8bit(CHARSET_INFO *, char *);
extern void my_casedn_str_8bit(CHARSET_INFO *, char *);
extern uint my_caseup_str_8bit(CHARSET_INFO *, char *);
extern uint my_casedn_str_8bit(CHARSET_INFO *, char *);
extern uint my_caseup_8bit(CHARSET_INFO *, char *src, uint srclen,
char *dst, uint dstlen);
extern uint my_casedn_8bit(CHARSET_INFO *, char *src, uint srclen,
......@@ -399,8 +399,8 @@ int my_mbcharlen_8bit(CHARSET_INFO *, uint c);
/* Functions for multibyte charsets */
extern void my_caseup_str_mb(CHARSET_INFO *, char *);
extern void my_casedn_str_mb(CHARSET_INFO *, char *);
extern uint my_caseup_str_mb(CHARSET_INFO *, char *);
extern uint my_casedn_str_mb(CHARSET_INFO *, char *);
extern uint my_caseup_mb(CHARSET_INFO *, char *src, uint srclen,
char *dst, uint dstlen);
extern uint my_casedn_mb(CHARSET_INFO *, char *src, uint srclen,
......
......@@ -52,7 +52,7 @@ static int _mi_put_key_in_record(MI_INFO *info,uint keynr,byte *record);
uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key,
const byte *record, my_off_t filepos)
{
byte *pos,*end;
byte *pos;
uchar *start;
reg1 HA_KEYSEG *keyseg;
my_bool is_ft= info->s->keyinfo[keynr].flag & HA_FULLTEXT;
......@@ -107,18 +107,17 @@ uint _mi_make_key(register MI_INFO *info, uint keynr, uchar *key,
}
if (keyseg->flag & HA_SPACE_PACK)
{
end= pos + length;
if (type != HA_KEYTYPE_NUM)
{
while (end > pos && end[-1] == ' ')
end--;
length= cs->cset->lengthsp(cs, pos, length);
}
else
{
byte *end= pos + length;
while (pos < end && pos[0] == ' ')
pos++;
length=(uint) (end-pos);
}
length=(uint) (end-pos);
FIX_LENGTH(cs, pos, length, char_length);
store_key_length_inc(key,char_length);
memcpy((byte*) key,(byte*) pos,(size_t) char_length);
......@@ -403,8 +402,10 @@ static int _mi_put_key_in_record(register MI_INFO *info, uint keynr,
pos= record+keyseg->start;
if (keyseg->type != (int) HA_KEYTYPE_NUM)
{
memcpy(pos,key,(size_t) length);
bfill(pos+length,keyseg->length-length,' ');
memcpy(pos,key,(size_t) length);
keyseg->charset->cset->fill(keyseg->charset,
pos + length, keyseg->length - length,
' ');
}
else
{
......
......@@ -336,6 +336,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
goto err;
}
}
else if (pos->type == HA_KEYTYPE_BINARY)
pos->charset= &my_charset_bin;
}
if (share->keyinfo[i].flag & HA_SPATIAL)
{
......
......@@ -2,6 +2,10 @@
# Test for strict-mode autoincrement
#
--disable_warnings
drop table if exists t1;
--enable_warnings
set @org_mode=@@sql_mode;
eval create table t1
(
......
......@@ -171,8 +171,8 @@ create table t1 (a char(10) character set koi8r, b text character set koi8r);
insert into t1 values ('test','test');
insert into t1 values ('','');
Warnings:
Warning 1265 Data truncated for column 'a' at row 1
Warning 1265 Data truncated for column 'b' at row 1
Warning 1366 Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xCA\xC3\xD5\xCB' for column 'b' at row 1
drop table t1;
set names koi8r;
create table t1 (a char(10) character set cp1251);
......
......@@ -723,6 +723,28 @@ lily
river
drop table t1;
deallocate prepare stmt;
create table t1 (
a char(10) unicode not null,
index a (a)
) engine=myisam;
insert into t1 values (repeat(0x201f, 10));
insert into t1 values (repeat(0x2020, 10));
insert into t1 values (repeat(0x2021, 10));
explain select hex(a) from t1 order by a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL a 20 NULL 3 Using index
select hex(a) from t1 order by a;
hex(a)
201F201F201F201F201F201F201F201F201F201F
2020202020202020202020202020202020202020
2021202120212021202120212021202120212021
alter table t1 drop index a;
select hex(a) from t1 order by a;
hex(a)
201F201F201F201F201F201F201F201F201F201F
2020202020202020202020202020202020202020
2021202120212021202120212021202120212021
drop table t1;
CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
SELECT id, MIN(s) FROM t1 GROUP BY id;
......
......@@ -197,7 +197,7 @@ drop table t1;
create table t1 (s1 char(10) character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......@@ -205,7 +205,7 @@ drop table t1;
create table t1 (s1 varchar(10) character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......@@ -213,7 +213,7 @@ drop table t1;
create table t1 (s1 text character set utf8);
insert into t1 values (0x41FF);
Warnings:
Warning 1265 Data truncated for column 's1' at row 1
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
......
......@@ -372,10 +372,10 @@ t collation(t) FORMAT(MATCH t AGAINST ('Osnabruck'),6)
aus Osnabrck utf8_general_ci 1.591140
alter table t1 modify t varchar(200) collate latin1_german2_ci not null;
Warnings:
Warning 1265 Data truncated for column 't' at row 3
Warning 1265 Data truncated for column 't' at row 4
Warning 1265 Data truncated for column 't' at row 5
Warning 1265 Data truncated for column 't' at row 6
Warning 1366 Incorrect string value: '\xD0\xAD\xD1\x82\xD0\xBE...' for column 't' at row 3
Warning 1366 Incorrect string value: '\xD0\x9E\xD1\x82\xD0\xBB...' for column 't' at row 4
Warning 1366 Incorrect string value: '\xD0\x9D\xD0\xB5 \xD0...' for column 't' at row 5
Warning 1366 Incorrect string value: '\xD0\xB8 \xD0\xB1\xD1...' for column 't' at row 6
SELECT t, collation(t) FROM t1 WHERE MATCH t AGAINST ('Osnabrck');
t collation(t)
aus Osnabrck latin1_german2_ci
......
......@@ -84,3 +84,27 @@ create table t2 like T1;
drop table t1, t2;
show tables;
Tables_in_test
set names utf8;
drop table if exists İ,İİ;
create table İ (s1 int);
show create table İ;
Table Create Table
İ CREATE TABLE `i` (
`s1` int(11) default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
show tables;
Tables_in_test
i
drop table İ;
create table İİ (s1 int);
show create table İİ;
Table Create Table
İİ CREATE TABLE `ii` (
`s1` int(11) default NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
show tables;
Tables_in_test
ii
drop table İİ;
set names latin1;
End of 5.0 tests
......@@ -6,7 +6,7 @@ drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
create table t1 (a int not null, key(a)) engine=innodb;
create table t2 (a int not null, key(a)) engine=innodb;
create table t3 (a int) engine=innodb;
create table t3 (a int unique) engine=innodb;
create table t4 (a int) engine=innodb;
show variables like 'slave_transaction_retries';
Variable_name Value
......@@ -35,14 +35,14 @@ begin;
select * from t1 for update;
a
start slave;
insert into t2 values(22);
insert into t2 values(201);
commit;
select * from t1;
a
1
select * from t2;
a
22
201
show slave status;
Slave_IO_State #
Master_Host 127.0.0.1
......@@ -50,7 +50,7 @@ Master_User root
Master_Port MASTER_MYPORT
Connect_Retry 1
Master_Log_File master-bin.000001
Read_Master_Log_Pos 18911
Read_Master_Log_Pos 18918
Relay_Log_File #
Relay_Log_Pos #
Relay_Master_Log_File master-bin.000001
......@@ -65,7 +65,7 @@ Replicate_Wild_Ignore_Table
Last_Errno 0
Last_Error
Skip_Counter 0
Exec_Master_Log_Pos 18911
Exec_Master_Log_Pos 18918
Relay_Log_Space #
Until_Condition None
Until_Log_File
......@@ -78,12 +78,16 @@ Master_SSL_Cipher
Master_SSL_Key
Seconds_Behind_Master #
stop slave;
change master to master_log_pos=532;
delete from t3;
change master to master_log_pos=539;
begin;
select * from t2 for update;
a
22
201
start slave;
select count(*) from t3 /* must be zero */;
count(*)
0
commit;
select * from t1;
a
......@@ -91,7 +95,7 @@ a
1
select * from t2;
a
22
201
show slave status;
Slave_IO_State #
Master_Host 127.0.0.1
......@@ -99,7 +103,7 @@ Master_User root
Master_Port MASTER_MYPORT
Connect_Retry 1
Master_Log_File master-bin.000001
Read_Master_Log_Pos 18911
Read_Master_Log_Pos 18918
Relay_Log_File #
Relay_Log_Pos #
Relay_Master_Log_File master-bin.000001
......@@ -114,7 +118,7 @@ Replicate_Wild_Ignore_Table
Last_Errno 0
Last_Error
Skip_Counter 0
Exec_Master_Log_Pos 18911
Exec_Master_Log_Pos 18918
Relay_Log_Space #
Until_Condition None
Until_Log_File
......@@ -128,12 +132,16 @@ Master_SSL_Key
Seconds_Behind_Master #
set global max_relay_log_size=0;
stop slave;
change master to master_log_pos=532;
delete from t3;
change master to master_log_pos=539;
begin;
select * from t2 for update;
a
22
201
start slave;
select count(*) from t3 /* must be zero */;
count(*)
0
commit;
select * from t1;
a
......@@ -142,7 +150,7 @@ a
1
select * from t2;
a
22
201
show slave status;
Slave_IO_State #
Master_Host 127.0.0.1
......@@ -150,7 +158,7 @@ Master_User root
Master_Port MASTER_MYPORT
Connect_Retry 1
Master_Log_File master-bin.000001
Read_Master_Log_Pos 18911
Read_Master_Log_Pos 18918
Relay_Log_File #
Relay_Log_Pos #
Relay_Master_Log_File master-bin.000001
......@@ -165,7 +173,7 @@ Replicate_Wild_Ignore_Table
Last_Errno 0
Last_Error
Skip_Counter 0
Exec_Master_Log_Pos 18911
Exec_Master_Log_Pos 18918
Relay_Log_Space #
Until_Condition None
Until_Log_File
......
......@@ -14,3 +14,19 @@ SELECT * FROM t4;
a
DROP TABLE t1;
DROP TABLE t4;
DROP TABLE IF EXISTS t5;
CREATE TABLE t5 (
word varchar(50) collate utf8_unicode_ci NOT NULL default ''
) DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
SET @@session.character_set_client=33,@@session.collation_connection=192;
CREATE TEMPORARY TABLE tmptbl504451f4258$1 (id INT NOT NULL) ENGINE=MEMORY;
INSERT INTO t5 (word) VALUES ('TEST’');
SELECT HEX(word) FROM t5;
HEX(word)
54455354E28099
SELECT HEX(word) FROM t5;
HEX(word)
54455354E28099
SELECT * FROM tmptbl504451f4258$1;
ERROR 42S02: Table 'test.tmptbl504451f4258$1' doesn't exist
DROP TABLE t5;
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
drop table if exists t1;
set @org_mode=@@sql_mode;
create table t1
(
......
......@@ -454,6 +454,23 @@ select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
#
create table t1 (
a char(10) unicode not null,
index a (a)
) engine=myisam;
insert into t1 values (repeat(0x201f, 10));
insert into t1 values (repeat(0x2020, 10));
insert into t1 values (repeat(0x2021, 10));
# make sure "index read" is used
explain select hex(a) from t1 order by a;
select hex(a) from t1 order by a;
alter table t1 drop index a;
select hex(a) from t1 order by a;
drop table t1;
#
# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
# over a 'ucs2' field uses a temporary table
......
......@@ -85,3 +85,23 @@ drop table t1, t2;
show tables;
# End of 4.1 tests
#
# Bug#20404: SHOW CREATE TABLE fails with Turkish I
#
set names utf8;
--disable_warnings
drop table if exists İ,İİ;
--enable_warnings
create table İ (s1 int);
show create table İ;
show tables;
drop table İ;
create table İİ (s1 int);
show create table İİ;
show tables;
drop table İİ;
set names latin1;
--echo End of 5.0 tests
......@@ -16,7 +16,8 @@ source include/master-slave.inc;
connection master;
create table t1 (a int not null, key(a)) engine=innodb;
create table t2 (a int not null, key(a)) engine=innodb;
create table t3 (a int) engine=innodb;
# requiring 'unique' for the timeout part of the test
create table t3 (a int unique) engine=innodb;
create table t4 (a int) engine=innodb;
show variables like 'slave_transaction_retries';
sync_slave_with_master;
......@@ -31,8 +32,7 @@ stop slave;
connection master;
begin;
# Let's keep BEGIN and the locked statement in two different relay logs.
let $1=200;
disable_query_log;
let $1=200;disable_query_log;
while ($1)
{
eval insert into t3 values( $1 );
......@@ -59,7 +59,7 @@ enable_query_log;
select * from t1 for update;
start slave;
--real_sleep 3 # hope that slave is blocked now
insert into t2 values(22); # provoke deadlock, slave should be victim
insert into t2 values(201); # provoke deadlock, slave should be victim
commit;
sync_with_master;
select * from t1; # check that slave succeeded finally
......@@ -74,11 +74,13 @@ show slave status;
# 2) Test lock wait timeout
stop slave;
change master to master_log_pos=532; # the BEGIN log event
delete from t3;
change master to master_log_pos=539; # the BEGIN log event
begin;
select * from t2 for update; # hold lock
start slave;
--real_sleep 10 # slave should have blocked, and be retrying
select count(*) from t3 /* must be zero */; # replaying begins after rollback
commit;
sync_with_master;
select * from t1; # check that slave succeeded finally
......@@ -97,11 +99,13 @@ set global max_relay_log_size=0;
# This is really copy-paste of 2) of above
stop slave;
change master to master_log_pos=532;
delete from t3;
change master to master_log_pos=539;
begin;
select * from t2 for update;
start slave;
--real_sleep 10
select count(*) from t3 /* must be zero */; # replaying begins after rollback
commit;
sync_with_master;
select * from t1;
......
--replicate-ignore-table=test.t1 --replicate-ignore-table=test.t2 --replicate-ignore-table=test.t3
--replicate-ignore-table=test.t1 --replicate-ignore-table=test.t2 --replicate-ignore-table=test.t3 --replicate-wild-ignore-table=%.tmptbl%
......@@ -26,3 +26,26 @@ SELECT * FROM t4;
connection master;
DROP TABLE t1;
DROP TABLE t4;
#
# bug#22877 replication character sets get out of sync
# using replicate-wild-ignore-table
#
--disable_warnings
DROP TABLE IF EXISTS t5;
--enable_warnings
CREATE TABLE t5 (
word varchar(50) collate utf8_unicode_ci NOT NULL default ''
) DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;
SET @@session.character_set_client=33,@@session.collation_connection=192;
CREATE TEMPORARY TABLE tmptbl504451f4258$1 (id INT NOT NULL) ENGINE=MEMORY;
INSERT INTO t5 (word) VALUES ('TEST’');
SELECT HEX(word) FROM t5;
sync_slave_with_master;
connection slave;
SELECT HEX(word) FROM t5;
--error 1146
SELECT * FROM tmptbl504451f4258$1;
connection master;
DROP TABLE t5;
This diff is collapsed.
......@@ -3345,9 +3345,9 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
const char *errmsg;
/*
We were in a transaction which has been rolled back because of a
deadlock (currently, InnoDB deadlock detected by InnoDB) or lock
wait timeout (innodb_lock_wait_timeout exceeded); let's seek back to
BEGIN log event and retry it all again.
Sonera deadlock. if lock wait timeout (innodb_lock_wait_timeout exceeded)
there is no rollback since 5.0.13 (ref: manual).
let's seek back to BEGIN log event and retry it all again.
We have to not only seek but also
a) init_master_info(), to seek back to hot relay log's start for later
(for when we will come back to this hot log after re-processing the
......@@ -3369,6 +3369,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
else
{
exec_res= 0;
end_trans(thd, ROLLBACK);
/* chance for concurrent connection to get more locks */
safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
(CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
......@@ -3386,9 +3387,17 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
"the slave_transaction_retries variable.",
slave_trans_retries);
}
if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
rli->trans_retries= 0; // restart from fresh
}
else if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
{
/*
Only reset the retry counter if the event succeeded or
failed with a non-transient error. On a successful event,
the execution will proceed as usual; in the case of a
non-transient error, the slave will stop with an error.
*/
rli->trans_retries= 0; // restart from fresh
}
}
return exec_res;
}
else
......
......@@ -2503,7 +2503,23 @@ mysql_execute_command(THD *thd)
{
/* we warn the slave SQL thread */
my_message(ER_SLAVE_IGNORED_TABLE, ER(ER_SLAVE_IGNORED_TABLE), MYF(0));
reset_one_shot_variables(thd);
if (thd->one_shot_set)
{
/*
It's ok to check thd->one_shot_set here:
The charsets in a MySQL 5.0 slave can change by both a binlogged
SET ONE_SHOT statement and the event-internal charset setting,
and these two ways to change charsets do not seems to work
together.
At least there seems to be problems in the rli cache for
charsets if we are using ONE_SHOT. Note that this is normally no
problem because either the >= 5.0 slave reads a 4.1 binlog (with
ONE_SHOT) *or* or 5.0 binlog (without ONE_SHOT) but never both."
*/
reset_one_shot_variables(thd);
}
DBUG_RETURN(0);
}
}
......@@ -6121,7 +6137,7 @@ TABLE_LIST *st_select_lex::add_table_to_list(THD *thd,
ptr->alias= alias_str;
if (lower_case_table_names && table->table.length)
my_casedn_str(files_charset_info, table->table.str);
table->table.length= my_casedn_str(files_charset_info, table->table.str);
ptr->table_name=table->table.str;
ptr->table_name_length=table->table.length;
ptr->lock_type= lock_type;
......
......@@ -854,6 +854,162 @@ copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
}
/*
copy a string,
with optional character set conversion,
with optional left padding (for binary -> UCS2 conversion)
SYNOPSIS
well_formed_copy_nhars()
to Store result here
to_length Maxinum length of "to" string
to_cs Character set of "to" string
from Copy from here
from_length Length of from string
from_cs From character set
nchars Copy not more that nchars characters
well_formed_error_pos Return position when "from" is not well formed
or NULL otherwise.
cannot_convert_error_pos Return position where a not convertable
character met, or NULL otherwise.
from_end_pos Return position where scanning of "from"
string stopped.
NOTES
RETURN
length of bytes copied to 'to'
*/
uint32
well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos)
{
uint res;
if ((to_cs == &my_charset_bin) ||
(from_cs == &my_charset_bin) ||
(to_cs == from_cs) ||
my_charset_same(from_cs, to_cs))
{
if (to_length < to_cs->mbminlen || !nchars)
{
*from_end_pos= from;
*cannot_convert_error_pos= NULL;
*well_formed_error_pos= NULL;
return 0;
}
if (to_cs == &my_charset_bin)
{
res= min(min(nchars, to_length), from_length);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
}
else
{
int well_formed_error;
uint from_offset;
if ((from_offset= (from_length % to_cs->mbminlen)) &&
(from_cs == &my_charset_bin))
{
/*
Copying from BINARY to UCS2 needs to prepend zeros sometimes:
INSERT INTO t1 (ucs2_column) VALUES (0x01);
0x01 -> 0x0001
*/
uint pad_length= to_cs->mbminlen - from_offset;
bzero(to, pad_length);
memmove(to + pad_length, from, from_offset);
nchars--;
from+= from_offset;
from_length-= from_offset;
to+= to_cs->mbminlen;
to_length-= to_cs->mbminlen;
}
set_if_smaller(from_length, to_length);
res= to_cs->cset->well_formed_len(to_cs, from, from + from_length,
nchars, &well_formed_error);
memmove(to, from, res);
*from_end_pos= from + res;
*well_formed_error_pos= well_formed_error ? from + res : NULL;
*cannot_convert_error_pos= NULL;
if (from_offset)
res+= to_cs->mbminlen;
}
}
else
{
int cnvres;
my_wc_t wc;
int (*mb_wc)(struct charset_info_st *, my_wc_t *,
const uchar *, const uchar *)= from_cs->cset->mb_wc;
int (*wc_mb)(struct charset_info_st *, my_wc_t,
uchar *s, uchar *e)= to_cs->cset->wc_mb;
const uchar *from_end= (const uchar*) from + from_length;
uchar *to_end= (uchar*) to + to_length;
char *to_start= to;
*well_formed_error_pos= NULL;
*cannot_convert_error_pos= NULL;
for ( ; nchars; nchars--)
{
const char *from_prev= from;
if ((cnvres= (*mb_wc)(from_cs, &wc, (uchar*) from, from_end)) > 0)
from+= cnvres;
else if (cnvres == MY_CS_ILSEQ)
{
if (!*well_formed_error_pos)
*well_formed_error_pos= from;
from++;
wc= '?';
}
else if (cnvres > MY_CS_TOOSMALL)
{
/*
A correct multibyte sequence detected
But it doesn't have Unicode mapping.
*/
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from;
from+= (-cnvres);
wc= '?';
}
else
break; // Not enough characters
outp:
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
to+= cnvres;
else if (cnvres == MY_CS_ILUNI && wc != '?')
{
if (!*cannot_convert_error_pos)
*cannot_convert_error_pos= from_prev;
wc= '?';
goto outp;
}
else
break;
}
*from_end_pos= from;
res= to - to_start;
}
return (uint32) res;
}
void String::print(String *str)
{
char *st= (char*)Ptr, *end= st+str_length;
......
......@@ -32,6 +32,14 @@ String *copy_if_not_alloced(String *a,String *b,uint32 arg_length);
uint32 copy_and_convert(char *to, uint32 to_length, CHARSET_INFO *to_cs,
const char *from, uint32 from_length,
CHARSET_INFO *from_cs, uint *errors);
uint32 well_formed_copy_nchars(CHARSET_INFO *to_cs,
char *to, uint to_length,
CHARSET_INFO *from_cs,
const char *from, uint from_length,
uint nchars,
const char **well_formed_error_pos,
const char **cannot_convert_error_pos,
const char **from_end_pos);
class String
{
......
......@@ -211,9 +211,10 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
/* This function is used for all conversion functions */
static void my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
static uint my_case_str_bin(CHARSET_INFO *cs __attribute__((unused)),
char *str __attribute__((unused)))
{
return 0;
}
static uint my_case_bin(CHARSET_INFO *cs __attribute__((unused)),
......
......@@ -21,40 +21,44 @@
#ifdef USE_MB
void my_caseup_str_mb(CHARSET_INFO * cs, char *str)
uint my_caseup_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
register uchar *map=cs->to_upper;
register uchar *map= cs->to_upper;
char *str_orig= str;
while (*str)
{
/* Pointing after the '\0' is safe here. */
if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen)))
str+=l;
if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
str+= l;
else
{
*str=(char) map[(uchar)*str];
*str= (char) map[(uchar)*str];
str++;
}
}
return str - str_orig;
}
void my_casedn_str_mb(CHARSET_INFO * cs, char *str)
uint my_casedn_str_mb(CHARSET_INFO * cs, char *str)
{
register uint32 l;
register uchar *map=cs->to_lower;
register uchar *map= cs->to_lower;
char *str_orig= str;
while (*str)
{
/* Pointing after the '\0' is safe here. */
if ((l=my_ismbchar(cs, str, str + cs->mbmaxlen)))
str+=l;
if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
str+= l;
else
{
*str=(char) map[(uchar)*str];
*str= (char) map[(uchar)*str];
str++;
}
}
return str - str_orig;
}
uint my_caseup_mb(CHARSET_INFO * cs, char *src, uint srclen,
......
......@@ -188,20 +188,26 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, uint a_length,
}
void my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
uint my_caseup_str_8bit(CHARSET_INFO * cs,char *str)
{
register uchar *map=cs->to_upper;
while ((*str = (char) map[(uchar) *str]) != 0)
register uchar *map= cs->to_upper;
char *str_orig= str;
while ((*str= (char) map[(uchar) *str]) != 0)
str++;
return str - str_orig;
}
void my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
uint my_casedn_str_8bit(CHARSET_INFO * cs,char *str)
{
register uchar *map=cs->to_lower;
while ((*str = (char) map[(uchar)*str]) != 0)
register uchar *map= cs->to_lower;
char *str_orig= str;
while ((*str= (char) map[(uchar) *str]) != 0)
str++;
return str - str_orig;
}
uint my_caseup_8bit(CHARSET_INFO * cs, char *src, uint srclen,
char *dst __attribute__((unused)),
uint dstlen __attribute__((unused)))
......
......@@ -159,13 +159,13 @@ static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, uint slen,
}
static void my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
static uint my_caseup_str_ucs2(CHARSET_INFO * cs __attribute__((unused)),
char * s __attribute__((unused)))
{
return 0;
}
static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
char *dst __attribute__((unused)),
uint dstlen __attribute__((unused)))
......@@ -188,9 +188,11 @@ static uint my_casedn_ucs2(CHARSET_INFO *cs, char *src, uint srclen,
return srclen;
}
static void my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
static uint my_casedn_str_ucs2(CHARSET_INFO *cs __attribute__((unused)),
char * s __attribute__((unused)))
{
return 0;
}
......
......@@ -2045,6 +2045,52 @@ static int my_utf8_uni(CHARSET_INFO *cs __attribute__((unused)),
return MY_CS_ILSEQ;
}
/*
The same as above, but without range check
for example, for a null-terminated string
*/
static int my_utf8_uni_no_range(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s)
{
unsigned char c;
c= s[0];
if (c < 0x80)
{
*pwc = c;
return 1;
}
if (c < 0xc2)
return MY_CS_ILSEQ;
if (c < 0xe0)
{
if (!((s[1] ^ 0x80) < 0x40))
return MY_CS_ILSEQ;
*pwc = ((my_wc_t) (c & 0x1f) << 6) | (my_wc_t) (s[1] ^ 0x80);
return 2;
}
if (c < 0xf0)
{
if (!((s[1] ^ 0x80) < 0x40 &&
(s[2] ^ 0x80) < 0x40 &&
(c >= 0xe1 || s[1] >= 0xa0)))
return MY_CS_ILSEQ;
*pwc= ((my_wc_t) (c & 0x0f) << 12) |
((my_wc_t) (s[1] ^ 0x80) << 6) |
(my_wc_t) (s[2] ^ 0x80);
return 3;
}
return MY_CS_ILSEQ;
}
static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
my_wc_t wc, uchar *r, uchar *e)
{
......@@ -2091,6 +2137,34 @@ static int my_uni_utf8 (CHARSET_INFO *cs __attribute__((unused)) ,
}
/*
The same as above, but without range check.
*/
static int my_uni_utf8_no_range(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t wc, uchar *r)
{
int count;
if (wc < 0x80)
count= 1;
else if (wc < 0x800)
count= 2;
else if (wc < 0x10000)
count= 3;
else
return MY_CS_ILUNI;
switch (count)
{
/* Fall through all cases!!! */
case 3: r[2]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0x800;
case 2: r[1]= (uchar) (0x80 | (wc & 0x3f)); wc= wc >> 6; wc |= 0xc0;
case 1: r[0]= (uchar) wc;
}
return count;
}
static uint my_caseup_utf8(CHARSET_INFO *cs, char *src, uint srclen,
char *dst, uint dstlen)
{
......@@ -2141,10 +2215,26 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, uint slen,
}
static void my_caseup_str_utf8(CHARSET_INFO * cs, char * s)
static uint my_caseup_str_utf8(CHARSET_INFO *cs, char *src)
{
uint len= (uint) strlen(s);
my_caseup_utf8(cs, s, len, s, len);
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->caseup_multiply == 1);
while (*src &&
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].toupper : wc;
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
break;
src+= srcres;
dst+= dstres;
}
*dst= '\0';
return (uint) (dst - dst0);
}
......@@ -2170,10 +2260,43 @@ static uint my_casedn_utf8(CHARSET_INFO *cs, char *src, uint srclen,
return (uint) (dst - dst0);
}
static void my_casedn_str_utf8(CHARSET_INFO *cs, char * s)
static uint my_casedn_str_utf8(CHARSET_INFO *cs, char *src)
{
uint len= (uint) strlen(s);
my_casedn_utf8(cs, s, len, s, len);
my_wc_t wc;
int srcres, dstres;
char *dst= src, *dst0= src;
MY_UNICASE_INFO **uni_plane= cs->caseinfo;
DBUG_ASSERT(cs->casedn_multiply == 1);
while (*src &&
(srcres= my_utf8_uni_no_range(cs, &wc, (uchar *) src)) > 0)
{
int plane= (wc>>8) & 0xFF;
wc= uni_plane[plane] ? uni_plane[plane][wc & 0xFF].tolower : wc;
if ((dstres= my_uni_utf8_no_range(cs, wc, (uchar*) dst)) <= 0)
break;
src+= srcres;
dst+= dstres;
}
/*
In rare cases lower string can be shorter than
the original string, for example:
"U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE"
(which is 0xC4B0 in utf8, i.e. two bytes)
is converted into
"U+0069 LATIN SMALL LETTER I"
(which is 0x69 in utf8, i.e. one byte)
So, we need to put '\0' terminator after converting.
*/
*dst= '\0';
return (uint) (dst - dst0);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment