Commit 296d4b2f authored by bar@mysql.com's avatar bar@mysql.com

Bug#15376: Unassigned multibyte codes are converted to U+0000

Mergeing changes into 5.0
parent 36e92839
...@@ -9819,3 +9819,9 @@ eucjpms_bin 6109 ...@@ -9819,3 +9819,9 @@ eucjpms_bin 6109
eucjpms_bin 61 eucjpms_bin 61
eucjpms_bin 6120 eucjpms_bin 6120
drop table t1; drop table t1;
select hex(convert(_eucjpms 0xA5FE41 using ucs2));
hex(convert(_eucjpms 0xA5FE41 using ucs2))
003F0041
select hex(convert(_eucjpms 0x8FABF841 using ucs2));
hex(convert(_eucjpms 0x8FABF841 using ucs2))
003F0041
...@@ -2307,6 +2307,12 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#'; ...@@ -2307,6 +2307,12 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
c2h c2h
ab_def ab_def
drop table t1; drop table t1;
select hex(convert(_ujis 0xA5FE41 using ucs2));
hex(convert(_ujis 0xA5FE41 using ucs2))
003F0041
select hex(convert(_ujis 0x8FABF841 using ucs2));
hex(convert(_ujis 0x8FABF841 using ucs2))
003F0041
DROP TABLE IF EXISTS t1, t2; DROP TABLE IF EXISTS t1, t2;
DROP PROCEDURE IF EXISTS sp1; DROP PROCEDURE IF EXISTS sp1;
set names ujis; set names ujis;
...@@ -2337,9 +2343,3 @@ DROP TABLE t2; ...@@ -2337,9 +2343,3 @@ DROP TABLE t2;
set names default; set names default;
set character_set_database=default; set character_set_database=default;
set character_set_server=default; set character_set_server=default;
select hex(convert(_ujis 0xA5FE41 using ucs2));
hex(convert(_ujis 0xA5FE41 using ucs2))
003F0041
select hex(convert(_ujis 0x8FABF841 using ucs2));
hex(convert(_ujis 0x8FABF841 using ucs2))
003F0041
...@@ -363,3 +363,20 @@ SET collation_connection='eucjpms_japanese_ci'; ...@@ -363,3 +363,20 @@ SET collation_connection='eucjpms_japanese_ci';
-- source include/ctype_filesort.inc -- source include/ctype_filesort.inc
SET collation_connection='eucjpms_bin'; SET collation_connection='eucjpms_bin';
-- source include/ctype_filesort.inc -- source include/ctype_filesort.inc
#
# Bugs#15375: Unassigned multibyte codes are broken
# into parts when converting to Unicode.
# This query should return 0x003F0041. I.e. it should
# scan unassigned double-byte character 0xA5FE, convert
# it as QUESTION MARK 0x003F and then scan the next
# character, which is a single byte character 0x41.
#
select hex(convert(_eucjpms 0xA5FE41 using ucs2));
# This one should return 0x003F0041:
# scan unassigned three-byte character 0x8FABF8,
# convert it as QUESTION MARK 0x003F and then scan
# the next character, which is a single byte character 0x41.
select hex(convert(_eucjpms 0x8FABF841 using ucs2));
...@@ -5355,7 +5355,7 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)), ...@@ -5355,7 +5355,7 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)),
int hi=s[0]; int hi=s[0];
if (s >= e) if (s >= e)
return MY_CS_TOOFEW(0); return MY_CS_TOOSMALL;
if (hi < 0x80) if (hi < 0x80)
{ {
...@@ -5370,10 +5370,10 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)), ...@@ -5370,10 +5370,10 @@ my_mb_wc_cp932(CHARSET_INFO *cs __attribute__((unused)),
} }
if (s+2>e) if (s+2>e)
return MY_CS_TOOFEW(0); return MY_CS_TOOSMALL2;
if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1]))) if (!(pwc[0]=func_cp932_uni_onechar((hi<<8)+s[1])))
return MY_CS_ILSEQ; return -2;
return 2; return 2;
} }
......
...@@ -243,7 +243,7 @@ my_mb_wc_jisx0201(CHARSET_INFO *cs __attribute__((unused)), ...@@ -243,7 +243,7 @@ my_mb_wc_jisx0201(CHARSET_INFO *cs __attribute__((unused)),
const uchar *e __attribute__((unused))) const uchar *e __attribute__((unused)))
{ {
wc[0]=tab_jisx0201_uni[*s]; wc[0]=tab_jisx0201_uni[*s];
return (!wc[0] && s[0]) ? MY_CS_ILSEQ : 1; return (!wc[0] && s[0]) ? -1 : 1;
} }
...@@ -8473,7 +8473,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) ...@@ -8473,7 +8473,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
int c1,c2,c3; int c1,c2,c3;
if (s >= e) if (s >= e)
return MY_CS_TOOFEW(0); return MY_CS_TOOSMALL;
c1=s[0]; c1=s[0];
...@@ -8485,7 +8485,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) ...@@ -8485,7 +8485,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
} }
if (s+2>e) if (s+2>e)
return MY_CS_TOOFEW(0); return MY_CS_TOOSMALL2;
c2=s[1]; c2=s[1];
...@@ -8500,7 +8500,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) ...@@ -8500,7 +8500,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
{ {
pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80)); pwc[0]=my_jisx0208_uni_onechar( ((c1-0x80) << 8) + (c2-0x80));
if (!pwc[0]) if (!pwc[0])
return MY_CS_ILSEQ; return -2;
} }
else else
{ {
...@@ -8520,7 +8520,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) ...@@ -8520,7 +8520,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
ret = my_mb_wc_jisx0201(cs,pwc,s+1,e); ret = my_mb_wc_jisx0201(cs,pwc,s+1,e);
if (ret!=1) if (ret!=1)
return ret; return -2;
return 2; return 2;
} }
...@@ -8531,7 +8531,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) ...@@ -8531,7 +8531,7 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
return MY_CS_ILSEQ; return MY_CS_ILSEQ;
if (s+3>e) if (s+3>e)
return MY_CS_TOOFEW(0); return MY_CS_TOOSMALL3;
c3=s[2]; c3=s[2];
if (c3 < 0xA1 || c3>=0xFF) if (c3 < 0xA1 || c3>=0xFF)
...@@ -8540,8 +8540,8 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e) ...@@ -8540,8 +8540,8 @@ my_mb_wc_euc_jp(CHARSET_INFO *cs,my_wc_t *pwc, const uchar *s, const uchar *e)
if (c2<0xF5) if (c2<0xF5)
{ {
pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80)); pwc[0]=my_jisx0212_uni_onechar((c2-0x80)*256 + (c3-0x80));
if (!pwc) if (!pwc[0])
return MY_CS_ILSEQ; return -3;
} }
else else
{ {
...@@ -8572,7 +8572,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) ...@@ -8572,7 +8572,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if ((jp=my_uni_jisx0208_onechar(wc))) if ((jp=my_uni_jisx0208_onechar(wc)))
{ {
if (s+2>e) if (s+2>e)
return MY_CS_TOOSMALL; return MY_CS_TOOSMALL2;
jp+=0x8080; jp+=0x8080;
s[0]=jp>>8; s[0]=jp>>8;
...@@ -8584,7 +8584,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) ...@@ -8584,7 +8584,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if (my_wc_mb_jisx0201(c,wc,s,e) == 1) if (my_wc_mb_jisx0201(c,wc,s,e) == 1)
{ {
if (s+2>e) if (s+2>e)
return MY_CS_TOOSMALL; return MY_CS_TOOSMALL2;
s[1]= s[0]; s[1]= s[0];
s[0]= 0x8E; s[0]= 0x8E;
return 2; return 2;
...@@ -8594,7 +8594,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) ...@@ -8594,7 +8594,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if ((jp=my_uni_jisx0212_onechar(wc))) if ((jp=my_uni_jisx0212_onechar(wc)))
{ {
if (s+3>e) if (s+3>e)
return MY_CS_TOOSMALL; return MY_CS_TOOSMALL3;
jp+=0x8080; jp+=0x8080;
s[0]=0x8F; s[0]=0x8F;
...@@ -8608,7 +8608,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) ...@@ -8608,7 +8608,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if (wc>=0xE000 && wc<0xE3AC) if (wc>=0xE000 && wc<0xE3AC)
{ {
if (s+2>e) if (s+2>e)
return MY_CS_TOOSMALL; return MY_CS_TOOSMALL2;
c1=((unsigned)(wc-0xE000)/94)+0xF5; c1=((unsigned)(wc-0xE000)/94)+0xF5;
s[0]=c1; s[0]=c1;
...@@ -8622,7 +8622,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e) ...@@ -8622,7 +8622,7 @@ my_wc_mb_euc_jp(CHARSET_INFO *c,my_wc_t wc, unsigned char *s, unsigned char *e)
if (wc>=0xE3AC && wc<0xE758) if (wc>=0xE3AC && wc<0xE758)
{ {
if (s+3>e) if (s+3>e)
return MY_CS_TOOSMALL; return MY_CS_TOOSMALL3;
s[0]=0x8F; s[0]=0x8F;
c1=((unsigned)(wc-0xE3AC)/94)+0xF5; c1=((unsigned)(wc-0xE3AC)/94)+0xF5;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment