Commit 129415d0 authored by Alexander Nozdrin's avatar Alexander Nozdrin

Bug#55980 Character sets: supplementary character _bin ordering is wrong

Post-fix:
  Reverting the "utf16_bin did not sort supplementary characters
  between U+D700 and U+E000" part. We'll use code-point order.

Committing on behalf of Alexander Barkov.
parent 1e2e33b6
...@@ -625,16 +625,16 @@ INSERT INTO t1 VALUES (_utf8mb4 0xCE85),(_utf8mb4 0xF4808080); ...@@ -625,16 +625,16 @@ INSERT INTO t1 VALUES (_utf8mb4 0xCE85),(_utf8mb4 0xF4808080);
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a; SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
HEX(a) HEX(CONVERT(a USING utf8mb4)) HEX(a) HEX(CONVERT(a USING utf8mb4))
0385 CE85 0385 CE85
FF9D EFBE9D
D800DF84 F0908E84 D800DF84 F0908E84
DBC0DC00 F4808080 DBC0DC00 F4808080
FF9D EFBE9D
ALTER TABLE t1 ADD KEY(a); ALTER TABLE t1 ADD KEY(a);
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a; SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
HEX(a) HEX(CONVERT(a USING utf8mb4)) HEX(a) HEX(CONVERT(a USING utf8mb4))
0385 CE85 0385 CE85
FF9D EFBE9D
D800DF84 F0908E84 D800DF84 F0908E84
DBC0DC00 F4808080 DBC0DC00 F4808080
FF9D EFBE9D
DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t1;
select @@collation_connection; select @@collation_connection;
@@collation_connection @@collation_connection
......
...@@ -1463,7 +1463,7 @@ my_strnncoll_utf16_bin(CHARSET_INFO *cs, ...@@ -1463,7 +1463,7 @@ my_strnncoll_utf16_bin(CHARSET_INFO *cs,
} }
if (s_wc != t_wc) if (s_wc != t_wc)
{ {
return my_bincmp(s, s + s_res, t, t + t_res); return s_wc > t_wc ? 1 : -1;
} }
s+= s_res; s+= s_res;
...@@ -1503,7 +1503,7 @@ my_strnncollsp_utf16_bin(CHARSET_INFO *cs, ...@@ -1503,7 +1503,7 @@ my_strnncollsp_utf16_bin(CHARSET_INFO *cs,
if (s_wc != t_wc) if (s_wc != t_wc)
{ {
return my_bincmp(s, s + s_res, t, t + t_res); return s_wc > t_wc ? 1 : -1;
} }
s+= s_res; s+= s_res;
......
...@@ -1967,17 +1967,6 @@ my_strnxfrm_unicode_full_bin(CHARSET_INFO *cs, ...@@ -1967,17 +1967,6 @@ my_strnxfrm_unicode_full_bin(CHARSET_INFO *cs,
if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0) if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0)
break; break;
src+= res; src+= res;
if (cs->mbminlen == 2) /* utf16_bin */
{
/*
Reorder code points to weights as follows:
U+0000..U+D7FF -> [00][00][00]..[00][D7][FF] BMP part #1
U+10000..U+10FFFF -> [01][00][00]..[10][FF][FF] Supplementary
U+E000..U+FFFF -> [20][E0][00]..[20][FF][FF] BMP part #2
*/
if (wc >= 0xE000 && wc <= 0xFFFF)
wc+= 0x200000;
}
*dst++= (uchar) (wc >> 16); *dst++= (uchar) (wc >> 16);
*dst++= (uchar) ((wc >> 8) & 0xFF); *dst++= (uchar) ((wc >> 8) & 0xFF);
*dst++= (uchar) (wc & 0xFF); *dst++= (uchar) (wc & 0xFF);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment