Commit f6280353 authored by Alexander Barkov's avatar Alexander Barkov

Bsckporting WL#3764 Sinhala Collation

modified:
  mysql-test/r/ctype_utf8.result
  mysql-test/t/ctype_utf8.test
  mysys/charset-def.c
  strings/ctype-uca.c
parent 27936529
......@@ -1880,3 +1880,115 @@ CONVERT(a, CHAR) CONVERT(b, CHAR)
70000 1092
DROP TABLE t1;
End of 5.0 tests
Start of 5.4 tests
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
predicted_order int NOT NULL,
utf8_encoding VARCHAR(10) NOT NULL
) CHARACTER SET utf8;
INSERT INTO t1 VALUES (19, x'E0B696'), (30, x'E0B69AE0B798'), (61, x'E0B6AF'), (93, x'E0B799'), (52, x'E0B6A6'), (73, x'E0B6BBE0B78AE2808D'), (3, x'E0B686'), (56, x'E0B6AA'), (55, x'E0B6A9'), (70, x'E0B6B9'), (94, x'E0B79A'), (80, x'E0B785'), (25, x'E0B69AE0B791'), (48, x'E0B6A2'), (13, x'E0B690'), (86, x'E0B793'), (91, x'E0B79F'), (81, x'E0B786'), (79, x'E0B784'), (14, x'E0B691'), (99, x'E0B78A'), (8, x'E0B68B'), (68, x'E0B6B7'), (22, x'E0B69A'), (16, x'E0B693'), (33, x'E0B69AE0B7B3'), (38, x'E0B69AE0B79D'), (21, x'E0B683'), (11, x'E0B68E'), (77, x'E0B782'), (40, x'E0B69AE0B78A'), (101, x'E0B78AE2808DE0B6BB'), (35, x'E0B69AE0B79A'), (1, x'E0B7B4'), (9, x'E0B68C'), (96, x'E0B79C'), (6, x'E0B689'), (95, x'E0B79B'), (88, x'E0B796'), (64, x'E0B6B3'), (26, x'E0B69AE0B792'), (82, x'E0B78F'), (28, x'E0B69AE0B794'), (39, x'E0B69AE0B79E'), (97, x'E0B79D'), (2, x'E0B685'), (75, x'E0B780'), (34, x'E0B69AE0B799'), (69, x'E0B6B8'), (83, x'E0B790'), (18, x'E0B695'), (90, x'E0B7B2'), (17, x'E0B694'), (72, x'E0B6BB'), (66, x'E0B6B5'), (59, x'E0B6AD'), (44, x'E0B69E'), (15, x'E0B692'), (23, x'E0B69AE0B78F'), (65, x'E0B6B4'), (42, x'E0B69C'), (63, x'E0B6B1'), (85, x'E0B792'), (47, x'E0B6A1'), (49, x'E0B6A3'), (92, x'E0B7B3'), (78, x'E0B783'), (36, x'E0B69AE0B79B'), (4, x'E0B687'), (24, x'E0B69AE0B790'), (87, x'E0B794'), (37, x'E0B69AE0B79C'), (32, x'E0B69AE0B79F'), (29, x'E0B69AE0B796'), (43, x'E0B69D'), (62, x'E0B6B0'), (100, x'E0B78AE2808DE0B6BA'), (60, x'E0B6AE'), (45, x'E0B69F'), (12, x'E0B68F'), (46, x'E0B6A0'), (50, x'E0B6A5'), (51, x'E0B6A4'), (5, x'E0B688'), (76, x'E0B781'), (89, x'E0B798'), (74, x'E0B6BD'), (10, x'E0B68D'), (57, x'E0B6AB'), (71, x'E0B6BA'), (58, x'E0B6AC'), (27, x'E0B69AE0B793'), (54, x'E0B6A8'), (84, x'E0B791'), (31, x'E0B69AE0B7B2'), (98, x'E0B79E'), (53, x'E0B6A7'), (41, x'E0B69B'), (67, x'E0B6B6'), (7, x'E0B68A'), (20, x'E0B682');
SELECT predicted_order, hex(utf8_encoding) FROM t1 ORDER BY utf8_encoding COLLATE utf8_sinhala_ci;
predicted_order hex(utf8_encoding)
1 E0B7B4
2 E0B685
3 E0B686
4 E0B687
5 E0B688
6 E0B689
7 E0B68A
8 E0B68B
9 E0B68C
10 E0B68D
11 E0B68E
12 E0B68F
13 E0B690
14 E0B691
15 E0B692
16 E0B693
17 E0B694
18 E0B695
19 E0B696
20 E0B682
21 E0B683
22 E0B69A
23 E0B69AE0B78F
24 E0B69AE0B790
25 E0B69AE0B791
26 E0B69AE0B792
27 E0B69AE0B793
28 E0B69AE0B794
29 E0B69AE0B796
30 E0B69AE0B798
31 E0B69AE0B7B2
32 E0B69AE0B79F
33 E0B69AE0B7B3
34 E0B69AE0B799
35 E0B69AE0B79A
36 E0B69AE0B79B
37 E0B69AE0B79C
38 E0B69AE0B79D
39 E0B69AE0B79E
40 E0B69AE0B78A
41 E0B69B
42 E0B69C
43 E0B69D
44 E0B69E
45 E0B69F
46 E0B6A0
47 E0B6A1
48 E0B6A2
49 E0B6A3
50 E0B6A5
51 E0B6A4
52 E0B6A6
53 E0B6A7
54 E0B6A8
55 E0B6A9
56 E0B6AA
57 E0B6AB
58 E0B6AC
59 E0B6AD
60 E0B6AE
61 E0B6AF
62 E0B6B0
63 E0B6B1
64 E0B6B3
65 E0B6B4
66 E0B6B5
67 E0B6B6
68 E0B6B7
69 E0B6B8
70 E0B6B9
71 E0B6BA
72 E0B6BB
73 E0B6BBE0B78AE2808D
74 E0B6BD
75 E0B780
76 E0B781
77 E0B782
78 E0B783
79 E0B784
80 E0B785
81 E0B786
82 E0B78F
83 E0B790
84 E0B791
85 E0B792
86 E0B793
87 E0B794
88 E0B796
89 E0B798
90 E0B7B2
91 E0B79F
92 E0B7B3
93 E0B799
94 E0B79A
95 E0B79B
96 E0B79C
97 E0B79D
98 E0B79E
99 E0B78A
100 E0B78AE2808DE0B6BA
101 E0B78AE2808DE0B6BB
DROP TABLE t1;
End of 5.4 tests
......@@ -1456,3 +1456,22 @@ SELECT CONVERT(a, CHAR), CONVERT(b, CHAR) from t1 GROUP BY b;
DROP TABLE t1;
--echo End of 5.0 tests
--echo Start of 5.4 tests
#
# Bug#26474: Add Sinhala script (Sri Lanka) collation to MySQL
#
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1 (
predicted_order int NOT NULL,
utf8_encoding VARCHAR(10) NOT NULL
) CHARACTER SET utf8;
INSERT INTO t1 VALUES (19, x'E0B696'), (30, x'E0B69AE0B798'), (61, x'E0B6AF'), (93, x'E0B799'), (52, x'E0B6A6'), (73, x'E0B6BBE0B78AE2808D'), (3, x'E0B686'), (56, x'E0B6AA'), (55, x'E0B6A9'), (70, x'E0B6B9'), (94, x'E0B79A'), (80, x'E0B785'), (25, x'E0B69AE0B791'), (48, x'E0B6A2'), (13, x'E0B690'), (86, x'E0B793'), (91, x'E0B79F'), (81, x'E0B786'), (79, x'E0B784'), (14, x'E0B691'), (99, x'E0B78A'), (8, x'E0B68B'), (68, x'E0B6B7'), (22, x'E0B69A'), (16, x'E0B693'), (33, x'E0B69AE0B7B3'), (38, x'E0B69AE0B79D'), (21, x'E0B683'), (11, x'E0B68E'), (77, x'E0B782'), (40, x'E0B69AE0B78A'), (101, x'E0B78AE2808DE0B6BB'), (35, x'E0B69AE0B79A'), (1, x'E0B7B4'), (9, x'E0B68C'), (96, x'E0B79C'), (6, x'E0B689'), (95, x'E0B79B'), (88, x'E0B796'), (64, x'E0B6B3'), (26, x'E0B69AE0B792'), (82, x'E0B78F'), (28, x'E0B69AE0B794'), (39, x'E0B69AE0B79E'), (97, x'E0B79D'), (2, x'E0B685'), (75, x'E0B780'), (34, x'E0B69AE0B799'), (69, x'E0B6B8'), (83, x'E0B790'), (18, x'E0B695'), (90, x'E0B7B2'), (17, x'E0B694'), (72, x'E0B6BB'), (66, x'E0B6B5'), (59, x'E0B6AD'), (44, x'E0B69E'), (15, x'E0B692'), (23, x'E0B69AE0B78F'), (65, x'E0B6B4'), (42, x'E0B69C'), (63, x'E0B6B1'), (85, x'E0B792'), (47, x'E0B6A1'), (49, x'E0B6A3'), (92, x'E0B7B3'), (78, x'E0B783'), (36, x'E0B69AE0B79B'), (4, x'E0B687'), (24, x'E0B69AE0B790'), (87, x'E0B794'), (37, x'E0B69AE0B79C'), (32, x'E0B69AE0B79F'), (29, x'E0B69AE0B796'), (43, x'E0B69D'), (62, x'E0B6B0'), (100, x'E0B78AE2808DE0B6BA'), (60, x'E0B6AE'), (45, x'E0B69F'), (12, x'E0B68F'), (46, x'E0B6A0'), (50, x'E0B6A5'), (51, x'E0B6A4'), (5, x'E0B688'), (76, x'E0B781'), (89, x'E0B798'), (74, x'E0B6BD'), (10, x'E0B68D'), (57, x'E0B6AB'), (71, x'E0B6BA'), (58, x'E0B6AC'), (27, x'E0B69AE0B793'), (54, x'E0B6A8'), (84, x'E0B791'), (31, x'E0B69AE0B7B2'), (98, x'E0B79E'), (53, x'E0B6A7'), (41, x'E0B69B'), (67, x'E0B6B6'), (7, x'E0B68A'), (20, x'E0B682');
SELECT predicted_order, hex(utf8_encoding) FROM t1 ORDER BY utf8_encoding COLLATE utf8_sinhala_ci;
DROP TABLE t1;
--echo End of 5.4 tests
......@@ -42,6 +42,7 @@ extern CHARSET_INFO my_charset_ucs2_roman_uca_ci;
extern CHARSET_INFO my_charset_ucs2_persian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_esperanto_uca_ci;
extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
#endif
#ifdef HAVE_CHARSET_utf8
......@@ -63,6 +64,7 @@ extern CHARSET_INFO my_charset_utf8_roman_uca_ci;
extern CHARSET_INFO my_charset_utf8_persian_uca_ci;
extern CHARSET_INFO my_charset_utf8_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf8_hungarian_uca_ci;
extern CHARSET_INFO my_charset_utf8_sinhala_uca_ci;
#ifdef HAVE_UTF8_GENERAL_CS
extern CHARSET_INFO my_charset_utf8_general_cs;
#endif
......@@ -152,6 +154,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_ucs2_persian_uca_ci);
add_compiled_collation(&my_charset_ucs2_esperanto_uca_ci);
add_compiled_collation(&my_charset_ucs2_hungarian_uca_ci);
add_compiled_collation(&my_charset_ucs2_sinhala_uca_ci);
#endif
#endif
......@@ -186,6 +189,7 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf8_persian_uca_ci);
add_compiled_collation(&my_charset_utf8_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
#endif
#endif
......
......@@ -6712,6 +6712,34 @@ static const char hungarian[]=
"&O < \\u00F6 <<< \\u00D6 << \\u0151 <<< \\u0150"
"&U < \\u00FC <<< \\u00DC << \\u0171 <<< \\u0170";
/*
SCCII Part 1 : Collation Sequence (SLS1134)
2006/11/24
Harshula Jayasuriya <harshula at gmail dot com>
Language Technology Research Lab, University of Colombo / ICTA
*/
#if 0
static const char sinhala[]=
"& \\u0D96 < \\u0D82 < \\u0D83"
"& \\u0DA5 < \\u0DA4"
"& \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3"
"& \\u0DDE < \\u0DCA";
#else
static const char sinhala[]=
"& \\u0D96 < \\u0D82 < \\u0D83 < \\u0D9A < \\u0D9B < \\u0D9C < \\u0D9D"
"< \\u0D9E < \\u0D9F < \\u0DA0 < \\u0DA1 < \\u0DA2 < \\u0DA3"
"< \\u0DA5 < \\u0DA4 < \\u0DA6"
"< \\u0DA7 < \\u0DA8 < \\u0DA9 < \\u0DAA < \\u0DAB < \\u0DAC"
"< \\u0DAD < \\u0DAE < \\u0DAF < \\u0DB0 < \\u0DB1"
"< \\u0DB3 < \\u0DB4 < \\u0DB5 < \\u0DB6 < \\u0DB7 < \\u0DB8"
"< \\u0DB9 < \\u0DBA < \\u0DBB < \\u0DBD < \\u0DC0 < \\u0DC1"
"< \\u0DC2 < \\u0DC3 < \\u0DC4 < \\u0DC5 < \\u0DC6"
"< \\u0DCF"
"< \\u0DD0 < \\u0DD1 < \\u0DD2 < \\u0DD3 < \\u0DD4 < \\u0DD6"
"< \\u0DD8 < \\u0DF2 < \\u0DDF < \\u0DF3 < \\u0DD9 < \\u0DDA"
"< \\u0DDB < \\u0DDC < \\u0DDD < \\u0DDE < \\u0DCA";
#endif
/*
Unicode Collation Algorithm:
......@@ -8698,6 +8726,39 @@ CHARSET_INFO my_charset_ucs2_hungarian_uca_ci=
};
CHARSET_INFO my_charset_ucs2_sinhala_uca_ci=
{
147,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"ucs2", /* csname */
"ucs2_sinhala_ci", /* name */
"", /* comment */
sinhala, /* tailoring */
NULL, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
2, /* mbminlen */
2, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_ucs2_handler,
&my_collation_ucs2_uca_handler
};
#endif
......@@ -9355,6 +9416,38 @@ CHARSET_INFO my_charset_utf8_hungarian_uca_ci=
&my_collation_any_uca_handler
};
CHARSET_INFO my_charset_utf8_sinhala_uca_ci=
{
211,0,0, /* number */
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE,
"utf8", /* cs name */
"utf8_sinhala_ci", /* name */
"", /* comment */
sinhala, /* tailoring */
ctype_utf8, /* ctype */
NULL, /* to_lower */
NULL, /* to_upper */
NULL, /* sort_order */
NULL, /* contractions */
NULL, /* sort_order_big*/
NULL, /* tab_to_uni */
NULL, /* tab_from_uni */
my_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
8, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
3, /* mbminlen */
3, /* mbmaxlen */
9, /* min_sort_char */
0xFFFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
&my_charset_utf8_handler,
&my_collation_any_uca_handler
};
#endif /* HAVE_CHARSET_utf8 */
#endif /* HAVE_UCA_COLLATIONS */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment