Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
95d07ee4
Commit
95d07ee4
authored
Jul 03, 2015
by
Alexander Barkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character"
parent
302bf7c4
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
256 additions
and
27 deletions
+256
-27
include/m_ctype.h
include/m_ctype.h
+0
-1
mysql-test/r/ctype_eucjpms.result
mysql-test/r/ctype_eucjpms.result
+42
-0
mysql-test/r/ctype_ujis.result
mysql-test/r/ctype_ujis.result
+42
-0
mysql-test/t/ctype_eucjpms.test
mysql-test/t/ctype_eucjpms.test
+25
-0
mysql-test/t/ctype_ujis.test
mysql-test/t/ctype_ujis.test
+25
-0
strings/ctype-eucjpms.c
strings/ctype-eucjpms.c
+40
-5
strings/ctype-mb.c
strings/ctype-mb.c
+0
-16
strings/ctype-ujis.c
strings/ctype-ujis.c
+40
-5
strings/strcoll.ic
strings/strcoll.ic
+12
-0
unittest/strings/strings-t.c
unittest/strings/strings-t.c
+30
-0
No files found.
include/m_ctype.h
View file @
95d07ee4
...
...
@@ -351,7 +351,6 @@ struct my_collation_handler_st
my_bool
(
*
propagate
)(
CHARSET_INFO
*
cs
,
const
uchar
*
str
,
size_t
len
);
};
extern
MY_COLLATION_HANDLER
my_collation_mb_bin_handler
;
extern
MY_COLLATION_HANDLER
my_collation_8bit_bin_handler
;
extern
MY_COLLATION_HANDLER
my_collation_8bit_simple_ci_handler
;
extern
MY_COLLATION_HANDLER
my_collation_ucs2_uca_handler
;
...
...
mysql-test/r/ctype_eucjpms.result
View file @
95d07ee4
...
...
@@ -33841,3 +33841,45 @@ ERROR HY000: Invalid eucjpms character string: '8EA0'
#
# End of 10.0 tests
#
#
# Start of 10.1 tests
#
#
# MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character"
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET eucjpms, KEY(a));
INSERT INTO t1 VALUES ('a'),(0x7F);
INSERT INTO t1 VALUES (0x8EA1),(0x8EDF);
INSERT INTO t1 VALUES (0x8FA1A1),(0x8FFEFE);
INSERT INTO t1 VALUES (0xA1A1),(0xDEDE),(0xDFDF),(0xE0E0),(0xFEFE);
SELECT HEX(a) FROM t1 ORDER BY a;
HEX(a)
61
7F
8EA1
8EDF
8FA1A1
8FFEFE
A1A1
DEDE
DFDF
E0E0
FEFE
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET eucjpms COLLATE eucjpms_bin;
SELECT HEX(a) FROM t1 ORDER BY a;
HEX(a)
61
7F
8EA1
8EDF
8FA1A1
8FFEFE
A1A1
DEDE
DFDF
E0E0
FEFE
DROP TABLE t1;
#
# End of 10.1 tests
#
mysql-test/r/ctype_ujis.result
View file @
95d07ee4
...
...
@@ -26144,3 +26144,45 @@ ERROR HY000: Invalid ujis character string: '8EA0'
#
# End of 10.0 tests
#
#
# Start of 10.1 tests
#
#
# MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character"
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ujis, KEY(a));
INSERT INTO t1 VALUES ('a'),(0x7F);
INSERT INTO t1 VALUES (0x8EA1),(0x8EDF);
INSERT INTO t1 VALUES (0x8FA1A1),(0x8FFEFE);
INSERT INTO t1 VALUES (0xA1A1),(0xDEDE),(0xDFDF),(0xE0E0),(0xFEFE);
SELECT HEX(a) FROM t1 ORDER BY a;
HEX(a)
61
7F
8EA1
8EDF
8FA1A1
8FFEFE
A1A1
DEDE
DFDF
E0E0
FEFE
ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ujis COLLATE ujis_bin;
SELECT HEX(a) FROM t1 ORDER BY a;
HEX(a)
61
7F
8EA1
8EDF
8FA1A1
8FFEFE
A1A1
DEDE
DFDF
E0E0
FEFE
DROP TABLE t1;
#
# End of 10.1 tests
#
mysql-test/t/ctype_eucjpms.test
View file @
95d07ee4
...
...
@@ -541,3 +541,28 @@ SELECT _eucjpms 0x8EA0;
--
echo
#
--
echo
# End of 10.0 tests
--
echo
#
--
echo
#
--
echo
# Start of 10.1 tests
--
echo
#
--
echo
#
--
echo
# MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character"
--
echo
#
CREATE
TABLE
t1
(
a
VARCHAR
(
10
)
CHARACTER
SET
eucjpms
,
KEY
(
a
));
# [x00-x7F] # ASCII/JIS-Roman
# [x8E][xA1-xDF] # half-width katakana
# [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990
# [xA1-xFE][xA1-xFE] # JIS X 0208:1997
INSERT
INTO
t1
VALUES
(
'a'
),(
0x7F
);
INSERT
INTO
t1
VALUES
(
0x8EA1
),(
0x8EDF
);
INSERT
INTO
t1
VALUES
(
0x8FA1A1
),(
0x8FFEFE
);
INSERT
INTO
t1
VALUES
(
0xA1A1
),(
0xDEDE
),(
0xDFDF
),(
0xE0E0
),(
0xFEFE
);
SELECT
HEX
(
a
)
FROM
t1
ORDER
BY
a
;
ALTER
TABLE
t1
MODIFY
a
VARCHAR
(
10
)
CHARACTER
SET
eucjpms
COLLATE
eucjpms_bin
;
SELECT
HEX
(
a
)
FROM
t1
ORDER
BY
a
;
DROP
TABLE
t1
;
--
echo
#
--
echo
# End of 10.1 tests
--
echo
#
mysql-test/t/ctype_ujis.test
View file @
95d07ee4
...
...
@@ -1369,3 +1369,28 @@ SELECT _ujis 0x8EA0;
--
echo
#
--
echo
# End of 10.0 tests
--
echo
#
--
echo
#
--
echo
# Start of 10.1 tests
--
echo
#
--
echo
#
--
echo
# MDEV-8215 Asian MB3 charsets: compare broken bytes as "greater than any non-broken character"
--
echo
#
CREATE
TABLE
t1
(
a
VARCHAR
(
10
)
CHARACTER
SET
ujis
,
KEY
(
a
));
# [x00-x7F] # ASCII/JIS-Roman
# [x8E][xA1-xDF] # half-width katakana
# [x8F][xA1-xFE][xA1-xFE] # JIS X 0212-1990
# [xA1-xFE][xA1-xFE] # JIS X 0208:1997
INSERT
INTO
t1
VALUES
(
'a'
),(
0x7F
);
INSERT
INTO
t1
VALUES
(
0x8EA1
),(
0x8EDF
);
INSERT
INTO
t1
VALUES
(
0x8FA1A1
),(
0x8FFEFE
);
INSERT
INTO
t1
VALUES
(
0xA1A1
),(
0xDEDE
),(
0xDFDF
),(
0xE0E0
),(
0xFEFE
);
SELECT
HEX
(
a
)
FROM
t1
ORDER
BY
a
;
ALTER
TABLE
t1
MODIFY
a
VARCHAR
(
10
)
CHARACTER
SET
ujis
COLLATE
ujis_bin
;
SELECT
HEX
(
a
)
FROM
t1
ORDER
BY
a
;
DROP
TABLE
t1
;
--
echo
#
--
echo
# End of 10.1 tests
--
echo
#
strings/ctype-eucjpms.c
View file @
95d07ee4
...
...
@@ -194,6 +194,7 @@ static const uchar sort_order_eucjpms[]=
#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
#define IS_MB2_JIS(x,y) (iseucjpms(x) && iseucjpms(y))
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
...
...
@@ -201,6 +202,23 @@ static const uchar sort_order_eucjpms[]=
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms_japanese_ci
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB1(x) ((int) sort_order_eucjpms[(uchar) (x)])
#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \
(((uint) (uchar) (y)) << 8))
#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z))
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _eucjpms_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB1(x) ((int) (uchar) (x))
#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \
(((uint) (uchar) (y)) << 8))
#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z))
#include "strcoll.ic"
static uint ismbchar_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
...
...
@@ -67467,11 +67485,11 @@ size_t my_numcells_eucjpms(CHARSET_INFO *cs __attribute__((unused)),
}
static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_COLLATION_HANDLER my_collation_
eucjpms_japanese_
ci_handler =
{
NULL, /* init */
my_strnncoll_
simple,/* strnncoll */
my_strnncollsp_
simple
,
my_strnncoll_
eucjpms_japanese_ci,
my_strnncollsp_
eucjpms_japanese_ci
,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
...
...
@@ -67482,6 +67500,23 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_propagate_simple
};
static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
{
NULL, /* init */
my_strnncoll_eucjpms_bin,
my_strnncollsp_eucjpms_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin,
my_propagate_simple
};
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
...
...
@@ -67547,7 +67582,7 @@ struct charset_info_st my_charset_eucjpms_japanese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_ci_handler
&my_collation_
eucjpms_japanese_
ci_handler
};
...
...
@@ -67580,7 +67615,7 @@ struct charset_info_st my_charset_eucjpms_bin=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_
mb
_bin_handler
&my_collation_
eucjpms
_bin_handler
};
strings/ctype-mb.c
View file @
95d07ee4
...
...
@@ -1560,20 +1560,4 @@ int my_mb_ctype_mb(CHARSET_INFO *cs, int *ctype,
}
MY_COLLATION_HANDLER
my_collation_mb_bin_handler
=
{
NULL
,
/* init */
my_strnncoll_mb_bin
,
my_strnncollsp_mb_bin
,
my_strnxfrm_mb
,
my_strnxfrmlen_simple
,
my_like_range_mb
,
my_wildcmp_mb_bin
,
my_strcasecmp_mb_bin
,
my_instr_mb
,
my_hash_sort_mb_bin
,
my_propagate_simple
};
#endif
strings/ctype-ujis.c
View file @
95d07ee4
...
...
@@ -193,6 +193,7 @@ static const uchar sort_order_ujis[]=
#define isujis_ss3(c) ((uchar) (c) == 0x8f)
#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis
#define IS_MB1_CHAR(x) ((uchar) (x) < 0x80)
#define IS_MB2_JIS(x,y) (isujis(x) && isujis(y))
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
...
...
@@ -200,6 +201,23 @@ static const uchar sort_order_ujis[]=
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis_japanese_ci
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB1(x) ((int) sort_order_ujis[(uchar) (x)])
#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \
(((uint) (uchar) (y)) << 8))
#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z))
#include "strcoll.ic"
#define MY_FUNCTION_NAME(x) my_ ## x ## _ujis_bin
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB1(x) ((int) (uchar) (x))
#define WEIGHT_MB2(x,y) ((((uint) (uchar)(x)) << 16) | \
(((uint) (uchar) (y)) << 8))
#define WEIGHT_MB3(x,y,z) (WEIGHT_MB2(x,y) | ((uint) (uchar) z))
#include "strcoll.ic"
static uint ismbchar_ujis(CHARSET_INFO *cs __attribute__((unused)),
const char* p, const char *e)
...
...
@@ -67211,11 +67229,11 @@ my_caseup_ujis(CHARSET_INFO * cs, char *src, size_t srclen,
#ifdef HAVE_CHARSET_ujis
static MY_COLLATION_HANDLER my_collation_ci_handler =
static MY_COLLATION_HANDLER my_collation_
ujis_japanese_
ci_handler =
{
NULL, /* init */
my_strnncoll_
simple,/* strnncoll */
my_strnncollsp_
simple
,
my_strnncoll_
ujis_japanese_ci,
my_strnncollsp_
ujis_japanese_ci
,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
...
...
@@ -67226,6 +67244,23 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_propagate_simple
};
static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
{
NULL, /* init */
my_strnncoll_ujis_bin,
my_strnncollsp_ujis_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
my_wildcmp_mb_bin,
my_strcasecmp_mb_bin,
my_instr_mb,
my_hash_sort_mb_bin,
my_propagate_simple
};
static MY_CHARSET_HANDLER my_charset_handler=
{
NULL, /* init */
...
...
@@ -67291,7 +67326,7 @@ struct charset_info_st my_charset_ujis_japanese_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_ci_handler
&my_collation_
ujis_japanese_
ci_handler
};
...
...
@@ -67324,7 +67359,7 @@ struct charset_info_st my_charset_ujis_bin=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_handler,
&my_collation_
mb
_bin_handler
&my_collation_
ujis
_bin_handler
};
strings/strcoll.ic
View file @
95d07ee4
...
...
@@ -95,6 +95,17 @@ MY_FUNCTION_NAME(scan_weight)(int *weight, const uchar *str, const uchar *end)
return 2; /* A valid two-byte character */
}
#ifdef IS_MB3_CHAR
if (str + 3 > end) /* Incomplete three-byte character */
goto bad;
if (IS_MB3_CHAR(str[0], str[1], str[2]))
{
*weight= WEIGHT_MB3(str[0], str[1], str[2]);
return 3; /* A valid three-byte character */
}
#endif
bad:
*weight= WEIGHT_ILSEQ(str[0]); /* Bad byte */
return 1;
...
...
@@ -228,4 +239,5 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
#undef WEIGHT_ILSEQ
#undef WEIGHT_MB1
#undef WEIGHT_MB2
#undef WEIGHT_MB3
#undef WEIGHT_PAD_SPACE
unittest/strings/strings-t.c
View file @
95d07ee4
...
...
@@ -333,6 +333,20 @@ STRNNCOLL_PARAM strcoll_8181_A1_E0E0[]=
};
/*
A shared test for eucjpms and ujis.
*/
STRNNCOLL_PARAM
strcoll_ujis
[]
=
{
{
CSTR
(
"
\x8E\xA1
"
),
CSTR
(
"
\x8E
"
),
-
1
},
/* Good MB2 vs incomplete MB2 */
{
CSTR
(
"
\x8E\xA1
"
),
CSTR
(
"
\x8F\xA1
"
),
-
1
},
/* Good MB2 vs incomplete MB3 */
{
CSTR
(
"
\x8E\xA1
"
),
CSTR
(
"
\x8F\xA1\xA1
"
),
-
1
},
/* Good MB2 vs good MB3 */
{
CSTR
(
"
\xA1\xA1
"
),
CSTR
(
"
\x8F\xA1\xA1
"
),
1
},
/* Good MB2 vs good MB3 */
{
CSTR
(
"
\x8E
"
),
CSTR
(
"
\x8F\xA1
"
),
-
1
},
/* Incomplete MB2 vs incomplete MB3 */
{
NULL
,
0
,
NULL
,
0
,
0
}
};
static
void
str2hex
(
char
*
dst
,
size_t
dstlen
,
const
char
*
src
,
size_t
srclen
)
{
...
...
@@ -415,6 +429,14 @@ test_strcollsp()
failed
+=
strcollsp
(
&
my_charset_cp932_japanese_ci
,
strcoll_8181_A1_E0E0
);
failed
+=
strcollsp
(
&
my_charset_cp932_bin
,
strcoll_8181_A1_E0E0
);
#endif
#ifdef HAVE_CHARSET_eucjpms
failed
+=
strcollsp
(
&
my_charset_eucjpms_japanese_ci
,
strcoll_mb2_common
);
failed
+=
strcollsp
(
&
my_charset_eucjpms_bin
,
strcoll_mb2_common
);
failed
+=
strcollsp
(
&
my_charset_eucjpms_japanese_ci
,
strcoll_mb2_A1A1_mb2_F9FE
);
failed
+=
strcollsp
(
&
my_charset_eucjpms_bin
,
strcoll_mb2_A1A1_mb2_F9FE
);
failed
+=
strcollsp
(
&
my_charset_eucjpms_japanese_ci
,
strcoll_ujis
);
failed
+=
strcollsp
(
&
my_charset_eucjpms_bin
,
strcoll_ujis
);
#endif
#ifdef HAVE_CHARSET_euckr
failed
+=
strcollsp
(
&
my_charset_euckr_korean_ci
,
strcoll_mb2_common
);
failed
+=
strcollsp
(
&
my_charset_euckr_korean_ci
,
strcoll_mb2_A1A1_mb2_F9FE
);
...
...
@@ -440,6 +462,14 @@ test_strcollsp()
failed
+=
strcollsp
(
&
my_charset_sjis_bin
,
strcoll_mb1_A1_bad_F9FE
);
failed
+=
strcollsp
(
&
my_charset_sjis_japanese_ci
,
strcoll_8181_A1_E0E0
);
failed
+=
strcollsp
(
&
my_charset_sjis_bin
,
strcoll_8181_A1_E0E0
);
#endif
#ifdef HAVE_CHARSET_ujis
failed
+=
strcollsp
(
&
my_charset_ujis_japanese_ci
,
strcoll_mb2_common
);
failed
+=
strcollsp
(
&
my_charset_ujis_bin
,
strcoll_mb2_common
);
failed
+=
strcollsp
(
&
my_charset_ujis_japanese_ci
,
strcoll_mb2_A1A1_mb2_F9FE
);
failed
+=
strcollsp
(
&
my_charset_ujis_bin
,
strcoll_mb2_A1A1_mb2_F9FE
);
failed
+=
strcollsp
(
&
my_charset_ujis_japanese_ci
,
strcoll_ujis
);
failed
+=
strcollsp
(
&
my_charset_ujis_bin
,
strcoll_ujis
);
#endif
return
failed
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment