Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
5267ec8a
Commit
5267ec8a
authored
Oct 18, 2004
by
unknown
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Bug #6040 can't retrieve records with umlaut characters in case insensitive manner
parent
2310f00a
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
193 additions
and
163 deletions
+193
-163
include/m_ctype.h
include/m_ctype.h
+5
-0
mysql-test/r/ctype_utf8.result
mysql-test/r/ctype_utf8.result
+9
-0
mysql-test/t/ctype_utf8.test
mysql-test/t/ctype_utf8.test
+8
-0
strings/ctype-ucs2.c
strings/ctype-ucs2.c
+4
-162
strings/ctype-utf8.c
strings/ctype-utf8.c
+167
-1
No files found.
include/m_ctype.h
View file @
5267ec8a
...
...
@@ -365,6 +365,11 @@ uint my_instr_mb(struct charset_info_st *,
const
char
*
s
,
uint
s_length
,
my_match_t
*
match
,
uint
nmatch
);
int
my_wildcmp_unicode
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
,
MY_UNICASE_INFO
**
weights
);
extern
my_bool
my_parse_charset_xml
(
const
char
*
bug
,
uint
len
,
int
(
*
add
)(
CHARSET_INFO
*
cs
));
...
...
mysql-test/r/ctype_utf8.result
View file @
5267ec8a
...
...
@@ -63,6 +63,15 @@ select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
1
select convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8);
convert(_latin1'Gnter Andr' using utf8) like CONVERT(_latin1'GNTER%' USING utf8)
1
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8)
1
select CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8);
CONVERT(_koi8r'' USING utf8) LIKE CONVERT(_koi8r'' USING utf8)
1
SELECT 'a' = 'a ';
'a' = 'a '
1
...
...
mysql-test/t/ctype_utf8.test
View file @
5267ec8a
...
...
@@ -33,6 +33,14 @@ select 'A' like 'a';
select
'A'
like
'a'
collate
utf8_bin
;
select
_utf8
0xD0B0D0B1D0B2
like
concat
(
_utf8
'%'
,
_utf8
0xD0B1
,
_utf8
'%'
);
# Bug #6040: can't retrieve records with umlaut
# characters in case insensitive manner.
# Case insensitive search LIKE comparison
# was broken for multibyte characters:
select
convert
(
_latin1
'Gnter Andr'
using
utf8
)
like
CONVERT
(
_latin1
'GNTER%'
USING
utf8
);
select
CONVERT
(
_koi8r
''
USING
utf8
)
LIKE
CONVERT
(
_koi8r
''
USING
utf8
);
select
CONVERT
(
_koi8r
''
USING
utf8
)
LIKE
CONVERT
(
_koi8r
''
USING
utf8
);
#
# Check the following:
# "a" == "a "
...
...
strings/ctype-ucs2.c
View file @
5267ec8a
...
...
@@ -1231,171 +1231,13 @@ uint my_lengthsp_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
static
int
my_wildcmp_ucs2
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
,
MY_UNICASE_INFO
**
weights
)
{
int
result
=
-
1
;
/* Not found, using wildcards */
my_wc_t
s_wc
,
w_wc
;
int
scan
,
plane
;
while
(
wildstr
!=
wildend
)
{
while
(
1
)
{
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
escape
)
{
wildstr
+=
scan
;
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
}
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
result
=
1
;
/* Found an anchor char */
break
;
}
wildstr
+=
scan
;
scan
=
my_ucs2_uni
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
);
if
(
scan
<=
0
)
return
1
;
str
+=
scan
;
if
(
w_wc
==
(
my_wc_t
)
w_one
)
{
result
=
1
;
/* Found an anchor char */
}
else
{
if
(
weights
)
{
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
weights
[
plane
]
?
weights
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
w_wc
>>
8
)
&
0xFF
;
w_wc
=
weights
[
plane
]
?
weights
[
plane
][
w_wc
&
0xFF
].
sort
:
w_wc
;
}
if
(
s_wc
!=
w_wc
)
return
1
;
/* No match */
}
if
(
wildstr
==
wildend
)
return
(
str
!=
str_end
);
/* Match if both are at end */
}
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
/* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for
(
;
wildstr
!=
wildend
;
)
{
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
wildstr
+=
scan
;
continue
;
}
if
(
w_wc
==
(
my_wc_t
)
w_one
)
{
wildstr
+=
scan
;
scan
=
my_ucs2_uni
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
);
if
(
scan
<=
0
)
return
1
;
str
+=
scan
;
continue
;
}
break
;
/* Not a wild character */
}
if
(
wildstr
==
wildend
)
return
0
;
/* Ok if w_many is last */
if
(
str
==
str_end
)
return
-
1
;
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
escape
)
{
wildstr
+=
scan
;
scan
=
my_ucs2_uni
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
);
if
(
scan
<=
0
)
return
1
;
}
while
(
1
)
{
/* Skip until the first character from wildstr is found */
while
(
str
!=
str_end
)
{
scan
=
my_ucs2_uni
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
);
if
(
scan
<=
0
)
return
1
;
if
(
weights
)
{
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
weights
[
plane
]
?
weights
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
w_wc
>>
8
)
&
0xFF
;
w_wc
=
weights
[
plane
]
?
weights
[
plane
][
w_wc
&
0xFF
].
sort
:
w_wc
;
}
if
(
s_wc
==
w_wc
)
break
;
str
+=
scan
;
}
if
(
str
==
str_end
)
return
-
1
;
result
=
my_wildcmp_ucs2
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
weights
);
if
(
result
<=
0
)
return
result
;
str
+=
scan
;
}
}
}
return
(
str
!=
str_end
?
1
:
0
);
}
static
int
my_wildcmp_ucs2_ci
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
)
{
return
my_wildcmp_u
cs2
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
return
my_wildcmp_u
nicode
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
uni_plane
);
}
...
...
@@ -1406,7 +1248,7 @@ int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
)
{
return
my_wildcmp_u
cs2
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
return
my_wildcmp_u
nicode
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
NULL
);
}
...
...
strings/ctype-utf8.c
View file @
5267ec8a
...
...
@@ -1518,6 +1518,161 @@ MY_UNICASE_INFO *uni_plane[256]={
};
/*
** Compare string against string with wildcard
** This function is used in UTF8 and UCS2
**
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
int
my_wildcmp_unicode
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
,
MY_UNICASE_INFO
**
weights
)
{
int
result
=
-
1
;
/* Not found, using wildcards */
my_wc_t
s_wc
,
w_wc
;
int
scan
,
plane
;
int
(
*
mb_wc
)(
struct
charset_info_st
*
cs
,
my_wc_t
*
wc
,
const
unsigned
char
*
s
,
const
unsigned
char
*
e
);
mb_wc
=
cs
->
cset
->
mb_wc
;
while
(
wildstr
!=
wildend
)
{
while
(
1
)
{
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
escape
)
{
wildstr
+=
scan
;
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
}
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
result
=
1
;
/* Found an anchor char */
break
;
}
wildstr
+=
scan
;
if
((
scan
=
mb_wc
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
))
<=
0
)
return
1
;
str
+=
scan
;
if
(
w_wc
==
(
my_wc_t
)
w_one
)
{
result
=
1
;
/* Found an anchor char */
}
else
{
if
(
weights
)
{
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
weights
[
plane
]
?
weights
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
w_wc
>>
8
)
&
0xFF
;
w_wc
=
weights
[
plane
]
?
weights
[
plane
][
w_wc
&
0xFF
].
sort
:
w_wc
;
}
if
(
s_wc
!=
w_wc
)
return
1
;
/* No match */
}
if
(
wildstr
==
wildend
)
return
(
str
!=
str_end
);
/* Match if both are at end */
}
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
/* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for
(
;
wildstr
!=
wildend
;
)
{
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
w_many
)
{
wildstr
+=
scan
;
continue
;
}
if
(
w_wc
==
(
my_wc_t
)
w_one
)
{
wildstr
+=
scan
;
if
((
scan
=
mb_wc
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
))
<=
0
)
return
1
;
str
+=
scan
;
continue
;
}
break
;
/* Not a wild character */
}
if
(
wildstr
==
wildend
)
return
0
;
/* Ok if w_many is last */
if
(
str
==
str_end
)
return
-
1
;
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
if
(
w_wc
==
(
my_wc_t
)
escape
)
{
wildstr
+=
scan
;
if
((
scan
=
mb_wc
(
cs
,
&
w_wc
,
(
const
uchar
*
)
wildstr
,
(
const
uchar
*
)
wildend
))
<=
0
)
return
1
;
}
while
(
1
)
{
/* Skip until the first character from wildstr is found */
while
(
str
!=
str_end
)
{
if
((
scan
=
mb_wc
(
cs
,
&
s_wc
,
(
const
uchar
*
)
str
,
(
const
uchar
*
)
str_end
))
<=
0
)
return
1
;
if
(
weights
)
{
plane
=
(
s_wc
>>
8
)
&
0xFF
;
s_wc
=
weights
[
plane
]
?
weights
[
plane
][
s_wc
&
0xFF
].
sort
:
s_wc
;
plane
=
(
w_wc
>>
8
)
&
0xFF
;
w_wc
=
weights
[
plane
]
?
weights
[
plane
][
w_wc
&
0xFF
].
sort
:
w_wc
;
}
if
(
s_wc
==
w_wc
)
break
;
str
+=
scan
;
}
if
(
str
==
str_end
)
return
-
1
;
result
=
my_wildcmp_unicode
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
weights
);
if
(
result
<=
0
)
return
result
;
str
+=
scan
;
}
}
}
return
(
str
!=
str_end
?
1
:
0
);
}
#endif
...
...
@@ -1992,6 +2147,17 @@ static int my_strcasecmp_utf8(CHARSET_INFO *cs, const char *s, const char *t)
return
my_strncasecmp_utf8
(
cs
,
s
,
t
,
len
);
}
static
int
my_wildcmp_utf8
(
CHARSET_INFO
*
cs
,
const
char
*
str
,
const
char
*
str_end
,
const
char
*
wildstr
,
const
char
*
wildend
,
int
escape
,
int
w_one
,
int
w_many
)
{
return
my_wildcmp_unicode
(
cs
,
str
,
str_end
,
wildstr
,
wildend
,
escape
,
w_one
,
w_many
,
uni_plane
);
}
static
int
my_strnxfrm_utf8
(
CHARSET_INFO
*
cs
,
uchar
*
dst
,
uint
dstlen
,
const
uchar
*
src
,
uint
srclen
)
...
...
@@ -2060,7 +2226,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
my_strnncollsp_utf8
,
my_strnxfrm_utf8
,
my_like_range_mb
,
my_wildcmp_
mb
,
my_wildcmp_
utf8
,
my_strcasecmp_utf8
,
my_instr_mb
,
my_hash_sort_utf8
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment