Commit 022ae421 authored by Sergei Golubchik's avatar Sergei Golubchik

MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'

use utf8mb4 with PCRE2, not utf8mb3
parent 6538a91e
......@@ -895,3 +895,12 @@ REGEXP_INSTR('a_kollision', 'o([lm])\\1')
4
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
a
#
# MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
#
select hex(regexp_replace(cast(x'F09F9881' as char character set 'utf8mb4'), _utf8mb4'a', _utf8mb4'b')) as Text;
Text
F09F9881
#
# End of 10.6 tests
#
......
......@@ -470,3 +470,11 @@ SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1');
#
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
--enable_service_connection
--echo #
--echo # MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
--echo #
select hex(regexp_replace(cast(x'F09F9881' as char character set 'utf8mb4'), _utf8mb4'a', _utf8mb4'b')) as Text;
--echo #
--echo # End of 10.6 tests
--echo #
......@@ -6096,7 +6096,7 @@ void Regexp_processor_pcre::init(CHARSET_INFO *data_charset, int extra_flags)
// Convert text data to utf-8.
m_library_charset= data_charset == &my_charset_bin ?
&my_charset_bin : &my_charset_utf8mb3_general_ci;
&my_charset_bin : &my_charset_utf8mb4_general_ci;
m_conversion_is_needed= (data_charset != &my_charset_bin) &&
!my_charset_same(data_charset, m_library_charset);
......
......@@ -3057,7 +3057,7 @@ class Regexp_processor_pcre
m_pcre(NULL), m_pcre_match_data(NULL),
m_conversion_is_needed(true), m_is_const(0),
m_library_flags(0),
m_library_charset(&my_charset_utf8mb3_general_ci)
m_library_charset(&my_charset_utf8mb4_general_ci)
{}
int default_regex_flags();
void init(CHARSET_INFO *data_charset, int extra_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment