Commit 022ae421 authored by Sergei Golubchik's avatar Sergei Golubchik

MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'

use utf8mb4 with PCRE2, not utf8mb3
parent 6538a91e
...@@ -895,3 +895,12 @@ REGEXP_INSTR('a_kollision', 'o([lm])\\1') ...@@ -895,3 +895,12 @@ REGEXP_INSTR('a_kollision', 'o([lm])\\1')
4 4
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]'; SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
a a
#
# MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
#
select hex(regexp_replace(cast(x'F09F9881' as char character set 'utf8mb4'), _utf8mb4'a', _utf8mb4'b')) as Text;
Text
F09F9881
#
# End of 10.6 tests
#
......
...@@ -470,3 +470,11 @@ SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1'); ...@@ -470,3 +470,11 @@ SELECT REGEXP_INSTR('a_kollision', 'o([lm])\\1');
# #
SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]'; SELECT a FROM (SELECT "aa" a) t WHERE a REGEXP '[0-9]';
--enable_service_connection --enable_service_connection
--echo #
--echo # MDEV-11777 REGEXP_REPLACE converts utf8mb4 supplementary characters to '?'
--echo #
select hex(regexp_replace(cast(x'F09F9881' as char character set 'utf8mb4'), _utf8mb4'a', _utf8mb4'b')) as Text;
--echo #
--echo # End of 10.6 tests
--echo #
...@@ -6096,7 +6096,7 @@ void Regexp_processor_pcre::init(CHARSET_INFO *data_charset, int extra_flags) ...@@ -6096,7 +6096,7 @@ void Regexp_processor_pcre::init(CHARSET_INFO *data_charset, int extra_flags)
// Convert text data to utf-8. // Convert text data to utf-8.
m_library_charset= data_charset == &my_charset_bin ? m_library_charset= data_charset == &my_charset_bin ?
&my_charset_bin : &my_charset_utf8mb3_general_ci; &my_charset_bin : &my_charset_utf8mb4_general_ci;
m_conversion_is_needed= (data_charset != &my_charset_bin) && m_conversion_is_needed= (data_charset != &my_charset_bin) &&
!my_charset_same(data_charset, m_library_charset); !my_charset_same(data_charset, m_library_charset);
......
...@@ -3057,7 +3057,7 @@ class Regexp_processor_pcre ...@@ -3057,7 +3057,7 @@ class Regexp_processor_pcre
m_pcre(NULL), m_pcre_match_data(NULL), m_pcre(NULL), m_pcre_match_data(NULL),
m_conversion_is_needed(true), m_is_const(0), m_conversion_is_needed(true), m_is_const(0),
m_library_flags(0), m_library_flags(0),
m_library_charset(&my_charset_utf8mb3_general_ci) m_library_charset(&my_charset_utf8mb4_general_ci)
{} {}
int default_regex_flags(); int default_regex_flags();
void init(CHARSET_INFO *data_charset, int extra_flags); void init(CHARSET_INFO *data_charset, int extra_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment