Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters

@ mysql-test/r/ctype_latin1.result @ mysql-test/r/ctype_utf8.result @ mysql-test/t/ctype_latin1.test @ mysql-test/t/ctype_utf8.test Adding tests @ sql/mysqld.h @ sql/item.cc @ sql/sql_parse.cc @ sql/sql_view.cc Refactoring (thanks to Guilhem for the idea): Item_string::print() was hard to understand because of the different QT_ constants: in "query_type==QT_x", QT_x is explicitely included but the other two QT_ are implicitely excluded. The combinations with '||' and '&&' make this even harder. - logic is now more "explicit" by changing QT_ constants to a bitmap of flags: QT_ORDINARY: no change, QT_IS -> QT_TO_SYSTEM_CHARSET | QT_WITHOUT_INTRODUCERS, QT_EXPLAIN -> QT_TO_SYSTEM_CHARSET (QT_EXPLAIN was introduced in the first version of the Bug#57341 patch) - Item_string::print() is rewritten using those flags Bugfix itself: When QT_TO_SYSTEM_CHARSET is used alone (with no QT_WITHOUT_INTRODUCERS), we print string literals as follows: - display introducers if they were in the original query - print ASCII characters as is - print non-ASCII characters using hex-escape Note: as "EXPLAIN" output is only for human readability purposes and does not need to be a pasrable SQL, so using hex-escape is Ok. ErrConvString class perfectly suites for hex escaping purposes.

Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
@ mysql-test/r/ctype_latin1.result @ mysql-test/r/ctype_utf8.result @ mysql-test/t/ctype_latin1.test @ mysql-test/t/ctype_utf8.test Adding tests @ sql/mysqld.h @ sql/item.cc @ sql/sql_parse.cc @ sql/sql_view.cc Refactoring (thanks to Guilhem for the idea): Item_string::print() was hard to understand because of the different QT_ constants: in "query_type==QT_x", QT_x is explicitely included but the other two QT_ are implicitely excluded. The combinations with '||' and '&&' make this even harder. - logic is now more "explicit" by changing QT_ constants to a bitmap of flags: QT_ORDINARY: no change, QT_IS -> QT_TO_SYSTEM_CHARSET | QT_WITHOUT_INTRODUCERS, QT_EXPLAIN -> QT_TO_SYSTEM_CHARSET (QT_EXPLAIN was introduced in the first version of the Bug#57341 patch) - Item_string::print() is rewritten using those flags Bugfix itself: When QT_TO_SYSTEM_CHARSET is used alone (with no QT_WITHOUT_INTRODUCERS), we print string literals as follows: - display introducers if they were in the original query - print ASCII characters as is - print non-ASCII characters using hex-escape Note: as "EXPLAIN" output is only for human readability purposes and does not need to be a pasrable SQL, so using hex-escape is Ok. ErrConvString class perfectly suites for hex escaping purposes.
b326b9a3 · Alexander Barkov · 7e6d938f · b326b9a3 · b326b9a3 · b326b9a3
Commit b326b9a3 authored Mar 04, 2011 by Alexander Barkov
8 changed files
--- a/mysql-test/r/ctype_latin1.result
+++ b/mysql-test/r/ctype_latin1.result
@@ -3246,5 +3246,20 @@ maketime(`a`,`a`,`a`)
 DROP TABLE t1;
 SET sql_mode=default;
 #
+# Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
+#
+SET NAMES utf8;
+EXPLAIN EXTENDED SELECT 'abcdó', _latin1'abcdó', _utf8'abcdó';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	No tables used
+Warnings:
+Note	1003	select 'abcdó' AS `abcdó`,_latin1'abcd\xC3\xB3' AS `abcdÃ³`,_utf8'abcd\xC3\xB3' AS `abcdó`
+SET NAMES latin1;
+EXPLAIN EXTENDED SELECT 'abcdó', _latin1'abcdó', _utf8'abcdó';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	No tables used
+Warnings:
+Note	1003	select 'abcdó' AS `abcdó`,_latin1'abcd\xC3\xB3' AS `abcdó`,_utf8'abcd\xC3\xB3' AS `abcd`
+#
 # End of 5.5 tests
 #
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -4969,5 +4969,20 @@ GROUP BY INSERT('', t2.a, t1.a, (@@global.max_binlog_size));
 ERROR 23000: Duplicate entry '107374182410737418241' for key 'group_key'
 DROP TABLE t1;
 #
+# Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
+#
+SET NAMES latin1;
+EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ' AS u;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	No tables used
+Warnings:
+Note	1003	select 'abcdÁÂÃÄÅ' AS `abcdÁÂÃÄÅ`,_latin1'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`,_utf8'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `u`
+SET NAMES utf8;
+EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	filtered	Extra
+1	SIMPLE	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	No tables used
+Warnings:
+Note	1003	select 'abcdÁÂÃÄÅ' AS `abcdÁÂÃÄÅ`,_latin1'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÃÃ‚ÃƒÃ„Ã…`,_utf8'abcd\xC3\x81\xC3\x82\xC3\x83\xC3\x84\xC3\x85' AS `abcdÁÂÃÄÅ`
+#
 # End of 5.5 tests
 #
--- a/mysql-test/t/ctype_latin1.test
+++ b/mysql-test/t/ctype_latin1.test
@@ -143,6 +143,21 @@ SELECT '' LIKE '' ESCAPE EXPORT_SET(1, 1, 1, 1, '');

 --source include/ctype_numconv.inc

+--echo #
+--echo # Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
+--echo #
+# Test latin1 client erroneously started with --default-character-set=utf8
+# EXPLAIN output should still be pretty readable.
+# We're using 'ó' (\xC3\xB3) as a magic sequence:
+# - it's "LATIN CAPITAL LETTER A WITH TILDE ABOVE + SUPERSCRIPT 3" in latin1
+# - it's "LATIN SMALL LETTER O WITH ACUTE ABOVE" in utf8.
+SET NAMES utf8;
+EXPLAIN EXTENDED SELECT 'abcdó', _latin1'abcdó', _utf8'abcdó';
+# Test normal latin1
+SET NAMES latin1;
+EXPLAIN EXTENDED SELECT 'abcdó', _latin1'abcdó', _utf8'abcdó';
+
+
 --echo #
 --echo # End of 5.5 tests
 --echo #
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -1550,6 +1550,16 @@ SELECT COUNT(*) FROM t1, t1 t2
 GROUP BY INSERT('', t2.a, t1.a, (@@global.max_binlog_size));
 DROP TABLE t1;

+--echo #
+--echo # Bug#11764503 (Bug#57341) Query in EXPLAIN EXTENDED shows wrong characters
+--echo #
+# Emulate utf8 client erroneously started with --default-character-set=latin1,
+# as in the bug report. EXPLAIN output should still be pretty readable
+SET NAMES latin1;
+EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ' AS u;
+# Test normal utf8
+SET NAMES utf8;
+EXPLAIN EXTENDED SELECT 'abcdÁÂÃÄÅ', _latin1'abcdÁÂÃÄÅ', _utf8'abcdÁÂÃÄÅ';

 --echo #
 --echo # End of 5.5 tests

--- a/sql/item.cc
+++ b/sql/item.cc
@@ -2514,7 +2514,9 @@ my_decimal *Item_float::val_decimal(my_decimal *decimal_value)

 void Item_string::print(String *str, enum_query_type query_type)
 {
-  if (query_type == QT_ORDINARY && is_cs_specified())
+  const bool print_introducer=
+    !(query_type & QT_WITHOUT_INTRODUCERS) && is_cs_specified();
+  if (print_introducer)
  {
    str->append('_');
    str->append(collation.collation->csname);
@@ -2522,12 +2524,30 @@ void Item_string::print(String *str, enum_query_type query_type)

  str->append('\'');

-  if (query_type == QT_ORDINARY ||
-      my_charset_same(str_value.charset(), system_charset_info))
+  if (query_type & QT_TO_SYSTEM_CHARSET)
  {
-    str_value.print(str);
+    if (print_introducer)
+    {
+      /*
+        Because we wrote an introducer, we must print str_value in its
+        charset, and the resulting bytes must not be changed until they
+        reach the end client.
+        But the caller is asking for system_charset_info, and may later
+        convert into character_set_results. That means two conversions: we
+        must ensure that they don't change our printed bytes.
+        So we print str_value in the least common denominator of the three
+        charsets involved: ASCII. Non-ASCII characters are printed as \xFF
+        sequences (which is ASCII too). This way, our bytes will not be
+        changed.
+      */
+      ErrConvString tmp(str_value.ptr(), str_value.length(), &my_charset_bin);
+      str->append(tmp.ptr());
    }
    else
+    {
+      if (my_charset_same(str_value.charset(), system_charset_info))
+        str_value.print(str); // already in system_charset_info
+      else // need to convert
      {
        THD *thd= current_thd;
        LEX_STRING utf8_lex_str;
@@ -2544,6 +2564,13 @@ void Item_string::print(String *str, enum_query_type query_type)

        utf8_str.print(str);
      }
+    }
+  }
+  else
+  {
+    // Caller wants a result in the charset of str_value.
+    str_value.print(str);
+  }

  str->append('\'');
 }

--- a/sql/mysqld.h
+++ b/sql/mysqld.h
@@ -399,16 +399,16 @@ enum options_mysqld


 /**
-  Query type constants.
-
-  QT_ORDINARY -- ordinary SQL query.
-  QT_IS -- SQL query to be shown in INFORMATION_SCHEMA (in utf8 and without
-  character set introducers).
+   Query type constants (usable as bitmap flags).
 */
 enum enum_query_type
 {
-  QT_ORDINARY,
-  QT_IS
+  /// Nothing specific, ordinary SQL query.
+  QT_ORDINARY= 0,
+  /// In utf8.
+  QT_TO_SYSTEM_CHARSET= (1 << 0),
+  /// Without character set introducers.
+  QT_WITHOUT_INTRODUCERS= (1 << 1)
 };

 /* query_id */

--- a/sql/sql_parse.cc
+++ b/sql/sql_parse.cc
@@ -4439,7 +4439,11 @@ static bool execute_sqlcom_select(THD *thd, TABLE_LIST *all_tables)
        char buff[1024];
        String str(buff,(uint32) sizeof(buff), system_charset_info);
        str.length(0);
-        thd->lex->unit.print(&str, QT_ORDINARY);
+        /*
+          The warnings system requires input in utf8, @see
+          mysqld_show_warnings().
+        */
+        thd->lex->unit.print(&str, QT_TO_SYSTEM_CHARSET);
        str.append('\0');
        push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
                     ER_YES, str.ptr());

--- a/sql/sql_view.cc
+++ b/sql/sql_view.cc
@@ -841,7 +841,8 @@ static int mysql_register_view(THD *thd, TABLE_LIST *view,
    thd->variables.sql_mode&= ~MODE_ANSI_QUOTES;

    lex->unit.print(&view_query, QT_ORDINARY);
-    lex->unit.print(&is_query, QT_IS);
+    lex->unit.print(&is_query,
+                    enum_query_type(QT_TO_SYSTEM_CHARSET | QT_WITHOUT_INTRODUCERS));

    thd->variables.sql_mode|= sql_mode;
  }