Removing some duplicate code in THD::convert_string() & friends

1. Adding THD::convert_string(LEX_CSTRING *to,...) as a wrapper for convert_string(LEX_STRING *to,...), as LEX_CSTRING is now frequently used for conversion purpose. This reduced duplicate code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules in *.yy 2. Adding yet another THD::convert_string() with an extra parameter "bool simple_copy_is_possible". This even more reduced repeatable code in the mentioned grammar rules in *.yy 3. Deriving Lex_ident_cli_st from Lex_string_with_metadata_st, as they have very similar functionality. Moving m_quote from Lex_ident_cli_st to Lex_string_with_metadata_st, as m_quote will be used later to optimize string literals anyway (e.g. avoid redundant copying on the tokenizer stage). Adjusting Lex_input_stream::get_text() accordingly. 4. Moving the reminders of the code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules as new methods in THD: - make_text_string_sys() - make_text_string_connection() - make_text_string_filesystem() and changing *.yy to use these new methods. This reduced the amount of similar code in sql_yacc.yy and sql_yacc_ora.yy. 5. Removing duplicate code in Lex_input_stream::body_utf8_append_ident(): by reusing THD::make_text_string_sys(). Thanks to #3 and #4. 6. Making THD members charset_is_system_charset, charset_is_collation_connection, charset_is_character_set_filesystem private, as they are not needed externally any more.

Removing some duplicate code in THD::convert_string() & friends
1. Adding THD::convert_string(LEX_CSTRING *to,...) as a wrapper for convert_string(LEX_STRING *to,...), as LEX_CSTRING is now frequently used for conversion purpose. This reduced duplicate code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules in *.yy 2. Adding yet another THD::convert_string() with an extra parameter "bool simple_copy_is_possible". This even more reduced repeatable code in the mentioned grammar rules in *.yy 3. Deriving Lex_ident_cli_st from Lex_string_with_metadata_st, as they have very similar functionality. Moving m_quote from Lex_ident_cli_st to Lex_string_with_metadata_st, as m_quote will be used later to optimize string literals anyway (e.g. avoid redundant copying on the tokenizer stage). Adjusting Lex_input_stream::get_text() accordingly. 4. Moving the reminders of the code in TEXT_STRING_sys, TEXT_STRING_literal, TEXT_STRING_filesystem grammar rules as new methods in THD: - make_text_string_sys() - make_text_string_connection() - make_text_string_filesystem() and changing *.yy to use these new methods. This reduced the amount of similar code in sql_yacc.yy and sql_yacc_ora.yy. 5. Removing duplicate code in Lex_input_stream::body_utf8_append_ident(): by reusing THD::make_text_string_sys(). Thanks to #3 and #4. 6. Making THD members charset_is_system_charset, charset_is_collation_connection, charset_is_character_set_filesystem private, as they are not needed externally any more.
4a126bf3 · Alexander Barkov · af682525 · 4a126bf3 · 4a126bf3 · 4a126bf3
Commit 4a126bf3 authored May 11, 2018 by Alexander Barkov
Showing with 101 additions and 130 deletions

sql/sql_class.h sql/sql_class.h +40 -1

sql/sql_lex.cc sql/sql_lex.cc +15 -28

sql/sql_lex.h sql/sql_lex.h +33 -25

sql/sql_yacc.yy sql/sql_yacc.yy +7 -38

sql/sql_yacc_ora.yy sql/sql_yacc_ora.yy +6 -38

No files found.
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -3130,8 +3130,10 @@ class THD :public Statement,
    is set if a statement accesses a temporary table created through
    CREATE TEMPORARY TABLE. 
  */
-  bool	     charset_is_system_charset, charset_is_collation_connection;
+private:
+  bool       charset_is_system_charset, charset_is_collation_connection;
  bool       charset_is_character_set_filesystem;
+public:
  bool       enable_slow_log;    /* Enable slow log for current statement */
  bool	     abort_on_warning;
  bool 	     got_warning;       /* Set on call to push_warning() */
@@ -3706,6 +3708,25 @@ class THD :public Statement,
  bool convert_string(LEX_STRING *to, CHARSET_INFO *to_cs,
 		      const char *from, size_t from_length,
 		      CHARSET_INFO *from_cs);
+  bool convert_string(LEX_CSTRING *to, CHARSET_INFO *to_cs,
+                      const char *from, size_t from_length,
+                      CHARSET_INFO *from_cs)
+  {
+    LEX_STRING tmp;
+    bool rc= convert_string(&tmp, to_cs, from, from_length, from_cs);
+    to->str= tmp.str;
+    to->length= tmp.length;
+    return rc;
+  }
+  bool convert_string(LEX_CSTRING *to, CHARSET_INFO *tocs,
+                      const LEX_CSTRING *from, CHARSET_INFO *fromcs,
+                      bool simple_copy_is_possible)
+  {
+    if (!simple_copy_is_possible)
+      return unlikely(convert_string(to, tocs, from->str, from->length, fromcs));
+    *to= *from;
+    return false;
+  }
  /*
    Convert a strings between character sets.
    Uses my_convert_fix(), which uses an mb_wc .. mc_mb loop internally.
@@ -3767,6 +3788,24 @@ class THD :public Statement,
  Item_basic_constant *make_string_literal_nchar(const Lex_string_with_metadata_st &str);
  Item_basic_constant *make_string_literal_charset(const Lex_string_with_metadata_st &str,
                                                   CHARSET_INFO *cs);
+  bool make_text_string_sys(LEX_CSTRING *to,
+                            const Lex_string_with_metadata_st *from)
+  {
+    return convert_string(to, system_charset_info,
+                          from, charset(), charset_is_system_charset);
+  }
+  bool make_text_string_connection(LEX_CSTRING *to,
+                                   const Lex_string_with_metadata_st *from)
+  {
+    return convert_string(to, variables.collation_connection,
+                          from, charset(), charset_is_collation_connection);
+  }
+  bool make_text_string_filesystem(LEX_CSTRING *to,
+                                   const Lex_string_with_metadata_st *from)
+  {
+    return convert_string(to, variables.character_set_filesystem,
+                          from, charset(), charset_is_character_set_filesystem);
+  }
  void add_changed_table(TABLE *table);
  void add_changed_table(const char *key, size_t key_length);
  CHANGED_TABLE_LIST * changed_table_dup(const char *key, size_t key_length);

--- a/sql/sql_lex.cc
+++ b/sql/sql_lex.cc
@@ -416,32 +416,18 @@ void Lex_input_stream::body_utf8_append(const char *ptr)
                  operation.
 */

-void Lex_input_stream::body_utf8_append_ident(THD *thd,
-                                              const LEX_CSTRING *txt,
-                                              const char *end_ptr)
+void
+Lex_input_stream::body_utf8_append_ident(THD *thd,
+                                         const Lex_string_with_metadata_st *txt,
+                                         const char *end_ptr)
 {
  if (!m_cpp_utf8_processed_ptr)
    return;

  LEX_CSTRING utf_txt;
-  CHARSET_INFO *txt_cs= thd->charset();
-
-  if (!my_charset_same(txt_cs, &my_charset_utf8_general_ci))
-  {
-    LEX_STRING to;
-    thd->convert_string(&to,
-                        &my_charset_utf8_general_ci,
-                        txt->str, (uint) txt->length,
-                        txt_cs);
-    utf_txt.str=    to.str;
-    utf_txt.length= to.length;
-
-  }
-  else
-    utf_txt= *txt;
+  thd->make_text_string_sys(&utf_txt, txt); // QQ: check return value?

  /* NOTE: utf_txt.length is in bytes, not in symbols. */
-
  memcpy(m_body_utf8_ptr, utf_txt.str, utf_txt.length);
  m_body_utf8_ptr += utf_txt.length;
  *m_body_utf8_ptr= 0;
@@ -1043,13 +1029,13 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
  uchar c;
  uint found_escape=0;
  CHARSET_INFO *cs= m_thd->charset();
+  bool is_8bit= false;

-  dst->set_8bit(false);
  while (! eof())
  {
    c= yyGet();
    if (c & 0x80)
-      dst->set_8bit(true);
+      is_8bit= true;
 #ifdef USE_MB
    {
      int l;
@@ -1093,23 +1079,24 @@ bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,

      if (!(to= (char*) m_thd->alloc((uint) (end - str) + 1)))
      {
-        dst->str= "";        // Sql_alloc has set error flag
-        dst->length= 0;
-        return true;
+        dst->set(&empty_clex_str, 0, '\0');
+        return true;                   // Sql_alloc has set error flag
      }
-      dst->str= to;

      m_cpp_text_start= m_cpp_tok_start + pre_skip;
      m_cpp_text_end= get_cpp_ptr() - post_skip;

      if (!found_escape)
      {
-        memcpy(to, str, dst->length= (end - str));
-        to[dst->length]= 0;
+        size_t len= (end - str);
+        memcpy(to, str, len);
+        to[len]= '\0';
+        dst->set(to, len, is_8bit, '\0');
      }
      else
      {
-        dst->length= unescape(cs, to, str, end, sep);
+        size_t len= unescape(cs, to, str, end, sep);
+        dst->set(to, len, is_8bit, '\0');
      }
      return false;
    }

--- a/sql/sql_lex.h
+++ b/sql/sql_lex.h
@@ -37,12 +37,16 @@


 /**
-  A string with metadata.
+  A string with metadata. Usually points to a string in the client
+  character set, but unlike Lex_ident_cli_st (see below) it does not
+  necessarily point to a query fragment. It can also point to memory
+  of other kinds (e.g. an additional THD allocated memory buffer
+  not overlapping with the current query text).
+
  We'll add more flags here eventually, to know if the string has, e.g.:
  - multi-byte characters
  - bad byte sequences
  - backslash escapes:   'a\nb'
-  - separator escapes:   'a''b'
  and reuse the original query fragments instead of making the string
  copy too early, in Lex_input_stream::get_text().
  This will allow to avoid unnecessary copying, as well as
@@ -50,9 +54,30 @@
 */
 struct Lex_string_with_metadata_st: public LEX_CSTRING
 {
+private:
  bool m_is_8bit; // True if the string has 8bit characters
+  char m_quote;   // Quote character, or 0 if not quoted
 public:
  void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
+  void set_metadata(bool is_8bit, char quote)
+  {
+    m_is_8bit= is_8bit;
+    m_quote= quote;
+  }
+  void set(const char *s, size_t len, bool is_8bit, char quote)
+  {
+    str= s;
+    length= len;
+    set_metadata(is_8bit, quote);
+  }
+  void set(const LEX_CSTRING *s, bool is_8bit, char quote)
+  {
+    ((LEX_CSTRING &)*this)= *s;
+    set_metadata(is_8bit, quote);
+  }
+  bool is_8bit() const { return m_is_8bit; }
+  bool is_quoted() const { return m_quote != '\0'; }
+  char quote() const { return m_quote; }
  // Get string repertoire by the 8-bit flag and the character set
  uint repertoire(CHARSET_INFO *cs) const
  {
@@ -71,44 +96,27 @@ struct Lex_string_with_metadata_st: public LEX_CSTRING
  Used to store identifiers in the client character set.
  Points to a query fragment.
 */
-struct Lex_ident_cli_st: public LEX_CSTRING
+struct Lex_ident_cli_st: public Lex_string_with_metadata_st
 {
-private:
-  bool m_is_8bit;
-  char m_quote;
 public:
  void set_keyword(const char *s, size_t len)
  {
-    str= s;
-    length= len;
-    m_is_8bit= false;
-    m_quote= '\0';
+    set(s, len, false, '\0');
  }
  void set_ident(const char *s, size_t len, bool is_8bit)
  {
-    str= s;
-    length= len;
-    m_is_8bit= is_8bit;
-    m_quote= '\0';
+    set(s, len, is_8bit, '\0');
  }
  void set_ident_quoted(const char *s, size_t len, bool is_8bit, char quote)
  {
-    str= s;
-    length= len;
-    m_is_8bit= is_8bit;
-    m_quote= quote;
+    set(s, len, is_8bit, quote);
  }
  void set_unquoted(const LEX_CSTRING *s, bool is_8bit)
  {
-    ((LEX_CSTRING &)*this)= *s;
-    m_is_8bit= is_8bit;
-    m_quote= '\0';
+    set(s, is_8bit, '\0');
  }
  const char *pos() const { return str - is_quoted(); }
  const char *end() const { return str + length + is_quoted(); }
-  bool is_quoted() const { return m_quote != '\0'; }
-  bool is_8bit() const { return m_is_8bit; }
-  char quote() const { return m_quote; }
 };


@@ -2453,7 +2461,7 @@ class Lex_input_stream
  void body_utf8_append(const char *ptr);
  void body_utf8_append(const char *ptr, const char *end_ptr);
  void body_utf8_append_ident(THD *thd,
-                              const LEX_CSTRING *txt,
+                              const Lex_string_with_metadata_st *txt,
                              const char *end_ptr);
  void body_utf8_append_escape(THD *thd,
                               const LEX_CSTRING *txt,

--- a/sql/sql_yacc.yy
+++ b/sql/sql_yacc.yy
@@ -15170,57 +15170,26 @@ IDENT_sys:
 TEXT_STRING_sys:
          TEXT_STRING
          {
-            if (thd->charset_is_system_charset)
-              $$= $1;
-            else
-            {
-              LEX_STRING to;
-              if (unlikely(thd->convert_string(&to, system_charset_info,
-                                               $1.str, $1.length,
-                                               thd->charset())))
-                MYSQL_YYABORT;
-              $$.str=    to.str;
-	      $$.length= to.length;
-            }
+            if (thd->make_text_string_sys(&$$, &$1))
+              MYSQL_YYABORT;
          }
        ;

 TEXT_STRING_literal:
          TEXT_STRING
          {
-            if (thd->charset_is_collation_connection)
-              $$= $1;
-            else
-            {
-              LEX_STRING to;
-              if (unlikely(thd->convert_string(&to,
-                                               thd->variables.collation_connection,
-                                               $1.str, $1.length,
-                                               thd->charset())))
-                MYSQL_YYABORT;
-              $$.str=    to.str;
-	      $$.length= to.length;
-            }
+            if (thd->make_text_string_connection(&$$, &$1))
+              MYSQL_YYABORT;
          }
        ;

 TEXT_STRING_filesystem:
          TEXT_STRING
          {
-            if (thd->charset_is_character_set_filesystem)
-              $$= $1;
-            else
-            {
-              LEX_STRING to;
-              if (unlikely(thd->convert_string(&to,
-                                               thd->variables.character_set_filesystem,
-                                               $1.str, $1.length,
-                                               thd->charset())))
-                MYSQL_YYABORT;
-              $$.str=    to.str;
-	      $$.length= to.length;
-            }
+            if (thd->make_text_string_filesystem(&$$, &$1))
+              MYSQL_YYABORT;
          }
+        ;

 ident_table_alias:
          IDENT_sys

--- a/sql/sql_yacc_ora.yy
+++ b/sql/sql_yacc_ora.yy
@@ -14919,56 +14919,24 @@ IDENT_sys:
 TEXT_STRING_sys:
          TEXT_STRING
          {
-            if (thd->charset_is_system_charset)
-              $$= $1;
-            else
-            {
-              LEX_STRING to;
-              if (unlikely(thd->convert_string(&to, system_charset_info,
-                                               $1.str, $1.length,
-                                               thd->charset())))
-                MYSQL_YYABORT;
-              $$.str=    to.str;
-	      $$.length= to.length;
-            }
+            if (thd->make_text_string_sys(&$$, &$1))
+              MYSQL_YYABORT;
          }
        ;

 TEXT_STRING_literal:
          TEXT_STRING
          {
-            if (thd->charset_is_collation_connection)
-              $$= $1;
-            else
-            {
-              LEX_STRING to;
-              if (unlikely(thd->convert_string(&to,
-                                               thd->variables.collation_connection,
-                                               $1.str, $1.length,
-                                               thd->charset())))
-                MYSQL_YYABORT;
-              $$.str=    to.str;
-	      $$.length= to.length;
-            }
+            if (thd->make_text_string_connection(&$$, &$1))
+              MYSQL_YYABORT;
          }
        ;

 TEXT_STRING_filesystem:
          TEXT_STRING
          {
-            if (thd->charset_is_character_set_filesystem)
-              $$= $1;
-            else
-            {
-              LEX_STRING to;
-              if (unlikely(thd->convert_string(&to,
-                                               thd->variables.character_set_filesystem,
-                                               $1.str, $1.length,
-                                               thd->charset())))
-                MYSQL_YYABORT;
-              $$.str=    to.str;
-	      $$.length= to.length;
-            }
+            if (thd->make_text_string_filesystem(&$$, &$1))
+              MYSQL_YYABORT;
          }
        ;