Commit f1b0b046 authored by Alexander Barkov's avatar Alexander Barkov

MDEV-12411 Remove Lex::text_string_is_7bit

parent 015868e7
...@@ -1056,18 +1056,19 @@ Lex_input_stream::unescape(CHARSET_INFO *cs, char *to, ...@@ -1056,18 +1056,19 @@ Lex_input_stream::unescape(CHARSET_INFO *cs, char *to,
Fix sometimes to do only one scan of the string Fix sometimes to do only one scan of the string
*/ */
bool Lex_input_stream::get_text(LEX_STRING *dst, uint sep, bool Lex_input_stream::get_text(Lex_string_with_metadata_st *dst, uint sep,
int pre_skip, int post_skip) int pre_skip, int post_skip)
{ {
reg1 uchar c; reg1 uchar c;
uint found_escape=0; uint found_escape=0;
CHARSET_INFO *cs= m_thd->charset(); CHARSET_INFO *cs= m_thd->charset();
tok_bitmap= 0; dst->set_8bit(false);
while (! eof()) while (! eof())
{ {
c= yyGet(); c= yyGet();
tok_bitmap|= c; if (c & 0x80)
dst->set_8bit(true);
#ifdef USE_MB #ifdef USE_MB
{ {
int l; int l;
...@@ -1433,18 +1434,17 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) ...@@ -1433,18 +1434,17 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
} }
/* Found N'string' */ /* Found N'string' */
lip->yySkip(); // Skip ' lip->yySkip(); // Skip '
if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 2, 1)) if (lip->get_text(&yylval->lex_string_with_metadata,
(sep= lip->yyGetLast()), 2, 1))
{ {
state= MY_LEX_CHAR; // Read char by char state= MY_LEX_CHAR; // Read char by char
break; break;
} }
lip->body_utf8_append(lip->m_cpp_text_start); lip->body_utf8_append(lip->m_cpp_text_start);
lip->body_utf8_append_escape(thd, &yylval->lex_str, lip->body_utf8_append_escape(thd, &yylval->lex_string_with_metadata,
national_charset_info, national_charset_info,
lip->m_cpp_text_end, sep); lip->m_cpp_text_end, sep);
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(NCHAR_STRING); return(NCHAR_STRING);
} }
case MY_LEX_IDENT_OR_HEX: case MY_LEX_IDENT_OR_HEX:
...@@ -1798,7 +1798,8 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) ...@@ -1798,7 +1798,8 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
case MY_LEX_STRING: // Incomplete text string case MY_LEX_STRING: // Incomplete text string
{ {
uint sep; uint sep;
if (lip->get_text(&yylval->lex_str, (sep= lip->yyGetLast()), 1, 1)) if (lip->get_text(&yylval->lex_string_with_metadata,
(sep= lip->yyGetLast()), 1, 1))
{ {
state= MY_LEX_CHAR; // Read char by char state= MY_LEX_CHAR; // Read char by char
break; break;
...@@ -1806,11 +1807,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd) ...@@ -1806,11 +1807,9 @@ static int lex_one_token(YYSTYPE *yylval, THD *thd)
CHARSET_INFO *strcs= lip->m_underscore_cs ? lip->m_underscore_cs : cs; CHARSET_INFO *strcs= lip->m_underscore_cs ? lip->m_underscore_cs : cs;
lip->body_utf8_append(lip->m_cpp_text_start); lip->body_utf8_append(lip->m_cpp_text_start);
lip->body_utf8_append_escape(thd, &yylval->lex_str, strcs, lip->body_utf8_append_escape(thd, &yylval->lex_string_with_metadata,
lip->m_cpp_text_end, sep); strcs, lip->m_cpp_text_end, sep);
lip->m_underscore_cs= NULL; lip->m_underscore_cs= NULL;
lex->text_string_is_7bit= (lip->tok_bitmap & 0x80) ? 0 : 1;
return(TEXT_STRING); return(TEXT_STRING);
} }
case MY_LEX_COMMENT: // Comment case MY_LEX_COMMENT: // Comment
......
...@@ -33,6 +33,38 @@ ...@@ -33,6 +33,38 @@
/* YACC and LEX Definitions */ /* YACC and LEX Definitions */
/**
A string with metadata.
We'll add more flags here eventually, to know if the string has, e.g.:
- multi-byte characters
- bad byte sequences
- backslash escapes: 'a\nb'
- separator escapes: 'a''b'
and reuse the original query fragments instead of making the string
copy too early, in Lex_input_stream::get_text().
This will allow to avoid unnecessary copying, as well as
create more optimal Item types in sql_yacc.yy
*/
struct Lex_string_with_metadata_st: public LEX_STRING
{
bool m_is_8bit; // True if the string has 8bit characters
public:
void set_8bit(bool is_8bit) { m_is_8bit= is_8bit; }
// Get string repertoire by the 8-bit flag and the character set
uint repertoire(CHARSET_INFO *cs) const
{
return !m_is_8bit && my_charset_is_ascii_based(cs) ?
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
}
// Get string repertoire by the 8-bit flag, for ASCII-based character sets
uint repertoire() const
{
return !m_is_8bit ? MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
}
};
enum sub_select_type enum sub_select_type
{ {
UNSPECIFIED_TYPE, UNSPECIFIED_TYPE,
...@@ -2246,7 +2278,8 @@ class Lex_input_stream ...@@ -2246,7 +2278,8 @@ class Lex_input_stream
/** LALR(2) resolution, value of the look ahead token.*/ /** LALR(2) resolution, value of the look ahead token.*/
LEX_YYSTYPE lookahead_yylval; LEX_YYSTYPE lookahead_yylval;
bool get_text(LEX_STRING *to, uint sep, int pre_skip, int post_skip); bool get_text(Lex_string_with_metadata_st *to,
uint sep, int pre_skip, int post_skip);
void add_digest_token(uint token, LEX_YYSTYPE yylval); void add_digest_token(uint token, LEX_YYSTYPE yylval);
...@@ -2325,9 +2358,6 @@ class Lex_input_stream ...@@ -2325,9 +2358,6 @@ class Lex_input_stream
*/ */
const char *found_semicolon; const char *found_semicolon;
/** Token character bitmaps, to detect 7bit strings. */
uchar tok_bitmap;
/** SQL_MODE = IGNORE_SPACE. */ /** SQL_MODE = IGNORE_SPACE. */
bool ignore_space; bool ignore_space;
...@@ -2565,8 +2595,6 @@ struct LEX: public Query_tables_list ...@@ -2565,8 +2595,6 @@ struct LEX: public Query_tables_list
DYNAMIC_ARRAY plugins; DYNAMIC_ARRAY plugins;
plugin_ref plugins_static_buffer[INITIAL_LEX_PLUGIN_LIST_SIZE]; plugin_ref plugins_static_buffer[INITIAL_LEX_PLUGIN_LIST_SIZE];
bool text_string_is_7bit;
/** SELECT of CREATE VIEW statement */ /** SELECT of CREATE VIEW statement */
LEX_STRING create_view_select; LEX_STRING create_view_select;
......
...@@ -906,6 +906,7 @@ Virtual_column_info *add_virtual_expression(THD *thd, Item *expr) ...@@ -906,6 +906,7 @@ Virtual_column_info *add_virtual_expression(THD *thd, Item *expr)
/* structs */ /* structs */
LEX_STRING lex_str; LEX_STRING lex_str;
LEX_SYMBOL symbol; LEX_SYMBOL symbol;
Lex_string_with_metadata_st lex_string_with_metadata;
struct sys_var_with_base variable; struct sys_var_with_base variable;
struct { int vars, conds, hndlrs, curs; } spblock; struct { int vars, conds, hndlrs, curs; } spblock;
Lex_length_and_dec_st Lex_length_and_dec; Lex_length_and_dec_st Lex_length_and_dec;
...@@ -1709,14 +1710,18 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize); ...@@ -1709,14 +1710,18 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%left INTERVAL_SYM %left INTERVAL_SYM
%type <lex_str> %type <lex_str>
IDENT IDENT_QUOTED TEXT_STRING DECIMAL_NUM FLOAT_NUM NUM LONG_NUM IDENT IDENT_QUOTED DECIMAL_NUM FLOAT_NUM NUM LONG_NUM
HEX_NUM HEX_STRING HEX_NUM HEX_STRING
LEX_HOSTNAME ULONGLONG_NUM field_ident select_alias ident ident_or_text LEX_HOSTNAME ULONGLONG_NUM field_ident select_alias ident ident_or_text
IDENT_sys TEXT_STRING_sys TEXT_STRING_literal IDENT_sys TEXT_STRING_sys TEXT_STRING_literal
NCHAR_STRING opt_component key_cache_name opt_component key_cache_name
sp_opt_label BIN_NUM label_ident TEXT_STRING_filesystem ident_or_empty sp_opt_label BIN_NUM label_ident TEXT_STRING_filesystem ident_or_empty
opt_constraint constraint opt_ident opt_constraint constraint opt_ident
%type <lex_string_with_metadata>
TEXT_STRING
NCHAR_STRING
%type <lex_str_ptr> %type <lex_str_ptr>
opt_table_alias opt_table_alias
...@@ -13695,9 +13700,7 @@ text_literal: ...@@ -13695,9 +13700,7 @@ text_literal:
LEX_STRING tmp; LEX_STRING tmp;
CHARSET_INFO *cs_con= thd->variables.collation_connection; CHARSET_INFO *cs_con= thd->variables.collation_connection;
CHARSET_INFO *cs_cli= thd->variables.character_set_client; CHARSET_INFO *cs_cli= thd->variables.character_set_client;
uint repertoire= thd->lex->text_string_is_7bit && uint repertoire= $1.repertoire(cs_cli);
my_charset_is_ascii_based(cs_cli) ?
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
if (thd->charset_is_collation_connection || if (thd->charset_is_collation_connection ||
(repertoire == MY_REPERTOIRE_ASCII && (repertoire == MY_REPERTOIRE_ASCII &&
my_charset_is_ascii_based(cs_con))) my_charset_is_ascii_based(cs_con)))
...@@ -13716,13 +13719,11 @@ text_literal: ...@@ -13716,13 +13719,11 @@ text_literal:
} }
| NCHAR_STRING | NCHAR_STRING
{ {
uint repertoire= Lex->text_string_is_7bit ?
MY_REPERTOIRE_ASCII : MY_REPERTOIRE_UNICODE30;
DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info)); DBUG_ASSERT(my_charset_is_ascii_based(national_charset_info));
$$= new (thd->mem_root) Item_string(thd, $1.str, $1.length, $$= new (thd->mem_root) Item_string(thd, $1.str, $1.length,
national_charset_info, national_charset_info,
DERIVATION_COERCIBLE, DERIVATION_COERCIBLE,
repertoire); $1.repertoire());
if ($$ == NULL) if ($$ == NULL)
MYSQL_YYABORT; MYSQL_YYABORT;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment