Commit 8ba71c4c authored by Alexander Barkov's avatar Alexander Barkov Committed by Oleg Smirnov

MDEV-33281 Implement optimizer hints

Implementing a recursive descent parser for optimizer hints.
parent 1cb1dad1
......@@ -2716,8 +2716,10 @@ static bool add_line(String &buffer, char *line, size_t line_length,
break;
}
else if (!*in_string && inchar == '/' && *(pos+1) == '*' &&
!(*(pos+2) == '!' || (*(pos+2) == 'M' && *(pos+3) == '!')))
else if (!*in_string && inchar == '/' && pos[1] == '*' &&
!(pos[2] == '!' ||
(pos[2] == 'M' && pos[3] == '!') ||
pos[2] == '+'))
{
if (preserve_comments)
{
......@@ -2754,8 +2756,8 @@ static bool add_line(String &buffer, char *line, size_t line_length,
}
else
{ // Add found char to buffer
if (!*in_string && inchar == '/' && *(pos + 1) == '*' &&
*(pos + 2) == '!')
if (!*in_string && inchar == '/' && pos[1] == '*' &&
(pos[2] == '!' || pos[2] == '+'))
ss_comment= 1;
else if (!*in_string && ss_comment && inchar == '*' && *(pos + 1) == '/')
ss_comment= 0;
......
......@@ -154,6 +154,8 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
../sql/json_table.cc
../sql/opt_histogram_json.cc
../sql/sp_instr.cc
../sql/opt_hints_parser.cc ../sql/opt_hints_parser.h
../sql/scan_char.h
${GEN_SOURCES}
${MYSYS_LIBWRAP_SOURCE}
)
......
This diff is collapsed.
This diff is collapsed.
......@@ -187,6 +187,7 @@ SET (SQL_SOURCE
json_table.cc
proxy_protocol.cc backup.cc xa.cc
socketpair.c socketpair.h
opt_hints_parser.cc opt_hints_parser.h scan_char.h
${CMAKE_CURRENT_BINARY_DIR}/lex_hash.h
${CMAKE_CURRENT_BINARY_DIR}/lex_token.h
${GEN_SOURCES}
......
......@@ -18,8 +18,11 @@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
#include "my_global.h"
#include "m_ctype.h"
#include "char_buffer.h"
#include "lex_string.h"
#include "my_sys.h"
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *table_alias_charset;
......
/*
Copyright (c) 2024, MariaDB
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
#include "opt_hints_parser.h"
#include "sql_error.h"
#include "mysqld_error.h"
#include "sql_class.h"
// This method is for debug purposes
bool Optimizer_hint_parser::parse_token_list(THD *thd)
{
for ( ; ; m_look_ahead_token= get_token(m_cs))
{
char tmp[200];
my_snprintf(tmp, sizeof(tmp), "TOKEN: %d %.*s",
(int) m_look_ahead_token.id(),
(int) m_look_ahead_token.length,
m_look_ahead_token.str);
push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
ER_UNKNOWN_ERROR, tmp);
if (m_look_ahead_token.id() == TokenID::tNULL ||
m_look_ahead_token.id() == TokenID::tEOF)
break;
}
return true; // Success
}
void Optimizer_hint_parser::push_warning_syntax_error(THD *thd)
{
const char *msg= ER_THD(thd, ER_WARN_OPTIMIZER_HINT_SYNTAX_ERROR);
ErrConvString txt(m_look_ahead_token.str, strlen(m_look_ahead_token.str),
thd->variables.character_set_client);
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
ER_PARSE_ERROR, ER_THD(thd, ER_PARSE_ERROR),
msg, txt.ptr(), 1);
}
bool
Optimizer_hint_parser::
Table_name_list_container::add(Optimizer_hint_parser *p,
Table_name &&elem)
{
Table_name *pe= (Table_name*) p->m_thd->alloc(sizeof(*pe));
if (!pe)
return true;
*pe= std::move(elem);
return push_back(pe, p->m_thd->mem_root);
}
bool
Optimizer_hint_parser::
Hint_param_table_list_container::add(Optimizer_hint_parser *p,
Hint_param_table &&elem)
{
Hint_param_table *pe= (Hint_param_table*) p->m_thd->alloc(sizeof(*pe));
if (!pe)
return true;
*pe= std::move(elem);
return push_back(pe, p->m_thd->mem_root);
}
bool
Optimizer_hint_parser::
Hint_param_index_list_container::add(Optimizer_hint_parser *p,
Hint_param_index &&elem)
{
Hint_param_index *pe= (Hint_param_index*) p->m_thd->alloc(sizeof(*pe));
if (!pe)
return true;
*pe= std::move(elem);
return push_back(pe, p->m_thd->mem_root);
}
bool
Optimizer_hint_parser::
Hint_list_container::add(Optimizer_hint_parser *p,
Hint &&elem)
{
Hint *pe= (Hint*) p->m_thd->alloc(sizeof(*pe));
if (!pe)
return true;
*pe= std::move(elem);
return push_back(pe, p->m_thd->mem_root);
}
This diff is collapsed.
/* Copyright (c) 2024, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
#ifndef SCAN_CHAR_H
#define SCAN_CHAR_H
/**
A helper class to store the head character of a string,
with help of a charlen() call.
*/
class Scan_char
{
const char *m_ptr; // The start of the character
int m_length; // The result:
// >0 - the character octet length
// <=0 - an error (e.g. end of input, wrong byte sequence)
public:
Scan_char(CHARSET_INFO *const cs, const char *str, const char *end)
:m_ptr(str), m_length(cs->charlen(str, end))
{ }
// Compare if two non-erroneous characters are equal
bool eq(const Scan_char &rhs) const
{
DBUG_ASSERT(m_length > 0);
DBUG_ASSERT(rhs.m_length > 0);
return m_length == rhs.m_length &&
!memcmp(m_ptr, rhs.m_ptr, (size_t) m_length);
}
// Compare if two possibly erroneous characters are equal
bool eq_safe(const Scan_char &rhs) const
{
return m_length == rhs.m_length && m_length > 0 &&
!memcmp(m_ptr, rhs.m_ptr, (size_t) m_length);
}
const char *ptr() const { return m_ptr; }
int length() const { return m_length; }
};
#endif // SCAN_CHAR_H
......@@ -12280,3 +12280,11 @@ ER_SEQUENCE_TABLE_ORDER_BY
eng "ORDER BY"
ER_VARIABLE_IGNORED
eng "The variable '%s' is ignored. It only exists for compatibility with old installations and will be removed in a future release"
ER_WARN_OPTIMIZER_HINT_SYNTAX_ERROR
eng "Optimizer hint syntax error"
ER_WARN_CONFLICTING_HINT
eng "Hint %s is ignored as conflicting/duplicated"
ER_WARN_UNKNOWN_QB_NAME
eng "Query block name %s is not found for %s hint"
ER_UNRESOLVED_HINT_NAME
eng "Unresolved name %s for %s hint"
This diff is collapsed.
......@@ -17,11 +17,21 @@
#define SIMPLE_TOKENIZER_INCLUDED
#include "lex_ident.h"
#include "scan_char.h"
/**
A tokenizer for an ASCII7 input
*/
class Simple_tokenizer
{
protected:
const char *m_ptr;
const char *m_end;
public:
Simple_tokenizer(const LEX_CSTRING &str)
:m_ptr(str.str), m_end(str.str + str.length)
{ }
Simple_tokenizer(const char *str, size_t length)
:m_ptr(str), m_end(str + length)
{ }
......@@ -33,11 +43,15 @@ class Simple_tokenizer
{
return m_ptr >= m_end;
}
bool is_space() const
{
return m_ptr[0] == ' ' || m_ptr[0] == '\r' || m_ptr[0] == '\n';
}
void get_spaces()
{
for ( ; !eof(); m_ptr++)
{
if (m_ptr[0] != ' ')
if (!is_space())
break;
}
}
......@@ -82,4 +96,184 @@ class Simple_tokenizer
};
/**
A tokenizer for a character set aware input.
*/
class Extended_string_tokenizer: public Simple_tokenizer
{
protected:
CHARSET_INFO *m_cs;
class Token_metadata
{
public:
bool m_extended_chars:1;
bool m_double_quotes:1;
Token_metadata()
:m_extended_chars(false), m_double_quotes(false)
{ }
};
class Token_with_metadata: public Lex_cstring,
public Token_metadata
{
public:
Token_with_metadata()
{ }
Token_with_metadata(const char *str, size_t length,
const Token_metadata &metadata)
:Lex_cstring(str, length), Token_metadata(metadata)
{ }
Token_with_metadata(const char *str)
:Lex_cstring(str, (size_t) 0), Token_metadata()
{ }
};
/*
Get a non-delimited identifier for a 8-bit character set
*/
Token_with_metadata get_ident_8bit(const char *str, const char *end) const
{
DBUG_ASSERT(m_cs->mbmaxlen == 1);
Token_with_metadata res(str);
for ( ; str < end && m_cs->ident_map[(uchar) *str]; str++, res.length++)
{
if (*str & 0x80)
res.m_extended_chars= true;
}
return res;
}
/*
Get a non-identifier for a multi-byte character set
*/
Token_with_metadata get_ident_mb(const char *str, const char *end) const
{
DBUG_ASSERT(m_cs->mbmaxlen > 1);
Token_with_metadata res(str);
for ( ; m_cs->ident_map[(uchar) *str]; )
{
int char_length= m_cs->charlen(str, end);
if (char_length <= 0)
break;
str+= char_length;
res.length+= (size_t) char_length;
res.m_extended_chars|= char_length > 1;
}
return res;
}
/*
Get a non-delimited identifier
*/
Token_with_metadata get_ident(const char *str, const char *end)
{
return m_cs->mbmaxlen == 1 ? get_ident_8bit(str, end) :
get_ident_mb(str, end);
}
/*
Get a quoted string or a quoted identifier.
The quote character is determined by the current head character
pointed by str. The result is returned together with the left
and the right quotes.
*/
Token_with_metadata get_quoted_string(const char *str, const char *end)
{
Token_with_metadata res(str);
const Scan_char quote(m_cs, str, end);
if (quote.length() <= 0)
{
/*
Could not get the left quote character:
- the end of the input reached, or
- a bad byte sequence found.
Return a null token to signal the error to the caller.
*/
return Token_with_metadata();
}
str+= quote.length();
res.length+= (size_t) quote.length();
for ( ; ; )
{
const Scan_char ch(m_cs, str, end);
if (ch.length() <= 0)
{
/*
Could not find the right quote character:
- the end of the input reached before the quote was not found, or
- a bad byte sequences found
Return a null token to signal the error to the caller.
*/
return Token_with_metadata();
}
str+= ch.length();
res.length+= (size_t) ch.length();
if (quote.eq(ch))
{
if (quote.eq_safe(Scan_char(m_cs, str, end)))
{
/*
Two quotes in a row found:
- `a``b`
- "a""b"
*/
str+= quote.length();
res.length+= (size_t) quote.length();
res.m_extended_chars|= quote.length() > 1;
res.m_double_quotes= true;
continue;
}
return res; // The right quote found
}
res.m_extended_chars|= ch.length() > 1;
}
return res;
}
public:
Extended_string_tokenizer(CHARSET_INFO *cs, const LEX_CSTRING &str)
:Simple_tokenizer(str),
m_cs(cs)
{ }
// Skip all leading spaces
void get_spaces()
{
for ( ; !eof(); m_ptr++)
{
if (!my_isspace(m_cs, *m_ptr))
break;
}
}
/*
Get a non-delimited identifier.
Can return an empty token if the head character is not an identifier
character.
*/
Token_with_metadata get_ident()
{
const Token_with_metadata tok= get_ident(m_ptr, m_end);
m_ptr+= tok.length;
return tok;
}
/*
Get a quoted string or a quoted identifier.
Can return a null token if there were errors
(e.g. unexpected end of the input, bad byte sequence).
*/
Token_with_metadata get_quoted_string()
{
const Token_with_metadata tok= get_quoted_string(m_ptr, m_end);
m_ptr+= tok.length;
return tok;
}
};
#endif // SIMPLE_TOKENIZER_INCLUDED
......@@ -855,6 +855,7 @@ Lex_input_stream::reset(char *buffer, size_t length)
found_semicolon= NULL;
ignore_space= MY_TEST(m_thd->variables.sql_mode & MODE_IGNORE_SPACE);
stmt_prepare_mode= FALSE;
hint_comment= FALSE;
multi_statements= TRUE;
in_comment=NO_COMMENT;
m_underscore_cs= NULL;
......@@ -2492,10 +2493,20 @@ int Lex_input_stream::lex_one_token(YYSTYPE *yylval, THD *thd)
else
{
in_comment= PRESERVE_COMMENT;
yylval->lex_str.str= m_ptr;
yySkip(); // Accept /
yySkip(); // Accept *
comment_closed= ! consume_comment(0);
/* regular comments can have zero comments inside. */
if ((comment_closed= ! consume_comment(0)) && hint_comment)
{
if (yylval->lex_str.str[2]=='+')
{
next_state= MY_LEX_START;
yylval->lex_str.length= m_ptr - yylval->lex_str.str;
restore_in_comment_state();
return HINT_COMMENT;
}
}
}
/*
Discard:
......@@ -12382,3 +12393,53 @@ bool SELECT_LEX_UNIT::is_derived_eliminated() const
return true;
return derived->table->map & outer_select()->join->eliminated_tables;
}
/*
Parse optimizer hints and return as Hint_list allocated on thd->mem_root.
The caller should check both return value and thd->is_error()
to know what happened, as follows:
Return value thd->is_error() Meaning
------------ --------------- -------
rc != nullptr false the hints were parsed without errors
rc != nullptr true not possible
rc == nullptr false no hints, empty hints, hint parse error
rc == nullptr true fatal error, such as EOM
*/
Optimizer_hint_parser::Hint_list *
LEX::parse_optimizer_hints(const LEX_CSTRING &hints_str)
{
DBUG_ASSERT(!hints_str.str || hints_str.length >= 5);
if (!hints_str.str)
return nullptr; // There were no a hint comment
// Instantiate the query hint parser.
// Remove the leading '/*+' and trailing '*/'
// when passing hints to the parser.
Optimizer_hint_parser p(thd, thd->charset(),
Lex_cstring(hints_str.str + 3, hints_str.length - 5));
// Parse hints
Optimizer_hint_parser::Hints hints(&p);
DBUG_ASSERT(!p.is_error() || !hints);
if (p.is_fatal_error())
{
/*
Fatal error (e.g. EOM), have the caller fail.
The SQL error should be in DA already.
*/
DBUG_ASSERT(thd->is_error());
return nullptr; // Continue, the caller will test thd->is_error()
}
if (!hints) // Hint parsing failed with a syntax error
{
p.push_warning_syntax_error(thd);
return nullptr; // Continue and ignore hints.
}
// Hints were not empty and were parsed without errors
return new (thd->mem_root) Optimizer_hint_parser::Hint_list(std::move(hints));
}
......@@ -40,6 +40,7 @@
#include "table.h"
#include "sql_class.h" // enum enum_column_usage
#include "select_handler.h"
#include "opt_hints_parser.h"
/* Used for flags of nesting constructs */
#define SELECT_NESTING_MAP_SIZE 64
......@@ -2809,6 +2810,11 @@ class Lex_input_stream
*/
bool multi_statements:1;
/**
TRUE if hint comments should be returned as a token.
*/
bool hint_comment:1;
/** Current line number. */
uint yylineno;
......@@ -4864,6 +4870,9 @@ struct LEX: public Query_tables_list
{
return nullptr;
}
Optimizer_hint_parser::Hint_list *
parse_optimizer_hints(const LEX_CSTRING &hint);
};
......
......@@ -276,6 +276,7 @@ void _CONCAT_UNDERSCORED(turn_parser_debug_on,yyparse)()
TABLE_LIST *table_list;
Table_ident *table;
Qualified_column_ident *qualified_column_ident;
Optimizer_hint_parser::Hint_list *opt_hints;
char *simple_string;
const char *const_simple_string;
chooser_compare_func_creator boolfunc2creator;
......@@ -381,6 +382,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
%token <lex_str> '@'
%token HINT_COMMENT
/*
Special purpose tokens
*/
......@@ -1330,6 +1333,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
opt_constraint constraint opt_ident
sp_block_label sp_control_label opt_place opt_db
udt_name
HINT_COMMENT opt_hint_comment
%type <ident_sys>
IDENT_sys
......@@ -1576,6 +1580,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, size_t *yystacksize);
%type <expr_lex>
expr_lex
%type <opt_hints>
opt_optimizer_hint
%destructor
{
/*
......@@ -8711,8 +8718,23 @@ table_value_constructor:
}
;
opt_hint_comment:
/*empty */ { $$= null_clex_str; }
| HINT_COMMENT { $$= $1; }
;
opt_optimizer_hint:
{ YYLIP->hint_comment= true; }
opt_hint_comment
{
YYLIP->hint_comment= false;
if (!($$= Lex->parse_optimizer_hints($2)) && thd->is_error())
MYSQL_YYABORT;
}
;
query_specification_start:
SELECT_SYM
SELECT_SYM opt_optimizer_hint
{
SELECT_LEX *sel;
LEX *lex= Lex;
......@@ -13296,7 +13318,7 @@ opt_temporary:
*/
insert:
INSERT
INSERT opt_optimizer_hint
{
Lex->sql_command= SQLCOM_INSERT;
Lex->duplicates= DUP_ERROR;
......@@ -13305,7 +13327,7 @@ insert:
}
insert_start insert_lock_option opt_ignore opt_into insert_table
{
Select->set_lock_for_tables($4, true, false);
Select->set_lock_for_tables($5, true, false);
}
insert_field_spec opt_insert_update opt_returning
stmt_end
......@@ -13316,7 +13338,7 @@ insert:
;
replace:
REPLACE
REPLACE opt_optimizer_hint
{
Lex->sql_command = SQLCOM_REPLACE;
Lex->duplicates= DUP_REPLACE;
......@@ -13325,7 +13347,7 @@ replace:
}
insert_start replace_lock_option opt_into insert_table
{
Select->set_lock_for_tables($4, true, false);
Select->set_lock_for_tables($5, true, false);
}
insert_field_spec opt_returning
stmt_end
......@@ -13600,7 +13622,7 @@ update_table_list:
/* Update rows in a table */
update:
UPDATE_SYM
UPDATE_SYM opt_optimizer_hint
{
LEX *lex= Lex;
if (Lex->main_select_push())
......@@ -13635,12 +13657,12 @@ update:
be too pessimistic. We will decrease lock level if possible
later while processing the statement.
*/
slex->set_lock_for_tables($3, slex->table_list.elements == 1, false);
slex->set_lock_for_tables($4, slex->table_list.elements == 1, false);
}
opt_where_clause opt_order_clause delete_limit_clause
{
if ($10)
Select->order_list= *($10);
if ($11)
Select->order_list= *($11);
} stmt_end {}
;
......@@ -13687,7 +13709,7 @@ opt_low_priority:
/* Delete rows from a table */
delete:
DELETE_SYM
DELETE_SYM opt_optimizer_hint
{
LEX *lex= Lex;
YYPS->m_lock_type= TL_WRITE_DEFAULT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment