Commit b4379df5 authored by Varun Gupta's avatar Varun Gupta

MDEV-21265: IN predicate conversion to IN subquery should be allowed for a...

MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison

Allow materialization strategy when collations on the
inner and outer sides of an IN subquery are the same and the
character set of the inner side is a proper subset of the character
set on the outer side.
This allows conversion from utf8mb3 to utf8mb4
as the former is a subset of the later.
This is only allowed when IN predicate is converted to an IN subquery

Backported part of the patch (d6a00d9b) of MDEV-17905.
parent f3b10354
......@@ -2718,3 +2718,37 @@ Warning 1931 Query execution was interrupted. The query examined at least 3020 r
SET join_cache_level= @save_join_cache_level;
DROP TABLE t1,t2,t3,t4;
# End of 10.2 tests
#
# MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison
#
CREATE TABLE t1(a VARCHAR(50) collate utf8_general_ci, b INT);
INSERT INTO t1 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
CREATE TABLE t2(a VARCHAR(50) collate utf8mb4_general_ci, b INT);
INSERT INTO t2 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
set @save_in_predicate_conversion_threshold= @@in_predicate_conversion_threshold;
set in_predicate_conversion_threshold=2;
set names 'utf8mb4';
#
# IN predicate to IN subquery is not allowed as materialization is not allowed
# The character set on the inner side is not equal to or a proper subset of the outer side
#
EXPLAIN
SELECT * FROM t1 WHERE (t1.a,t1.b) IN (('abx',1),('def',2), ('abc', 3));
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 5 Using where
set names 'utf8';
#
# IN predicate to IN subquery is performed as materialization is llowed
# The character set on the inner side is a proper subset of the outer side
#
EXPLAIN
SELECT * FROM t2 WHERE (t2.a,t2.b) IN (('abx',1),('def',2), ('abc', 3));
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 ALL NULL NULL NULL NULL 5
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 16 func,func 1 Using where
2 MATERIALIZED <derived3> ALL NULL NULL NULL NULL 3
3 DERIVED NULL NULL NULL NULL NULL NULL NULL No tables used
set names default;
set @@in_predicate_conversion_threshold= @save_in_predicate_conversion_threshold;
DROP TABLE t1,t2;
# End of 10.3 tests
......@@ -2238,3 +2238,40 @@ SET join_cache_level= @save_join_cache_level;
DROP TABLE t1,t2,t3,t4;
--echo # End of 10.2 tests
--echo #
--echo # MDEV-21265: IN predicate conversion to IN subquery should be allowed for a broader set of datatype comparison
--echo #
CREATE TABLE t1(a VARCHAR(50) collate utf8_general_ci, b INT);
INSERT INTO t1 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
CREATE TABLE t2(a VARCHAR(50) collate utf8mb4_general_ci, b INT);
INSERT INTO t2 VALUES ('abc',1), ('def', 2), ('ghi', 3), ('jkl', 4), ('mno', 5);
set @save_in_predicate_conversion_threshold= @@in_predicate_conversion_threshold;
set in_predicate_conversion_threshold=2;
set names 'utf8mb4';
--echo #
--echo # IN predicate to IN subquery is not allowed as materialization is not allowed
--echo # The character set on the inner side is not equal to or a proper subset of the outer side
--echo #
EXPLAIN
SELECT * FROM t1 WHERE (t1.a,t1.b) IN (('abx',1),('def',2), ('abc', 3));
set names 'utf8';
--echo #
--echo # IN predicate to IN subquery is performed as materialization is llowed
--echo # The character set on the inner side is a proper subset of the outer side
--echo #
EXPLAIN
SELECT * FROM t2 WHERE (t2.a,t2.b) IN (('abx',1),('def',2), ('abc', 3));
set names default;
set @@in_predicate_conversion_threshold= @save_in_predicate_conversion_threshold;
DROP TABLE t1,t2;
--echo # End of 10.3 tests
......@@ -1454,7 +1454,8 @@ Item_in_subselect::Item_in_subselect(THD *thd, Item * left_exp,
pushed_cond_guards(NULL), do_not_convert_to_sj(FALSE), is_jtbm_merged(FALSE),
is_jtbm_const_tab(FALSE), is_flattenable_semijoin(FALSE),
is_registered_semijoin(FALSE),
upper_item(0)
upper_item(0),
converted_from_in_predicate(FALSE)
{
DBUG_ENTER("Item_in_subselect::Item_in_subselect");
DBUG_PRINT("info", ("in_strategy: %u", (uint)in_strategy));
......
......@@ -601,12 +601,18 @@ class Item_in_subselect :public Item_exists_subselect
Item_func_not_all *upper_item; // point on NOT/NOP before ALL/SOME subquery
/*
SET to TRUE if IN subquery is converted from an IN predicate
*/
bool converted_from_in_predicate;
Item_in_subselect(THD *thd_arg, Item * left_expr, st_select_lex *select_lex);
Item_in_subselect(THD *thd_arg):
Item_exists_subselect(thd_arg), left_expr_cache(0), first_execution(TRUE),
in_strategy(SUBS_NOT_TRANSFORMED),
pushed_cond_guards(NULL), func(NULL), do_not_convert_to_sj(FALSE),
is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0) {}
is_jtbm_merged(FALSE), is_jtbm_const_tab(FALSE), upper_item(0),
converted_from_in_predicate(FALSE) {}
void cleanup();
subs_type substype() { return IN_SUBS; }
void reset()
......
......@@ -836,6 +836,7 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
bool all_are_fields= TRUE;
uint32 total_key_length = 0;
bool converted_from_in_predicate= in_subs->converted_from_in_predicate;
for (uint i= 0; i < elements; i++)
{
Item *outer= in_subs->left_expr->element_index(i);
......@@ -843,8 +844,12 @@ bool subquery_types_allow_materialization(Item_in_subselect *in_subs)
all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM &&
inner->real_item()->type() == Item::FIELD_ITEM);
total_key_length += inner->max_length;
if (!inner->type_handler()->subquery_type_allows_materialization(inner,
outer))
if (!inner->
type_handler()->
subquery_type_allows_materialization(inner,
outer,
converted_from_in_predicate))
DBUG_RETURN(FALSE);
}
......
......@@ -131,6 +131,24 @@ uint convert_to_printable(char *to, size_t to_len,
const char *from, size_t from_len,
CHARSET_INFO *from_cs, size_t nbytes= 0);
class Charset
{
CHARSET_INFO *m_charset;
public:
Charset() :m_charset(&my_charset_bin) { }
Charset(CHARSET_INFO *cs) :m_charset(cs) { }
CHARSET_INFO *charset() const { return m_charset; }
/*
Collation name without the character set name.
For example, in case of "latin1_swedish_ci",
this method returns "_swedish_ci".
*/
LEX_CSTRING collation_specific_name() const;
bool encoding_allows_reinterpret_as(CHARSET_INFO *cs) const;
bool eq_collation_specific_names(CHARSET_INFO *cs) const;
};
class String : public Sql_alloc
{
char *Ptr;
......
......@@ -829,7 +829,8 @@ static bool cmp_row_types(Item* item1, Item* item2)
Item *inner= item1->element_index(i);
Item *outer= item2->element_index(i);
if (!inner->type_handler()->subquery_type_allows_materialization(inner,
outer))
outer,
true))
return true;
}
return false;
......@@ -895,7 +896,7 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd,
for (uint i=1; i < arg_count; i++)
{
if (!args[i]->const_item() || cmp_row_types(args[0], args[i]))
if (!args[i]->const_item() || cmp_row_types(args[i], args[0]))
return this;
}
......@@ -975,6 +976,7 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd,
if (!(in_subs=
new (thd->mem_root) Item_in_subselect(thd, args[0], sq_select)))
goto err;
in_subs->converted_from_in_predicate= TRUE;
sq= in_subs;
if (negated)
sq= negate_expression(thd, in_subs);
......
......@@ -19,6 +19,7 @@
#include "sql_const.h"
#include "sql_class.h"
#include "sql_time.h"
#include "sql_string.h"
#include "item.h"
#include "log.h"
......@@ -5112,7 +5113,8 @@ uint Type_handler_timestamp_common::Item_decimal_precision(const Item *item) con
bool Type_handler_real_result::
subquery_type_allows_materialization(const Item *inner,
const Item *outer) const
const Item *outer,
bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == REAL_RESULT);
return outer->cmp_type() == REAL_RESULT;
......@@ -5121,7 +5123,8 @@ bool Type_handler_real_result::
bool Type_handler_int_result::
subquery_type_allows_materialization(const Item *inner,
const Item *outer) const
const Item *outer,
bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == INT_RESULT);
return outer->cmp_type() == INT_RESULT;
......@@ -5130,7 +5133,8 @@ bool Type_handler_int_result::
bool Type_handler_decimal_result::
subquery_type_allows_materialization(const Item *inner,
const Item *outer) const
const Item *outer,
bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == DECIMAL_RESULT);
return outer->cmp_type() == DECIMAL_RESULT;
......@@ -5139,23 +5143,37 @@ bool Type_handler_decimal_result::
bool Type_handler_string_result::
subquery_type_allows_materialization(const Item *inner,
const Item *outer) const
const Item *outer,
bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == STRING_RESULT);
return outer->cmp_type() == STRING_RESULT &&
outer->collation.collation == inner->collation.collation &&
/*
Materialization also is unable to work when create_tmp_table() will
create a blob column because item->max_length is too big.
The following test is copied from varstring_type_handler().
*/
!inner->too_big_for_varchar();
if (outer->cmp_type() == STRING_RESULT &&
/*
Materialization also is unable to work when create_tmp_table() will
create a blob column because item->max_length is too big.
The following test is copied from varstring_type_handler().
*/
!inner->too_big_for_varchar())
{
if (outer->collation.collation == inner->collation.collation)
return true;
if (is_in_predicate)
{
Charset inner_col(inner->collation.collation);
if (inner_col.encoding_allows_reinterpret_as(outer->
collation.collation) &&
inner_col.eq_collation_specific_names(outer->collation.collation))
return true;
}
}
return false;
}
bool Type_handler_temporal_result::
subquery_type_allows_materialization(const Item *inner,
const Item *outer) const
const Item *outer,
bool is_in_predicate) const
{
DBUG_ASSERT(inner->cmp_type() == TIME_RESULT);
return mysql_timestamp_type() ==
......@@ -5973,3 +5991,49 @@ bool Type_handler_general_purpose_string::
}
/***************************************************************************/
LEX_CSTRING Charset::collation_specific_name() const
{
/*
User defined collations can provide arbitrary names
for character sets and collations, so a collation
name not necessarily starts with the character set name.
*/
size_t csname_length= strlen(m_charset->csname);
if (strncmp(m_charset->name, m_charset->csname, csname_length))
return {NULL, 0};
const char *ptr= m_charset->name + csname_length;
return {ptr, strlen(ptr) };
}
bool
Charset::encoding_allows_reinterpret_as(const CHARSET_INFO *cs) const
{
if (!strcmp(m_charset->csname, cs->csname))
return true;
if (!strcmp(m_charset->csname, MY_UTF8MB3) &&
!strcmp(cs->csname, MY_UTF8MB4))
return true;
/*
Originally we allowed here instat ALTER for ASCII-to-LATIN1
and UCS2-to-UTF16, but this was wrong:
- MariaDB's ascii is not a subset for 8-bit character sets
like latin1, because it allows storing bytes 0x80..0xFF as
"unassigned" characters (see MDEV-19285).
- MariaDB's ucs2 (as in Unicode-1.1) is not a subset for UTF16,
because they treat surrogate codes differently (MDEV-19284).
*/
return false;
}
bool
Charset::eq_collation_specific_names(CHARSET_INFO *cs) const
{
LEX_CSTRING name0= collation_specific_name();
LEX_CSTRING name1= Charset(cs).collation_specific_name();
return name0.length && !cmp(&name0, &name1);
}
......@@ -1291,9 +1291,21 @@ class Type_handler
Item *target_expr, Item *target_value,
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const= 0;
/*
@brief
Check if an IN subquery allows materialization or not
@param
inner expression on the inner side of the IN subquery
outer expression on the outer side of the IN subquery
is_in_predicate SET to true if IN subquery was converted from an
IN predicate or we are checking if materialization
strategy can be used for an IN predicate
*/
virtual bool
subquery_type_allows_materialization(const Item *inner,
const Item *outer) const= 0;
const Item *outer,
bool is_in_predicate) const= 0;
/**
Make a simple constant replacement item for a constant "src",
so the new item can futher be used for comparison with "cmp", e.g.:
......@@ -1470,7 +1482,8 @@ class Type_handler_row: public Type_handler
}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
const Item *outer) const
const Item *outer,
bool is_in_predicate) const
{
DBUG_ASSERT(0);
return false;
......@@ -1788,7 +1801,8 @@ class Type_handler_real_result: public Type_handler_numeric
virtual ~Type_handler_real_result() {}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
const Item *outer) const;
const Item *outer,
bool is_in_predicate) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
void sortlength(THD *thd,
......@@ -1857,7 +1871,8 @@ class Type_handler_decimal_result: public Type_handler_numeric
virtual ~Type_handler_decimal_result() {};
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
const Item *outer) const;
const Item *outer,
bool is_in_predicate) const;
Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
......@@ -2060,7 +2075,8 @@ class Type_handler_int_result: public Type_handler_numeric
virtual ~Type_handler_int_result() {}
const Type_handler *type_handler_for_comparison() const;
bool subquery_type_allows_materialization(const Item *inner,
const Item *outer) const;
const Item *outer,
bool is_in_predicate) const;
Field *make_num_distinct_aggregator_field(MEM_ROOT *, const Item *) const;
void make_sort_key(uchar *to, Item *item, const SORT_FIELD_ATTR *sort_field,
Sort_param *param) const;
......@@ -2156,7 +2172,8 @@ class Type_handler_temporal_result: public Type_handler
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const;
bool subquery_type_allows_materialization(const Item *inner,
const Item *outer) const;
const Item *outer,
bool is_in_predicate) const;
bool Item_func_min_max_fix_attributes(THD *thd, Item_func_min_max *func,
Item **items, uint nitems) const;
bool Item_sum_hybrid_fix_length_and_dec(Item_sum_hybrid *func) const;
......@@ -2266,7 +2283,8 @@ class Type_handler_string_result: public Type_handler
Item_bool_func2 *source,
Item *source_expr, Item *source_const) const;
bool subquery_type_allows_materialization(const Item *inner,
const Item *outer) const;
const Item *outer,
bool is_in_predicate) const;
Item *make_const_item_for_comparison(THD *, Item *src, const Item *cmp) const;
Item_cache *Item_get_cache(THD *thd, const Item *item) const;
bool set_comparator_func(Arg_comparator *cmp) const;
......@@ -3236,7 +3254,8 @@ class Type_handler_blob_common: public Type_handler_longstr
return blob_type_handler(item);
}
bool subquery_type_allows_materialization(const Item *inner,
const Item *outer) const
const Item *outer,
bool is_in_predicate) const
{
return false; // Materialization does not work with BLOB columns
}
......@@ -3341,7 +3360,8 @@ class Type_handler_geometry: public Type_handler_string_result
return true;
}
bool subquery_type_allows_materialization(const Item *inner,
const Item *outer) const
const Item *outer,
bool is_in_predicate) const
{
return false; // Materialization does not work with GEOMETRY columns
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment