Commit 702b6073 authored by Oleg Smirnov's avatar Oleg Smirnov

MDEV-34665 Simplify IN predicate processing for NULL-aware materialization...

MDEV-34665 Simplify IN predicate processing for NULL-aware materialization involving only one column

It was found that unnecessary work of building Ordered_key structures
is being done when processing NULL-aware materialization for IN predicates
having only one column. In fact, the logic for that simplified case can be
expressed as follows.
Say we have predicate left_expr IN (SELECT <subq1>), where left_expr is
scalar(not a tuple).
Then
    if (left_expr is NULL) {
      if (subq1 produced any rows) {
        // note that we don't care if subq1 has produced
        // NULLs or not.
        NULL IN (<some values>) -> UNKNOWN, i.e. NULL.
      } else {
        NULL IN ({empty-set}) -> FALSE.
      }
    } else {
      // left_expr is a non-NULL value
      if (subq1 output has a match for left_expr) {
        left_expr IN (..., left_expr ...) -> TRUE
      } else {
        // no "known" matches.
        if (subq1 output has a NULL) {
          left_expr IN ( ... NULL ...) ->
           (NULL could have been a match or not)
           -> NULL.
        } else {
          // subq1 didn't produce any "UNKNOWNs" so
          // we're positive there weren't any matches
          -> FALSE.
        }
      }
    }

This commit introduces subselect_single_column_partial_engine class
implementing the logic described.
parent 48b256a7
......@@ -3013,3 +3013,154 @@ UNION ALL
1
1
drop table t1, t2;
#
# MDEV-34665: Simplify IN predicate processing for NULL-aware
# materialization involving only one column
set @save_optimizer_switch=@@optimizer_switch;
set @@optimizer_switch = "materialization=on,in_to_exists=off,semijoin=off";
create table t1 (a int);
create table t2 (b int);
insert into t1 values (null), (1), (2), (3);
# Query against empty t2
select * from t1 where a not in (select b from t2);
a
1
2
3
NULL
select a from t1 join t2 on a not in (select b from t2);
a
select a from t1 left join t2 on a not in (select b from t2);
a
1
2
3
NULL
# Insert some not-NULL values
insert into t2 values (3), (4);
select * from t1 where a not in (select b from t2);
a
1
2
select a from t1 join t2 on a not in (select b from t2);
a
1
1
2
2
select a from t1 left join t2 on a not in (select b from t2);
a
1
1
2
2
3
NULL
# Ensure the correct strategy is tested
analyze format=json select * from t1 where a not in (select b from t2);
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": "REPLACED",
"table": {
"table_name": "t1",
"access_type": "ALL",
"r_loops": 1,
"rows": 4,
"r_rows": 4,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 50,
"attached_condition": "!<in_optimizer>(t1.a,t1.a in (subquery#2))"
},
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;constant for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"r_partial_matches": 1,
"query_block": {
"select_id": 2,
"r_loops": 1,
"r_total_time_ms": "REPLACED",
"table": {
"table_name": "t2",
"access_type": "ALL",
"r_loops": 1,
"rows": 2,
"r_rows": 2,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 100
}
}
}
}
]
}
}
# Insert NULL value
insert into t2 values (null);
select * from t1 where a not in (select b from t2);
a
select a from t1 join t2 on a not in (select b from t2);
a
select a from t1 left join t2 on a not in (select b from t2);
a
1
2
3
NULL
analyze format=json select * from t1 where a not in (select b from t2);
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": "REPLACED",
"table": {
"table_name": "t1",
"access_type": "ALL",
"r_loops": 1,
"rows": 4,
"r_rows": 4,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 0,
"attached_condition": "!<in_optimizer>(t1.a,t1.a in (subquery#2))"
},
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;constant for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"query_block": {
"select_id": 2,
"r_loops": 1,
"r_total_time_ms": "REPLACED",
"table": {
"table_name": "t2",
"access_type": "ALL",
"r_loops": 1,
"rows": 3,
"r_rows": 3,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 100
}
}
}
}
]
}
}
drop table t1, t2;
set @@optimizer_switch=@save_optimizer_switch;
......@@ -296,3 +296,50 @@ UNION ALL
;
drop table t1, t2;
--echo #
--echo # MDEV-34665: Simplify IN predicate processing for NULL-aware
--echo # materialization involving only one column
set @save_optimizer_switch=@@optimizer_switch;
set @@optimizer_switch = "materialization=on,in_to_exists=off,semijoin=off";
create table t1 (a int);
create table t2 (b int);
insert into t1 values (null), (1), (2), (3);
--echo # Query against empty t2
--sorted_result
select * from t1 where a not in (select b from t2);
--sorted_result
select a from t1 join t2 on a not in (select b from t2);
--sorted_result
select a from t1 left join t2 on a not in (select b from t2);
--echo # Insert some not-NULL values
insert into t2 values (3), (4);
--sorted_result
select * from t1 where a not in (select b from t2);
--sorted_result
select a from t1 join t2 on a not in (select b from t2);
--sorted_result
select a from t1 left join t2 on a not in (select b from t2);
--echo # Ensure the correct strategy is tested
--source include/analyze-format.inc
analyze format=json select * from t1 where a not in (select b from t2);
--echo # Insert NULL value
insert into t2 values (null);
--sorted_result
select * from t1 where a not in (select b from t2);
--sorted_result
select a from t1 join t2 on a not in (select b from t2);
--sorted_result
select a from t1 left join t2 on a not in (select b from t2);
--source include/analyze-format.inc
analyze format=json select * from t1 where a not in (select b from t2);
drop table t1, t2;
set @@optimizer_switch=@save_optimizer_switch;
......@@ -80,8 +80,6 @@ ANALYZE
}
}
# "Partial match" is used due to NOT IN
# Force rowid-merge partial partial matching
set @@optimizer_switch="partial_match_rowid_merge=on,partial_match_table_scan=off";
analyze format=json select * from t1 where a not in (select b from t2);
ANALYZE
{
......@@ -104,58 +102,7 @@ ANALYZE
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;array merge for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"r_partial_matches": 1,
"r_partial_match_buffer_size": "REPLACED",
"r_partial_match_array_sizes": ["2"],
"query_block": {
"select_id": 2,
"r_loops": 1,
"r_total_time_ms": "REPLACED",
"table": {
"table_name": "t2",
"access_type": "ALL",
"r_loops": 1,
"rows": 2,
"r_rows": 2,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 100
}
}
}
}
]
}
}
# Force table scan partial matching
set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=on";
analyze format=json select * from t1 where a not in (select b from t2);
ANALYZE
{
"query_block": {
"select_id": 1,
"r_loops": 1,
"r_total_time_ms": "REPLACED",
"table": {
"table_name": "t1",
"access_type": "ALL",
"r_loops": 1,
"rows": 4,
"r_rows": 4,
"r_table_time_ms": "REPLACED",
"r_other_time_ms": "REPLACED",
"filtered": 100,
"r_filtered": 50,
"attached_condition": "!<in_optimizer>(t1.a,t1.a in (subquery#2))"
},
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;full scan for partial match",
"r_strategy": "index_lookup;constant for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"r_partial_matches": 1,
......@@ -211,7 +158,7 @@ ANALYZE
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;full scan for partial match",
"r_strategy": "index_lookup;constant for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"r_partial_matches": 1,
......@@ -238,7 +185,6 @@ ANALYZE
}
}
}
set @@optimizer_switch="partial_match_rowid_merge=on,partial_match_table_scan=off";
analyze format=json select a from t1 group by a not in (select b from t2);
ANALYZE
{
......@@ -269,12 +215,10 @@ ANALYZE
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;array merge for partial match",
"r_strategy": "index_lookup;constant for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"r_partial_matches": 1,
"r_partial_match_buffer_size": "REPLACED",
"r_partial_match_array_sizes": ["2"],
"query_block": {
"select_id": 2,
"r_loops": 1,
......@@ -298,7 +242,6 @@ ANALYZE
}
}
}
set @@optimizer_switch="partial_match_rowid_merge=on,partial_match_table_scan=on";
# Subselect in ORDER BY
analyze format=json select a from t1 order by a in (select b from t2);
ANALYZE
......@@ -333,7 +276,7 @@ ANALYZE
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;full scan for partial match",
"r_strategy": "index_lookup;constant for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"r_partial_matches": 1,
......@@ -381,7 +324,7 @@ ANALYZE
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;full scan for partial match",
"r_strategy": "index_lookup;constant for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"r_partial_matches": 1,
......@@ -521,7 +464,7 @@ ANALYZE
"subqueries": [
{
"materialization": {
"r_strategy": "index_lookup;full scan for partial match",
"r_strategy": "index_lookup;constant for partial match",
"r_loops": 4,
"r_index_lookups": 3,
"query_block": {
......@@ -614,6 +557,7 @@ create table t1 (a1 char(1), a2 char(1));
insert into t1 values (null, 'b');
create table t2 (b1 char(1), b2 char(2));
insert into t2 values ('a','b'), ('c', 'd'), (null, 'e'), ('f', 'g');
# Force rowid-merge partial matching
set @@optimizer_switch="partial_match_rowid_merge=on,partial_match_table_scan=off";
explain format=json select * from t1 where (a1, a2) not in (select b1, b2 from t2);
EXPLAIN
......@@ -690,6 +634,7 @@ ANALYZE
]
}
}
# Force table scan partial matching
set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=on";
analyze format=json select * from t1 where (a1, a2) not in (select b1, b2 from t2);
ANALYZE
......
......@@ -13,13 +13,6 @@ explain format=json select * from t1 where a in (select b from t2);
analyze format=json select * from t1 where a in (select b from t2);
--echo # "Partial match" is used due to NOT IN
--echo # Force rowid-merge partial partial matching
set @@optimizer_switch="partial_match_rowid_merge=on,partial_match_table_scan=off";
--source include/analyze-format.inc
analyze format=json select * from t1 where a not in (select b from t2);
--echo # Force table scan partial matching
set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=on";
--source include/analyze-format.inc
analyze format=json select * from t1 where a not in (select b from t2);
......@@ -27,11 +20,9 @@ analyze format=json select * from t1 where a not in (select b from t2);
--source include/analyze-format.inc
analyze format=json select a from t1 group by a in (select b from t2);
set @@optimizer_switch="partial_match_rowid_merge=on,partial_match_table_scan=off";
--source include/analyze-format.inc
analyze format=json select a from t1 group by a not in (select b from t2);
set @@optimizer_switch="partial_match_rowid_merge=on,partial_match_table_scan=on";
--echo # Subselect in ORDER BY
--source include/analyze-format.inc
analyze format=json select a from t1 order by a in (select b from t2);
......@@ -68,11 +59,13 @@ insert into t1 values (null, 'b');
create table t2 (b1 char(1), b2 char(2));
insert into t2 values ('a','b'), ('c', 'd'), (null, 'e'), ('f', 'g');
--echo # Force rowid-merge partial matching
set @@optimizer_switch="partial_match_rowid_merge=on,partial_match_table_scan=off";
explain format=json select * from t1 where (a1, a2) not in (select b1, b2 from t2);
--source include/analyze-format.inc
analyze format=json select * from t1 where (a1, a2) not in (select b1, b2 from t2);
--echo # Force table scan partial matching
set @@optimizer_switch="partial_match_rowid_merge=off,partial_match_table_scan=on";
--source include/analyze-format.inc
analyze format=json select * from t1 where (a1, a2) not in (select b1, b2 from t2);
......
......@@ -4973,12 +4973,18 @@ subselect_hash_sj_engine::get_strategy_using_data()
void
subselect_hash_sj_engine::choose_partial_match_strategy(
bool has_non_null_key, bool has_covering_null_row,
uint field_count, bool has_non_null_key, bool has_covering_null_row,
MY_BITMAP *partial_match_key_parts_arg)
{
ulonglong pm_buff_size;
DBUG_ASSERT(strategy == PARTIAL_MATCH);
if (field_count == 1)
{
strategy= PARTIAL_MATCH_SINGLE_COLUMN;
return;
}
/*
Choose according to global optimizer switch. If only one of the switches is
'ON', then the remaining strategy is the only possible one. The only cases
......@@ -5414,7 +5420,8 @@ void subselect_hash_sj_engine::cleanup()
related engines are created and chosen for each execution.
*/
item->get_IN_subquery()->engine= materialize_engine;
if (lookup_engine_type == TABLE_SCAN_ENGINE ||
if (lookup_engine_type == SINGLE_COLUMN_ENGINE ||
lookup_engine_type == TABLE_SCAN_ENGINE ||
lookup_engine_type == ROWID_MERGE_ENGINE)
{
subselect_engine *inner_lookup_engine;
......@@ -5755,11 +5762,26 @@ int subselect_hash_sj_engine::exec()
count_pm_keys= count_partial_match_columns - count_null_only_columns +
(nn_key_parts ? 1 : 0);
choose_partial_match_strategy(MY_TEST(nn_key_parts),
choose_partial_match_strategy(field_count, MY_TEST(nn_key_parts),
has_covering_null_row,
&partial_match_key_parts);
DBUG_ASSERT(strategy == PARTIAL_MATCH_MERGE ||
DBUG_ASSERT(strategy == PARTIAL_MATCH_SINGLE_COLUMN ||
strategy == PARTIAL_MATCH_MERGE ||
strategy == PARTIAL_MATCH_SCAN);
if (strategy == PARTIAL_MATCH_SINGLE_COLUMN)
{
if (!(pm_engine= new subselect_single_column_partial_engine(
(subselect_uniquesubquery_engine*) lookup_engine, tmp_table,
item, result, semi_join_conds->argument_list(),
has_covering_null_row, has_covering_null_columns,
count_columns_with_nulls)) ||
pm_engine->prepare(thd))
{
/* This is an irrecoverable error. */
res= 1;
goto err;
}
}
if (strategy == PARTIAL_MATCH_MERGE)
{
pm_engine=
......@@ -5786,7 +5808,6 @@ int subselect_hash_sj_engine::exec()
strategy= PARTIAL_MATCH_SCAN;
}
}
if (strategy == PARTIAL_MATCH_SCAN)
{
if (!(pm_engine=
......@@ -6978,6 +6999,44 @@ void subselect_table_scan_engine::cleanup()
}
subselect_single_column_partial_engine::subselect_single_column_partial_engine(
subselect_uniquesubquery_engine *engine_arg,
TABLE *tmp_table_arg,
Item_subselect *item_arg,
select_result_interceptor *result_arg,
List<Item> *equi_join_conds_arg,
bool has_covering_null_row_arg,
bool has_covering_null_columns_arg,
uint count_columns_with_nulls_arg)
:subselect_partial_match_engine(engine_arg, tmp_table_arg, item_arg,
result_arg, equi_join_conds_arg,
has_covering_null_row_arg,
has_covering_null_columns_arg,
count_columns_with_nulls_arg)
{}
bool subselect_single_column_partial_engine::partial_match()
{
/*
We get here if:
- there is only one column in the materialized table;
- its current value of left_expr is NULL (otherwise we would have hit
the earlier "index lookup" branch at subselect_partial_match::exec());
- the materialized table does not have NULL values (for a similar reason);
- the materialized table is not empty.
The case when materialization produced no rows (empty table) is handled at
subselect_hash_sj_engine::exec(), the result of IN predicate is always
FALSE in that case.
After all those preconditions met, the result of the partial match is TRUE.
*/
DBUG_ASSERT(item->get_IN_subquery()->left_expr_has_null() &&
!has_covering_null_row &&
tmp_table->file->stats.records > 0);
return true;
}
void Item_subselect::register_as_with_rec_ref(With_element *with_elem)
{
with_elem->sq_with_rec_ref.link_in_list(this, &this->next_with_rec_ref);
......
......@@ -835,7 +835,8 @@ class subselect_engine: public Sql_alloc,
enum enum_engine_type {ABSTRACT_ENGINE, SINGLE_SELECT_ENGINE,
UNION_ENGINE, UNIQUESUBQUERY_ENGINE,
INDEXSUBQUERY_ENGINE, HASH_SJ_ENGINE,
ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE};
ROWID_MERGE_ENGINE, TABLE_SCAN_ENGINE,
SINGLE_COLUMN_ENGINE};
subselect_engine(Item_subselect *si,
select_result_interceptor *res):
......@@ -1175,6 +1176,8 @@ class subselect_hash_sj_engine : public subselect_engine
PARTIAL_MATCH, /* Use some partial matching strategy. */
PARTIAL_MATCH_MERGE, /* Use partial matching through index merging. */
PARTIAL_MATCH_SCAN, /* Use partial matching through table scan. */
PARTIAL_MATCH_SINGLE_COLUMN, /* Use simplifeid partial matching when
there is only one field involved. */
IMPOSSIBLE /* Subquery materialization is not applicable. */
};
......@@ -1195,7 +1198,8 @@ class subselect_hash_sj_engine : public subselect_engine
ulonglong rowid_merge_buff_size(bool has_non_null_key,
bool has_covering_null_row,
MY_BITMAP *partial_match_key_parts);
void choose_partial_match_strategy(bool has_non_null_key,
void choose_partial_match_strategy(uint field_count,
bool has_non_null_key,
bool has_covering_null_row,
MY_BITMAP *partial_match_key_parts);
bool make_semi_join_conds();
......@@ -1559,6 +1563,25 @@ class subselect_table_scan_engine: public subselect_partial_match_engine
enum_engine_type engine_type() override { return TABLE_SCAN_ENGINE; }
};
class subselect_single_column_partial_engine:
public subselect_partial_match_engine
{
protected:
bool partial_match() override;
public:
subselect_single_column_partial_engine(
subselect_uniquesubquery_engine *engine_arg,
TABLE *tmp_table_arg, Item_subselect *item_arg,
select_result_interceptor *result_arg,
List<Item> *equi_join_conds_arg,
bool has_covering_null_row_arg,
bool has_covering_null_columns_arg,
uint count_columns_with_nulls_arg);
void cleanup() override {}
enum_engine_type engine_type() override { return SINGLE_COLUMN_ENGINE; }
};
/**
@brief Subquery materialization tracker
......@@ -1641,6 +1664,8 @@ class Subq_materialization_tracker
return "index_lookup;array merge for partial match";
case Strategy::PARTIAL_MATCH_SCAN:
return "index_lookup;full scan for partial match";
case Strategy::PARTIAL_MATCH_SINGLE_COLUMN:
return "index_lookup;constant for partial match";
default:
return "unsupported";
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment