Commit 69af0632 authored by Yuchen Pei's avatar Yuchen Pei

MDEV-22534 allowing non-semijoin materialization

parent d3915c51
......@@ -3429,11 +3429,11 @@ WHERE t1.c1 NOT IN (SELECT t2.c1 FROM t2, t1 AS a1
WHERE t2.i1 = t1.pk AND t2.i1 BETWEEN 3 AND 5);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 60 100.00 Using where
2 DEPENDENT SUBQUERY t2 ref|filter c1,i1 c1|i1 3|5 func 38 (25%) 25.00 Using where; Full scan on NULL key; Using rowid filter
2 DEPENDENT SUBQUERY a1 ALL NULL NULL NULL NULL 60 100.00 Using join buffer (flat, BNL join)
2 MATERIALIZED t2 ALL c1,i1 NULL NULL NULL 192 25.00 Using where
2 MATERIALIZED a1 ALL NULL NULL NULL NULL 60 100.00 Using join buffer (flat, BNL join)
Warnings:
Note 1276 Field or reference 'test.t1.pk' of SELECT #2 was resolved in SELECT #1
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`c1` AS `c1` from `test`.`t1` where !<expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`pk`>(<in_optimizer>((`test`.`t1`.`c1`,`test`.`t1`.`pk`),<exists>(/* select#2 */ select `test`.`t2`.`c1`,`test`.`t2`.`i1` from `test`.`t2` join `test`.`t1` `a1` where `test`.`t2`.`i1` between 3 and 5 and trigcond(<cache>(`test`.`t1`.`c1`) = `test`.`t2`.`c1`) and (<cache>(`test`.`t1`.`pk`) = `test`.`t2`.`i1` or `test`.`t2`.`i1` is null) having `test`.`t2`.`i1` is null)))
Note 1003 /* select#1 */ select `test`.`t1`.`pk` AS `pk`,`test`.`t1`.`c1` AS `c1` from `test`.`t1` where !<expr_cache><`test`.`t1`.`c1`,`test`.`t1`.`pk`>(<in_optimizer>((`test`.`t1`.`c1`,`test`.`t1`.`pk`),(`test`.`t1`.`c1`,`test`.`t1`.`pk`) in ( <materialize> (/* select#2 */ select `test`.`t2`.`c1`,`test`.`t2`.`i1` from `test`.`t2` join `test`.`t1` `a1` where `test`.`t2`.`i1` is not null and `test`.`t2`.`i1` between 3 and 5 ), <primary_index_lookup>(`test`.`t1`.`c1` in <temporary table> on distinct_key where `test`.`t1`.`c1` = `<subquery2>`.`c1` and `test`.`t1`.`pk` = `<subquery2>`.`i1`))))
SELECT * FROM t1
WHERE t1.c1 NOT IN (SELECT t2.c1 FROM t2, t1 AS a1
WHERE t2.i1 = t1.pk AND t2.i1 BETWEEN 3 AND 5);
......
......@@ -201,7 +201,7 @@ a1 b1
0 2 - 00
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
["decorrelation"]
["decorrelation", "materialization"]
drop table t1, t2;
# check for aggregate function in the select items in the subquery; test adapted from main.delete_use_source_engines
create table t1 (c1 integer, c2 integer, c3 integer);
......@@ -304,7 +304,7 @@ a1 b1
3 2
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
["decorrelation"]
["decorrelation", "materialization"]
# no transformation applied when no synonyms applied to any / all
select * from t1 where a1 >= all (select a2 from t2 where b1 = b2);
a1 b1
......@@ -333,7 +333,7 @@ a1 b1
1 1
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
["decorrelation"]
["decorrelation", "materialization"]
# returns empty
select * from t1 where not (a1, b1) in (select a2, b2 from t2);
a1 b1
......@@ -422,7 +422,7 @@ pk i
3 40
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
["decorrelation"]
["decorrelation", "materialization"]
drop table t1, t2a;
# misc cases
CREATE TABLE t2 (pk INT PRIMARY KEY, b INT);
......@@ -550,3 +550,22 @@ json_detailed(json_extract(trace, '$**.join_optimization.steps[*].transformation
}
]
drop table t1, t2;
# non-semijoin materialization
create table ten(a int);
insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table one_h(a int);
insert into one_h select A.a + B.a* 10 from ten A, ten B;
create table ten_k(a int primary key);
insert into ten_k select A.a + B.a* 100 from one_h A, one_h B;
create table t1 (a int, b int, c int );
insert into t1 select a, a, a from one_h;
create table t2 (a int, b int, c int );
insert into t2 select a,a,a from ten_k;
explain select * from t1 where 1 in (select 1 from t2 where t2.a=t1.a and t2.b=t1.b) or t1.c<333;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 10000
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
["decorrelation", "materialization"]
drop table ten, one_h, ten_k, t1, t2;
......@@ -408,3 +408,25 @@ eval $query;
select json_detailed(json_extract(trace, '$**.join_optimization.steps[*].transformation')) from information_schema.OPTIMIZER_TRACE;
drop table t1, t2;
--echo # non-semijoin materialization
create table ten(a int);
insert into ten values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table one_h(a int);
insert into one_h select A.a + B.a* 10 from ten A, ten B;
create table ten_k(a int primary key);
insert into ten_k select A.a + B.a* 100 from one_h A, one_h B;
create table t1 (a int, b int, c int );
insert into t1 select a, a, a from one_h;
create table t2 (a int, b int, c int );
insert into t2 select a,a,a from ten_k;
explain select * from t1 where 1 in (select 1 from t2 where t2.a=t1.a and t2.b=t1.b) or t1.c<333;
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
drop table ten, one_h, ten_k, t1, t2;
......@@ -387,10 +387,10 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
6 MATERIALIZED t2i index it2i2 it2i3 18 NULL 5 100.00 Using where; Using index
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 5 100.00 Using where
4 MATERIALIZED t3b ALL NULL NULL NULL NULL 4 100.00 Using where
3 DEPENDENT SUBQUERY t3a ALL NULL NULL NULL NULL 4 100.00 Using where
3 MATERIALIZED t3a ALL NULL NULL NULL NULL 4 100.00
Warnings:
Note 1276 Field or reference 'test.t1.a1' of SELECT #3 was resolved in SELECT #1
Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from `test`.`t1` where <expr_cache><`test`.`t1`.`a1`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t1`.`a1`,`test`.`t1`.`a2`),<exists>(/* select#2 */ select `test`.`t2`.`b1`,`test`.`t2`.`b2` from `test`.`t2` where (<expr_cache><`test`.`t2`.`b2`,`test`.`t1`.`a1`>(<in_optimizer>((`test`.`t2`.`b2`,`test`.`t1`.`a1`),<exists>(/* select#3 */ select `test`.`t3a`.`c2`,`test`.`t3a`.`c1` from `test`.`t3` `t3a` where <cache>(`test`.`t2`.`b2`) = `test`.`t3a`.`c2` and <cache>(`test`.`t1`.`a1`) = `test`.`t3a`.`c1`))) or <expr_cache><`test`.`t2`.`b2`>(<in_optimizer>(`test`.`t2`.`b2`,`test`.`t2`.`b2` in ( <materialize> (/* select#4 */ select `test`.`t3b`.`c2` from `test`.`t3` `t3b` where `test`.`t3b`.`c2` like '%03' ), <primary_index_lookup>(`test`.`t2`.`b2` in <temporary table> on distinct_key where `test`.`t2`.`b2` = `<subquery4>`.`c2`))))) and <cache>(`test`.`t1`.`a1`) = `test`.`t2`.`b1` and <cache>(`test`.`t1`.`a2`) = `test`.`t2`.`b2`))) and <expr_cache><`test`.`t1`.`a1`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t1`.`a1`,`test`.`t1`.`a2`),(`test`.`t1`.`a1`,`test`.`t1`.`a2`) in ( <materialize> (/* select#5 */ select `test`.`t3c`.`c1`,`test`.`t3c`.`c2` from `test`.`t3` `t3c` where <expr_cache><`test`.`t3c`.`c1`,`test`.`t3c`.`c2`>(<in_optimizer>((`test`.`t3c`.`c1`,`test`.`t3c`.`c2`),(`test`.`t3c`.`c1`,`test`.`t3c`.`c2`) in ( <materialize> (/* select#6 */ select `test`.`t2i`.`b1`,`test`.`t2i`.`b2` from `test`.`t2i` where `test`.`t2i`.`b2` > '0' ), <primary_index_lookup>(`test`.`t3c`.`c1` in <temporary table> on distinct_key where `test`.`t3c`.`c1` = `<subquery6>`.`b1` and `test`.`t3c`.`c2` = `<subquery6>`.`b2`)))) ), <primary_index_lookup>(`test`.`t1`.`a1` in <temporary table> on distinct_key where `test`.`t1`.`a1` = `<subquery5>`.`c1` and `test`.`t1`.`a2` = `<subquery5>`.`c2`))))
Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from `test`.`t1` where <expr_cache><`test`.`t1`.`a1`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t1`.`a1`,`test`.`t1`.`a2`),<exists>(/* select#2 */ select `test`.`t2`.`b1`,`test`.`t2`.`b2` from `test`.`t2` where (<expr_cache><`test`.`t2`.`b2`,`test`.`t1`.`a1`>(<in_optimizer>((`test`.`t2`.`b2`,`test`.`t1`.`a1`),(`test`.`t2`.`b2`,`test`.`t1`.`a1`) in ( <materialize> (/* select#3 */ select `test`.`t3a`.`c2`,`test`.`t3a`.`c1` from `test`.`t3` `t3a` where 1 ), <primary_index_lookup>(`test`.`t2`.`b2` in <temporary table> on distinct_key where `test`.`t2`.`b2` = `<subquery3>`.`c2` and `test`.`t1`.`a1` = `<subquery3>`.`c1`)))) or <expr_cache><`test`.`t2`.`b2`>(<in_optimizer>(`test`.`t2`.`b2`,`test`.`t2`.`b2` in ( <materialize> (/* select#4 */ select `test`.`t3b`.`c2` from `test`.`t3` `t3b` where `test`.`t3b`.`c2` like '%03' ), <primary_index_lookup>(`test`.`t2`.`b2` in <temporary table> on distinct_key where `test`.`t2`.`b2` = `<subquery4>`.`c2`))))) and <cache>(`test`.`t1`.`a1`) = `test`.`t2`.`b1` and <cache>(`test`.`t1`.`a2`) = `test`.`t2`.`b2`))) and <expr_cache><`test`.`t1`.`a1`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t1`.`a1`,`test`.`t1`.`a2`),(`test`.`t1`.`a1`,`test`.`t1`.`a2`) in ( <materialize> (/* select#5 */ select `test`.`t3c`.`c1`,`test`.`t3c`.`c2` from `test`.`t3` `t3c` where <expr_cache><`test`.`t3c`.`c1`,`test`.`t3c`.`c2`>(<in_optimizer>((`test`.`t3c`.`c1`,`test`.`t3c`.`c2`),(`test`.`t3c`.`c1`,`test`.`t3c`.`c2`) in ( <materialize> (/* select#6 */ select `test`.`t2i`.`b1`,`test`.`t2i`.`b2` from `test`.`t2i` where `test`.`t2i`.`b2` > '0' ), <primary_index_lookup>(`test`.`t3c`.`c1` in <temporary table> on distinct_key where `test`.`t3c`.`c1` = `<subquery6>`.`b1` and `test`.`t3c`.`c2` = `<subquery6>`.`b2`)))) ), <primary_index_lookup>(`test`.`t1`.`a1` in <temporary table> on distinct_key where `test`.`t1`.`a1` = `<subquery5>`.`c1` and `test`.`t1`.`a2` = `<subquery5>`.`c2`))))
select * from t1
where (a1, a2) in (select b1, b2 from t2
where b2 in (select c2 from t3 t3a where c1 = a1) or
......@@ -520,11 +520,11 @@ id select_type table type possible_keys key key_len ref rows filtered Extra
6 DEPENDENT SUBQUERY t2i index_subquery it2i1,it2i2,it2i3 it2i3 18 func,func 1 100.00 Using index; Using where
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 5 100.00 Using where
4 MATERIALIZED t3b ALL NULL NULL NULL NULL 4 100.00 Using where
3 DEPENDENT SUBQUERY t3a ALL NULL NULL NULL NULL 4 100.00 Using where
3 MATERIALIZED t3a ALL NULL NULL NULL NULL 4 100.00
Warnings:
Note 1276 Field or reference 'test.t1.a1' of SELECT #3 was resolved in SELECT #1
Note 1276 Field or reference 'test.t1.a2' of SELECT #6 was resolved in SELECT #1
Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from `test`.`t1` where <expr_cache><`test`.`t1`.`a1`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t1`.`a1`,`test`.`t1`.`a2`),<exists>(/* select#2 */ select `test`.`t2`.`b1`,`test`.`t2`.`b2` from `test`.`t2` where (<expr_cache><`test`.`t2`.`b2`,`test`.`t1`.`a1`>(<in_optimizer>((`test`.`t2`.`b2`,`test`.`t1`.`a1`),<exists>(/* select#3 */ select `test`.`t3a`.`c2`,`test`.`t3a`.`c1` from `test`.`t3` `t3a` where <cache>(`test`.`t2`.`b2`) = `test`.`t3a`.`c2` and <cache>(`test`.`t1`.`a1`) = `test`.`t3a`.`c1`))) or <expr_cache><`test`.`t2`.`b2`>(<in_optimizer>(`test`.`t2`.`b2`,`test`.`t2`.`b2` in ( <materialize> (/* select#4 */ select `test`.`t3b`.`c2` from `test`.`t3` `t3b` where `test`.`t3b`.`c2` like '%03' ), <primary_index_lookup>(`test`.`t2`.`b2` in <temporary table> on distinct_key where `test`.`t2`.`b2` = `<subquery4>`.`c2`))))) and <cache>(`test`.`t1`.`a1`) = `test`.`t2`.`b1` and <cache>(`test`.`t1`.`a2`) = `test`.`t2`.`b2`))) and <expr_cache><`test`.`t1`.`a1`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t1`.`a1`,`test`.`t1`.`a2`),<exists>(/* select#5 */ select `test`.`t3c`.`c1`,`test`.`t3c`.`c2` from `test`.`t3` `t3c` where <expr_cache><`test`.`t3c`.`c1`,`test`.`t3c`.`c2`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t3c`.`c1`,`test`.`t3c`.`c2`),<exists>(<index_lookup>(<cache>(`test`.`t3c`.`c1`) in t2i on it2i3 where (`test`.`t2i`.`b2` > '0' or `test`.`t2i`.`b2` = `test`.`t1`.`a2`) and <cache>(`test`.`t3c`.`c1`) = `test`.`t2i`.`b1` and <cache>(`test`.`t3c`.`c2`) = `test`.`t2i`.`b2`)))) and <cache>(`test`.`t1`.`a1`) = `test`.`t3c`.`c1` and <cache>(`test`.`t1`.`a2`) = `test`.`t3c`.`c2`)))
Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from `test`.`t1` where <expr_cache><`test`.`t1`.`a1`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t1`.`a1`,`test`.`t1`.`a2`),<exists>(/* select#2 */ select `test`.`t2`.`b1`,`test`.`t2`.`b2` from `test`.`t2` where (<expr_cache><`test`.`t2`.`b2`,`test`.`t1`.`a1`>(<in_optimizer>((`test`.`t2`.`b2`,`test`.`t1`.`a1`),(`test`.`t2`.`b2`,`test`.`t1`.`a1`) in ( <materialize> (/* select#3 */ select `test`.`t3a`.`c2`,`test`.`t3a`.`c1` from `test`.`t3` `t3a` where 1 ), <primary_index_lookup>(`test`.`t2`.`b2` in <temporary table> on distinct_key where `test`.`t2`.`b2` = `<subquery3>`.`c2` and `test`.`t1`.`a1` = `<subquery3>`.`c1`)))) or <expr_cache><`test`.`t2`.`b2`>(<in_optimizer>(`test`.`t2`.`b2`,`test`.`t2`.`b2` in ( <materialize> (/* select#4 */ select `test`.`t3b`.`c2` from `test`.`t3` `t3b` where `test`.`t3b`.`c2` like '%03' ), <primary_index_lookup>(`test`.`t2`.`b2` in <temporary table> on distinct_key where `test`.`t2`.`b2` = `<subquery4>`.`c2`))))) and <cache>(`test`.`t1`.`a1`) = `test`.`t2`.`b1` and <cache>(`test`.`t1`.`a2`) = `test`.`t2`.`b2`))) and <expr_cache><`test`.`t1`.`a1`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t1`.`a1`,`test`.`t1`.`a2`),<exists>(/* select#5 */ select `test`.`t3c`.`c1`,`test`.`t3c`.`c2` from `test`.`t3` `t3c` where <expr_cache><`test`.`t3c`.`c1`,`test`.`t3c`.`c2`,`test`.`t1`.`a2`>(<in_optimizer>((`test`.`t3c`.`c1`,`test`.`t3c`.`c2`),<exists>(<index_lookup>(<cache>(`test`.`t3c`.`c1`) in t2i on it2i3 where (`test`.`t2i`.`b2` > '0' or `test`.`t2i`.`b2` = `test`.`t1`.`a2`) and <cache>(`test`.`t3c`.`c1`) = `test`.`t2i`.`b1` and <cache>(`test`.`t3c`.`c2`) = `test`.`t2i`.`b2`)))) and <cache>(`test`.`t1`.`a1`) = `test`.`t3c`.`c1` and <cache>(`test`.`t1`.`a2`) = `test`.`t3c`.`c2`)))
explain extended
select * from t1 where (a1, a2) in (select '1 - 01', '2 - 01');
id select_type table type possible_keys key key_len ref rows filtered Extra
......
......@@ -43,6 +43,7 @@
#include "sql_cte.h"
#include "sql_test.h"
#include "opt_trace.h"
#include "opt_subselect.h"
double get_post_group_estimate(JOIN* join, double join_op_rows);
......@@ -3291,6 +3292,14 @@ bool Item_in_subselect::exists2in_processor(void *opt_arg)
get_select_lex()->select_number, "IN (SELECT)",
"decorrelation");
}
/** If not having materialization as a strategy and it is not a
semijoin thus will not have a chance of adding materialization later
in `check_and_do_in_subquery_rewrites()`, try to add materialization
again */
if (!test_strategy(SUBS_MATERIALIZATION) && !is_registered_semijoin)
try_add_materialization(thd, this);
out:
/* Switch back to the runtime arena */
if (arena)
......
......@@ -1482,6 +1482,11 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
DBUG_RETURN(TRUE);
}
void try_add_materialization(THD *thd, Item_in_subselect *in_sub)
{
if (is_materialization_applicable(thd, in_sub, in_sub->unit->first_select()))
in_sub->add_strategy(SUBS_MATERIALIZATION);
}
/*
Get #output_rows and scan_time estimates for a "delayed" table.
......
......@@ -32,6 +32,7 @@ bool setup_degenerate_jtbm_semi_joins(JOIN *join,
bool setup_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list,
List<Item> &eq_list);
void cleanup_empty_jtbm_semi_joins(JOIN *join, List<TABLE_LIST> *join_list);
void try_add_materialization(THD *thd, Item_in_subselect *in_sub);
// used by Loose_scan_opt
ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment