Commit b1f828a8 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-19134: EXISTS() slower if ORDER BY is defined

Step 1: Removal of ORDER BY [LIMIT] from the subquery should be done
earlier and for broader class of subqueries.

The rewrite was done in Item_in_subselect::select_in_like_transformer(),
but this had problems:
- It didn't cover EXISTS subqueries
- It covered IN-subqueries, but was done after the semi-join transformation
  was considered inapplicable, because ORDER BY was present.

Remaining issue:
- EXISTS->IN transformation happens before
  check_and_do_in_subquery_rewrites() is called, so it is still prevented
  by the present ORDER BY.
parent 366bf104
......@@ -2256,7 +2256,7 @@ SELECT a3 FROM t3 WHERE b2 = b1 AND b2 <= b1 ORDER BY b3
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 system NULL NULL NULL NULL 1
1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Using where
3 DEPENDENT SUBQUERY t3 ALL NULL NULL NULL NULL 2 Using where
1 PRIMARY t3 ALL NULL NULL NULL NULL 2 Using where; FirstMatch(t1); Using join buffer (flat, BNL join)
SELECT * FROM t1 WHERE a1 IN (
SELECT a2 FROM t2 WHERE a2 IN (
SELECT a3 FROM t3 WHERE b2 = b1 AND b2 <= b1 ORDER BY b3
......@@ -2534,3 +2534,26 @@ x
c1
1
drop table t1;
#
# MDEV-19134: EXISTS() slower if ORDER BY is defined
#
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int, b int);
insert into t1 select
A.a + B.a*10, A.a + B.a*10 from t0 A, t0 B;
create table t2 as select * from t1;
# This will not be able to convert to semi-join but will not require filesort:
explain
select * from t1 where exists (select * from t2 where t2.a=t1.a order by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL # Using where
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL # Using where
# This will be merged and converted into a semi-join:
explain
select * from t1 where t1.a in (select t2.a from t2 order by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 100
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 100
drop table t0, t1, t2;
......@@ -2075,3 +2075,27 @@ insert into t1 values(2,1),(1,2);
select (select c1 from t1 group by c1,c2 order by c1 limit 1) as x;
(select c1 from t1 group by c1,c2 order by c1 limit 1);
drop table t1;
--echo #
--echo # MDEV-19134: EXISTS() slower if ORDER BY is defined
--echo #
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int, b int);
insert into t1 select
A.a + B.a*10, A.a + B.a*10 from t0 A, t0 B;
create table t2 as select * from t1;
--echo # This will not be able to convert to semi-join but will not require filesort:
--replace_column 9 #
explain
select * from t1 where exists (select * from t2 where t2.a=t1.a order by t2.b);
--echo # This will be merged and converted into a semi-join:
explain
select * from t1 where t1.a in (select t2.a from t2 order by t2.b);
drop table t0, t1, t2;
......@@ -458,7 +458,7 @@ EXPLAIN
SELECT * FROM t1 WHERE EXISTS ( SELECT b FROM t2, t3 GROUP BY b HAVING b != 3 );
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE
2 SUBQUERY t2 index PRIMARY PRIMARY 4 NULL 1 Using where; Using index; Using temporary; Using filesort
2 SUBQUERY t2 index PRIMARY PRIMARY 4 NULL 1 Using where; Using index; Using temporary
2 SUBQUERY t3 ALL NULL NULL NULL NULL 1 Using join buffer (flat, BNL join)
SELECT * FROM t1 WHERE EXISTS ( SELECT b FROM t2, t3 GROUP BY b HAVING b != 3 );
a
......@@ -480,7 +480,7 @@ HAVING SQ2_alias1 . col_int_key >= 7
);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE
2 SUBQUERY SQ2_alias2 index NULL col_int_key 5 NULL 1 Using index; Using temporary; Using filesort
2 SUBQUERY SQ2_alias2 index NULL col_int_key 5 NULL 1 Using index; Using temporary
2 SUBQUERY SQ2_alias1 ref col_int_key col_int_key 5 test.SQ2_alias2.col_int_key 1 Using where; Using index
SELECT 1 FROM t1 AS alias1
WHERE EXISTS ( SELECT SQ2_alias1 . col_int_key AS SQ2_field1
......
......@@ -148,7 +148,7 @@ FROM t2 GROUP BY f1
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
2 SUBQUERY t1 system NULL NULL NULL NULL 1
3 SUBQUERY t2 ALL NULL NULL NULL NULL 2 Using temporary; Using filesort
3 SUBQUERY t2 ALL NULL NULL NULL NULL 2 Using temporary
drop table t1, t2, t3;
#
# LP BUG#715034 Item_sum_distinct::clear(): Assertion `tree != 0' failed
......
......@@ -265,11 +265,11 @@ set @@optimizer_switch=@save_optimizer_switch;
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 order by b1, b2);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 3 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 16 test.t1.a1,test.t1.a2 1 100.00
1 PRIMARY t1 ALL NULL NULL NULL NULL 3 100.00
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 16 func,func 1 100.00
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 100.00
Warnings:
Note 1003 /* select#1 */ select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from <materialize> (/* select#2 */ select `test`.`t2`.`b1`,`test`.`t2`.`b2` from `test`.`t2` order by `test`.`t2`.`b1`,`test`.`t2`.`b2`) join `test`.`t1` where `<subquery2>`.`b1` = `test`.`t1`.`a1` and `<subquery2>`.`b2` = `test`.`t1`.`a2`
Note 1003 select `test`.`t1`.`a1` AS `a1`,`test`.`t1`.`a2` AS `a2` from `test`.`t1` semi join (`test`.`t2`) where 1
select * from t1 where (a1, a2) in (select b1, b2 from t2 order by b1, b2);
a1 a2
1 - 01 2 - 01
......@@ -277,11 +277,10 @@ a1 a2
explain extended
select * from t1i where (a1, a2) in (select b1, b2 from t2i order by b1, b2);
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1i index it1i1,it1i2,it1i3 it1i3 18 NULL 3 100.00 Using where; Using index
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 16 test.t1i.a1,test.t1i.a2 1 100.00
2 MATERIALIZED t2i index NULL it2i3 18 NULL 5 100.00 Using index
1 PRIMARY t2i index it2i1,it2i2,it2i3 it2i3 18 NULL 5 50.00 Using where; Using index; LooseScan
1 PRIMARY t1i ref it1i1,it1i2,it1i3 it1i3 18 test.t2i.b1,test.t2i.b2 1 100.00 Using index
Warnings:
Note 1003 /* select#1 */ select `test`.`t1i`.`a1` AS `a1`,`test`.`t1i`.`a2` AS `a2` from <materialize> (/* select#2 */ select `test`.`t2i`.`b1`,`test`.`t2i`.`b2` from `test`.`t2i` order by `test`.`t2i`.`b1`,`test`.`t2i`.`b2`) join `test`.`t1i` where `<subquery2>`.`b1` = `test`.`t1i`.`a1` and `<subquery2>`.`b2` = `test`.`t1i`.`a2`
Note 1003 select `test`.`t1i`.`a1` AS `a1`,`test`.`t1i`.`a2` AS `a2` from `test`.`t1i` semi join (`test`.`t2i`) where `test`.`t1i`.`a1` = `test`.`t2i`.`b1` and `test`.`t1i`.`a2` = `test`.`t2i`.`b2`
select * from t1i where (a1, a2) in (select b1, b2 from t2i order by b1, b2);
a1 a2
1 - 01 2 - 01
......@@ -1472,10 +1471,7 @@ INSERT INTO t2 VALUES (10,0),(11,0);
explain SELECT * FROM t1 JOIN t2 USING (f1)
WHERE t1.f1 IN (SELECT t1.pk FROM t1 ORDER BY t1.f1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 system NULL NULL NULL NULL 1
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 const 1
1 PRIMARY t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
2 MATERIALIZED t1 system NULL NULL NULL NULL 1
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Impossible WHERE noticed after reading const tables
SELECT * FROM t1 JOIN t2 USING (f1)
WHERE t1.f1 IN (SELECT t1.pk FROM t1 ORDER BY t1.f1);
f1 pk pk
......
......@@ -161,6 +161,7 @@ execute st1;
set @@optimizer_switch=@save_optimizer_switch;
# materialize the result of ORDER BY
# No longer really happens as the optimizer is now smart enough not to sort in this case
# non-indexed fields
explain extended
select * from t1 where (a1, a2) in (select b1, b2 from t2 order by b1, b2);
......
......@@ -2049,7 +2049,7 @@ insert t1 values (1),(2),(3),(1);
explain select 1 from dual where exists (select max(a) from t1 group by a union select a+2 from t1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL No tables used
2 SUBQUERY t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort
2 SUBQUERY t1 ALL NULL NULL NULL NULL 4 Using temporary
3 UNION t1 ALL NULL NULL NULL NULL 4
NULL UNION RESULT <union2,3> ALL NULL NULL NULL NULL NULL
drop table t1;
......
......@@ -3204,21 +3204,6 @@ Item_in_subselect::select_in_like_transformer(JOIN *join)
DBUG_ENTER("Item_in_subselect::select_in_like_transformer");
DBUG_ASSERT(thd == join->thd);
/*
IN/SOME/ALL/ANY subqueries aren't support LIMIT clause. Without it
ORDER BY clause becomes meaningless thus we drop it here.
*/
for (SELECT_LEX *sl= current->master_unit()->first_select();
sl; sl= sl->next_select())
{
if (sl->join)
{
sl->join->order= 0;
sl->join->skip_sort_order= 1;
}
}
thd->where= "IN/ALL/ANY subquery";
/*
......
......@@ -594,7 +594,8 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
{
Item_in_subselect *in_subs= NULL;
Item_allany_subselect *allany_subs= NULL;
switch (subselect->substype()) {
Item_subselect::subs_type substype= subselect->substype();
switch (substype) {
case Item_subselect::IN_SUBS:
in_subs= (Item_in_subselect *)subselect;
break;
......@@ -606,6 +607,26 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
break;
}
/*
Try removing "ORDER BY" or even "ORDER BY ... LIMIT" from certain kinds
of subqueries. The removal might enable further transformations.
*/
if (substype == Item_subselect::IN_SUBS ||
substype == Item_subselect::EXISTS_SUBS ||
substype == Item_subselect::ANY_SUBS ||
substype == Item_subselect::ALL_SUBS)
{
// (1) - ORDER BY without LIMIT can be removed from IN/EXISTS subqueries
// (2) - for EXISTS, can also remove "ORDER BY ... LIMIT n",
// but cannot remove "ORDER BY ... LIMIT n OFFSET m"
if (!select_lex->select_limit || // (1)
(substype == Item_subselect::EXISTS_SUBS && // (2)
!select_lex->offset_limit)) // (2)
{
select_lex->join->order= 0;
select_lex->join->skip_sort_order= 1;
}
}
/* Resolve expressions and perform semantic analysis for IN query */
if (in_subs != NULL)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment