Commit d3915c51 authored by Yuchen Pei's avatar Yuchen Pei

MDEV-22534 WIP addressing review comments

- styles and formatting
- fixing nulls
parent ba9cf8ef
......@@ -321,6 +321,28 @@ select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') f
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
NULL
drop table t1, t2;
# fix NOT IN with null values
create table t1 (a1 int, b1 int);
create table t2 (a2 int, b2 int);
insert into t1 values (0, null), (1, 1);
insert into t2 values (0, 1), (1, null);
# returns (0, null), (1, 1)
select * from t1 where not a1 in (select a2 from t2 where b1 = b2);
a1 b1
0 NULL
1 1
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
["decorrelation"]
# returns empty
select * from t1 where not (a1, b1) in (select a2, b2 from t2);
a1 b1
# returns (0, null), (1, 1)
select * from t1 where not (b1 is not null and (a1, b1) in (select a2, b2 from t2 where b2 is not null));
a1 b1
0 NULL
1 1
drop table t1, t2;
# skip transformation when neither toplevel IN nor toplevel NOT IN; testcase adapated from main.subslect4
CREATE TABLE t1 ( a INT, b INT );
INSERT INTO t1 VALUES ( 1, NULL ), ( 2, NULL );
......@@ -336,16 +358,6 @@ a b
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
NULL
SELECT * FROM t1 WHERE a NOT IN ( SELECT c FROM t2 where b = d) IS unknown;
a b
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
NULL
SELECT * FROM t1 WHERE a IN ( SELECT c FROM t2 where b = d ) IS unknown;
a b
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
json_extract(trace, '$**.join_optimization.steps[*].transformation.to')
NULL
drop table t1, t2;
# 2nd ps execution / why we need to backup and restore the query arena; test also in main.subselect_sj2_mat
CREATE TABLE t1 ( c varchar(1));
......
......@@ -217,6 +217,23 @@ select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') f
drop table t1, t2;
--echo # fix NOT IN with null values
create table t1 (a1 int, b1 int);
create table t2 (a2 int, b2 int);
insert into t1 values (0, null), (1, 1);
insert into t2 values (0, 1), (1, null);
--echo # returns (0, null), (1, 1)
select * from t1 where not a1 in (select a2 from t2 where b1 = b2);
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
--echo # returns empty
select * from t1 where not (a1, b1) in (select a2, b2 from t2);
--echo # returns (0, null), (1, 1)
select * from t1 where not (b1 is not null and (a1, b1) in (select a2, b2 from t2 where b2 is not null));
drop table t1, t2;
--echo # skip transformation when neither toplevel IN nor toplevel NOT IN; testcase adapated from main.subslect4
CREATE TABLE t1 ( a INT, b INT );
INSERT INTO t1 VALUES ( 1, NULL ), ( 2, NULL );
......@@ -228,12 +245,6 @@ select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') f
# not top level, upper_not is null
SELECT * FROM t1 WHERE a IN ( SELECT c FROM t2 where b = d ) IS NULL;
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
# not top level, upper_not is not top level
SELECT * FROM t1 WHERE a NOT IN ( SELECT c FROM t2 where b = d) IS unknown;
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
# not top level, upper_not is null
SELECT * FROM t1 WHERE a IN ( SELECT c FROM t2 where b = d ) IS unknown;
select json_extract(trace, '$**.join_optimization.steps[*].transformation.to') from information_schema.OPTIMIZER_TRACE;
drop table t1, t2;
......
......@@ -47,6 +47,7 @@
double get_post_group_estimate(JOIN* join, double join_op_rows);
LEX_CSTRING exists_outer_expr_name= { STRING_WITH_LEN("<exists outer expr>") };
LEX_CSTRING in_outer_expr_name= { STRING_WITH_LEN("<in outer expr>") };
LEX_CSTRING no_matter_name= {STRING_WITH_LEN("<no matter>") };
......@@ -3108,10 +3109,22 @@ static bool intersects_free_list(Item *item, THD *thd)
return false;
}
/* De-correlate where conditions in an IN subquery */
/**
De-correlate where conditions in an IN subquery
Similar to `Item_exists_subselect::exists2in_processor()`, it checks
for equalities in the form of outer_exp = inner_field in the where
condition, and move the outer_exp to left_expr, and inner_field to
the select item list in the IN subquery
@param opt_arg a cast to a THD*, the connection
@retval FALSE ok
@retval TRUE error
*/
bool Item_in_subselect::exists2in_processor(void *opt_arg)
{
THD *thd= (THD *)opt_arg;
THD *thd= (THD *) opt_arg;
SELECT_LEX *first_select= unit->first_select();
JOIN *join= first_select->join;
bool will_be_correlated;
......@@ -3121,33 +3134,36 @@ bool Item_in_subselect::exists2in_processor(void *opt_arg)
int res= FALSE;
DBUG_ENTER("Item_in_subselect::exists2in_processor");
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_EXISTS_TO_IN) ||
/* proceed only if I'm a toplevel IN or a toplevel NOT IN */
/*
Skip the transformation if it is not needed or would not help
(1). optimizer flag exists_to_in is off
(2). skip if the subquery is not toplevel IN nor toplevel NOT IN
(3). skip if the subquery is a part of a union
(4). skip if the subquery has group by
(5). skip if the subquery has aggregation in the select
(6). skip if the subquery has having
(7). skip if the subquery does not have where conditions
(8). skip if the left expr is a single nonscalar subselect
*/
if (!optimizer_flag(thd, OPTIMIZER_SWITCH_EXISTS_TO_IN) || /* (1) */
!(is_top_level_item() ||
(upper_not && upper_not->is_top_level_item())) ||
first_select->is_part_of_union() || /* skip if part of a union */
first_select->group_list.elements || /* skip if with group by */
first_select->with_sum_func || /* skip if aggregation */
join->having || /* skip if with having */
!join->conds || /* skip if no conds */
/* skip if left expr is a single nonscalar subselect */
(upper_not && upper_not->is_top_level_item())) || /* (2) */
first_select->is_part_of_union() || /* (3) */
first_select->group_list.elements || /* (4) */
first_select->with_sum_func || /* (5) */
join->having || /* (6) */
!join->conds || /* (7) */
(left_expr->type() == Item::SUBSELECT_ITEM &&
!left_expr->type_handler()->is_scalar_type()))
!left_expr->type_handler()->is_scalar_type())) /* (8) */
DBUG_RETURN(FALSE);
/* iterate over conditions, and check whether they can be moved out. */
if (find_inner_outer_equalities(&join->conds, eqs))
DBUG_RETURN(FALSE);
/* If we are in the execution of a prepared statement, check for
intersection with the free list to avoid segfault. Note that the
check for prepared statement execution is necessary, otherwise it
will likely always find intersection thus abort the
transformation.
fixme: this only works when HAVE_PSI_STATEMENT_INTERFACE is
defined. It causes CI's like amd64-debian-10-debug-embedded to
fail. Are there other ways to find out we are in the execution of a
prepared statement? */
if (thd->m_statement_state.m_parent_prepared_stmt)
/* If we are in a ps/sp execution, check for and skip on
intersection with the temporary free list to avoid 2nd ps execution
segfault */
if (!thd->stmt_arena->is_conventional())
{
for (uint i= 0; i < (uint) eqs.elements(); i++)
{
......@@ -3155,6 +3171,7 @@ bool Item_in_subselect::exists2in_processor(void *opt_arg)
DBUG_RETURN(FALSE);
}
}
/* Determines whether the result will be correlated */
{
List<Item> unused;
......@@ -3169,7 +3186,7 @@ bool Item_in_subselect::exists2in_processor(void *opt_arg)
DBUG_ASSERT(prm.count >= (uint)eqs.elements());
will_be_correlated= prm.count > (uint)eqs.elements();
}
/* Back up the free list */
/* Switch to the permanent arena if we are in a ps/sp execution */
arena= thd->activate_stmt_arena_if_needed(&backup);
/* Construct the items for left_expr */
if (left_expr->type() == Item::ROW_ITEM)
......@@ -3177,7 +3194,9 @@ bool Item_in_subselect::exists2in_processor(void *opt_arg)
outer.push_back(left_expr->element_index(i));
else
outer.push_back(left_expr);
const uint outer_offset= outer.elements;
const uint offset= first_select->item_list.elements;
DBUG_ASSERT(outer_offset == offset);
/* Move items to outer and select item list */
for (uint i= 0; i < (uint)eqs.elements(); i++)
{
......@@ -3186,8 +3205,18 @@ bool Item_in_subselect::exists2in_processor(void *opt_arg)
Item *outer_exp= eqs.at(i).outer_exp;
first_select->item_list.push_back(local_field, thd->mem_root);
first_select->ref_pointer_array[offset + i]= (Item *)local_field;
outer.push_back(outer_exp);
*eq_ref= new (thd->mem_root) Item_int(thd, 1);
outer.push_back(outer_exp, thd->mem_root);
if (!upper_not || !local_field->maybe_null())
*eq_ref= new (thd->mem_root) Item_int(thd, 1);
else
*eq_ref= new (thd->mem_root)
Item_func_isnotnull(thd,
new (thd->mem_root)
Item_field(thd,
((Item_field*)(local_field->
real_item()))->context,
((Item_field*)(local_field->
real_item()))->field));
if((*eq_ref)->fix_fields(thd, (Item **)eq_ref))
{
res= TRUE;
......@@ -3203,6 +3232,7 @@ bool Item_in_subselect::exists2in_processor(void *opt_arg)
}
left_expr_orig= left_expr;
is_correlated= will_be_correlated;
/* Update any Item_in_optimizer wrapper if exists */
if (optimizer)
{
......@@ -3213,13 +3243,56 @@ bool Item_in_subselect::exists2in_processor(void *opt_arg)
goto out;
}
}
/* AND the IN subquery with IS NOT NULLs if upper_not */
if (upper_not)
{
Item *exp;
List<Item> *and_list= new (thd->mem_root) List<Item>;
if (!and_list)
{
res= TRUE;
goto out;
}
for (size_t i= outer_offset; i < left_expr->cols(); i++)
{
if (left_expr->element_index(i)->maybe_null())
{
and_list->
push_front(new (thd->mem_root)
Item_func_isnotnull(thd,
new (thd->mem_root)
Item_direct_ref(thd,
&unit->outer_select()->context,
left_expr->addr(i),
no_matter_name,
in_outer_expr_name)),
thd->mem_root);
}
}
if (and_list->elements > 0)
{
if (optimizer)
and_list->push_front(optimizer, thd->mem_root);
else
and_list->push_front(this, thd->mem_root);
exp= new (thd->mem_root) Item_cond_and(thd, *and_list);
if (exp->fix_fields_if_needed(thd, upper_not->arguments()))
{
res= TRUE;
goto out;
}
upper_not->arguments()[0]= exp;
}
}
{
OPT_TRACE_TRANSFORM(thd, trace_wrapper, trace_transform,
get_select_lex()->select_number, "IN (SELECT)",
"decorrelation");
}
out:
/* Restores the free list */
/* Switch back to the runtime arena */
if (arena)
thd->restore_active_arena(arena, &backup);
DBUG_RETURN(res);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment