Commit 6adddca8 authored by Sergey Petrunya's avatar Sergey Petrunya

Make semi-joins work with outer joins part #1:

- Make make_outerjoin_info() correctly process semi-join nests
- Make make_join_select() attach conditions to the right places.
parent e1d734f3
......@@ -1326,4 +1326,25 @@ x
m
c
drop table t1,t2,t3,t4;
#
# BUG#795530 Wrong result with subquery semijoin materialization and outer join
# Simplified testcase that uses DuplicateElimination
#
create table t1 (a int);
create table t2 (a int, b char(10));
insert into t1 values (1),(2);
insert into t2 values (1, 'one'), (3, 'three');
create table t3 (b char(10));
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
explain select * from t3 where t3.b in (select t2.b from t1 left join t2 on t1.a=t2.a);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Start temporary
1 PRIMARY t2 ALL NULL NULL NULL NULL 2 Using where
1 PRIMARY t3 ALL NULL NULL NULL NULL 8 Using where; End temporary; Using join buffer (flat, BNL join)
select * from t3 where t3.b in (select t2.b from t1 left join t2 on t1.a=t2.a);
b
drop table t1, t2, t3;
set @@optimizer_switch=@save_optimizer_switch;
......@@ -1334,6 +1334,27 @@ x
m
c
drop table t1,t2,t3,t4;
#
# BUG#795530 Wrong result with subquery semijoin materialization and outer join
# Simplified testcase that uses DuplicateElimination
#
create table t1 (a int);
create table t2 (a int, b char(10));
insert into t1 values (1),(2);
insert into t2 values (1, 'one'), (3, 'three');
create table t3 (b char(10));
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
explain select * from t3 where t3.b in (select t2.b from t1 left join t2 on t1.a=t2.a);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 2 Start temporary
1 PRIMARY t2 ALL NULL NULL NULL NULL 2 Using where; Using join buffer (flat, BNL join)
1 PRIMARY t3 ALL NULL NULL NULL NULL 8 Using where; End temporary; Using join buffer (incremental, BNL join)
select * from t3 where t3.b in (select t2.b from t1 left join t2 on t1.a=t2.a);
b
drop table t1, t2, t3;
set @@optimizer_switch=@save_optimizer_switch;
#
# BUG#49129: Wrong result with IN-subquery with join_cache_level=6 and firstmatch=off
......
......@@ -636,7 +636,7 @@ from t1_16
where a1 in (select substring(b1,1,16) from t2_16 where b1 > '0');
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1_16 ALL NULL NULL NULL NULL 3 100.00
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 20 func 1 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 20 func 1 100.00
2 SUBQUERY t2_16 ALL NULL NULL NULL NULL 3 100.00 Using where
Warnings:
Note 1003 select left(`test`.`t1_16`.`a1`,7) AS `left(a1,7)`,left(`test`.`t1_16`.`a2`,7) AS `left(a2,7)` from `test`.`t1_16` semi join (`test`.`t2_16`) where ((`test`.`t2_16`.`b1` > '0') and (`test`.`t1_16`.`a1` = substr(`test`.`t2_16`.`b1`,1,16)))
......@@ -751,7 +751,7 @@ from t1_512
where a1 in (select substring(b1,1,512) from t2_512 where b1 > '0');
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1_512 ALL NULL NULL NULL NULL 3 100.00
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 517 func 1 100.00 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 517 func 1 100.00
2 SUBQUERY t2_512 ALL NULL NULL NULL NULL 3 100.00 Using where
Warnings:
Note 1003 select left(`test`.`t1_512`.`a1`,7) AS `left(a1,7)`,left(`test`.`t1_512`.`a2`,7) AS `left(a2,7)` from `test`.`t1_512` semi join (`test`.`t2_512`) where ((`test`.`t2_512`.`b1` > '0') and (`test`.`t1_512`.`a1` = substr(`test`.`t2_512`.`b1`,1,512)))
......@@ -847,7 +847,7 @@ from t1_1024
where a1 in (select substring(b1,1,1024) from t2_1024 where b1 > '0');
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1_1024 ALL NULL NULL NULL NULL 3 100.00
1 PRIMARY <subquery2> eq_ref NULL distinct_key 15 func,func 1 100.00 Using where
1 PRIMARY <subquery2> eq_ref NULL distinct_key 15 func,func 1 100.00
2 SUBQUERY t2_1024 ALL NULL NULL NULL NULL 3 100.00 Using where
Warnings:
Note 1003 select left(`test`.`t1_1024`.`a1`,7) AS `left(a1,7)`,left(`test`.`t1_1024`.`a2`,7) AS `left(a2,7)` from `test`.`t1_1024` semi join (`test`.`t2_1024`) where ((`test`.`t2_1024`.`b1` > '0') and (`test`.`t1_1024`.`a1` = substr(`test`.`t2_1024`.`b1`,1,1024)))
......@@ -941,7 +941,7 @@ from t1_1025
where a1 in (select substring(b1,1,1025) from t2_1025 where b1 > '0');
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1_1025 ALL NULL NULL NULL NULL 3 100.00
1 PRIMARY <subquery2> eq_ref NULL distinct_key 15 func,func 1 100.00 Using where
1 PRIMARY <subquery2> eq_ref NULL distinct_key 15 func,func 1 100.00
2 SUBQUERY t2_1025 ALL NULL NULL NULL NULL 3 100.00 Using where
Warnings:
Note 1003 select left(`test`.`t1_1025`.`a1`,7) AS `left(a1,7)`,left(`test`.`t1_1025`.`a2`,7) AS `left(a2,7)` from `test`.`t1_1025` semi join (`test`.`t2_1025`) where ((`test`.`t2_1025`.`b1` > '0') and (`test`.`t1_1025`.`a1` = substr(`test`.`t2_1025`.`b1`,1,1025)))
......@@ -1219,7 +1219,7 @@ insert into t1 values ('aa', 'aaaa');
explain select a,b from t1 where b in (select a from t1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 2
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 3 func 1 Using where
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 3 func 1
2 SUBQUERY t1 ALL NULL NULL NULL NULL 2
select a,b from t1 where b in (select a from t1);
a b
......
......@@ -1212,5 +1212,25 @@ LEFT JOIN t2 JOIN t3 ON t3.f10 = t2.f10 ON t3.f11 != 0 );
drop table t1,t2,t3,t4;
--echo #
--echo # BUG#795530 Wrong result with subquery semijoin materialization and outer join
--echo # Simplified testcase that uses DuplicateElimination
--echo #
create table t1 (a int);
create table t2 (a int, b char(10));
insert into t1 values (1),(2);
insert into t2 values (1, 'one'), (3, 'three');
create table t3 (b char(10));
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
insert into t3 values('three'),( 'four');
explain select * from t3 where t3.b in (select t2.b from t1 left join t2 on t1.a=t2.a);
select * from t3 where t3.b in (select t2.b from t1 left join t2 on t1.a=t2.a);
drop table t1, t2, t3;
# The following command must be the last one the file
set @@optimizer_switch=@save_optimizer_switch;
......@@ -1688,7 +1688,8 @@ class Item_equal: public Item_bool_func
friend class Item_equal_fields_iterator;
friend Item *eliminate_item_equal(COND *cond, COND_EQUAL *upper_levels,
Item_equal *item_equal);
friend bool setup_sj_materialization(struct st_join_table *tab);
friend bool setup_sj_materialization_part1(struct st_join_table *tab);
friend bool setup_sj_materialization_part2(struct st_join_table *tab);
};
class COND_EQUAL: public Sql_alloc
......
......@@ -2840,9 +2840,8 @@ void fix_semijoin_strategies_for_picked_join_order(JOIN *join)
TRUE Error
*/
bool setup_sj_materialization(JOIN_TAB *sjm_tab)
bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab)
{
uint i;
DBUG_ENTER("setup_sj_materialization");
JOIN_TAB *tab= sjm_tab->bush_children->start;
TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
......@@ -2851,6 +2850,7 @@ bool setup_sj_materialization(JOIN_TAB *sjm_tab)
/* First the calls come to the materialization function */
List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
DBUG_ASSERT(sjm->is_used);
/*
Set up the table to write to, do as select_union::create_result_table does
*/
......@@ -2878,6 +2878,22 @@ bool setup_sj_materialization(JOIN_TAB *sjm_tab)
sjm->materialized= FALSE;
sjm_tab->table= sjm->table;
sjm->table->pos_in_table_list= emb_sj_nest;//???? psergey ???
DBUG_RETURN(FALSE);
}
bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab)
{
DBUG_ENTER("setup_sj_materialization_part2");
JOIN_TAB *tab= sjm_tab->bush_children->start;
TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding;
SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info;
THD *thd= tab->join->thd;
uint i;
List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list;
List_iterator<Item> it(item_list);
if (!sjm->is_sj_scan)
{
......@@ -2992,7 +3008,7 @@ bool setup_sj_materialization(JOIN_TAB *sjm_tab)
in the record buffers for the source tables.
*/
sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements];
it.rewind();
//it.rewind();
for (uint i=0; i < sjm->sjm_table_cols.elements; i++)
{
bool dummy;
......
......@@ -285,7 +285,9 @@ void restore_prev_sj_state(const table_map remaining_tables,
const JOIN_TAB *tab, uint idx);
void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
bool setup_sj_materialization(JOIN_TAB *tab);
bool setup_sj_materialization_part1(JOIN_TAB *sjm_tab);
bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab);
TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg,
SJ_TMP_TABLE *sjtbl);
......
......@@ -90,7 +90,7 @@ static store_key *get_store_key(THD *thd,
KEYUSE *keyuse, table_map used_tables,
KEY_PART_INFO *key_part, uchar *key_buff,
uint maybe_null);
static void make_outerjoin_info(JOIN *join);
static bool make_outerjoin_info(JOIN *join);
static Item*
make_cond_after_sjm(Item *root_cond, Item *cond, table_map tables, table_map sjm_tables);
static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
......@@ -1164,7 +1164,10 @@ JOIN::optimize()
}
reset_nj_counters(this, join_list);
make_outerjoin_info(this);
if (make_outerjoin_info(this))
{
DBUG_RETURN(1);
}
/*
Among the equal fields belonging to the same multiple equality
......@@ -7620,6 +7623,12 @@ static void add_not_null_conds(JOIN *join)
nested outer join and so on until it reaches root_tab
(root_tab can be 0).
In other words:
add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should
wrap parts of WHERE. The idea is that the part of WHERE should be only
evaluated after we've finished figuring out whether outer joins.
^^^ is the above correct?
@param tab the first inner table for most nested outer join
@param cond the predicate to be guarded (must be set)
@param root_tab the first inner table to stop
......@@ -7647,6 +7656,12 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
}
bool TABLE_LIST::is_active_sjm()
{
return sj_mat_info && sj_mat_info->is_used;
}
/**
Fill in outer join related info for the execution plan structure.
......@@ -7664,6 +7679,12 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
corresponding first inner table through the field t0->on_expr_ref.
Here ti are structures of the JOIN_TAB type.
In other words, for each join tab, set
- first_inner
- last_inner
- first_upper
- on_expr_ref, cond_equal
EXAMPLE. For the query:
@code
SELECT * FROM t1
......@@ -7689,20 +7710,33 @@ add_found_match_trig_cond(JOIN_TAB *tab, COND *cond, JOIN_TAB *root_tab)
has been chosen.
*/
static void
static bool
make_outerjoin_info(JOIN *join)
{
DBUG_ENTER("make_outerjoin_info");
for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); tab;
tab= next_linear_tab(join, tab, WITHOUT_BUSH_ROOTS))
{
TABLE *table=tab->table;
/*
psergey: The following is probably incorrect, fix it when we get
semi+outer joins processing to work:
Create temp. tables for merged SJ-Materialization nests. We need to do
this now, because further code relies on tab->table and
tab->table->pos_in_table_list being set.
*/
if (!table)
continue;
JOIN_TAB *tab;
for (tab= first_linear_tab(join, WITHOUT_CONST_TABLES);
tab;
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
{
if (tab->bush_children)
{
if (setup_sj_materialization_part1(tab))
DBUG_RETURN(TRUE);
tab->table->reginfo.join_tab= tab;
}
}
for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES); tab;
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
{
TABLE *table= tab->table;
TABLE_LIST *tbl= table->pos_in_table_list;
TABLE_LIST *embedding= tbl->embedding;
......@@ -7716,11 +7750,16 @@ make_outerjoin_info(JOIN *join)
tab->last_inner= tab->first_inner= tab;
tab->on_expr_ref= &tbl->on_expr;
tab->cond_equal= tbl->cond_equal;
if (embedding)
if (embedding && !embedding->is_active_sjm())
tab->first_upper= embedding->nested_join->first_nested;
}
for ( ; embedding ; embedding= embedding->embedding)
{
if (embedding->is_active_sjm())
{
/* We're trying to walk out of an SJ-Materialization nest. Don't do this. */
break;
}
/* Ignore sj-nests: */
if (!(embedding->on_expr && embedding->outer_join))
continue;
......@@ -7752,7 +7791,7 @@ make_outerjoin_info(JOIN *join)
}
}
}
DBUG_VOID_RETURN;
DBUG_RETURN(FALSE);
}
......@@ -8127,10 +8166,25 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
the null complemented row.
*/
/* First push down constant conditions from on expressions */
for (JOIN_TAB *join_tab= first_linear_tab(join, WITHOUT_CONST_TABLES);
join_tab;
join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS))
/*
First push down constant conditions from ON expressions.
- Each pushed-down condition is wrapped into trigger which is
enabled only for non-NULL-complemented record
- The condition is attached to the first_inner_table.
With regards to join nests:
- if we start at top level, don't walk into nests
- if we start inside a nest, stay within that nest.
*/
JOIN_TAB *start_from= tab->bush_root_tab?
tab->bush_root_tab->bush_children->start :
join->join_tab + join->const_tables;
JOIN_TAB *end_with= tab->bush_root_tab?
tab->bush_root_tab->bush_children->end :
join->join_tab + join->top_join_tab_count;
for (JOIN_TAB *join_tab= start_from;
join_tab != end_with;
join_tab++)
{
if (*join_tab->on_expr_ref)
{
......@@ -8155,9 +8209,14 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
}
}
/* Push down non-constant conditions from ON expressions */
//JOIN_TAB *first_tab= join->join_tab+join->const_tables;
JOIN_TAB *last_tab= tab;
/*
while we're inside of an outer join and last_tab is
the last of its tables ...
*/
while (first_inner_tab && first_inner_tab->last_inner == last_tab)
{
/*
......@@ -8168,19 +8227,13 @@ make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
table_map used_tables2= (join->const_table_map |
OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
for (JOIN_TAB *tab= first_linear_tab(join, WITHOUT_CONST_TABLES);
tab;
tab= (tab == last_tab)? NULL: next_linear_tab(join, tab,
WITH_BUSH_ROOTS))
{
if (!tab->table)
start_from= tab->bush_root_tab?
tab->bush_root_tab->bush_children->start :
join->join_tab + join->const_tables;
for (JOIN_TAB *tab= start_from; tab <= last_tab; tab++)
{
/*
psergey-todo: this is probably incorrect, fix this when we get
correct processing for outer joins + semi joins
*/
continue;
}
DBUG_ASSERT(tab->table);
current_map= tab->table->map;
used_tables2|= current_map;
/*
......@@ -9131,7 +9184,7 @@ make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
{
if (tab->bush_children)
{
if (setup_sj_materialization(tab))
if (setup_sj_materialization_part2(tab))
return TRUE;
}
......@@ -20538,8 +20591,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
examined_rows= tab->limit;
else
{
if (!tab->table->pos_in_table_list ||
tab->table->is_filled_at_execution()) // temporary, is_filled_at_execution
if (tab->table->is_filled_at_execution())
{
examined_rows= tab->records;
}
......
......@@ -204,7 +204,13 @@ typedef struct st_join_table {
NULL means no index condition pushdown was performed.
*/
Item *pre_idx_push_select_cond;
Item **on_expr_ref; /**< pointer to the associated on expression */
/*
Pointer to the associated ON expression. on_expr_ref=!NULL except for
degenerate joins.
*on_expr_ref!=NULL for tables that are first inner tables within an outer
join.
*/
Item **on_expr_ref;
COND_EQUAL *cond_equal; /**< multiple equalities for the on expression */
st_join_table *first_inner; /**< first inner table for including outerjoin */
bool found; /**< true after all matches or null complement */
......@@ -478,6 +484,8 @@ typedef struct st_join_table {
}
double scan_time();
bool preread_init();
bool is_sjm_nest() { return test(bush_children); }
} JOIN_TAB;
......
......@@ -5408,9 +5408,11 @@ bool st_table::is_children_attached(void)
bool st_table::is_filled_at_execution()
{
return test(pos_in_table_list->jtbm_subselect);
return test(pos_in_table_list->jtbm_subselect ||
pos_in_table_list->is_active_sjm());
}
/*
Cleanup this table for re-execution.
......
......@@ -1765,6 +1765,7 @@ struct TABLE_LIST
respectively.
*/
char *get_table_name() { return view != NULL ? view_name.str : table_name; }
bool is_active_sjm();
st_select_lex_unit *get_unit();
st_select_lex *get_single_select();
void wrap_into_nested_join(List<TABLE_LIST> &join_list);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment