Commit 275a4b35 authored by Sergey Petrunia's avatar Sergey Petrunia

MWL#17: Table elimination

- First code. Elimination works for simple cases, passes the testsuite.
- Known issues:
  = No elimination is done for aggregate functions.
  = EXPLAIN EXTENDED shows eliminated tables (I think it better not)
  = No benchmark yet
  = The code needs some polishing.

mysql-test/r/table_elim.result:
  MWL#17: Table elimination
  - Testcases
mysql-test/t/table_elim.test:
  MWL#17: Table elimination
  - Testcases
sql/sql_select.cc:
  MWL#17: Table elimination
sql/sql_select.h:
  MWL#17: Table elimination
  - Added JOIN_TAB::eliminated (is JOIN_TAB the best place to store this flag?)
sql/table.h:
  MWL#17: Table elimination
  - ADded NESTED_JOIN::n_tables. We need to have the number of real tables remaining in an outer join nest.
parent d7ae55e7
drop table if exists t0, t1, t2, t3;
create table t1 (a int);
insert into t1 values (0),(1),(2),(3);
create table t0 as select * from t1;
create table t2 (a int primary key, b int)
as select a, a as b from t1 where a in (1,2);
create table t3 (a int primary key, b int)
as select a, a as b from t1 where a in (1,3);
# This will be eliminated:
explain select t1.a from t1 left join t2 on t2.a=t1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
select t1.a from t1 left join t2 on t2.a=t1.a;
a
0
1
2
3
# This will not be eliminated as t2.b is in in select list:
explain select * from t1 left join t2 on t2.a=t1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1
# This will not be eliminated as t2.b is in in order list:
explain select t1.a from t1 left join t2 on t2.a=t1.a order by t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort
1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1
# This will not be eliminated as t2.b is in group list:
explain select t1.a from t1 left join t2 on t2.a=t1.a group by t2.b;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort
1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1
# This will not be eliminated as t2.b is in the WHERE
explain select t1.a from t1 left join t2 on t2.a=t1.a where t2.b < 3 or t2.b is null;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
1 SIMPLE t2 eq_ref PRIMARY PRIMARY 4 test.t1.a 1 Using where
# Elimination of multiple tables:
explain select t1.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
# Elimination of multiple tables (2):
explain select t1.a from t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
# Elimination when done within an outer join nest:
explain
select t0.*
from
t0 left join (t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and
t3.a=t1.a) on t0.a=t1.a;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t0 ALL NULL NULL NULL NULL 4
1 SIMPLE t1 ALL NULL NULL NULL NULL 4
drop table t0, t1, t2, t3;
#
# Table elimination (MWL#17) tests
#
--disable_warnings
drop table if exists t0, t1, t2, t3;
--enable_warnings
create table t1 (a int);
insert into t1 values (0),(1),(2),(3);
create table t0 as select * from t1;
create table t2 (a int primary key, b int)
as select a, a as b from t1 where a in (1,2);
create table t3 (a int primary key, b int)
as select a, a as b from t1 where a in (1,3);
--echo # This will be eliminated:
explain select t1.a from t1 left join t2 on t2.a=t1.a;
select t1.a from t1 left join t2 on t2.a=t1.a;
--echo # This will not be eliminated as t2.b is in in select list:
explain select * from t1 left join t2 on t2.a=t1.a;
--echo # This will not be eliminated as t2.b is in in order list:
explain select t1.a from t1 left join t2 on t2.a=t1.a order by t2.b;
--echo # This will not be eliminated as t2.b is in group list:
explain select t1.a from t1 left join t2 on t2.a=t1.a group by t2.b;
## TODO: Aggregate functions prevent table elimination ATM.
--echo # This will not be eliminated as t2.b is in the WHERE
explain select t1.a from t1 left join t2 on t2.a=t1.a where t2.b < 3 or t2.b is null;
--echo # Elimination of multiple tables:
explain select t1.a from t1 left join (t2 join t3) on t2.a=t1.a and t3.a=t1.a;
--echo # Elimination of multiple tables (2):
explain select t1.a from t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and t3.a=t1.a;
--echo # Elimination when done within an outer join nest:
explain
select t0.*
from
t0 left join (t1 left join (t2 join t3 on t2.b=t3.b) on t2.a=t1.a and
t3.a=t1.a) on t0.a=t1.a;
drop table t0, t1, t2, t3;
......@@ -42,6 +42,11 @@
#define TMP_ENGINE_HTON myisam_hton
#endif
#define FT_KEYPART (MAX_REF_PARTS+10)
/* Values in optimize */
#define KEY_OPTIMIZE_EXISTS 1
#define KEY_OPTIMIZE_REF_OR_NULL 2
const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
"MAYBE_REF","ALL","range","index","fulltext",
"ref_or_null","unique_subquery","index_subquery",
......@@ -2468,6 +2473,304 @@ static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select,
DBUG_RETURN(HA_POS_ERROR); /* This shouldn't happend */
}
bool has_eq_ref_access_candidate(TABLE *table, table_map can_refer_to_these)
{
KEYUSE *keyuse= table->reginfo.join_tab->keyuse;
if (keyuse)
{
/*
walk through all of the KEYUSE elements and
- locate unique keys
- check if we have eq_ref access for them
TODO any other reqs?
loops are constructed like in best_access_path
*/
while (keyuse->table == table)
{
uint key= keyuse->key;
key_part_map bound_parts=0;
bool ft_key= test(keyuse->keypart == FT_KEYPART);
do /* For each keypart and each way to read it */
{
if (!(keyuse->used_tables & ~can_refer_to_these) &&
!(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL))
{
bound_parts |= keyuse->keypart_map;
}
keyuse++;
} while (keyuse->table && keyuse->key == key);
KEY *keyinfo= table->key_info + key;
if (!ft_key &&
((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) &&
bound_parts == PREV_BITS(key_part_map, keyinfo->key_parts))
{
return TRUE;
}
}
}
return FALSE;
}
static void mark_table_as_eliminated(JOIN *join, TABLE *table, uint *const_tbl_count,
table_map *const_tables)
{
JOIN_TAB *tab= table->reginfo.join_tab;
if (!(*const_tables & tab->table->map))
{
DBUG_PRINT("info", ("Eliminated table %s", table->alias));
tab->type= JT_CONST;
tab->eliminated= TRUE;
*const_tables |= table->map;
join->const_table_map|= table->map;
set_position(join, (*const_tbl_count)++, tab, (KEYUSE*)0);
}
}
/*
Now on to traversal. There can be a situation like this:
FROM t1
LEFT JOIN t2 ON cond(t1,t2)
LEFT JOIN t3 ON cond(..., possibly-t2) // <--(*)
LEFT JOIN t4 ON cond(..., possibly-t2)
Besides that, simplify_joins() may have created back references, so when
we're e.g. looking at outer join (*) we need to look both forward and
backward to check if there are any references in preceding/following
outer joins'
TODO would it create only following-sibling references or
preceding-sibling as well?
And if not, should we rely on that?
*/
int
eliminate_tables_for_join_list(JOIN *join, List<TABLE_LIST> *join_list,
table_map used_tables_elsewhere,
uint *const_tbl_count, table_map *const_tables)
{
List_iterator<TABLE_LIST> it(*join_list);
table_map used_tables_on_right[MAX_TABLES]; // todo change to alloca
table_map used_tables_on_left;
TABLE_LIST *tbl;
int i, n_tables;
int eliminated=0;
/* Collect the reverse-bitmap-array */
for (i=0; (tbl= it++); i++)
{
used_tables_on_right[i]= 0;
if (tbl->on_expr)
used_tables_on_right[i]= tbl->on_expr->used_tables();
if (tbl->nested_join)
used_tables_on_right[i]= tbl->nested_join->used_tables;
}
n_tables= i;
for (i= n_tables - 2; i > 0; i--)
used_tables_on_right[i] |= used_tables_on_right[i+1];
it.rewind();
/* Walk through tables and join nests and see if we can eliminate them */
used_tables_on_left= 0;
i= 1;
while ((tbl= it++))
{
table_map tables_used_outside= used_tables_on_left |
used_tables_on_right[i] |
used_tables_elsewhere;
table_map cur_tables;
if (tbl->nested_join)
{
DBUG_ASSERT(tbl->on_expr);
/*
There can be cases where table removal is applicable for tables
within the outer join but not for the outer join itself. Ask to
remove the children first.
TODO: NoHopelessEliminationAttempts: the below call can return
information about whether it would make any sense to try removing
this entire outer join nest.
*/
int eliminated_in_children=
eliminate_tables_for_join_list(join, &tbl->nested_join->join_list,
tables_used_outside,
const_tbl_count, const_tables);
tbl->nested_join->n_tables -=eliminated_in_children;
cur_tables= tbl->nested_join->used_tables;
if (!(cur_tables & tables_used_outside))
{
/*
Check if all embedded tables together can produce at most one
record combination. This is true when
- each of them has one_match(outer-tables) property
(this is a stronger condition than all of them together having
this property but that's irrelevant here)
- there are no outer joins among them
(except for the case of outer join which has all inner tables
to be constant and is guaranteed to produce only one record.
that record will be null-complemented)
*/
bool one_match= TRUE;
List_iterator<TABLE_LIST> it2(tbl->nested_join->join_list);
TABLE_LIST *inner;
while ((inner= it2++))
{
/*
Bail out if we see an outer join (TODO: handle the above
null-complemntated-rows-only case)
*/
if (inner->on_expr)
{
one_match= FALSE;
break;
}
if (inner->table && // <-- to be removed after NoHopelessEliminationAttempts
!has_eq_ref_access_candidate(inner->table,
~tbl->nested_join->used_tables))
{
one_match= FALSE;
break;
}
}
if (one_match)
{
it2.rewind();
while ((inner= it2++))
{
mark_table_as_eliminated(join, inner->table, const_tbl_count,
const_tables);
}
eliminated += tbl->nested_join->join_list.elements;
//psergey-todo: do we need to do anything about removing the join
//nest?
}
else
{
eliminated += eliminated_in_children;
}
}
}
else if (tbl->on_expr)
{
cur_tables= tbl->on_expr->used_tables();
/* Check and remove */
if (!(tbl->table->map & tables_used_outside) &&
has_eq_ref_access_candidate(tbl->table, (table_map)-1))
{
mark_table_as_eliminated(join, tbl->table, const_tbl_count,
const_tables);
eliminated += 1;
}
}
/* Update bitmap of tables we've seen on the left */
i++;
used_tables_on_left |= cur_tables;
}
return eliminated;
}
/*
Perform table elimination based on outer join
SELECT * FROM t1 LEFT JOIN
(t2 JOIN t3) ON t3.primary_key=t1.col AND
t4.primary_key= t2.col
CRITERIA FOR REMOVING ONE OJ NEST
we can't rely on sole presense of eq_refs. Because if we do, we'll miss
things like this:
SELECT * FROM flights LEFT JOIN
(pax as S1 JOIN pax as S2 ON S2.id=S1.spouse AND s1.id=s2.spouse)
(no-polygamy schema/query but there can be many couples on the flight)
..
REMOVAL PROCESS
We can remove an inner side of an outer join if it there is a warranty
that it will produce not more than one record:
... t1 LEFT JOIN t2 ON (t2.unique_key = expr) ...
For nested outer joins:
- The process naturally occurs bottom-up (in order to remove an
outer-join we need to analyze its contents)
- If we failed to remove an outer join nest, it makes no sense to
try removing its ancestors, as the
ot LEFT JOIN it ON cond
pair may possibly produce two records (one record via match and
another one as access-method record).
Q: If we haven't removed an OUTER JOIN, does it make sense to attempt
removing its ancestors?
A: No as the innermost outer join will produce two records => no ancestor
outer join nest will be able to provide the max_fanout==1 guarantee.
psergey-todo: .
*/
static void eliminate_tables(JOIN *join, uint *const_tbl_count, table_map *const_tables)
{
Item *item;
table_map used_tables;
DBUG_ENTER("eliminate_tables");
if (!join->outer_join)
DBUG_VOID_RETURN;
/* Find the tables that are referred to from WHERE/HAVING */
used_tables= (join->conds? join->conds->used_tables() : 0) |
(join->having? join->having->used_tables() : 0);
/* Add tables referred to from the select list */
List_iterator<Item> it(join->fields_list);
while ((item= it++))
used_tables |= item->used_tables();
/* Add tables referred to from ORDER BY and GROUP BY lists */
ORDER *all_lists[]= { join->order, join->group_list};
for (int i=0; i < 2; i++)
{
for (ORDER *cur_list= all_lists[i]; cur_list; cur_list= cur_list->next)
used_tables |= (*(cur_list->item))->used_tables();
}
THD* thd= join->thd;
if (join->select_lex == &thd->lex->select_lex)
{
/* Multi-table UPDATE and DELETE: don't eliminate the tables we modify: */
used_tables |= thd->table_map_for_update;
/* Multi-table UPDATE: don't eliminate tables referred from SET statement */
if (thd->lex->sql_command == SQLCOM_UPDATE_MULTI)
{
List_iterator<Item> it2(thd->lex->value_list);
while ((item= it2++))
used_tables |= item->used_tables();
}
}
if (((1 << join->tables) - 1) & ~used_tables)
{
/* There are some time tables that we probably could eliminate */
eliminate_tables_for_join_list(join, join->join_list, used_tables,
const_tbl_count, const_tables);
}
DBUG_VOID_RETURN;
}
/*
This structure is used to collect info on potentially sargable
predicates in order to check whether they become sargable after
......@@ -2823,6 +3126,10 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
}
}
//psergey-todo: table elimination
eliminate_tables(join, &const_count, &found_const_table_map);
//:psergey-todo
/* Calc how many (possible) matched records in each table */
for (s=stat ; s < stat_end ; s++)
......@@ -2956,9 +3263,6 @@ typedef struct key_field_t {
bool *cond_guard; /* See KEYUSE::cond_guard */
} KEY_FIELD;
/* Values in optimize */
#define KEY_OPTIMIZE_EXISTS 1
#define KEY_OPTIMIZE_REF_OR_NULL 2
/**
Merge new key definitions to old ones, remove those not used in both.
......@@ -3563,7 +3867,6 @@ add_key_part(DYNAMIC_ARRAY *keyuse_array,KEY_FIELD *key_field)
}
#define FT_KEYPART (MAX_REF_PARTS+10)
static void
add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
......@@ -6021,7 +6324,7 @@ make_outerjoin_info(JOIN *join)
}
if (!tab->first_inner)
tab->first_inner= nested_join->first_nested;
if (++nested_join->counter < nested_join->join_list.elements)
if (++nested_join->counter < nested_join->n_tables)
break;
/* Table tab is the last inner table for nested join. */
nested_join->first_nested->last_inner= tab;
......@@ -8575,6 +8878,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top)
conds= simplify_joins(join, &nested_join->join_list, conds, top);
used_tables= nested_join->used_tables;
not_null_tables= nested_join->not_null_tables;
/* The following two might become unequal after table elimination: */
nested_join->n_tables= nested_join->join_list.elements;
}
else
{
......@@ -8733,7 +9038,7 @@ static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
with anything)
2. we could run out bits in nested_join_map otherwise.
*/
if (nested_join->join_list.elements != 1)
if (nested_join->n_tables != 1)
{
nested_join->nj_map= (nested_join_map) 1 << first_unused++;
first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
......@@ -8894,7 +9199,7 @@ static bool check_interleaving_with_nj(JOIN_TAB *next_tab)
join->cur_embedding_map |= next_emb->nested_join->nj_map;
}
if (next_emb->nested_join->join_list.elements !=
if (next_emb->nested_join->n_tables !=
next_emb->nested_join->counter)
break;
......@@ -8928,7 +9233,7 @@ static void restore_prev_nj_state(JOIN_TAB *last)
{
if (!(--last_emb->nested_join->counter))
join->cur_embedding_map&= ~last_emb->nested_join->nj_map;
else if (last_emb->nested_join->join_list.elements-1 ==
else if (last_emb->nested_join->n_tables-1 ==
last_emb->nested_join->counter)
join->cur_embedding_map|= last_emb->nested_join->nj_map;
else
......@@ -16202,6 +16507,14 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
tmp3.length(0);
quick_type= -1;
//psergey-todo:
if (tab->eliminated)
{
used_tables|=table->map;
continue;
}
item_list.empty();
/* id */
item_list.push_back(new Item_uint((uint32)
......
......@@ -210,6 +210,9 @@ typedef struct st_join_table {
JOIN *join;
/** Bitmap of nested joins this table is part of */
nested_join_map embedding_map;
//psergey-todo: more justified place
bool eliminated;
void cleanup();
inline bool is_using_loose_index_scan()
......
......@@ -1626,6 +1626,8 @@ typedef struct st_nested_join
Before each use the counters are zeroed by reset_nj_counters.
*/
uint counter;
/* Tables left after elimination */
uint n_tables;
nested_join_map nj_map; /* Bit used to identify this nested join*/
} NESTED_JOIN;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment