From bcf15f6e0c272f13585aabf01ef64dbb8c01e406 Mon Sep 17 00:00:00 2001 From: Sergey Petrunya <psergey@askmonty.org> Date: Tue, 16 Feb 2010 00:53:06 +0300 Subject: [PATCH] Subquery optimization backport: - Factor out subquery code into sql/opt_subselect.{h,cc} - Stop using the term "confluent" (was used due to misreading the dictionary) --- .bzrignore | 4 + libmysqld/Makefile.am | 1 + sql/Makefile.am | 4 +- sql/item_subselect.cc | 6 +- sql/opt_subselect.cc | 3455 +++++++++++++++++++++++++++ sql/opt_subselect.h | 368 +++ sql/sql_join_cache.cc | 1 + sql/sql_lex.h | 2 +- sql/sql_select.cc | 5308 +++++++---------------------------------- sql/sql_select.h | 131 +- 10 files changed, 4715 insertions(+), 4565 deletions(-) create mode 100644 sql/opt_subselect.cc create mode 100644 sql/opt_subselect.h diff --git a/.bzrignore b/.bzrignore index d2a1ce94f70..9625cb43c69 100644 --- a/.bzrignore +++ b/.bzrignore @@ -1922,3 +1922,7 @@ libmysqld/examples/mysqltest.cc extra/libevent/event-config.h libmysqld/opt_table_elimination.cc libmysqld/ha_federatedx.cc +libmysqld/multi_range_read.cc +libmysqld/opt_index_cond_pushdown.cc +libmysqld/opt_subselect.cc +libmysqld/sql_join_cache.cc diff --git a/libmysqld/Makefile.am b/libmysqld/Makefile.am index d2f1113041c..e9a1438182c 100644 --- a/libmysqld/Makefile.am +++ b/libmysqld/Makefile.am @@ -58,6 +58,7 @@ sqlsources = derror.cc field.cc field_conv.cc strfunc.cc filesort.cc \ log_event.cc rpl_record.cc \ log_event_old.cc rpl_record_old.cc \ protocol.cc net_serv.cc opt_range.cc \ + opt_subselect.cc \ opt_sum.cc procedure.cc records.cc sql_acl.cc \ sql_load.cc discover.cc sql_locale.cc \ sql_profile.cc \ diff --git a/sql/Makefile.am b/sql/Makefile.am index 06b5bcd9b50..2833fb487c9 100644 --- a/sql/Makefile.am +++ b/sql/Makefile.am @@ -60,6 +60,7 @@ noinst_HEADERS = item.h item_func.h item_sum.h item_cmpfunc.h \ ha_ndbcluster_binlog.h ha_ndbcluster_tables.h \ ha_partition.h rpl_constants.h \ opt_range.h protocol.h rpl_tblmap.h rpl_utility.h \ + opt_subselect.h \ rpl_reporting.h \ log.h log_slow.h sql_show.h rpl_rli.h rpl_mi.h \ sql_select.h structs.h table.h sql_udf.h hash_filo.h \ @@ -102,7 +103,8 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc sql_partition.cc \ unireg.cc des_key_file.cc \ log_event.cc rpl_record.cc \ log_event_old.cc rpl_record_old.cc \ - discover.cc time.cc opt_range.cc opt_sum.cc \ + discover.cc time.cc opt_range.cc opt_subselect.cc \ + opt_sum.cc \ records.cc filesort.cc handler.cc \ ha_partition.cc \ sql_db.cc sql_table.cc sql_rename.cc sql_crypt.cc \ diff --git a/sql/item_subselect.cc b/sql/item_subselect.cc index fd73590d3f5..1b2a4d66c06 100644 --- a/sql/item_subselect.cc +++ b/sql/item_subselect.cc @@ -31,10 +31,6 @@ #include "mysql_priv.h" #include "sql_select.h" -inline Item * and_items(Item* cond, Item *item) -{ - return (cond? (new Item_cond_and(cond, item)) : item); -} Item_subselect::Item_subselect(): Item_result_field(), value_assigned(0), thd(0), substitution(0), @@ -1899,7 +1895,7 @@ Item_in_subselect::select_in_like_transformer(JOIN *join, Comp_creator *func) object, but we can't know it here, but here we need address correct reference on left expresion. - //psergey: he means confluent cases like "... IN (SELECT 1)" + //psergey: he means degenerate cases like "... IN (SELECT 1)" */ if (!optimizer) { diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc new file mode 100644 index 00000000000..49b6d9caa31 --- /dev/null +++ b/sql/opt_subselect.cc @@ -0,0 +1,3455 @@ +/** + @file + + @brief + Subquery optimization code here. + +*/ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#include "mysql_priv.h" +#include "sql_select.h" +#include "opt_subselect.h" + +#include <my_bit.h> + +// Our own: +static +bool subquery_types_allow_materialization(Item_in_subselect *in_subs); +static bool replace_where_subcondition(JOIN *join, Item **expr, + Item *old_cond, Item *new_cond, + bool do_fix_fields); +static int subq_sj_candidate_cmp(Item_in_subselect* const *el1, + Item_in_subselect* const *el2); +static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred); +static TABLE_LIST *alloc_join_nest(THD *thd); +static +void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist); +static uint get_tmp_table_rec_length(List<Item> &items); +bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables); +static SJ_MATERIALIZATION_INFO * +at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, + uint idx, bool *loose_scan); +void best_access_path(JOIN *join, JOIN_TAB *s, + table_map remaining_tables, uint idx, + bool disable_jbuf, double record_count, + POSITION *pos, POSITION *loose_scan_pos); + +static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, + Item_in_subselect *subq_pred); +static void remove_sj_conds(Item **tree); +static bool is_cond_sj_in_equality(Item *item); +static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab); +static Item *remove_additional_cond(Item* conds); +static void remove_subq_pushed_predicates(JOIN *join, Item **where); + + +/* + Check if we need JOIN::prepare()-phase subquery rewrites and if yes, do them + + DESCRIPTION + Check if we need to do + - subquery->semi-join rewrite + - if the subquery can be handled with materialization + - 'substitution' rewrite for table-less subqueries like "(select 1)" + + and mark appropriately + + RETURN + 0 - OK + -1 - Some sort of query error +*/ + +int check_and_do_in_subquery_rewrites(JOIN *join) +{ + THD *thd=join->thd; + st_select_lex *select_lex= join->select_lex; + DBUG_ENTER("check_and_do_in_subquery_rewrites"); + /* + If + 1) this join is inside a subquery (of any type except FROM-clause + subquery) and + 2) we aren't just normalizing a VIEW + + Then perform early unconditional subquery transformations: + - Convert subquery predicate into semi-join, or + - Mark the subquery for execution using materialization, or + - Perform IN->EXISTS transformation, or + - Perform more/less ALL/ANY -> MIN/MAX rewrite + - Substitute trivial scalar-context subquery with its value + + TODO: for PS, make the whole block execute only on the first execution + */ + Item_subselect *subselect; + if (!thd->lex->view_prepare_mode && // (1) + (subselect= select_lex->master_unit()->item)) // (2) + { + Item_in_subselect *in_subs= NULL; + if (subselect->substype() == Item_subselect::IN_SUBS) + in_subs= (Item_in_subselect*)subselect; + + /* Resolve expressions and perform semantic analysis for IN query */ + if (in_subs != NULL) + /* + TODO: Add the condition below to this if statement when we have proper + support for is_correlated handling for materialized semijoins. + If we were to add this condition now, the fix_fields() call in + convert_subq_to_sj() would force the flag is_correlated to be set + erroneously for prepared queries. + + thd->stmt_arena->state != Query_arena::PREPARED) + */ + { + /* + Check if the left and right expressions have the same # of + columns, i.e. we don't have a case like + (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...) + + TODO why do we have this duplicated in IN->EXISTS transformers? + psergey-todo: fix these: grep for duplicated_subselect_card_check + */ + if (select_lex->item_list.elements != in_subs->left_expr->cols()) + { + my_error(ER_OPERAND_COLUMNS, MYF(0), in_subs->left_expr->cols()); + DBUG_RETURN(-1); + } + + SELECT_LEX *current= thd->lex->current_select; + thd->lex->current_select= current->return_after_parsing(); + char const *save_where= thd->where; + thd->where= " IN/ALL/ANY subquery"; + + bool failure= !in_subs->left_expr->fixed && + in_subs->left_expr->fix_fields(thd, &in_subs->left_expr); + thd->lex->current_select= current; + thd->where= save_where; + if (failure) + DBUG_RETURN(-1); /* purecov: deadcode */ + } + DBUG_PRINT("info", ("Checking if subq can be converted to semi-join")); + /* + Check if we're in subquery that is a candidate for flattening into a + semi-join (which is done in flatten_subqueries()). The + requirements are: + 1. Subquery predicate is an IN/=ANY subq predicate + 2. Subquery is a single SELECT (not a UNION) + 3. Subquery does not have GROUP BY or ORDER BY + 4. Subquery does not use aggregate functions or HAVING + 5. Subquery predicate is at the AND-top-level of ON/WHERE clause + 6. We are not in a subquery of a single table UPDATE/DELETE that + doesn't have a JOIN (TODO: We should handle this at some + point by switching to multi-table UPDATE/DELETE) + 7. We're not in a table-less subquery like "SELECT 1" + 8. No execution method was already chosen (by a prepared statement) + 9. Parent select is not a table-less select + 10. Neither parent nor child select have STRAIGHT_JOIN option. + */ + if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) && + in_subs && // 1 + !select_lex->is_part_of_union() && // 2 + !select_lex->group_list.elements && !join->order && // 3 + !join->having && !select_lex->with_sum_func && // 4 + thd->thd_marker.emb_on_expr_nest && // 5 + select_lex->outer_select()->join && // 6 + select_lex->master_unit()->first_select()->leaf_tables && // 7 + in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8 + select_lex->outer_select()->leaf_tables && // 9 + !((join->select_options | // 10 + select_lex->outer_select()->join->select_options) // 10 + & SELECT_STRAIGHT_JOIN)) // 10 + { + DBUG_PRINT("info", ("Subquery is semi-join conversion candidate")); + + (void)subquery_types_allow_materialization(in_subs); + + in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; + + /* Register the subquery for further processing in flatten_subqueries() */ + select_lex-> + outer_select()->join->sj_subselects.append(thd->mem_root, in_subs); + in_subs->expr_join_nest= thd->thd_marker.emb_on_expr_nest; + } + else + { + DBUG_PRINT("info", ("Subquery can't be converted to semi-join")); + /* + Check if the subquery predicate can be executed via materialization. + The required conditions are: + 1. Subquery predicate is an IN/=ANY subq predicate + 2. Subquery is a single SELECT (not a UNION) + 3. Subquery is not a table-less query. In this case there is no + point in materializing. + 3A The upper query is not a table-less SELECT ... FROM DUAL. We + can't do materialization for SELECT .. FROM DUAL because it + does not call setup_subquery_materialization(). We could make + SELECT ... FROM DUAL call that function but that doesn't seem + to be the case that is worth handling. + 4. Subquery predicate is a top-level predicate + (this implies it is not negated) + TODO: this is a limitation that should be lifted once we + implement correct NULL semantics (WL#3830) + 5. Subquery is non-correlated + TODO: + This is an overly restrictive condition. It can be extended to: + (Subquery is non-correlated || + Subquery is correlated to any query outer to IN predicate || + (Subquery is correlated to the immediate outer query && + Subquery !contains {GROUP BY, ORDER BY [LIMIT], + aggregate functions}) && subquery predicate is not under "NOT IN")) + 6. No execution method was already chosen (by a prepared statement). + + (*) The subquery must be part of a SELECT statement. The current + condition also excludes multi-table update statements. + + We have to determine whether we will perform subquery materialization + before calling the IN=>EXISTS transformation, so that we know whether to + perform the whole transformation or only that part of it which wraps + Item_in_subselect in an Item_in_optimizer. + */ + if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) && + in_subs && // 1 + !select_lex->is_part_of_union() && // 2 + select_lex->master_unit()->first_select()->leaf_tables && // 3 + thd->lex->sql_command == SQLCOM_SELECT && // * + select_lex->outer_select()->leaf_tables && // 3A + subquery_types_allow_materialization(in_subs)) + { + // psergey-todo: duplicated_subselect_card_check: where it's done? + if (in_subs->is_top_level_item() && // 4 + !in_subs->is_correlated && // 5 + in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 + in_subs->exec_method= Item_in_subselect::MATERIALIZATION; + } + + Item_subselect::trans_res trans_res; + if ((trans_res= subselect->select_transformer(join)) != + Item_subselect::RES_OK) + { + DBUG_RETURN((trans_res == Item_subselect::RES_ERROR)); + } + } + } + DBUG_RETURN(0); +} + + +/** + @brief Check if subquery's compared types allow materialization. + + @param in_subs Subquery predicate, updated as follows: + types_allow_materialization TRUE if subquery materialization is allowed. + sjm_scan_allowed If types_allow_materialization is TRUE, + indicates whether it is possible to use subquery + materialization and scan the materialized table. + + @retval TRUE If subquery types allow materialization. + @retval FALSE Otherwise. + + @details + This is a temporary fix for BUG#36752. + + There are two subquery materialization strategies: + + 1. Materialize and do index lookups in the materialized table. See + BUG#36752 for description of restrictions we need to put on the + compared expressions. + + 2. Materialize and then do a full scan of the materialized table. At the + moment, this strategy's applicability criteria are even stricter than + in #1. + + This is so because of the following: consider an uncorrelated subquery + + ...WHERE (ot1.col1, ot2.col2 ...) IN (SELECT ie1,ie2,... FROM it1 ...) + + and a join order that could be used to do sjm-materialization: + + SJM-Scan(it1, it1), ot1, ot2 + + IN-equalities will be parts of conditions attached to the outer tables: + + ot1: ot1.col1 = ie1 AND ... (C1) + ot2: ot1.col2 = ie2 AND ... (C2) + + besides those there may be additional references to ie1 and ie2 + generated by equality propagation. The problem with evaluating C1 and + C2 is that ie{1,2} refer to subquery tables' columns, while we only have + current value of materialization temptable. Our solution is to + * require that all ie{N} are table column references. This allows + to copy the values of materialization temptable columns to the + original table's columns (see setup_sj_materialization for more + details) + * require that compared columns have exactly the same type. This is + a temporary measure to avoid BUG#36752-type problems. +*/ + +static +bool subquery_types_allow_materialization(Item_in_subselect *in_subs) +{ + DBUG_ENTER("subquery_types_allow_materialization"); + + DBUG_ASSERT(in_subs->left_expr->fixed); + + List_iterator<Item> it(in_subs->unit->first_select()->item_list); + uint elements= in_subs->unit->first_select()->item_list.elements; + + in_subs->types_allow_materialization= FALSE; // Assign default values + in_subs->sjm_scan_allowed= FALSE; + + bool all_are_fields= TRUE; + for (uint i= 0; i < elements; i++) + { + Item *outer= in_subs->left_expr->element_index(i); + Item *inner= it++; + all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM && + inner->real_item()->type() == Item::FIELD_ITEM); + if (outer->result_type() != inner->result_type()) + DBUG_RETURN(FALSE); + switch (outer->result_type()) { + case STRING_RESULT: + if (outer->is_datetime() != inner->is_datetime()) + DBUG_RETURN(FALSE); + + if (!(outer->collation.collation == inner->collation.collation + /*&& outer->max_length <= inner->max_length */)) + DBUG_RETURN(FALSE); + /*case INT_RESULT: + if (!(outer->unsigned_flag ^ inner->unsigned_flag)) + DBUG_RETURN(FALSE); */ + default: + ;/* suitable for materialization */ + } + } + in_subs->types_allow_materialization= TRUE; + in_subs->sjm_scan_allowed= all_are_fields; + DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed")); + DBUG_RETURN(TRUE); +} + + +/* + Convert semi-join subquery predicates into semi-join join nests + + SYNOPSIS + convert_join_subqueries_to_semijoins() + + DESCRIPTION + + Convert candidate subquery predicates into semi-join join nests. This + transformation is performed once in query lifetime and is irreversible. + + Conversion of one subquery predicate + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + We start with a join that has a semi-join subquery: + + SELECT ... + FROM ot, ... + WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where + + and convert it into a semi-join nest: + + SELECT ... + FROM ot SEMI JOIN (it1 ... itN), ... + WHERE outer_where AND subq_where AND oe=ie + + that is, in order to do the conversion, we need to + + * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent + query's FROM structure. + * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if + the subquery predicate was in an ON expression) + * Remove the subquery predicate from the parent query's WHERE + + Considerations when converting many predicates + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A join may have at most MAX_TABLES tables. This may prevent us from + flattening all subqueries when the total number of tables in parent and + child selects exceeds MAX_TABLES. + We deal with this problem by flattening children's subqueries first and + then using a heuristic rule to determine each subquery predicate's + "priority". + + RETURN + FALSE OK + TRUE Error +*/ + +bool convert_join_subqueries_to_semijoins(JOIN *join) +{ + Query_arena *arena, backup; + Item_in_subselect **in_subq; + Item_in_subselect **in_subq_end; + THD *thd= join->thd; + DBUG_ENTER("convert_join_subqueries_to_semijoins"); + + if (join->sj_subselects.elements() == 0) + DBUG_RETURN(FALSE); + + /* First, convert child join's subqueries. We proceed bottom-up here */ + for (in_subq= join->sj_subselects.front(), + in_subq_end= join->sj_subselects.back(); + in_subq != in_subq_end; + in_subq++) + { + st_select_lex *child_select= (*in_subq)->get_select_lex(); + JOIN *child_join= child_select->join; + child_join->outer_tables = child_join->tables; + + /* + child_select->where contains only the WHERE predicate of the + subquery itself here. We may be selecting from a VIEW, which has its + own predicate. The combined predicates are available in child_join->conds, + which was built by setup_conds() doing prepare_where() for all views. + */ + child_select->where= child_join->conds; + + if (convert_join_subqueries_to_semijoins(child_join)) + DBUG_RETURN(TRUE); + (*in_subq)->sj_convert_priority= + (*in_subq)->is_correlated * MAX_TABLES + child_join->outer_tables; + } + + // Temporary measure: disable semi-joins when they are together with outer + // joins. + for (TABLE_LIST *tbl= join->select_lex->leaf_tables; tbl; tbl=tbl->next_leaf) + { + TABLE_LIST *embedding= tbl->embedding; + if (tbl->on_expr || (tbl->embedding && !(embedding->sj_on_expr && + !embedding->embedding))) + { + in_subq= join->sj_subselects.front(); + arena= thd->activate_stmt_arena_if_needed(&backup); + goto skip_conversion; + } + } + + //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables); + /* + 2. Pick which subqueries to convert: + sort the subquery array + - prefer correlated subqueries over uncorrelated; + - prefer subqueries that have greater number of outer tables; + */ + join->sj_subselects.sort(subq_sj_candidate_cmp); + // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES + /* Replace all subqueries to be flattened with Item_int(1) */ + arena= thd->activate_stmt_arena_if_needed(&backup); + for (in_subq= join->sj_subselects.front(); + in_subq != in_subq_end && + join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; + in_subq++) + { + Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); + if (replace_where_subcondition(join, tree, *in_subq, new Item_int(1), + FALSE)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + } + + for (in_subq= join->sj_subselects.front(); + in_subq != in_subq_end && + join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; + in_subq++) + { + if (convert_subq_to_sj(join, *in_subq)) + DBUG_RETURN(TRUE); + } +skip_conversion: + /* + 3. Finalize (perform IN->EXISTS rewrite) the subqueries that we didn't + convert: + */ + for (; in_subq!= in_subq_end; in_subq++) + { + JOIN *child_join= (*in_subq)->unit->first_select()->join; + Item_subselect::trans_res res; + (*in_subq)->changed= 0; + (*in_subq)->fixed= 0; + + SELECT_LEX *save_select_lex= thd->lex->current_select; + thd->lex->current_select= (*in_subq)->unit->first_select(); + + res= (*in_subq)->select_transformer(child_join); + + thd->lex->current_select= save_select_lex; + + if (res == Item_subselect::RES_ERROR) + DBUG_RETURN(TRUE); + + (*in_subq)->changed= 1; + (*in_subq)->fixed= 1; + + Item *substitute= (*in_subq)->substitution; + bool do_fix_fields= !(*in_subq)->substitution->fixed; + Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr); + if (replace_where_subcondition(join, tree, *in_subq, substitute, + do_fix_fields)) + DBUG_RETURN(TRUE); + (*in_subq)->substitution= NULL; + + if (!thd->stmt_arena->is_conventional()) + { + tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? + &join->select_lex->prep_where : + &((*in_subq)->emb_on_expr_nest->prep_on_expr); + + if (replace_where_subcondition(join, tree, *in_subq, substitute, + FALSE)) + DBUG_RETURN(TRUE); + } + } + + if (arena) + thd->restore_active_arena(arena, &backup); + join->sj_subselects.clear(); + DBUG_RETURN(FALSE); +} + +/** + @brief Replaces an expression destructively inside the expression tree of + the WHERE clase. + + @note Because of current requirements for semijoin flattening, we do not + need to recurse here, hence this function will only examine the top-level + AND conditions. (see JOIN::prepare, comment starting with "Check if the + subquery predicate can be executed via materialization". + + @param join The top-level query. + @param old_cond The expression to be replaced. + @param new_cond The expression to be substituted. + @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for + the new expression. + @return <code>true</code> if there was an error, <code>false</code> if + successful. +*/ +static bool replace_where_subcondition(JOIN *join, Item **expr, + Item *old_cond, Item *new_cond, + bool do_fix_fields) +{ + //Item **expr= (emb_nest == (TABLE_LIST*)1)? &join->conds : &emb_nest->on_expr; + if (*expr == old_cond) + { + *expr= new_cond; + if (do_fix_fields) + new_cond->fix_fields(join->thd, expr); + join->select_lex->where= *expr; + return FALSE; + } + + if ((*expr)->type() == Item::COND_ITEM) + { + List_iterator<Item> li(*((Item_cond*)(*expr))->argument_list()); + Item *item; + while ((item= li++)) + { + if (item == old_cond) + { + li.replace(new_cond); + if (do_fix_fields) + new_cond->fix_fields(join->thd, li.ref()); + return FALSE; + } + } + } + // If we came here it means there were an error during prerequisites check. + DBUG_ASSERT(0); + return TRUE; +} + +static int subq_sj_candidate_cmp(Item_in_subselect* const *el1, + Item_in_subselect* const *el2) +{ + return ((*el1)->sj_convert_priority < (*el2)->sj_convert_priority) ? 1 : + ( ((*el1)->sj_convert_priority == (*el2)->sj_convert_priority)? 0 : -1); +} + + +/* + Convert a subquery predicate into a TABLE_LIST semi-join nest + + SYNOPSIS + convert_subq_to_sj() + parent_join Parent join, the one that has subq_pred in its WHERE/ON + clause + subq_pred Subquery predicate to be converted + + DESCRIPTION + Convert a subquery predicate into a TABLE_LIST semi-join nest. All the + prerequisites are already checked, so the conversion is always successfull. + + Prepared Statements: the transformation is permanent: + - Changes in TABLE_LIST structures are naturally permanent + - Item tree changes are performed on statement MEM_ROOT: + = we activate statement MEM_ROOT + = this function is called before the first fix_prepare_information + call. + + This is intended because the criteria for subquery-to-sj conversion remain + constant for the lifetime of the Prepared Statement. + + RETURN + FALSE OK + TRUE Out of memory error +*/ + +static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) +{ + SELECT_LEX *parent_lex= parent_join->select_lex; + TABLE_LIST *emb_tbl_nest= NULL; + List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list; + THD *thd= parent_join->thd; + DBUG_ENTER("convert_subq_to_sj"); + + /* + 1. Find out where to put the predicate into. + Note: for "t1 LEFT JOIN t2" this will be t2, a leaf. + */ + if ((void*)subq_pred->expr_join_nest != (void*)1) + { + if (subq_pred->expr_join_nest->nested_join) + { + /* + We're dealing with + + ... [LEFT] JOIN ( ... ) ON (subquery AND whatever) ... + + The sj-nest will be inserted into the brackets nest. + */ + emb_tbl_nest= subq_pred->expr_join_nest; + emb_join_list= &emb_tbl_nest->nested_join->join_list; + } + else if (!subq_pred->expr_join_nest->outer_join) + { + /* + We're dealing with + + ... INNER JOIN tblX ON (subquery AND whatever) ... + + The sj-nest will be tblX's "sibling", i.e. another child of its + parent. This is ok because tblX is joined as an inner join. + */ + emb_tbl_nest= subq_pred->expr_join_nest->embedding; + if (emb_tbl_nest) + emb_join_list= &emb_tbl_nest->nested_join->join_list; + } + else if (!subq_pred->expr_join_nest->nested_join) + { + TABLE_LIST *outer_tbl= subq_pred->expr_join_nest; + TABLE_LIST *wrap_nest; + /* + We're dealing with + + ... LEFT JOIN tbl ON (on_expr AND subq_pred) ... + + we'll need to convert it into: + + ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ... + | | + |<----- wrap_nest ---->| + + Q: other subqueries may be pointing to this element. What to do? + A1: simple solution: copy *subq_pred->expr_join_nest= *parent_nest. + But we'll need to fix other pointers. + A2: Another way: have TABLE_LIST::next_ptr so the following + subqueries know the table has been nested. + A3: changes in the TABLE_LIST::outer_join will make everything work + automatically. + */ + if (!(wrap_nest= alloc_join_nest(parent_join->thd))) + { + DBUG_RETURN(TRUE); + } + wrap_nest->embedding= outer_tbl->embedding; + wrap_nest->join_list= outer_tbl->join_list; + wrap_nest->alias= (char*) "(sj-wrap)"; + + wrap_nest->nested_join->join_list.empty(); + wrap_nest->nested_join->join_list.push_back(outer_tbl); + + outer_tbl->embedding= wrap_nest; + outer_tbl->join_list= &wrap_nest->nested_join->join_list; + + /* + wrap_nest will take place of outer_tbl, so move the outer join flag + and on_expr + */ + wrap_nest->outer_join= outer_tbl->outer_join; + outer_tbl->outer_join= 0; + + wrap_nest->on_expr= outer_tbl->on_expr; + outer_tbl->on_expr= NULL; + + List_iterator<TABLE_LIST> li(*wrap_nest->join_list); + TABLE_LIST *tbl; + while ((tbl= li++)) + { + if (tbl == outer_tbl) + { + li.replace(wrap_nest); + break; + } + } + /* + Ok now wrap_nest 'contains' outer_tbl and we're ready to add the + semi-join nest into it + */ + emb_join_list= &wrap_nest->nested_join->join_list; + emb_tbl_nest= wrap_nest; + } + } + + TABLE_LIST *sj_nest; + NESTED_JOIN *nested_join; + if (!(sj_nest= alloc_join_nest(parent_join->thd))) + { + DBUG_RETURN(TRUE); + } + nested_join= sj_nest->nested_join; + + sj_nest->join_list= emb_join_list; + sj_nest->embedding= emb_tbl_nest; + sj_nest->alias= (char*) "(sj-nest)"; + sj_nest->sj_subq_pred= subq_pred; + /* Nests do not participate in those 'chains', so: */ + /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/ + emb_join_list->push_back(sj_nest); + + /* + nested_join->used_tables and nested_join->not_null_tables are + initialized in simplify_joins(). + */ + + /* + 2. Walk through subquery's top list and set 'embedding' to point to the + sj-nest. + */ + st_select_lex *subq_lex= subq_pred->unit->first_select(); + nested_join->join_list.empty(); + List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list); + TABLE_LIST *tl, *last_leaf; + while ((tl= li++)) + { + tl->embedding= sj_nest; + tl->join_list= &nested_join->join_list; + nested_join->join_list.push_back(tl); + } + + /* + Reconnect the next_leaf chain. + TODO: Do we have to put subquery's tables at the end of the chain? + Inserting them at the beginning would be a bit faster. + NOTE: We actually insert them at the front! That's because the order is + reversed in this list. + */ + for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) ; + tl->next_leaf= subq_lex->leaf_tables; + last_leaf= tl; + + /* + Same as above for next_local chain + (a theory: a next_local chain always starts with ::leaf_tables + because view's tables are inserted after the view) + */ + for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) ; + tl->next_local= subq_lex->leaf_tables; + + /* A theory: no need to re-connect the next_global chain */ + + /* 3. Remove the original subquery predicate from the WHERE/ON */ + + // The subqueries were replaced for Item_int(1) earlier + subq_pred->exec_method= + Item_in_subselect::SEMI_JOIN; // for subsequent executions + /*TODO: also reset the 'with_subselect' there. */ + + /* n. Adjust the parent_join->tables counter */ + uint table_no= parent_join->tables; + /* n. Walk through child's tables and adjust table->map */ + for (tl= subq_lex->leaf_tables; tl; tl= tl->next_leaf, table_no++) + { + tl->table->tablenr= table_no; + tl->table->map= ((table_map)1) << table_no; + SELECT_LEX *old_sl= tl->select_lex; + tl->select_lex= parent_join->select_lex; + for (TABLE_LIST *emb= tl->embedding; + emb && emb->select_lex == old_sl; + emb= emb->embedding) + emb->select_lex= parent_join->select_lex; + } + parent_join->tables += subq_lex->join->tables; + + /* + Put the subquery's WHERE into semi-join's sj_on_expr + Add the subquery-induced equalities too. + */ + SELECT_LEX *save_lex= thd->lex->current_select; + thd->lex->current_select=subq_lex; + if (!subq_pred->left_expr->fixed && + subq_pred->left_expr->fix_fields(thd, &subq_pred->left_expr)) + DBUG_RETURN(TRUE); + thd->lex->current_select=save_lex; + + sj_nest->nested_join->sj_corr_tables= subq_pred->used_tables(); + sj_nest->nested_join->sj_depends_on= subq_pred->used_tables() | + subq_pred->left_expr->used_tables(); + sj_nest->sj_on_expr= subq_lex->where; + + /* + Create the IN-equalities and inject them into semi-join's ON expression. + Additionally, for LooseScan strategy + - Record the number of IN-equalities. + - Create list of pointers to (oe1, ..., ieN). We'll need the list to + see which of the expressions are bound and which are not (for those + we'll produce a distinct stream of (ie_i1,...ie_ik). + + (TODO: can we just create a list of pointers and hope the expressions + will not substitute themselves on fix_fields()? or we need to wrap + them into Item_direct_view_refs and store pointers to those. The + pointers to Item_direct_view_refs are guaranteed to be stable as + Item_direct_view_refs doesn't substitute itself with anything in + Item_direct_view_ref::fix_fields. + */ + sj_nest->sj_in_exprs= subq_pred->left_expr->cols(); + sj_nest->nested_join->sj_outer_expr_list.empty(); + + if (subq_pred->left_expr->cols() == 1) + { + nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr); + Item_func_eq *item_eq= + new Item_func_eq(subq_pred->left_expr, subq_lex->ref_pointer_array[0]); + item_eq->in_equality_no= 0; + sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq); + } + else + { + for (uint i= 0; i < subq_pred->left_expr->cols(); i++) + { + nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr-> + element_index(i)); + Item_func_eq *item_eq= + new Item_func_eq(subq_pred->left_expr->element_index(i), + subq_lex->ref_pointer_array[i]); + item_eq->in_equality_no= i; + sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq); + } + } + /* Fix the created equality and AND */ + sj_nest->sj_on_expr->fix_fields(parent_join->thd, &sj_nest->sj_on_expr); + + /* + Walk through sj nest's WHERE and ON expressions and call + item->fix_table_changes() for all items. + */ + sj_nest->sj_on_expr->fix_after_pullout(parent_lex, &sj_nest->sj_on_expr); + fix_list_after_tbl_changes(parent_lex, &sj_nest->nested_join->join_list); + + + /* Unlink the child select_lex so it doesn't show up in EXPLAIN: */ + subq_lex->master_unit()->exclude_level(); + + DBUG_EXECUTE("where", + print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY);); + + /* Inject sj_on_expr into the parent's WHERE or ON */ + if (emb_tbl_nest) + { + emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, + sj_nest->sj_on_expr); + emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr); + } + else + { + /* Inject into the WHERE */ + parent_join->conds= and_items(parent_join->conds, sj_nest->sj_on_expr); + parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds); + parent_join->select_lex->where= parent_join->conds; + } + + if (subq_lex->ftfunc_list->elements) + { + Item_func_match *ifm; + List_iterator_fast<Item_func_match> li(*(subq_lex->ftfunc_list)); + while ((ifm= li++)) + parent_lex->ftfunc_list->push_front(ifm); + } + + DBUG_RETURN(FALSE); +} + +static TABLE_LIST *alloc_join_nest(THD *thd) +{ + TABLE_LIST *tbl; + if (!(tbl= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+ + sizeof(NESTED_JOIN)))) + return NULL; + tbl->nested_join= (NESTED_JOIN*) ((uchar*)tbl + + ALIGN_SIZE(sizeof(TABLE_LIST))); + return tbl; +} + + +static +void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist) +{ + List_iterator<TABLE_LIST> it(*tlist); + TABLE_LIST *table; + while ((table= it++)) + { + if (table->on_expr) + table->on_expr->fix_after_pullout(new_parent, &table->on_expr); + if (table->nested_join) + fix_list_after_tbl_changes(new_parent, &table->nested_join->join_list); + } +} + + +/* + Pull tables out of semi-join nests, if possible + + SYNOPSIS + pull_out_semijoin_tables() + join The join where to do the semi-join flattening + + DESCRIPTION + Try to pull tables out of semi-join nests. + + PRECONDITIONS + When this function is called, the join may have several semi-join nests + but it is guaranteed that one semi-join nest does not contain another. + + ACTION + A table can be pulled out of the semi-join nest if + - It is a constant table, or + - It is accessed via eq_ref(outer_tables) + + POSTCONDITIONS + * Tables that were pulled out have JOIN_TAB::emb_sj_nest == NULL + * Tables that were not pulled out have JOIN_TAB::emb_sj_nest pointing + to semi-join nest they are in. + * Semi-join nests' TABLE_LIST::sj_inner_tables is updated accordingly + + This operation is (and should be) performed at each PS execution since + tables may become/cease to be constant across PS reexecutions. + + NOTE + Table pullout may make uncorrelated subquery correlated. Consider this + example: + + ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... ) + + here table it1 can be pulled out (we have it1.primary_key=oe which gives + us functional dependency). Once it1 is pulled out, all references to it1 + from p(it1, it2) become references to outside of the subquery and thus + make the subquery (i.e. its semi-join nest) correlated. + Making the subquery (i.e. its semi-join nest) correlated prevents us from + using Materialization or LooseScan to execute it. + + RETURN + 0 - OK + 1 - Out of memory error +*/ + +int pull_out_semijoin_tables(JOIN *join) +{ + TABLE_LIST *sj_nest; + DBUG_ENTER("pull_out_semijoin_tables"); + List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests); + + /* Try pulling out of the each of the semi-joins */ + while ((sj_nest= sj_list_it++)) + { + /* Action #1: Mark the constant tables to be pulled out */ + table_map pulled_tables= 0; + + List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list); + TABLE_LIST *tbl; + while ((tbl= child_li++)) + { + if (tbl->table) + { + tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest; + if (tbl->table->map & join->const_table_map) + { + pulled_tables |= tbl->table->map; + DBUG_PRINT("info", ("Table %s pulled out (reason: constant)", + tbl->table->alias)); + } + } + } + + /* + Action #2: Find which tables we can pull out based on + update_ref_and_keys() data. Note that pulling one table out can allow + us to pull out some other tables too. + */ + bool pulled_a_table; + do + { + pulled_a_table= FALSE; + child_li.rewind(); + while ((tbl= child_li++)) + { + if (tbl->table && !(pulled_tables & tbl->table->map)) + { + if (find_eq_ref_candidate(tbl->table, + sj_nest->nested_join->used_tables & + ~pulled_tables)) + { + pulled_a_table= TRUE; + pulled_tables |= tbl->table->map; + DBUG_PRINT("info", ("Table %s pulled out (reason: func dep)", + tbl->table->alias)); + /* + Pulling a table out of uncorrelated subquery in general makes + makes it correlated. See the NOTE to this funtion. + */ + sj_nest->sj_subq_pred->is_correlated= TRUE; + sj_nest->nested_join->sj_corr_tables|= tbl->table->map; + sj_nest->nested_join->sj_depends_on|= tbl->table->map; + } + } + } + } while (pulled_a_table); + + child_li.rewind(); + /* + Action #3: Move the pulled out TABLE_LIST elements to the parents. + */ + table_map inner_tables= sj_nest->nested_join->used_tables & + ~pulled_tables; + /* Record the bitmap of inner tables */ + sj_nest->sj_inner_tables= inner_tables; + if (pulled_tables) + { + List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL)? + (&sj_nest->embedding->nested_join->join_list): + (&join->select_lex->top_join_list); + Query_arena *arena, backup; + arena= join->thd->activate_stmt_arena_if_needed(&backup); + while ((tbl= child_li++)) + { + if (tbl->table) + { + if (inner_tables & tbl->table->map) + { + /* This table is not pulled out */ + tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest; + } + else + { + /* This table has been pulled out of the semi-join nest */ + tbl->table->reginfo.join_tab->emb_sj_nest= NULL; + /* + Pull the table up in the same way as simplify_joins() does: + update join_list and embedding pointers but keep next[_local] + pointers. + */ + child_li.remove(); + upper_join_list->push_back(tbl); + tbl->join_list= upper_join_list; + tbl->embedding= sj_nest->embedding; + } + } + } + + /* Remove the sj-nest itself if we've removed everything from it */ + if (!inner_tables) + { + List_iterator<TABLE_LIST> li(*upper_join_list); + /* Find the sj_nest in the list. */ + while (sj_nest != li++) ; + li.remove(); + /* Also remove it from the list of SJ-nests: */ + sj_list_it.remove(); + } + + if (arena) + join->thd->restore_active_arena(arena, &backup); + } + } + DBUG_RETURN(0); +} + + +/* + Optimize semi-join nests that could be run with sj-materialization + + SYNOPSIS + optimize_semijoin_nests() + join The join to optimize semi-join nests for + all_table_map Bitmap of all tables in the join + + DESCRIPTION + Optimize each of the semi-join nests that can be run with + materialization. For each of the nests, we + - Generate the best join order for this "sub-join" and remember it; + - Remember the sub-join execution cost (it's part of materialization + cost); + - Calculate other costs that will be incurred if we decide + to use materialization strategy for this semi-join nest. + + All obtained information is saved and will be used by the main join + optimization pass. + + RETURN + FALSE Ok + TRUE Out of memory error +*/ + +bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) +{ + DBUG_ENTER("optimize_semijoin_nests"); + List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests); + TABLE_LIST *sj_nest; + /* + The statement may have been executed with 'semijoin=on' earlier. + We need to verify that 'semijoin=on' still holds. + */ + if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN) && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_MATERIALIZATION)) + { + while ((sj_nest= sj_list_it++)) + { + /* semi-join nests with only constant tables are not valid */ + DBUG_ASSERT(sj_nest->sj_inner_tables & ~join->const_table_map); + + sj_nest->sj_mat_info= NULL; + if (sj_nest->sj_inner_tables && /* not everything was pulled out */ + !sj_nest->sj_subq_pred->is_correlated && + sj_nest->sj_subq_pred->types_allow_materialization) + { + join->emb_sjm_nest= sj_nest; + if (choose_plan(join, all_table_map)) + DBUG_RETURN(TRUE); /* purecov: inspected */ + /* + The best plan to run the subquery is now in join->best_positions, + save it. + */ + uint n_tables= my_count_bits(sj_nest->sj_inner_tables); + SJ_MATERIALIZATION_INFO* sjm; + if (!(sjm= new SJ_MATERIALIZATION_INFO) || + !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)* + n_tables))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjm->tables= n_tables; + sjm->is_used= FALSE; + double subjoin_out_rows, subjoin_read_time; + get_partial_join_cost(join, n_tables, + &subjoin_read_time, &subjoin_out_rows); + + sjm->materialization_cost.convert_from_cost(subjoin_read_time); + sjm->rows= subjoin_out_rows; + + List<Item> &right_expr_list= + sj_nest->sj_subq_pred->unit->first_select()->item_list; + /* + Adjust output cardinality estimates. If the subquery has form + + ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) ) + + then the number of distinct output record combinations has an + upper bound of product of number of records matching the tables + that are used by the SELECT clause. + TODO: + We can get a more precise estimate if we + - use rec_per_key cardinality estimates. For simple cases like + "oe IN (SELECT t.key ...)" it is trivial. + - Functional dependencies between the tables in the semi-join + nest (the payoff is probably less here?) + */ + { + for (uint i=0 ; i < join->const_tables + sjm->tables ; i++) + { + JOIN_TAB *tab= join->best_positions[i].table; + join->map2table[tab->table->tablenr]= tab; + } + List_iterator<Item> it(right_expr_list); + Item *item; + table_map map= 0; + while ((item= it++)) + map |= item->used_tables(); + map= map & ~PSEUDO_TABLE_BITS; + Table_map_iterator tm_it(map); + int tableno; + double rows= 1.0; + while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) + rows *= join->map2table[tableno]->table->quick_condition_rows; + sjm->rows= min(sjm->rows, rows); + } + memcpy(sjm->positions, join->best_positions + join->const_tables, + sizeof(POSITION) * n_tables); + + /* + Calculate temporary table parameters and usage costs + */ + uint rowlen= get_tmp_table_rec_length(right_expr_list); + double lookup_cost; + if (rowlen * subjoin_out_rows< join->thd->variables.max_heap_table_size) + lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST; + else + lookup_cost= DISK_TEMPTABLE_LOOKUP_COST; + + /* + Let materialization cost include the cost to write the data into the + temporary table: + */ + sjm->materialization_cost.add_io(subjoin_out_rows, lookup_cost); + + /* + Set the cost to do a full scan of the temptable (will need this to + consider doing sjm-scan): + */ + sjm->scan_cost.zero(); + sjm->scan_cost.add_io(sjm->rows, lookup_cost); + + sjm->lookup_cost.convert_from_cost(lookup_cost); + sj_nest->sj_mat_info= sjm; + DBUG_EXECUTE("opt", print_sjm(sjm);); + } + } + } + join->emb_sjm_nest= NULL; + DBUG_RETURN(FALSE); +} + +/* + Get estimated record length for semi-join materialization temptable + + SYNOPSIS + get_tmp_table_rec_length() + items IN subquery's select list. + + DESCRIPTION + Calculate estimated record length for semi-join materialization + temptable. It's an estimate because we don't follow every bit of + create_tmp_table()'s logic. This isn't necessary as the return value of + this function is used only for cost calculations. + + RETURN + Length of the temptable record, in bytes +*/ + +static uint get_tmp_table_rec_length(List<Item> &items) +{ + uint len= 0; + Item *item; + List_iterator<Item> it(items); + while ((item= it++)) + { + switch (item->result_type()) { + case REAL_RESULT: + len += sizeof(double); + break; + case INT_RESULT: + if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1)) + len += 8; + else + len += 4; + break; + case STRING_RESULT: + enum enum_field_types type; + /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type. */ + if ((type= item->field_type()) == MYSQL_TYPE_DATETIME || + type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE || + type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY) + len += 8; + else + len += item->max_length; + break; + case DECIMAL_RESULT: + len += 10; + break; + case ROW_RESULT: + default: + DBUG_ASSERT(0); /* purecov: deadcode */ + break; + } + } + return len; +} + +//psergey-todo: is the below a kind of table elimination?? +/* + Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate + + SYNOPSIS + find_eq_ref_candidate() + table Table to be checked + sj_inner_tables Bitmap of inner tables. eq_ref(inner_table) doesn't + count. + + DESCRIPTION + Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate + + TODO + Check again if it is feasible to factor common parts with constant table + search + + RETURN + TRUE - There exists an eq_ref(outer-tables) candidate + FALSE - Otherwise +*/ + +bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) +{ + KEYUSE *keyuse= table->reginfo.join_tab->keyuse; + uint key; + + if (keyuse) + { + while (1) /* For each key */ + { + key= keyuse->key; + KEY *keyinfo= table->key_info + key; + key_part_map bound_parts= 0; + if ((keyinfo->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME) + { + do /* For all equalities on all key parts */ + { + /* Check if this is "t.keypart = expr(outer_tables) */ + if (!(keyuse->used_tables & sj_inner_tables) && + !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)) + { + bound_parts |= 1 << keyuse->keypart; + } + keyuse++; + } while (keyuse->key == key && keyuse->table == table); + + if (bound_parts == PREV_BITS(uint, keyinfo->key_parts)) + return TRUE; + if (keyuse->table != table) + return FALSE; + } + else + { + do + { + keyuse++; + if (keyuse->table != table) + return FALSE; + } + while (keyuse->key == key); + } + } + } + return FALSE; +} + +/* + Do semi-join optimization step after we've added a new tab to join prefix + + SYNOPSIS + advance_sj_state() + join The join we're optimizing + remaining_tables Tables not in the join prefix + new_join_tab Join tab we've just added to the join prefix + idx Index of this join tab (i.e. number of tables + in the prefix minus one) + current_record_count INOUT Estimate of #records in join prefix's output + current_read_time INOUT Cost to execute the join prefix + loose_scan_pos IN A POSITION with LooseScan plan to access + table new_join_tab + (produced by the last best_access_path call) + + DESCRIPTION + Update semi-join optimization state after we've added another tab (table + and access method) to the join prefix. + + The state is maintained in join->positions[#prefix_size]. Each of the + available strategies has its own state variables. + + for each semi-join strategy + { + update strategy's state variables; + + if (join prefix has all the tables that are needed to consider + using this strategy for the semi-join(s)) + { + calculate cost of using the strategy + if ((this is the first strategy to handle the semi-join nest(s) || + the cost is less than other strategies)) + { + // Pick this strategy + pos->sj_strategy= .. + .. + } + } + + Most of the new state is saved join->positions[idx] (and hence no undo + is necessary). Several members of class JOIN are updated also, these + changes can be rolled back with restore_prev_sj_state(). + + See setup_semijoin_dups_elimination() for a description of what kinds of + join prefixes each strategy can handle. +*/ + +void advance_sj_state(JOIN *join, table_map remaining_tables, + const JOIN_TAB *new_join_tab, uint idx, + double *current_record_count, double *current_read_time, + POSITION *loose_scan_pos) +{ + TABLE_LIST *emb_sj_nest; + POSITION *pos= join->positions + idx; + remaining_tables &= ~new_join_tab->table->map; + + pos->prefix_cost.convert_from_cost(*current_read_time); + pos->prefix_record_count= *current_record_count; + pos->sj_strategy= SJ_OPT_NONE; + + /* Initialize the state or copy it from prev. tables */ + if (idx == join->const_tables) + { + pos->first_firstmatch_table= MAX_TABLES; + pos->first_loosescan_table= MAX_TABLES; + pos->dupsweedout_tables= 0; + pos->sjm_scan_need_tables= 0; + LINT_INIT(pos->sjm_scan_last_inner); + } + else + { + // FirstMatch + pos->first_firstmatch_table= + (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ? + MAX_TABLES : pos[-1].first_firstmatch_table; + pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl; + pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables; + + // LooseScan + pos->first_loosescan_table= + (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ? + MAX_TABLES : pos[-1].first_loosescan_table; + pos->loosescan_need_tables= pos[-1].loosescan_need_tables; + + // SJ-Materialization Scan + pos->sjm_scan_need_tables= + (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ? + 0 : pos[-1].sjm_scan_need_tables; + pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner; + + // Duplicate Weedout + pos->dupsweedout_tables= pos[-1].dupsweedout_tables; + pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table; + } + + table_map handled_by_fm_or_ls= 0; + /* FirstMatch Strategy */ + if (new_join_tab->emb_sj_nest && + optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH)) + { + const table_map outer_corr_tables= + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on; + const table_map sj_inner_tables= + new_join_tab->emb_sj_nest->sj_inner_tables; + + /* + Enter condition: + 1. The next join tab belongs to semi-join nest + (verified for the encompassing code block above). + 2. We're not in a duplicate producer range yet + 3. All outer tables that + - the subquery is correlated with, or + - referred to from the outer_expr + are in the join prefix + 4. All inner tables are still part of remaining_tables. + */ + if (!join->cur_sj_inner_tables && // (2) + !(remaining_tables & outer_corr_tables) && // (3) + (sj_inner_tables == // (4) + ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) + { + /* Start tracking potential FirstMatch range */ + pos->first_firstmatch_table= idx; + pos->firstmatch_need_tables= sj_inner_tables; + pos->first_firstmatch_rtbl= remaining_tables; + } + + if (pos->first_firstmatch_table != MAX_TABLES) + { + if (outer_corr_tables & pos->first_firstmatch_rtbl) + { + /* + Trying to add an sj-inner table whose sj-nest has an outer correlated + table that was not in the prefix. This means FirstMatch can't be used. + */ + pos->first_firstmatch_table= MAX_TABLES; + } + else + { + /* Record that we need all of this semi-join's inner tables, too */ + pos->firstmatch_need_tables|= sj_inner_tables; + } + + if (!(pos->firstmatch_need_tables & remaining_tables)) + { + /* + Got a complete FirstMatch range. + Calculate correct costs and fanout + */ + double reopt_cost, reopt_rec_count, sj_inner_fanout; + optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx, + remaining_tables, FALSE, idx, + &reopt_rec_count, &reopt_cost, + &sj_inner_fanout); + /* + We don't yet know what are the other strategies, so pick the + FirstMatch. + + We ought to save the alternate POSITIONs produced by + optimize_wo_join_buffering but the problem is that providing save + space uses too much space. Instead, we will re-calculate the + alternate POSITIONs after we've picked the best QEP. + */ + pos->sj_strategy= SJ_OPT_FIRST_MATCH; + *current_read_time= reopt_cost; + *current_record_count= reopt_rec_count / sj_inner_fanout; + handled_by_fm_or_ls= pos->firstmatch_need_tables; + } + } + } + + /* LooseScan Strategy */ + { + POSITION *first=join->positions+pos->first_loosescan_table; + /* + LooseScan strategy can't handle interleaving between tables from the + semi-join that LooseScan is handling and any other tables. + + If we were considering LooseScan for the join prefix (1) + and the table we're adding creates an interleaving (2) + then + stop considering loose scan + */ + if ((pos->first_loosescan_table != MAX_TABLES) && // (1) + (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) + new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) + { + pos->first_loosescan_table= MAX_TABLES; + } + + /* + If we got an option to use LooseScan for the current table, start + considering using LooseScan strategy + */ + if (loose_scan_pos->read_time != DBL_MAX) + { + pos->first_loosescan_table= idx; + pos->loosescan_need_tables= + new_join_tab->emb_sj_nest->sj_inner_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on | + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; + } + + if ((pos->first_loosescan_table != MAX_TABLES) && + !(remaining_tables & pos->loosescan_need_tables)) + { + /* + Ok we have LooseScan plan and also have all LooseScan sj-nest's + inner tables and outer correlated tables into the prefix. + */ + + first=join->positions + pos->first_loosescan_table; + uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); + /* Got a complete LooseScan range. Calculate its cost */ + double reopt_cost, reopt_rec_count, sj_inner_fanout; + /* + The same problem as with FirstMatch - we need to save POSITIONs + somewhere but reserving space for all cases would require too + much space. We will re-calculate POSITION structures later on. + */ + optimize_wo_join_buffering(join, pos->first_loosescan_table, idx, + remaining_tables, + TRUE, //first_alt + pos->first_loosescan_table + n_tables, + &reopt_rec_count, + &reopt_cost, &sj_inner_fanout); + /* + We don't yet have any other strategies that could handle this + semi-join nest (the other options are Duplicate Elimination or + Materialization, which need at least the same set of tables in + the join prefix to be considered) so unconditionally pick the + LooseScan. + */ + pos->sj_strategy= SJ_OPT_LOOSE_SCAN; + *current_read_time= reopt_cost; + *current_record_count= reopt_rec_count / sj_inner_fanout; + handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables; + } + } + + /* + Update join->cur_sj_inner_tables (Used by FirstMatch in this function and + LooseScan detector in best_access_path) + */ + if ((emb_sj_nest= new_join_tab->emb_sj_nest)) + { + join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; + join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; + + /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ + if (!(remaining_tables & + emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) + join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; + } + join->cur_dups_producing_tables &= ~handled_by_fm_or_ls; + + /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */ + bool sjm_scan; + SJ_MATERIALIZATION_INFO *mat_info; + if ((mat_info= at_sjmat_pos(join, remaining_tables, + new_join_tab, idx, &sjm_scan))) + { + if (sjm_scan) + { + /* + We can't yet evaluate this option yet. This is because we can't + accout for fanout of sj-inner tables yet: + + ntX SJM-SCAN(it1 ... itN) | ot1 ... otN | + ^(1) ^(2) + + we're now at position (1). SJM temptable in general has multiple + records, so at point (1) we'll get the fanout from sj-inner tables (ie + there will be multiple record combinations). + + The final join result will not contain any semi-join produced + fanout, i.e. tables within SJM-SCAN(...) will not contribute to + the cardinality of the join output. Extra fanout produced by + SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ... otN. + + The simple way to model this is to remove SJM-SCAN(...) fanout once + we reach the point #2. + */ + pos->sjm_scan_need_tables= + new_join_tab->emb_sj_nest->sj_inner_tables | + new_join_tab->emb_sj_nest->nested_join->sj_depends_on | + new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; + pos->sjm_scan_last_inner= idx; + } + else + { + /* This is SJ-Materialization with lookups */ + COST_VECT prefix_cost; + signed int first_tab= (int)idx - mat_info->tables; + double prefix_rec_count; + if (first_tab < (int)join->const_tables) + { + prefix_cost.zero(); + prefix_rec_count= 1.0; + } + else + { + prefix_cost= join->positions[first_tab].prefix_cost; + prefix_rec_count= join->positions[first_tab].prefix_record_count; + } + + double mat_read_time= prefix_cost.total_cost(); + mat_read_time += mat_info->materialization_cost.total_cost() + + prefix_rec_count * mat_info->lookup_cost.total_cost(); + + if (mat_read_time < *current_read_time || join->cur_dups_producing_tables) + { + /* + NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION + elements to join->positions as that makes it hard to return things + back when making one step back in join optimization. That's done + after the QEP has been chosen. + */ + pos->sj_strategy= SJ_OPT_MATERIALIZE; + *current_read_time= mat_read_time; + *current_record_count= prefix_rec_count; + join->cur_dups_producing_tables&= + ~new_join_tab->emb_sj_nest->sj_inner_tables; + } + } + } + + /* 4.A SJM-Scan second phase check */ + if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */ + !(pos->sjm_scan_need_tables & remaining_tables)) + { + TABLE_LIST *mat_nest= + join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest; + SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info; + + double prefix_cost; + double prefix_rec_count; + int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables; + /* Get the prefix cost */ + if (first_tab == (int)join->const_tables) + { + prefix_rec_count= 1.0; + prefix_cost= 0.0; + } + else + { + prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); + prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + } + + /* Add materialization cost */ + prefix_cost += mat_info->materialization_cost.total_cost() + + prefix_rec_count * mat_info->scan_cost.total_cost(); + prefix_rec_count *= mat_info->rows; + + uint i; + table_map rem_tables= remaining_tables; + for (i= idx; i != (first_tab + mat_info->tables - 1); i--) + rem_tables |= join->positions[i].table->table->map; + + POSITION curpos, dummy; + /* Need to re-run best-access-path as we prefix_rec_count has changed */ + for (i= first_tab + mat_info->tables; i <= idx; i++) + { + best_access_path(join, join->positions[i].table, rem_tables, i, FALSE, + prefix_rec_count, &curpos, &dummy); + prefix_rec_count *= curpos.records_read; + prefix_cost += curpos.read_time; + } + + /* + Use the strategy if + * it is cheaper then what we've had, or + * we haven't picked any other semi-join strategy yet + In the second case, we pick this strategy unconditionally because + comparing cost without semi-join duplicate removal with cost with + duplicate removal is not an apples-to-apples comparison. + */ + if (prefix_cost < *current_read_time || join->cur_dups_producing_tables) + { + pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN; + *current_read_time= prefix_cost; + *current_record_count= prefix_rec_count; + join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables; + + } + } + + /* 5. Duplicate Weedout strategy handler */ + { + /* + Duplicate weedout can be applied after all ON-correlated and + correlated + */ + TABLE_LIST *nest; + if ((nest= new_join_tab->emb_sj_nest)) + { + if (!pos->dupsweedout_tables) + pos->first_dupsweedout_table= idx; + + pos->dupsweedout_tables |= nest->sj_inner_tables | + nest->nested_join->sj_depends_on | + nest->nested_join->sj_corr_tables; + } + + if (pos->dupsweedout_tables && + !(remaining_tables & + ~new_join_tab->table->map & pos->dupsweedout_tables)) + { + /* + Ok, reached a state where we could put a dups weedout point. + Walk back and calculate + - the join cost (this is needed as the accumulated cost may assume + some other duplicate elimination method) + - extra fanout that will be removed by duplicate elimination + - duplicate elimination cost + There are two cases: + 1. We have other strategy/ies to remove all of the duplicates. + 2. We don't. + + We need to calculate the cost in case #2 also because we need to make + choice between this join order and others. + */ + uint first_tab= pos->first_dupsweedout_table; + double dups_cost; + double prefix_rec_count; + double sj_inner_fanout= 1.0; + double sj_outer_fanout= 1.0; + uint temptable_rec_size; + if (first_tab == join->const_tables) + { + prefix_rec_count= 1.0; + temptable_rec_size= 0; + dups_cost= 0.0; + } + else + { + dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); + prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; + temptable_rec_size= 8; /* This is not true but we'll make it so */ + } + + table_map dups_removed_fanout= 0; + for (uint j= pos->first_dupsweedout_table; j <= idx; j++) + { + POSITION *p= join->positions + j; + dups_cost += p->read_time; + if (p->table->emb_sj_nest) + { + sj_inner_fanout *= p->records_read; + dups_removed_fanout |= p->table->table->map; + } + else + { + sj_outer_fanout *= p->records_read; + temptable_rec_size += p->table->table->file->ref_length; + } + } + + /* + Add the cost of temptable use. The table will have sj_outer_fanout + records, and we will make + - sj_outer_fanout table writes + - sj_inner_fanout*sj_outer_fanout lookups. + + */ + double one_lookup_cost; + if (sj_outer_fanout*temptable_rec_size > + join->thd->variables.max_heap_table_size) + one_lookup_cost= DISK_TEMPTABLE_LOOKUP_COST; + else + one_lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST; + + double write_cost= join->positions[first_tab].prefix_record_count* + sj_outer_fanout * one_lookup_cost; + double full_lookup_cost= join->positions[first_tab].prefix_record_count* + sj_outer_fanout* sj_inner_fanout * + one_lookup_cost; + dups_cost += write_cost + full_lookup_cost; + + /* + Use the strategy if + * it is cheaper then what we've had, or + * we haven't picked any other semi-join strategy yet + The second part is necessary because this strategy is the last one + to consider (it needs "the most" tables in the prefix) and we can't + leave duplicate-producing tables not handled by any strategy. + */ + if (dups_cost < *current_read_time || join->cur_dups_producing_tables) + { + pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT; + *current_read_time= dups_cost; + *current_record_count= *current_record_count / sj_inner_fanout; + join->cur_dups_producing_tables &= ~dups_removed_fanout; + } + } + } +} + + +/* + Remove the last join tab from from join->cur_sj_inner_tables bitmap + we assume remaining_tables doesnt contain @tab. +*/ + +void restore_prev_sj_state(const table_map remaining_tables, + const JOIN_TAB *tab, uint idx) +{ + TABLE_LIST *emb_sj_nest; + if ((emb_sj_nest= tab->emb_sj_nest)) + { + /* If we're removing the last SJ-inner table, remove the sj-nest */ + if ((remaining_tables & emb_sj_nest->sj_inner_tables) == + (emb_sj_nest->sj_inner_tables & ~tab->table->map)) + { + tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; + } + } +} + + +/* + Given a semi-join nest, find out which of the IN-equalities are bound + + SYNOPSIS + get_bound_sj_equalities() + sj_nest Semi-join nest + remaining_tables Tables that are not yet bound + + DESCRIPTION + Given a semi-join nest, find out which of the IN-equalities have their + left part expression bound (i.e. the said expression doesn't refer to + any of remaining_tables and can be evaluated). + + RETURN + Bitmap of bound IN-equalities. +*/ + +ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, + table_map remaining_tables) +{ + List_iterator<Item> li(sj_nest->nested_join->sj_outer_expr_list); + Item *item; + uint i= 0; + ulonglong res= 0; + while ((item= li++)) + { + /* + Q: should this take into account equality propagation and how? + A: If e->outer_side is an Item_field, walk over the equality + class and see if there is an element that is bound? + (this is an optional feature) + */ + if (!(item->used_tables() & remaining_tables)) + { + res |= 1ULL << i; + } + } + return res; +} + + +/* + Check if the last tables of the partial join order allow to use + sj-materialization strategy for them + + SYNOPSIS + at_sjmat_pos() + join + remaining_tables + tab the last table's join tab + idx last table's index + loose_scan OUT TRUE <=> use LooseScan + + RETURN + TRUE Yes, can apply sj-materialization + FALSE No, some of the requirements are not met +*/ + +static SJ_MATERIALIZATION_INFO * +at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, + uint idx, bool *loose_scan) +{ + /* + Check if + 1. We're in a semi-join nest that can be run with SJ-materialization + 2. All the tables correlated through the IN subquery are in the prefix + */ + TABLE_LIST *emb_sj_nest= tab->emb_sj_nest; + table_map suffix= remaining_tables & ~tab->table->map; + if (emb_sj_nest && emb_sj_nest->sj_mat_info && + !(suffix & emb_sj_nest->sj_inner_tables)) + { + /* + Walk back and check if all immediately preceding tables are from + this semi-join. + */ + uint n_tables= my_count_bits(tab->emb_sj_nest->sj_inner_tables); + for (uint i= 1; i < n_tables ; i++) + { + if (join->positions[idx - i].table->emb_sj_nest != tab->emb_sj_nest) + return NULL; + } + *loose_scan= test(remaining_tables & ~tab->table->map & + (emb_sj_nest->sj_inner_tables | + emb_sj_nest->nested_join->sj_depends_on)); + if (*loose_scan && !emb_sj_nest->sj_subq_pred->sjm_scan_allowed) + return NULL; + else + return emb_sj_nest->sj_mat_info; + } + return NULL; +} + + + +/* + Fix semi-join strategies for the picked join order + + SYNOPSIS + fix_semijoin_strategies_for_picked_join_order() + join The join with the picked join order + + DESCRIPTION + Fix semi-join strategies for the picked join order. This is a step that + needs to be done right after we have fixed the join order. What we do + here is switch join's semi-join strategy description from backward-based + to forwards based. + + When join optimization is in progress, we re-consider semi-join + strategies after we've added another table. Here's an illustration. + Suppose the join optimization is underway: + + 1) ot1 it1 it2 + sjX -- looking at (ot1, it1, it2) join prefix, we decide + to use semi-join strategy sjX. + + 2) ot1 it1 it2 ot2 + sjX sjY -- Having added table ot2, we now may consider + another semi-join strategy and decide to use a + different strategy sjY. Note that the record + of sjX has remained under it2. That is + necessary because we need to be able to get + back to (ot1, it1, it2) join prefix. + what makes things even worse is that there are cases where the choice + of sjY changes the way we should access it2. + + 3) [ot1 it1 it2 ot2 ot3] + sjX sjY -- This means that after join optimization is + finished, semi-join info should be read + right-to-left (while nearly all plan refinement + functions, EXPLAIN, etc proceed from left to + right) + + This function does the needed reversal, making it possible to read the + join and semi-join order from left to right. +*/ + +void fix_semijoin_strategies_for_picked_join_order(JOIN *join) +{ + uint table_count=join->tables; + uint tablenr; + table_map remaining_tables= 0; + table_map handled_tabs= 0; + for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--) + { + POSITION *pos= join->best_positions + tablenr; + JOIN_TAB *s= pos->table; + uint first; + LINT_INIT(first); // Set by every branch except SJ_OPT_NONE which doesn't use it + + if ((handled_tabs & s->table->map) || pos->sj_strategy == SJ_OPT_NONE) + { + remaining_tables |= s->table->map; + continue; + } + + if (pos->sj_strategy == SJ_OPT_MATERIALIZE) + { + SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info; + sjm->is_used= TRUE; + sjm->is_sj_scan= FALSE; + memcpy(pos - sjm->tables + 1, sjm->positions, + sizeof(POSITION) * sjm->tables); + first= tablenr - sjm->tables + 1; + join->best_positions[first].n_sj_tables= sjm->tables; + join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE; + } + else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) + { + POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner; + SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info; + sjm->is_used= TRUE; + sjm->is_sj_scan= TRUE; + first= pos->sjm_scan_last_inner - sjm->tables + 1; + memcpy(join->best_positions + first, + sjm->positions, sizeof(POSITION) * sjm->tables); + join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; + join->best_positions[first].n_sj_tables= sjm->tables; + /* + Do what advance_sj_state did: re-run best_access_path for every table + in the [last_inner_table + 1; pos..) range + */ + double prefix_rec_count; + /* Get the prefix record count */ + if (first == join->const_tables) + prefix_rec_count= 1.0; + else + prefix_rec_count= join->best_positions[first-1].prefix_record_count; + + /* Add materialization record count*/ + prefix_rec_count *= sjm->rows; + + uint i; + table_map rem_tables= remaining_tables; + for (i= tablenr; i != (first + sjm->tables - 1); i--) + rem_tables |= join->best_positions[i].table->table->map; + + POSITION dummy; + join->cur_sj_inner_tables= 0; + for (i= first + sjm->tables; i <= tablenr; i++) + { + best_access_path(join, join->best_positions[i].table, rem_tables, i, FALSE, + prefix_rec_count, join->best_positions + i, &dummy); + prefix_rec_count *= join->best_positions[i].records_read; + rem_tables &= ~join->best_positions[i].table->table->map; + } + } + + if (pos->sj_strategy == SJ_OPT_FIRST_MATCH) + { + first= pos->first_firstmatch_table; + join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH; + join->best_positions[first].n_sj_tables= tablenr - first + 1; + POSITION dummy; // For loose scan paths + double record_count= (first== join->const_tables)? 1.0: + join->best_positions[tablenr - 1].prefix_record_count; + + table_map rem_tables= remaining_tables; + uint idx; + for (idx= first; idx <= tablenr; idx++) + { + rem_tables |= join->best_positions[idx].table->table->map; + } + /* + Re-run best_access_path to produce best access methods that do not use + join buffering + */ + join->cur_sj_inner_tables= 0; + for (idx= first; idx <= tablenr; idx++) + { + if (join->best_positions[idx].use_join_buffer) + { + best_access_path(join, join->best_positions[idx].table, + rem_tables, idx, TRUE /* no jbuf */, + record_count, join->best_positions + idx, &dummy); + } + record_count *= join->best_positions[idx].records_read; + rem_tables &= ~join->best_positions[idx].table->table->map; + } + } + + if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) + { + first= pos->first_loosescan_table; + POSITION *first_pos= join->best_positions + first; + POSITION loose_scan_pos; // For loose scan paths + double record_count= (first== join->const_tables)? 1.0: + join->best_positions[tablenr - 1].prefix_record_count; + + table_map rem_tables= remaining_tables; + uint idx; + for (idx= first; idx <= tablenr; idx++) + rem_tables |= join->best_positions[idx].table->table->map; + /* + Re-run best_access_path to produce best access methods that do not use + join buffering + */ + join->cur_sj_inner_tables= 0; + for (idx= first; idx <= tablenr; idx++) + { + if (join->best_positions[idx].use_join_buffer || (idx == first)) + { + best_access_path(join, join->best_positions[idx].table, + rem_tables, idx, TRUE /* no jbuf */, + record_count, join->best_positions + idx, + &loose_scan_pos); + if (idx==first) + join->best_positions[idx]= loose_scan_pos; + } + rem_tables &= ~join->best_positions[idx].table->table->map; + record_count *= join->best_positions[idx].records_read; + } + first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN; + first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables); + } + + if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT) + { + /* + Duplicate Weedout starting at pos->first_dupsweedout_table, ending at + this table. + */ + first= pos->first_dupsweedout_table; + join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT; + join->best_positions[first].n_sj_tables= tablenr - first + 1; + } + + uint i_end= first + join->best_positions[first].n_sj_tables; + for (uint i= first; i < i_end; i++) + { + if (i != first) + join->best_positions[i].sj_strategy= SJ_OPT_NONE; + handled_tabs |= join->best_positions[i].table->table->map; + } + + if (tablenr != first) + pos->sj_strategy= SJ_OPT_NONE; + remaining_tables |= s->table->map; + } +} + +/* + Setup semi-join materialization strategy for one semi-join nest + + SYNOPSIS + + setup_sj_materialization() + tab The first tab in the semi-join + + DESCRIPTION + Setup execution structures for one semi-join materialization nest: + - Create the materialization temporary table + - If we're going to do index lookups + create TABLE_REF structure to make the lookus + - else (if we're going to do a full scan of the temptable) + create Copy_field structures to do copying. + + RETURN + FALSE Ok + TRUE Error +*/ + +bool setup_sj_materialization(JOIN_TAB *tab) +{ + uint i; + DBUG_ENTER("setup_sj_materialization"); + TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding; + SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info; + THD *thd= tab->join->thd; + /* First the calls come to the materialization function */ + List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list; + + /* + Set up the table to write to, do as select_union::create_result_table does + */ + sjm->sjm_table_param.init(); + sjm->sjm_table_param.field_count= item_list.elements; + // psergey-merge: the following is not in 5.x: sjm->sjm_table_param.bit_fields_as_long= TRUE; + List_iterator<Item> it(item_list); + Item *right_expr; + while((right_expr= it++)) + sjm->sjm_table_cols.push_back(right_expr); + + if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param, + sjm->sjm_table_cols, (ORDER*) 0, + TRUE /* distinct */, + 1, /*save_sum_fields*/ + thd->options | TMP_TABLE_ALL_COLUMNS, + HA_POS_ERROR /*rows_limit */, + (char*)"sj-materialize"))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjm->table->file->extra(HA_EXTRA_WRITE_CACHE); + sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); + tab->join->sj_tmp_tables.push_back(sjm->table); + tab->join->sjm_info_list.push_back(sjm); + + sjm->materialized= FALSE; + if (!sjm->is_sj_scan) + { + KEY *tmp_key; /* The only index on the temporary table. */ + uint tmp_key_parts; /* Number of keyparts in tmp_key. */ + tmp_key= sjm->table->key_info; + tmp_key_parts= tmp_key->key_parts; + + /* + Create/initialize everything we will need to index lookups into the + temptable. + */ + TABLE_REF *tab_ref; + if (!(tab_ref= (TABLE_REF*) thd->alloc(sizeof(TABLE_REF)))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + tab_ref->key= 0; /* The only temp table index. */ + tab_ref->key_length= tmp_key->key_length; + if (!(tab_ref->key_buff= + (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) || + !(tab_ref->key_copy= + (store_key**) thd->alloc((sizeof(store_key*) * + (tmp_key_parts + 1)))) || + !(tab_ref->items= + (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + + tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length); + tab_ref->key_err=1; + tab_ref->null_rejecting= 1; + tab_ref->disable_cache= FALSE; + + KEY_PART_INFO *cur_key_part= tmp_key->key_part; + store_key **ref_key= tab_ref->key_copy; + uchar *cur_ref_buff= tab_ref->key_buff; + + for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++) + { + tab_ref->items[i]= emb_sj_nest->sj_subq_pred->left_expr->element_index(i); + int null_count= test(cur_key_part->field->real_maybe_null()); + *ref_key= new store_key_item(thd, cur_key_part->field, + /* TODO: + the NULL byte is taken into account in + cur_key_part->store_length, so instead of + cur_ref_buff + test(maybe_null), we could + use that information instead. + */ + cur_ref_buff + null_count, + null_count ? tab_ref->key_buff : 0, + cur_key_part->length, tab_ref->items[i]); + cur_ref_buff+= cur_key_part->store_length; + } + *ref_key= NULL; /* End marker. */ + tab_ref->key_err= 1; + tab_ref->key_parts= tmp_key_parts; + sjm->tab_ref= tab_ref; + + /* + Remove the injected semi-join IN-equalities from join_tab conds. This + needs to be done because the IN-equalities refer to columns of + sj-inner tables which are not available after the materialization + has been finished. + */ + for (i= 0; i < sjm->tables; i++) + { + remove_sj_conds(&tab[i].select_cond); + if (tab[i].select) + remove_sj_conds(&tab[i].select->cond); + } + if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm, + emb_sj_nest->sj_subq_pred))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + } + else + { + /* + We'll be doing full scan of the temptable. + Setup copying of temptable columns back to the record buffers + for their source tables. We need this because IN-equalities + refer to the original tables. + + EXAMPLE + + Consider the query: + SELECT * FROM ot WHERE ot.col1 IN (SELECT it.col2 FROM it) + + Suppose it's executed with SJ-Materialization-scan. We choose to do scan + if we can't do the lookup, i.e. the join order is (it, ot). The plan + would look as follows: + + table access method condition + it materialize+scan - + ot (whatever) ot1.col1=it.col2 (C2) + + The condition C2 refers to current row of table it. The problem is + that by the time we evaluate C2, we would have finished with scanning + it itself and will be scanning the temptable. + + At the moment, our solution is to copy back: when we get the next + temptable record, we copy its columns to their corresponding columns + in the record buffers for the source tables. + */ + sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements]; + it.rewind(); + for (uint i=0; i < sjm->sjm_table_cols.elements; i++) + { + bool dummy; + Item_equal *item_eq; + Item *item= (it++)->real_item(); + DBUG_ASSERT(item->type() == Item::FIELD_ITEM); + Field *copy_to= ((Item_field*)item)->field; + /* + Tricks with Item_equal are due to the following: suppose we have a + query: + + ... WHERE cond(ot.col) AND ot.col IN (SELECT it2.col FROM it1,it2 + WHERE it1.col= it2.col) + then equality propagation will create an + + Item_equal(it1.col, it2.col, ot.col) + + then substitute_for_best_equal_field() will change the conditions + according to the join order: + + it1 + it2 it1.col=it2.col + ot cond(it1.col) + + although we've originally had "SELECT it2.col", conditions attached + to subsequent outer tables will refer to it1.col, so SJM-Scan will + need to unpack data to there. + That is, if an element from subquery's select list participates in + equality propagation, then we need to unpack it to the first + element equality propagation member that refers to table that is + within the subquery. + */ + item_eq= find_item_equal(tab->join->cond_equal, copy_to, &dummy); + + if (item_eq) + { + List_iterator<Item_field> it(item_eq->fields); + Item_field *item; + while ((item= it++)) + { + if (!(item->used_tables() & ~emb_sj_nest->sj_inner_tables)) + { + copy_to= item->field; + break; + } + } + } + sjm->copy_field[i].set(copy_to, sjm->table->field[i], FALSE); + /* The write_set for source tables must be set up to allow the copying */ + bitmap_set_bit(copy_to->table->write_set, copy_to->field_index); + } + } + + DBUG_RETURN(FALSE); +} + + + +/* + Create subquery IN-equalities assuming use of materialization strategy + + SYNOPSIS + create_subq_in_equalities() + thd Thread handle + sjm Semi-join materialization structure + subq_pred The subquery predicate + + DESCRIPTION + Create subquery IN-equality predicates. That is, for a subquery + + (oe1, oe2, ...) IN (SELECT ie1, ie2, ... FROM ...) + + create "oe1=ie1 AND ie1=ie2 AND ..." expression, such that ie1, ie2, .. + refer to the columns of the table that's used to materialize the + subquery. + + RETURN + Created condition +*/ + +static Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, + Item_in_subselect *subq_pred) +{ + Item *res= NULL; + if (subq_pred->left_expr->cols() == 1) + { + if (!(res= new Item_func_eq(subq_pred->left_expr, + new Item_field(sjm->table->field[0])))) + return NULL; /* purecov: inspected */ + } + else + { + Item *conj; + for (uint i= 0; i < subq_pred->left_expr->cols(); i++) + { + if (!(conj= new Item_func_eq(subq_pred->left_expr->element_index(i), + new Item_field(sjm->table->field[i]))) || + !(res= and_items(res, conj))) + return NULL; /* purecov: inspected */ + } + } + if (res->fix_fields(thd, &res)) + return NULL; /* purecov: inspected */ + return res; +} + + + + +static void remove_sj_conds(Item **tree) +{ + if (*tree) + { + if (is_cond_sj_in_equality(*tree)) + { + *tree= NULL; + return; + } + else if ((*tree)->type() == Item::COND_ITEM) + { + Item *item; + List_iterator<Item> li(*(((Item_cond*)*tree)->argument_list())); + while ((item= li++)) + { + if (is_cond_sj_in_equality(item)) + li.replace(new Item_int(1)); + } + } + } +} + +/* Check if given Item was injected by semi-join equality */ +static bool is_cond_sj_in_equality(Item *item) +{ + if (item->type() == Item::FUNC_ITEM && + ((Item_func*)item)->functype()== Item_func::EQ_FUNC) + { + Item_func_eq *item_eq= (Item_func_eq*)item; + return test(item_eq->in_equality_no != UINT_MAX); + } + return FALSE; +} + + +/* + Create a temporary table to weed out duplicate rowid combinations + + SYNOPSIS + + create_duplicate_weedout_tmp_table() + thd Thread handle + uniq_tuple_length_arg Length of the table's column + sjtbl Update sjtbl->[start_]recinfo values which + will be needed if we'll need to convert the + created temptable from HEAP to MyISAM/Maria. + + DESCRIPTION + Create a temporary table to weed out duplicate rowid combinations. The + table has a single column that is a concatenation of all rowids in the + combination. + + Depending on the needed length, there are two cases: + + 1. When the length of the column < max_key_length: + + CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col)); + + 2. Otherwise (not a valid SQL syntax but internally supported): + + CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col)); + + The code in this function was produced by extraction of relevant parts + from create_tmp_table(). + + RETURN + created table + NULL on error +*/ + +TABLE *create_duplicate_weedout_tmp_table(THD *thd, + uint uniq_tuple_length_arg, + SJ_TMP_TABLE *sjtbl) +{ + MEM_ROOT *mem_root_save, own_root; + TABLE *table; + TABLE_SHARE *share; + uint temp_pool_slot=MY_BIT_NONE; + char *tmpname,path[FN_REFLEN]; + Field **reg_field; + KEY_PART_INFO *key_part_info; + KEY *keyinfo; + uchar *group_buff; + uchar *bitmaps; + uint *blob_field; + ENGINE_COLUMNDEF *recinfo, *start_recinfo; + bool using_unique_constraint=FALSE; + bool use_packed_rows= FALSE; + Field *field, *key_field; + uint blob_count, null_pack_length, null_count; + uchar *null_flags; + uchar *pos; + DBUG_ENTER("create_duplicate_weedout_tmp_table"); + DBUG_ASSERT(!sjtbl->is_degenerate); + /* + STEP 1: Get temporary table name + */ + statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status); + if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) + temp_pool_slot = bitmap_lock_set_next(&temp_pool); + + if (temp_pool_slot != MY_BIT_NONE) // we got a slot + sprintf(path, "%s_%lx_%i", tmp_file_prefix, + current_pid, temp_pool_slot); + else + { + /* if we run out of slots or we are not using tempool */ + sprintf(path,"%s%lx_%lx_%x", tmp_file_prefix,current_pid, + thd->thread_id, thd->tmp_table++); + } + fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME); + + /* STEP 2: Figure if we'll be using a key or blob+constraint */ + if (uniq_tuple_length_arg >= CONVERT_IF_BIGGER_TO_BLOB) + using_unique_constraint= TRUE; + + /* STEP 3: Allocate memory for temptable description */ + init_sql_alloc(&own_root, TABLE_ALLOC_BLOCK_SIZE, 0); + if (!multi_alloc_root(&own_root, + &table, sizeof(*table), + &share, sizeof(*share), + ®_field, sizeof(Field*) * (1+1), + &blob_field, sizeof(uint)*2, + &keyinfo, sizeof(*keyinfo), + &key_part_info, sizeof(*key_part_info) * 2, + &start_recinfo, + sizeof(*recinfo)*(1*2+4), + &tmpname, (uint) strlen(path)+1, + &group_buff, (!using_unique_constraint ? + uniq_tuple_length_arg : 0), + &bitmaps, bitmap_buffer_size(1)*3, + NullS)) + { + if (temp_pool_slot != MY_BIT_NONE) + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); + DBUG_RETURN(NULL); + } + strmov(tmpname,path); + + + /* STEP 4: Create TABLE description */ + bzero((char*) table,sizeof(*table)); + bzero((char*) reg_field,sizeof(Field*)*2); + + table->mem_root= own_root; + mem_root_save= thd->mem_root; + thd->mem_root= &table->mem_root; + + table->field=reg_field; + table->alias= "weedout-tmp"; + table->reginfo.lock_type=TL_WRITE; /* Will be updated */ + table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE; + table->map=1; + table->temp_pool_slot = temp_pool_slot; + table->copy_blobs= 1; + table->in_use= thd; + table->quick_keys.init(); + table->covering_keys.init(); + table->keys_in_use_for_query.init(); + + table->s= share; + init_tmp_table_share(thd, share, "", 0, tmpname, tmpname); + share->blob_field= blob_field; + share->blob_ptr_size= portable_sizeof_char_ptr; + share->db_low_byte_first=1; // True for HEAP and MyISAM + share->table_charset= NULL; + share->primary_key= MAX_KEY; // Indicate no primary key + share->keys_for_keyread.init(); + share->keys_in_use.init(); + + blob_count= 0; + + /* Create the field */ + { + /* + For the sake of uniformity, always use Field_varstring (altough we could + use Field_string for shorter keys) + */ + field= new Field_varstring(uniq_tuple_length_arg, FALSE, "rowids", share, + &my_charset_bin); + if (!field) + DBUG_RETURN(0); + field->table= table; + field->key_start.init(0); + field->part_of_key.init(0); + field->part_of_sortkey.init(0); + field->unireg_check= Field::NONE; + field->flags= (NOT_NULL_FLAG | BINARY_FLAG | NO_DEFAULT_VALUE_FLAG); + field->reset_fields(); + field->init(table); + field->orig_table= NULL; + + field->field_index= 0; + + *(reg_field++)= field; + *blob_field= 0; + *reg_field= 0; + + share->fields= 1; + share->blob_fields= 0; + } + + uint reclength= field->pack_length(); + if (using_unique_constraint) + { + share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); + table->file= get_new_handler(share, &table->mem_root, + share->db_type()); + DBUG_ASSERT(uniq_tuple_length_arg <= table->file->max_key_length()); + } + else + { + share->db_plugin= ha_lock_engine(0, heap_hton); + table->file= get_new_handler(share, &table->mem_root, + share->db_type()); + } + if (!table->file) + goto err; + + null_count=1; + + null_pack_length= 1; + reclength += null_pack_length; + + share->reclength= reclength; + { + uint alloc_length=ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1); + share->rec_buff_length= alloc_length; + if (!(table->record[0]= (uchar*) + alloc_root(&table->mem_root, alloc_length*3))) + goto err; + table->record[1]= table->record[0]+alloc_length; + share->default_values= table->record[1]+alloc_length; + } + setup_tmp_table_column_bitmaps(table, bitmaps); + + recinfo= start_recinfo; + null_flags=(uchar*) table->record[0]; + pos=table->record[0]+ null_pack_length; + if (null_pack_length) + { + bzero((uchar*) recinfo,sizeof(*recinfo)); + recinfo->type=FIELD_NORMAL; + recinfo->length=null_pack_length; + recinfo++; + bfill(null_flags,null_pack_length,255); // Set null fields + + table->null_flags= (uchar*) table->record[0]; + share->null_fields= null_count; + share->null_bytes= null_pack_length; + } + null_count=1; + + { + //Field *field= *reg_field; + uint length; + bzero((uchar*) recinfo,sizeof(*recinfo)); + field->move_field(pos,(uchar*) 0,0); + + field->reset(); + /* + Test if there is a default field value. The test for ->ptr is to skip + 'offset' fields generated by initalize_tables + */ + // Initialize the table field: + bzero(field->ptr, field->pack_length()); + + length=field->pack_length(); + pos+= length; + + /* Make entry for create table */ + recinfo->length=length; + if (field->flags & BLOB_FLAG) + recinfo->type= FIELD_BLOB; + else if (use_packed_rows && + field->real_type() == MYSQL_TYPE_STRING && + length >= MIN_STRING_LENGTH_TO_PACK_ROWS) + recinfo->type=FIELD_SKIP_ENDSPACE; + else + recinfo->type=FIELD_NORMAL; + + field->table_name= &table->alias; + } + + //param->recinfo=recinfo; + //store_record(table,s->default_values); // Make empty default record + + if (thd->variables.tmp_table_size == ~ (ulonglong) 0) // No limit + share->max_rows= ~(ha_rows) 0; + else + share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ? + min(thd->variables.tmp_table_size, + thd->variables.max_heap_table_size) : + thd->variables.tmp_table_size) / + share->reclength); + set_if_bigger(share->max_rows,1); // For dummy start options + + + //// keyinfo= param->keyinfo; + if (TRUE) + { + DBUG_PRINT("info",("Creating group key in temporary table")); + share->keys=1; + share->uniques= test(using_unique_constraint); + table->key_info=keyinfo; + keyinfo->key_part=key_part_info; + keyinfo->flags=HA_NOSAME; + keyinfo->usable_key_parts= keyinfo->key_parts= 1; + keyinfo->key_length=0; + keyinfo->rec_per_key=0; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->name= (char*) "weedout_key"; + { + key_part_info->null_bit=0; + key_part_info->field= field; + key_part_info->offset= field->offset(table->record[0]); + key_part_info->length= (uint16) field->key_length(); + key_part_info->type= (uint8) field->key_type(); + key_part_info->key_type = FIELDFLAG_BINARY; + if (!using_unique_constraint) + { + if (!(key_field= field->new_key_field(thd->mem_root, table, + group_buff, + field->null_ptr, + field->null_bit))) + goto err; + key_part_info->key_part_flag|= HA_END_SPACE_ARE_EQUAL; //todo need this? + } + keyinfo->key_length+= key_part_info->length; + } + } + + if (thd->is_fatal_error) // If end of memory + goto err; + share->db_record_offset= 1; + if (share->db_type() == TMP_ENGINE_HTON) + { + recinfo++; + if (create_internal_tmp_table(table, keyinfo, start_recinfo, &recinfo, 0)) + goto err; + } + sjtbl->start_recinfo= start_recinfo; + sjtbl->recinfo= recinfo; + if (open_tmp_table(table)) + goto err; + + thd->mem_root= mem_root_save; + DBUG_RETURN(table); + +err: + thd->mem_root= mem_root_save; + free_tmp_table(thd,table); /* purecov: inspected */ + if (temp_pool_slot != MY_BIT_NONE) + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); + DBUG_RETURN(NULL); /* purecov: inspected */ +} + + +/* + SemiJoinDuplicateElimination: Reset the temporary table +*/ + +int do_sj_reset(SJ_TMP_TABLE *sj_tbl) +{ + DBUG_ENTER("do_sj_reset"); + if (sj_tbl->tmp_table) + { + int rc= sj_tbl->tmp_table->file->ha_delete_all_rows(); + DBUG_RETURN(rc); + } + sj_tbl->have_degenerate_row= FALSE; + DBUG_RETURN(0); +} + +/* + SemiJoinDuplicateElimination: Weed out duplicate row combinations + + SYNPOSIS + do_sj_dups_weedout() + thd Thread handle + sjtbl Duplicate weedout table + + DESCRIPTION + Try storing current record combination of outer tables (i.e. their + rowids) in the temporary table. This records the fact that we've seen + this record combination and also tells us if we've seen it before. + + RETURN + -1 Error + 1 The row combination is a duplicate (discard it) + 0 The row combination is not a duplicate (continue) +*/ + +int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) +{ + int error; + SJ_TMP_TABLE::TAB *tab= sjtbl->tabs; + SJ_TMP_TABLE::TAB *tab_end= sjtbl->tabs_end; + uchar *ptr; + uchar *nulls_ptr; + + DBUG_ENTER("do_sj_dups_weedout"); + + if (sjtbl->is_degenerate) + { + if (sjtbl->have_degenerate_row) + DBUG_RETURN(1); + + sjtbl->have_degenerate_row= TRUE; + DBUG_RETURN(0); + } + + ptr= sjtbl->tmp_table->record[0] + 1; + nulls_ptr= ptr; + + /* Put the the rowids tuple into table->record[0]: */ + + // 1. Store the length + if (((Field_varstring*)(sjtbl->tmp_table->field[0]))->length_bytes == 1) + { + *ptr= (uchar)(sjtbl->rowid_len + sjtbl->null_bytes); + ptr++; + } + else + { + int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes); + ptr += 2; + } + + // 2. Zero the null bytes + if (sjtbl->null_bytes) + { + bzero(ptr, sjtbl->null_bytes); + ptr += sjtbl->null_bytes; + } + + // 3. Put the rowids + for (uint i=0; tab != tab_end; tab++, i++) + { + handler *h= tab->join_tab->table->file; + if (tab->join_tab->table->maybe_null && tab->join_tab->table->null_row) + { + /* It's a NULL-complemented row */ + *(nulls_ptr + tab->null_byte) |= tab->null_bit; + bzero(ptr + tab->rowid_offset, h->ref_length); + } + else + { + /* Copy the rowid value */ + memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length); + } + } + + error= sjtbl->tmp_table->file->ha_write_row(sjtbl->tmp_table->record[0]); + if (error) + { + /* create_internal_tmp_table_from_heap will generate error if needed */ + if (!sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP)) + DBUG_RETURN(1); /* Duplicate */ + if (create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table, + sjtbl->start_recinfo, + &sjtbl->recinfo, error, 1)) + DBUG_RETURN(-1); + } + DBUG_RETURN(0); +} + + +/* + Setup the strategies to eliminate semi-join duplicates. + + SYNOPSIS + setup_semijoin_dups_elimination() + join Join to process + options Join options (needed to see if join buffering will be + used or not) + no_jbuf_after Another bit of information re where join buffering will + be used. + + DESCRIPTION + Setup the strategies to eliminate semi-join duplicates. ATM there are 4 + strategies: + + 1. DuplicateWeedout (use of temptable to remove duplicates based on rowids + of row combinations) + 2. FirstMatch (pick only the 1st matching row combination of inner tables) + 3. LooseScan (scanning the sj-inner table in a way that groups duplicates + together and picking the 1st one) + 4. SJ-Materialization. + + The join order has "duplicate-generating ranges", and every range is + served by one strategy or a combination of FirstMatch with with some + other strategy. + + "Duplicate-generating range" is defined as a range within the join order + that contains all of the inner tables of a semi-join. All ranges must be + disjoint, if tables of several semi-joins are interleaved, then the ranges + are joined together, which is equivalent to converting + SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 ) + to + SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...) + . + + Applicability conditions are as follows: + + DuplicateWeedout strategy + ~~~~~~~~~~~~~~~~~~~~~~~~~ + + (ot|nt)* [ it ((it|ot|nt)* (it|ot))] (nt)* + +------+ +=========================+ +---+ + (1) (2) (3) + + (1) - Prefix of OuterTables (those that participate in + IN-equality and/or are correlated with subquery) and outer + Non-correlated tables. + (2) - The handled range. The range starts with the first sj-inner + table, and covers all sj-inner and outer tables + Within the range, Inner, Outer, outer non-correlated tables + may follow in any order. + (3) - The suffix of outer non-correlated tables. + + FirstMatch strategy + ~~~~~~~~~~~~~~~~~~~ + + (ot|nt)* [ it ((it|nt)* it) ] (nt)* + +------+ +==================+ +---+ + (1) (2) (3) + + (1) - Prefix of outer and non-correlated tables + (2) - The handled range, which may contain only inner and + non-correlated tables. + (3) - The suffix of outer non-correlated tables. + + LooseScan strategy + ~~~~~~~~~~~~~~~~~~ + + (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ] (ot|nt)* + +--------+ +===========+ +=============+ +------+ + (1) (2) (3) (4) + + (1) - Prefix that may contain any outer tables. The prefix must contain + all the non-trivially correlated outer tables. (non-trivially means + that the correlation is not just through the IN-equality). + + (2) - Inner table for which the LooseScan scan is performed. + + (3) - The remainder of the duplicate-generating range. It is served by + application of FirstMatch strategy, with the exception that + outer IN-correlated tables are considered to be non-correlated. + + (4) - THe suffix of outer and outer non-correlated tables. + + + The choice between the strategies is made by the join optimizer (see + advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()). + This function sets up all fields/structures/etc needed for execution except + for setup/initialization of semi-join materialization which is done in + setup_sj_materialization() (todo: can't we move that to here also?) + + RETURN + FALSE OK + TRUE Out of memory error +*/ + +int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, + uint no_jbuf_after) +{ + uint i; + THD *thd= join->thd; + DBUG_ENTER("setup_semijoin_dups_elimination"); + + for (i= join->const_tables ; i < join->tables ; i++) + { + JOIN_TAB *tab=join->join_tab + i; + POSITION *pos= join->best_positions + i; + uint keylen, keyno; + switch (pos->sj_strategy) { + case SJ_OPT_MATERIALIZE: + case SJ_OPT_MATERIALIZE_SCAN: + /* Do nothing */ + i += pos->n_sj_tables; + break; + case SJ_OPT_LOOSE_SCAN: + { + /* We jump from the last table to the first one */ + tab->loosescan_match_tab= tab + pos->n_sj_tables - 1; + + /* Calculate key length */ + keylen= 0; + keyno= pos->loosescan_key; + for (uint kp=0; kp < pos->loosescan_parts; kp++) + keylen += tab->table->key_info[keyno].key_part[kp].store_length; + + tab->loosescan_key_len= keylen; + if (pos->n_sj_tables > 1) + tab[pos->n_sj_tables - 1].do_firstmatch= tab; + i += pos->n_sj_tables; + break; + } + case SJ_OPT_DUPS_WEEDOUT: + { + /* + Check for join buffering. If there is one, move the first table + forwards, but do not destroy other duplicate elimination methods. + */ + uint first_table= i; + for (uint j= i; j < i + pos->n_sj_tables; j++) + { + if (join->best_positions[j].use_join_buffer && j <= no_jbuf_after) + { + first_table= join->const_tables; + break; + } + } + + SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES]; + SJ_TMP_TABLE::TAB *last_tab= sjtabs; + uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes) + uint jt_null_bits= 0; // # null bits in tuple bytes + /* + Walk through the range and remember + - tables that need their rowids to be put into temptable + - the last outer table + */ + for (JOIN_TAB *j=join->join_tab + first_table; + j < join->join_tab + i + pos->n_sj_tables; j++) + { + if (sj_table_is_included(join, j)) + { + last_tab->join_tab= j; + last_tab->rowid_offset= jt_rowid_offset; + jt_rowid_offset += j->table->file->ref_length; + if (j->table->maybe_null) + { + last_tab->null_byte= jt_null_bits / 8; + last_tab->null_bit= jt_null_bits++; + } + last_tab++; + j->table->prepare_for_position(); + j->keep_current_rowid= TRUE; + } + } + + SJ_TMP_TABLE *sjtbl; + if (jt_rowid_offset) /* Temptable has at least one rowid */ + { + uint tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB); + if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) || + !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + memcpy(sjtbl->tabs, sjtabs, tabs_size); + sjtbl->is_degenerate= FALSE; + sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs); + sjtbl->rowid_len= jt_rowid_offset; + sjtbl->null_bits= jt_null_bits; + sjtbl->null_bytes= (jt_null_bits + 7)/8; + sjtbl->tmp_table= + create_duplicate_weedout_tmp_table(thd, + sjtbl->rowid_len + + sjtbl->null_bytes, + sjtbl); + join->sj_tmp_tables.push_back(sjtbl->tmp_table); + } + else + { + /* + This is a special case where the entire subquery predicate does + not depend on anything at all, ie this is + WHERE const IN (uncorrelated select) + */ + if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE)))) + DBUG_RETURN(TRUE); /* purecov: inspected */ + sjtbl->tmp_table= NULL; + sjtbl->is_degenerate= TRUE; + sjtbl->have_degenerate_row= FALSE; + } + join->join_tab[first_table].flush_weedout_table= sjtbl; + join->join_tab[i + pos->n_sj_tables - 1].check_weed_out_table= sjtbl; + + i += pos->n_sj_tables; + break; + } + case SJ_OPT_FIRST_MATCH: + { + JOIN_TAB *j, *jump_to= tab-1; + for (j= tab; j != tab + pos->n_sj_tables; j++) + { + if (!tab->emb_sj_nest) + jump_to= tab; + else + { + j->first_sj_inner_tab= tab; + j->last_sj_inner_tab= tab + pos->n_sj_tables - 1; + } + } + j[-1].do_firstmatch= jump_to; + i += pos->n_sj_tables; + break; + } + case SJ_OPT_NONE: + break; + } + } + DBUG_RETURN(FALSE); +} + + +/* + Destroy all temporary tables created by NL-semijoin runtime +*/ + +void destroy_sj_tmp_tables(JOIN *join) +{ + List_iterator<TABLE> it(join->sj_tmp_tables); + TABLE *table; + while ((table= it++)) + { + /* + SJ-Materialization tables are initialized for either sequential reading + or index lookup, DuplicateWeedout tables are not initialized for read + (we only write to them), so need to call ha_index_or_rnd_end. + */ + table->file->ha_index_or_rnd_end(); + free_tmp_table(join->thd, table); + } + join->sj_tmp_tables.empty(); + join->sjm_info_list.empty(); +} + + +/* + Remove all records from all temp tables used by NL-semijoin runtime + + SYNOPSIS + clear_sj_tmp_tables() + join The join to remove tables for + + DESCRIPTION + Remove all records from all temp tables used by NL-semijoin runtime. This + must be done before every join re-execution. +*/ + +int clear_sj_tmp_tables(JOIN *join) +{ + int res; + List_iterator<TABLE> it(join->sj_tmp_tables); + TABLE *table; + while ((table= it++)) + { + if ((res= table->file->ha_delete_all_rows())) + return res; /* purecov: inspected */ + } + + SJ_MATERIALIZATION_INFO *sjm; + List_iterator<SJ_MATERIALIZATION_INFO> it2(join->sjm_info_list); + while ((sjm= it2++)) + { + sjm->materialized= FALSE; + } + return 0; +} + + +/* + Check if the table's rowid is included in the temptable + + SYNOPSIS + sj_table_is_included() + join The join + join_tab The table to be checked + + DESCRIPTION + SemiJoinDuplicateElimination: check the table's rowid should be included + in the temptable. This is so if + + 1. The table is not embedded within some semi-join nest + 2. The has been pulled out of a semi-join nest, or + + 3. The table is functionally dependent on some previous table + + [4. This is also true for constant tables that can't be + NULL-complemented but this function is not called for such tables] + + RETURN + TRUE - Include table's rowid + FALSE - Don't +*/ + +static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab) +{ + if (join_tab->emb_sj_nest) + return FALSE; + + /* Check if this table is functionally dependent on the tables that + are within the same outer join nest + */ + TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding; + if (join_tab->type == JT_EQ_REF) + { + table_map depends_on= 0; + uint idx; + + for (uint kp= 0; kp < join_tab->ref.key_parts; kp++) + depends_on |= join_tab->ref.items[kp]->used_tables(); + + Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS); + while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END) + { + JOIN_TAB *ref_tab= join->map2table[idx]; + if (embedding != ref_tab->table->pos_in_table_list->embedding) + return TRUE; + } + /* Ok, functionally dependent */ + return FALSE; + } + /* Not functionally dependent => need to include*/ + return TRUE; +} + + +/* + Index lookup-based subquery: save some flags for EXPLAIN output + + SYNOPSIS + save_index_subquery_explain_info() + join_tab Subquery's join tab (there is only one as index lookup is + only used for subqueries that are single-table SELECTs) + where Subquery's WHERE clause + + DESCRIPTION + For index lookup-based subquery (i.e. one executed with + subselect_uniquesubquery_engine or subselect_indexsubquery_engine), + check its EXPLAIN output row should contain + "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) + "Using Where" (TAB_INFO_USING_WHERE) + "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL) + and set appropriate flags in join_tab->packed_info. +*/ + +static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where) +{ + join_tab->packed_info= TAB_INFO_HAVE_VALUE; + if (join_tab->table->covering_keys.is_set(join_tab->ref.key)) + join_tab->packed_info |= TAB_INFO_USING_INDEX; + if (where) + join_tab->packed_info |= TAB_INFO_USING_WHERE; + for (uint i = 0; i < join_tab->ref.key_parts; i++) + { + if (join_tab->ref.cond_guards[i]) + { + join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL; + break; + } + } +} + + +/* + Check if the join can be rewritten to [unique_]indexsubquery_engine + + DESCRIPTION + Check if the join can be changed into [unique_]indexsubquery_engine. + + The check is done after join optimization, the idea is that if the join + has only one table and uses a [eq_]ref access generated from subselect's + IN-equality then we replace it with a subselect_indexsubquery_engine or a + subselect_uniquesubquery_engine. + + RETURN + 0 - Ok, rewrite done (stop join optimization and return) + 1 - Fatal error (stop join optimization and return) + -1 - No rewrite performed, continue with join optimization +*/ + +int rewrite_to_index_subquery_engine(JOIN *join) +{ + THD *thd= join->thd; + JOIN_TAB* join_tab=join->join_tab; + SELECT_LEX_UNIT *unit= join->unit; + DBUG_ENTER("rewrite_to_index_subquery_engine"); + /* + is this simple IN subquery? + */ + if (!join->group_list && !join->order && + join->unit->item && + join->unit->item->substype() == Item_subselect::IN_SUBS && + join->tables == 1 && join->conds && + !join->unit->is_union()) + { + if (!join->having) + { + Item *where= join->conds; + if (join_tab[0].type == JT_EQ_REF && + join_tab[0].ref.items[0]->name == in_left_expr_name) + { + remove_subq_pushed_predicates(join, &where); + save_index_subquery_explain_info(join_tab, where); + join_tab[0].type= JT_UNIQUE_SUBQUERY; + join->error= 0; + DBUG_RETURN(unit->item-> + change_engine(new + subselect_uniquesubquery_engine(thd, + join_tab, + unit->item, + where))); + } + else if (join_tab[0].type == JT_REF && + join_tab[0].ref.items[0]->name == in_left_expr_name) + { + remove_subq_pushed_predicates(join, &where); + save_index_subquery_explain_info(join_tab, where); + join_tab[0].type= JT_INDEX_SUBQUERY; + join->error= 0; + DBUG_RETURN(unit->item-> + change_engine(new + subselect_indexsubquery_engine(thd, + join_tab, + unit->item, + where, + NULL, + 0))); + } + } else if (join_tab[0].type == JT_REF_OR_NULL && + join_tab[0].ref.items[0]->name == in_left_expr_name && + join->having->name == in_having_cond) + { + join_tab[0].type= JT_INDEX_SUBQUERY; + join->error= 0; + join->conds= remove_additional_cond(join->conds); + save_index_subquery_explain_info(join_tab, join->conds); + DBUG_RETURN(unit->item-> + change_engine(new subselect_indexsubquery_engine(thd, + join_tab, + unit->item, + join->conds, + join->having, + 1))); + } + } + + DBUG_RETURN(-1); /* Haven't done the rewrite */ +} + + +/** + Remove additional condition inserted by IN/ALL/ANY transformation. + + @param conds condition for processing + + @return + new conditions +*/ + +static Item *remove_additional_cond(Item* conds) +{ + if (conds->name == in_additional_cond) + return 0; + if (conds->type() == Item::COND_ITEM) + { + Item_cond *cnd= (Item_cond*) conds; + List_iterator<Item> li(*(cnd->argument_list())); + Item *item; + while ((item= li++)) + { + if (item->name == in_additional_cond) + { + li.remove(); + if (cnd->argument_list()->elements == 1) + return cnd->argument_list()->head(); + return conds; + } + } + } + return conds; +} + + +/* + Remove the predicates pushed down into the subquery + + SYNOPSIS + remove_subq_pushed_predicates() + where IN Must be NULL + OUT The remaining WHERE condition, or NULL + + DESCRIPTION + Given that this join will be executed using (unique|index)_subquery, + without "checking NULL", remove the predicates that were pushed down + into the subquery. + + If the subquery compares scalar values, we can remove the condition that + was wrapped into trig_cond (it will be checked when needed by the subquery + engine) + + If the subquery compares row values, we need to keep the wrapped + equalities in the WHERE clause: when the left (outer) tuple has both NULL + and non-NULL values, we'll do a full table scan and will rely on the + equalities corresponding to non-NULL parts of left tuple to filter out + non-matching records. + + TODO: We can remove the equalities that will be guaranteed to be true by the + fact that subquery engine will be using index lookup. This must be done only + for cases where there are no conversion errors of significance, e.g. 257 + that is searched in a byte. But this requires homogenization of the return + codes of all Field*::store() methods. +*/ + +static void remove_subq_pushed_predicates(JOIN *join, Item **where) +{ + if (join->conds->type() == Item::FUNC_ITEM && + ((Item_func *)join->conds)->functype() == Item_func::EQ_FUNC && + ((Item_func *)join->conds)->arguments()[0]->type() == Item::REF_ITEM && + ((Item_func *)join->conds)->arguments()[1]->type() == Item::FIELD_ITEM && + test_if_ref (join->conds, + (Item_field *)((Item_func *)join->conds)->arguments()[1], + ((Item_func *)join->conds)->arguments()[0])) + { + *where= 0; + return; + } +} + + diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h new file mode 100644 index 00000000000..d8716dbb77f --- /dev/null +++ b/sql/opt_subselect.h @@ -0,0 +1,368 @@ +/* */ + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +int check_and_do_in_subquery_rewrites(JOIN *join); +bool convert_join_subqueries_to_semijoins(JOIN *join); +int pull_out_semijoin_tables(JOIN *join); +bool optimize_semijoin_nests(JOIN *join, table_map all_table_map); + +// used by Loose_scan_opt +ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, + table_map remaining_tables); + +/* + This is a class for considering possible loose index scan optimizations. + It's usage pattern is as follows: + best_access_path() + { + Loose_scan_opt opt; + + opt.init() + for each index we can do ref access with + { + opt.next_ref_key(); + for each keyuse + opt.add_keyuse(); + opt.check_ref_access(); + } + + if (some criteria for range scans) + opt.check_range_access(); + + opt.get_best_option(); + } +*/ + +class Loose_scan_opt +{ +public: + /* All methods must check this before doing anything else */ + bool try_loosescan; + + /* + If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is + called sj-equality. If oeK depends only on preceding tables then such + equality is called 'bound'. + */ + ulonglong bound_sj_equalities; + + /* Accumulated properties of ref access we're now considering: */ + ulonglong handled_sj_equalities; + key_part_map loose_scan_keyparts; + uint max_loose_keypart; + bool part1_conds_met; + + /* + Use of quick select is a special case. Some of its properties: + */ + uint quick_uses_applicable_index; + uint quick_max_loose_keypart; + + /* Best loose scan method so far */ + uint best_loose_scan_key; + double best_loose_scan_cost; + double best_loose_scan_records; + KEYUSE *best_loose_scan_start_key; + + uint best_max_loose_keypart; + + Loose_scan_opt(): + try_loosescan(FALSE), + bound_sj_equalities(0), + quick_uses_applicable_index(FALSE) + { + UNINIT_VAR(quick_max_loose_keypart); /* Protected by quick_uses_applicable_index */ + /* The following are protected by best_loose_scan_cost!= DBL_MAX */ + UNINIT_VAR(best_loose_scan_key); + UNINIT_VAR(best_loose_scan_records); + UNINIT_VAR(best_max_loose_keypart); + UNINIT_VAR(best_loose_scan_start_key); + } + + void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables) + { + /* + Discover the bound equalities. We need to do this if + 1. The next table is an SJ-inner table, and + 2. It is the first table from that semijoin, and + 3. We're not within a semi-join range (i.e. all semi-joins either have + all or none of their tables in join_table_map), except + s->emb_sj_nest (which we've just entered, see #2). + 4. All non-IN-equality correlation references from this sj-nest are + bound + 5. But some of the IN-equalities aren't (so this can't be handled by + FirstMatch strategy) + */ + best_loose_scan_cost= DBL_MAX; + if (!join->emb_sjm_nest && s->emb_sj_nest && // (1) + s->emb_sj_nest->sj_in_exprs < 64 && + ((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2) + s->emb_sj_nest->sj_inner_tables) && // (2) + join->cur_sj_inner_tables == 0 && // (3) + !(remaining_tables & + s->emb_sj_nest->nested_join->sj_corr_tables) && // (4) + remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5) + optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN)) + { + /* This table is an LooseScan scan candidate */ + bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, + remaining_tables); + try_loosescan= TRUE; + DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx", + (longlong)bound_sj_equalities)); + } + } + + void next_ref_key() + { + handled_sj_equalities=0; + loose_scan_keyparts= 0; + max_loose_keypart= 0; + part1_conds_met= FALSE; + } + + void add_keyuse(table_map remaining_tables, KEYUSE *keyuse) + { + if (try_loosescan && keyuse->sj_pred_no != UINT_MAX) + { + if (!(remaining_tables & keyuse->used_tables)) + { + /* + This allows to use equality propagation to infer that some + sj-equalities are bound. + */ + bound_sj_equalities |= 1ULL << keyuse->sj_pred_no; + } + else + { + handled_sj_equalities |= 1ULL << keyuse->sj_pred_no; + loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart; + set_if_bigger(max_loose_keypart, keyuse->keypart); + } + } + } + + bool have_a_case() { return test(handled_sj_equalities); } + + void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key, + table_map found_part) + { + /* + Check if we can use LooseScan semi-join strategy. We can if + 1. This is the right table at right location + 2. All IN-equalities are either + - "bound", ie. the outer_expr part refers to the preceding tables + - "handled", ie. covered by the index we're considering + 3. Index order allows to enumerate subquery's duplicate groups in + order. This happens when the index definition matches this + pattern: + + (handled_col|bound_col)* (other_col|bound_col) + + */ + if (try_loosescan && // (1) + (handled_sj_equalities | bound_sj_equalities) == // (2) + PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) && // (2) + (PREV_BITS(key_part_map, max_loose_keypart+1) & // (3) + (found_part | loose_scan_keyparts)) == // (3) + (found_part | loose_scan_keyparts) && // (3) + !key_uses_partial_cols(s->table, key)) + { + /* Ok, can use the strategy */ + part1_conds_met= TRUE; + if (s->quick && s->quick->index == key && + s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) + { + quick_uses_applicable_index= TRUE; + quick_max_loose_keypart= max_loose_keypart; + } + DBUG_PRINT("info", ("Can use LooseScan scan")); + + /* + Check if this is a special case where there are no usable bound + IN-equalities, i.e. we have + + outer_expr IN (SELECT innertbl.key FROM ...) + + and outer_expr cannot be evaluated yet, so it's actually full + index scan and not a ref access + */ + if (!(found_part & 1 ) && /* no usable ref access for 1st key part */ + s->table->covering_keys.is_set(key)) + { + DBUG_PRINT("info", ("Can use full index scan for LooseScan")); + + /* Calculate the cost of complete loose index scan. */ + double records= rows2double(s->table->file->stats.records); + + /* The cost is entire index scan cost (divided by 2) */ + double read_time= s->table->file->index_only_read_time(key, records); + + /* + Now find out how many different keys we will get (for now we + ignore the fact that we have "keypart_i=const" restriction for + some key components, that may make us think think that loose + scan will produce more distinct records than it actually will) + */ + ulong rpc; + if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart])) + records= records / rpc; + + // TODO: previous version also did /2 + if (read_time < best_loose_scan_cost) + { + best_loose_scan_key= key; + best_loose_scan_cost= read_time; + best_loose_scan_records= records; + best_max_loose_keypart= max_loose_keypart; + best_loose_scan_start_key= start_key; + } + } + } + } + + void check_ref_access_part2(uint key, KEYUSE *start_key, double records, + double read_time) + { + if (part1_conds_met && read_time < best_loose_scan_cost) + { + /* TODO use rec-per-key-based fanout calculations */ + best_loose_scan_key= key; + best_loose_scan_cost= read_time; + best_loose_scan_records= records; + best_max_loose_keypart= max_loose_keypart; + best_loose_scan_start_key= start_key; + } + } + + void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick) + { + /* TODO: this the right part restriction: */ + if (quick_uses_applicable_index && idx == join->const_tables && + quick->read_time < best_loose_scan_cost) + { + best_loose_scan_key= quick->index; + best_loose_scan_cost= quick->read_time; + /* this is ok because idx == join->const_tables */ + best_loose_scan_records= rows2double(quick->records); + best_max_loose_keypart= quick_max_loose_keypart; + best_loose_scan_start_key= NULL; + } + } + + void save_to_position(JOIN_TAB *tab, POSITION *pos) + { + pos->read_time= best_loose_scan_cost; + if (best_loose_scan_cost != DBL_MAX) + { + pos->records_read= best_loose_scan_records; + pos->key= best_loose_scan_start_key; + pos->loosescan_key= best_loose_scan_key; + pos->loosescan_parts= best_max_loose_keypart + 1; + pos->use_join_buffer= FALSE; + pos->table= tab; + // todo need ref_depend_map ? + DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s", + tab->table->key_info[best_loose_scan_key].name, + best_loose_scan_start_key? "(ref access)": + "(range/index access)")); + } + } +}; + + +void advance_sj_state(JOIN *join, const table_map remaining_tables, + const JOIN_TAB *new_join_tab, uint idx, + double *current_record_count, double *current_read_time, + POSITION *loose_scan_pos); +void restore_prev_sj_state(const table_map remaining_tables, + const JOIN_TAB *tab, uint idx); + +void fix_semijoin_strategies_for_picked_join_order(JOIN *join); +bool setup_sj_materialization(JOIN_TAB *tab); + +TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg, + SJ_TMP_TABLE *sjtbl); +int do_sj_reset(SJ_TMP_TABLE *sj_tbl); +int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl); + +/* + Temporary table used by semi-join DuplicateElimination strategy + + This consists of the temptable itself and data needed to put records + into it. The table's DDL is as follows: + + CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col)); + + where the primary key can be replaced with unique constraint if n exceeds + the limit (as it is always done for query execution-time temptables). + + The record value is a concatenation of rowids of tables from the join we're + executing. If a join table is on the inner side of the outer join, we + assume that its rowid can be NULL and provide means to store this rowid in + the tuple. +*/ + +class SJ_TMP_TABLE : public Sql_alloc +{ +public: + /* + Array of pointers to tables whose rowids compose the temporary table + record. + */ + class TAB + { + public: + JOIN_TAB *join_tab; + uint rowid_offset; + ushort null_byte; + uchar null_bit; + }; + TAB *tabs; + TAB *tabs_end; + + /* + is_degenerate==TRUE means this is a special case where the temptable record + has zero length (and presence of a unique key means that the temptable can + have either 0 or 1 records). + In this case we don't create the physical temptable but instead record + its state in SJ_TMP_TABLE::have_degenerate_row. + */ + bool is_degenerate; + + /* + When is_degenerate==TRUE: the contents of the table (whether it has the + record or not). + */ + bool have_degenerate_row; + + /* table record parameters */ + uint null_bits; + uint null_bytes; + uint rowid_len; + + /* The temporary table itself (NULL means not created yet) */ + TABLE *tmp_table; + + /* + These are the members we got from temptable creation code. We'll need + them if we'll need to convert table from HEAP to MyISAM/Maria. + */ + ENGINE_COLUMNDEF *start_recinfo; + ENGINE_COLUMNDEF *recinfo; + + /* Pointer to next table (next->start_idx > this->end_idx) */ + SJ_TMP_TABLE *next; +}; + +int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, + uint no_jbuf_after); +void destroy_sj_tmp_tables(JOIN *join); +int clear_sj_tmp_tables(JOIN *join); +int rewrite_to_index_subquery_engine(JOIN *join); + + + diff --git a/sql/sql_join_cache.cc b/sql/sql_join_cache.cc index 79234e6f7bf..cf7da5b5566 100644 --- a/sql/sql_join_cache.cc +++ b/sql/sql_join_cache.cc @@ -29,6 +29,7 @@ #include "mysql_priv.h" #include "sql_select.h" +#include "opt_subselect.h" /***************************************************************************** diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 2bb9fd80bfd..a4b46c54ad3 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -605,7 +605,7 @@ class st_select_lex: public st_select_lex_node List<TABLE_LIST> top_join_list; /* join list of the top level */ List<TABLE_LIST> *join_list; /* list for the currently parsed join */ TABLE_LIST *embedding; /* table embedding to the above list */ - List<TABLE_LIST> sj_nests; + List<TABLE_LIST> sj_nests; /* Semi-join nests within this join */ /* Beginning of the list of leaves in a FROM clause, where the leaves inlcude all base tables including view tables. The tables are connected diff --git a/sql/sql_select.cc b/sql/sql_select.cc index bb086a5d43a..30e06a9e7f7 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -31,17 +31,12 @@ #include "mysql_priv.h" #include "sql_select.h" #include "sql_cursor.h" +#include "opt_subselect.h" #include <m_ctype.h> #include <my_bit.h> #include <hash.h> #include <ft_global.h> -#if defined(WITH_MARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES) -#include "../storage/maria/ha_maria.h" -#define TMP_ENGINE_HTON maria_hton -#else -#define TMP_ENGINE_HTON myisam_hton -#endif const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref", "MAYBE_REF","ALL","range","index","fulltext", @@ -54,7 +49,6 @@ struct st_sargable_param; static void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array); static bool make_join_statistics(JOIN *join, TABLE_LIST *leaves, COND *conds, DYNAMIC_ARRAY *keyuse); -static bool optimize_semijoin_nests(JOIN *join, table_map all_table_map); static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse, JOIN_TAB *join_tab, uint tables, COND *conds, @@ -64,9 +58,9 @@ static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse, static int sort_keyuse(KEYUSE *a,KEYUSE *b); static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse, table_map used_tables); -static bool choose_plan(JOIN *join,table_map join_tables); +bool choose_plan(JOIN *join,table_map join_tables); -static void best_access_path(JOIN *join, JOIN_TAB *s, +void best_access_path(JOIN *join, JOIN_TAB *s, table_map remaining_tables, uint idx, bool disable_jbuf, double record_count, POSITION *pos, POSITION *loose_scan_pos); @@ -123,23 +117,11 @@ static void restore_prev_nj_state(JOIN_TAB *last); static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list); static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list, uint first_unused); -static -void advance_sj_state(JOIN *join, const table_map remaining_tables, - const JOIN_TAB *new_join_tab, uint idx, - double *current_record_count, double *current_read_time, - POSITION *loose_scan_pos); -static void restore_prev_sj_state(const table_map remaining_tables, - const JOIN_TAB *tab, uint idx); static COND *optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list, Item::cond_result *cond_value); static bool const_expression_in_where(COND *conds,Item *item, Item **comp_item); -static bool open_tmp_table(TABLE *table); -static bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, - ENGINE_COLUMNDEF *start_recinfo, - ENGINE_COLUMNDEF **recinfo, - ulonglong options); static bool create_internal_tmp_table_from_heap2(THD *thd, TABLE *table, ENGINE_COLUMNDEF *start_recinfo, @@ -250,23 +232,12 @@ static bool init_sum_functions(Item_sum **func, Item_sum **end); static bool update_sum_func(Item_sum **func); static void select_describe(JOIN *join, bool need_tmp_table,bool need_order, bool distinct, const char *message=NullS); -static Item *remove_additional_cond(Item* conds); static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab); -static bool test_if_ref(COND *root_cond, - Item_field *left_item,Item *right_item); -static bool replace_where_subcondition(JOIN *join, Item **expr, - Item *old_cond, Item *new_cond, - bool do_fix_fields); void get_partial_join_cost(JOIN *join, uint idx, double *read_time_arg, double *record_count_arg); static uint make_join_orderinfo(JOIN *join); static int join_read_record_no_init(JOIN_TAB *tab); -static -bool subquery_types_allow_materialization(Item_in_subselect *in_subs); -int do_sj_reset(SJ_TMP_TABLE *sj_tbl); -TABLE *create_duplicate_weedout_tmp_table(THD *thd, uint uniq_tuple_length_arg, - SJ_TMP_TABLE *sjtbl); Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, bool *inherited_fl); @@ -472,6 +443,7 @@ inline int setup_without_group(THD *thd, Item **ref_pointer_array, mysql_select assumes that all tables are already opened *****************************************************************************/ + /** Prepare of whole select (including sub queries in future). @@ -558,173 +530,13 @@ JOIN::prepare(Item ***rref_pointer_array, DBUG_RETURN(-1); /* purecov: inspected */ thd->lex->allow_sum_func= save_allow_sum_func; } - - /* - If - 1) this join is inside a subquery (of any type except FROM-clause - subquery) and - 2) we aren't just normalizing a VIEW - - Then perform early unconditional subquery transformations: - - Convert subquery predicate into semi-join, or - - Mark the subquery for execution using materialization, or - - Perform IN->EXISTS transformation, or - - Perform more/less ALL/ANY -> MIN/MAX rewrite - - Substitute trivial scalar-context subquery with its value - - TODO: for PS, make the whole block execute only on the first execution - */ - Item_subselect *subselect; - if (!thd->lex->view_prepare_mode && // (1) - (subselect= select_lex->master_unit()->item)) // (2) - { - Item_in_subselect *in_subs= NULL; - if (subselect->substype() == Item_subselect::IN_SUBS) - in_subs= (Item_in_subselect*)subselect; - - /* Resolve expressions and perform semantic analysis for IN query */ - if (in_subs != NULL) - /* - TODO: Add the condition below to this if statement when we have proper - support for is_correlated handling for materialized semijoins. - If we were to add this condition now, the fix_fields() call in - convert_subq_to_sj() would force the flag is_correlated to be set - erroneously for prepared queries. - - thd->stmt_arena->state != Query_arena::PREPARED) - */ - { - /* - Check if the left and right expressions have the same # of - columns, i.e. we don't have a case like - (oe1, oe2) IN (SELECT ie1, ie2, ie3 ...) - - TODO why do we have this duplicated in IN->EXISTS transformers? - psergey-todo: fix these: grep for duplicated_subselect_card_check - */ - if (select_lex->item_list.elements != in_subs->left_expr->cols()) - { - my_error(ER_OPERAND_COLUMNS, MYF(0), in_subs->left_expr->cols()); - DBUG_RETURN(-1); - } - - SELECT_LEX *current= thd->lex->current_select; - thd->lex->current_select= current->return_after_parsing(); - char const *save_where= thd->where; - thd->where= "IN/ALL/ANY subquery"; - - bool failure= !in_subs->left_expr->fixed && - in_subs->left_expr->fix_fields(thd, &in_subs->left_expr); - thd->lex->current_select= current; - thd->where= save_where; - if (failure) - DBUG_RETURN(-1); /* purecov: deadcode */ - } - DBUG_PRINT("info", ("Checking if subq can be converted to semi-join")); - /* - Check if we're in subquery that is a candidate for flattening into a - semi-join (which is done in flatten_subqueries()). The - requirements are: - 1. Subquery predicate is an IN/=ANY subq predicate - 2. Subquery is a single SELECT (not a UNION) - 3. Subquery does not have GROUP BY or ORDER BY - 4. Subquery does not use aggregate functions or HAVING - 5. Subquery predicate is at the AND-top-level of ON/WHERE clause - 6. We are not in a subquery of a single table UPDATE/DELETE that - doesn't have a JOIN (TODO: We should handle this at some - point by switching to multi-table UPDATE/DELETE) - 7. We're not in a confluent table-less subquery, like "SELECT 1". - 8. No execution method was already chosen (by a prepared statement) - 9. Parent select is not a confluent table-less select - 10. Neither parent nor child select have STRAIGHT_JOIN option. - */ - if (optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN) && - in_subs && // 1 - !select_lex->is_part_of_union() && // 2 - !select_lex->group_list.elements && !order && // 3 - !having && !select_lex->with_sum_func && // 4 - thd->thd_marker.emb_on_expr_nest && // 5 - select_lex->outer_select()->join && // 6 - select_lex->master_unit()->first_select()->leaf_tables && // 7 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED && // 8 - select_lex->outer_select()->leaf_tables && // 9 - !((select_options | select_lex->outer_select()->join->select_options) - & SELECT_STRAIGHT_JOIN)) // 10 - { - DBUG_PRINT("info", ("Subquery is semi-join conversion candidate")); - - (void)subquery_types_allow_materialization(in_subs); - - in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest; - - /* Register the subquery for further processing in flatten_subqueries() */ - select_lex-> - outer_select()->join->sj_subselects.append(thd->mem_root, in_subs); - in_subs->expr_join_nest= thd->thd_marker.emb_on_expr_nest; - } - else - { - DBUG_PRINT("info", ("Subquery can't be converted to semi-join")); - /* - Check if the subquery predicate can be executed via materialization. - The required conditions are: - 1. Subquery predicate is an IN/=ANY subq predicate - 2. Subquery is a single SELECT (not a UNION) - 3. Subquery is not a table-less query. In this case there is no - point in materializing. - 3A The upper query is not a confluent SELECT ... FROM DUAL. We - can't do materialization for SELECT .. FROM DUAL because it - does not call setup_subquery_materialization(). We could make - SELECT ... FROM DUAL call that function but that doesn't seem - to be the case that is worth handling. - 4. Subquery predicate is a top-level predicate - (this implies it is not negated) - TODO: this is a limitation that should be lifted once we - implement correct NULL semantics (WL#3830) - 5. Subquery is non-correlated - TODO: - This is an overly restrictive condition. It can be extended to: - (Subquery is non-correlated || - Subquery is correlated to any query outer to IN predicate || - (Subquery is correlated to the immediate outer query && - Subquery !contains {GROUP BY, ORDER BY [LIMIT], - aggregate functions}) && subquery predicate is not under "NOT IN")) - 6. No execution method was already chosen (by a prepared statement). - - (*) The subquery must be part of a SELECT statement. The current - condition also excludes multi-table update statements. - - We have to determine whether we will perform subquery materialization - before calling the IN=>EXISTS transformation, so that we know whether to - perform the whole transformation or only that part of it which wraps - Item_in_subselect in an Item_in_optimizer. - */ - if (optimizer_flag(thd, OPTIMIZER_SWITCH_MATERIALIZATION) && - in_subs && // 1 - !select_lex->is_part_of_union() && // 2 - select_lex->master_unit()->first_select()->leaf_tables && // 3 - thd->lex->sql_command == SQLCOM_SELECT && // * - select_lex->outer_select()->leaf_tables && // 3A - subquery_types_allow_materialization(in_subs)) - { - // psergey-todo: duplicated_subselect_card_check: where it's done? - if (in_subs->is_top_level_item() && // 4 - !in_subs->is_correlated && // 5 - in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED) // 6 - in_subs->exec_method= Item_in_subselect::MATERIALIZATION; - } - - Item_subselect::trans_res trans_res; - if ((trans_res= subselect->select_transformer(this)) != - Item_subselect::RES_OK) - { - select_lex->fix_prepare_information(thd, &conds, &having); - DBUG_RETURN((trans_res == Item_subselect::RES_ERROR)); - } - } - } + + int res= check_and_do_in_subquery_rewrites(this); select_lex->fix_prepare_information(thd, &conds, &having); + + if (res) + DBUG_RETURN(res); if (order) { @@ -855,535 +667,6 @@ JOIN::prepare(Item ***rref_pointer_array, } -/** - @brief Check if subquery's compared types allow materialization. - - @param in_subs Subquery predicate, updated as follows: - types_allow_materialization TRUE if subquery materialization is allowed. - sjm_scan_allowed If types_allow_materialization is TRUE, - indicates whether it is possible to use subquery - materialization and scan the materialized table. - - @retval TRUE If subquery types allow materialization. - @retval FALSE Otherwise. - - @details - This is a temporary fix for BUG#36752. - - There are two subquery materialization strategies: - - 1. Materialize and do index lookups in the materialized table. See - BUG#36752 for description of restrictions we need to put on the - compared expressions. - - 2. Materialize and then do a full scan of the materialized table. At the - moment, this strategy's applicability criteria are even stricter than - in #1. - - This is so because of the following: consider an uncorrelated subquery - - ...WHERE (ot1.col1, ot2.col2 ...) IN (SELECT ie1,ie2,... FROM it1 ...) - - and a join order that could be used to do sjm-materialization: - - SJM-Scan(it1, it1), ot1, ot2 - - IN-equalities will be parts of conditions attached to the outer tables: - - ot1: ot1.col1 = ie1 AND ... (C1) - ot2: ot1.col2 = ie2 AND ... (C2) - - besides those there may be additional references to ie1 and ie2 - generated by equality propagation. The problem with evaluating C1 and - C2 is that ie{1,2} refer to subquery tables' columns, while we only have - current value of materialization temptable. Our solution is to - * require that all ie{N} are table column references. This allows - to copy the values of materialization temptable columns to the - original table's columns (see setup_sj_materialization for more - details) - * require that compared columns have exactly the same type. This is - a temporary measure to avoid BUG#36752-type problems. -*/ - -static -bool subquery_types_allow_materialization(Item_in_subselect *in_subs) -{ - DBUG_ENTER("subquery_types_allow_materialization"); - - DBUG_ASSERT(in_subs->left_expr->fixed); - - List_iterator<Item> it(in_subs->unit->first_select()->item_list); - uint elements= in_subs->unit->first_select()->item_list.elements; - - in_subs->types_allow_materialization= FALSE; // Assign default values - in_subs->sjm_scan_allowed= FALSE; - - bool all_are_fields= TRUE; - for (uint i= 0; i < elements; i++) - { - Item *outer= in_subs->left_expr->element_index(i); - Item *inner= it++; - all_are_fields &= (outer->real_item()->type() == Item::FIELD_ITEM && - inner->real_item()->type() == Item::FIELD_ITEM); - if (outer->result_type() != inner->result_type()) - DBUG_RETURN(FALSE); - switch (outer->result_type()) { - case STRING_RESULT: - if (outer->is_datetime() != inner->is_datetime()) - DBUG_RETURN(FALSE); - - if (!(outer->collation.collation == inner->collation.collation - /*&& outer->max_length <= inner->max_length */)) - DBUG_RETURN(FALSE); - /*case INT_RESULT: - if (!(outer->unsigned_flag ^ inner->unsigned_flag)) - DBUG_RETURN(FALSE); */ - default: - ;/* suitable for materialization */ - } - } - in_subs->types_allow_materialization= TRUE; - in_subs->sjm_scan_allowed= all_are_fields; - DBUG_PRINT("info",("subquery_types_allow_materialization: ok, allowed")); - DBUG_RETURN(TRUE); -} - - -/* - Remove the predicates pushed down into the subquery - - SYNOPSIS - JOIN::remove_subq_pushed_predicates() - where IN Must be NULL - OUT The remaining WHERE condition, or NULL - - DESCRIPTION - Given that this join will be executed using (unique|index)_subquery, - without "checking NULL", remove the predicates that were pushed down - into the subquery. - - If the subquery compares scalar values, we can remove the condition that - was wrapped into trig_cond (it will be checked when needed by the subquery - engine) - - If the subquery compares row values, we need to keep the wrapped - equalities in the WHERE clause: when the left (outer) tuple has both NULL - and non-NULL values, we'll do a full table scan and will rely on the - equalities corresponding to non-NULL parts of left tuple to filter out - non-matching records. - - TODO: We can remove the equalities that will be guaranteed to be true by the - fact that subquery engine will be using index lookup. This must be done only - for cases where there are no conversion errors of significance, e.g. 257 - that is searched in a byte. But this requires homogenization of the return - codes of all Field*::store() methods. -*/ - -void JOIN::remove_subq_pushed_predicates(Item **where) -{ - if (conds->type() == Item::FUNC_ITEM && - ((Item_func *)this->conds)->functype() == Item_func::EQ_FUNC && - ((Item_func *)conds)->arguments()[0]->type() == Item::REF_ITEM && - ((Item_func *)conds)->arguments()[1]->type() == Item::FIELD_ITEM && - test_if_ref (this->conds, - (Item_field *)((Item_func *)conds)->arguments()[1], - ((Item_func *)conds)->arguments()[0])) - { - *where= 0; - return; - } -} - - -/* - Index lookup-based subquery: save some flags for EXPLAIN output - - SYNOPSIS - save_index_subquery_explain_info() - join_tab Subquery's join tab (there is only one as index lookup is - only used for subqueries that are single-table SELECTs) - where Subquery's WHERE clause - - DESCRIPTION - For index lookup-based subquery (i.e. one executed with - subselect_uniquesubquery_engine or subselect_indexsubquery_engine), - check its EXPLAIN output row should contain - "Using index" (TAB_INFO_FULL_SCAN_ON_NULL) - "Using Where" (TAB_INFO_USING_WHERE) - "Full scan on NULL key" (TAB_INFO_FULL_SCAN_ON_NULL) - and set appropriate flags in join_tab->packed_info. -*/ - -static void save_index_subquery_explain_info(JOIN_TAB *join_tab, Item* where) -{ - join_tab->packed_info= TAB_INFO_HAVE_VALUE; - if (join_tab->table->covering_keys.is_set(join_tab->ref.key)) - join_tab->packed_info |= TAB_INFO_USING_INDEX; - if (where) - join_tab->packed_info |= TAB_INFO_USING_WHERE; - for (uint i = 0; i < join_tab->ref.key_parts; i++) - { - if (join_tab->ref.cond_guards[i]) - { - join_tab->packed_info |= TAB_INFO_FULL_SCAN_ON_NULL; - break; - } - } -} - - -/* - Check if the table's rowid is included in the temptable - - SYNOPSIS - sj_table_is_included() - join The join - join_tab The table to be checked - - DESCRIPTION - SemiJoinDuplicateElimination: check the table's rowid should be included - in the temptable. This is so if - - 1. The table is not embedded within some semi-join nest - 2. The has been pulled out of a semi-join nest, or - - 3. The table is functionally dependent on some previous table - - [4. This is also true for constant tables that can't be - NULL-complemented but this function is not called for such tables] - - RETURN - TRUE - Include table's rowid - FALSE - Don't -*/ - -static bool sj_table_is_included(JOIN *join, JOIN_TAB *join_tab) -{ - if (join_tab->emb_sj_nest) - return FALSE; - - /* Check if this table is functionally dependent on the tables that - are within the same outer join nest - */ - TABLE_LIST *embedding= join_tab->table->pos_in_table_list->embedding; - if (join_tab->type == JT_EQ_REF) - { - table_map depends_on= 0; - uint idx; - - for (uint kp= 0; kp < join_tab->ref.key_parts; kp++) - depends_on |= join_tab->ref.items[kp]->used_tables(); - - Table_map_iterator it(depends_on & ~PSEUDO_TABLE_BITS); - while ((idx= it.next_bit())!=Table_map_iterator::BITMAP_END) - { - JOIN_TAB *ref_tab= join->map2table[idx]; - if (embedding != ref_tab->table->pos_in_table_list->embedding) - return TRUE; - } - /* Ok, functionally dependent */ - return FALSE; - } - /* Not functionally dependent => need to include*/ - return TRUE; -} - - -/* - Setup the strategies to eliminate semi-join duplicates. - - SYNOPSIS - setup_semijoin_dups_elimination() - join Join to process - options Join options (needed to see if join buffering will be - used or not) - no_jbuf_after Another bit of information re where join buffering will - be used. - - DESCRIPTION - Setup the strategies to eliminate semi-join duplicates. ATM there are 4 - strategies: - - 1. DuplicateWeedout (use of temptable to remove duplicates based on rowids - of row combinations) - 2. FirstMatch (pick only the 1st matching row combination of inner tables) - 3. LooseScan (scanning the sj-inner table in a way that groups duplicates - together and picking the 1st one) - 4. SJ-Materialization. - - The join order has "duplicate-generating ranges", and every range is - served by one strategy or a combination of FirstMatch with with some - other strategy. - - "Duplicate-generating range" is defined as a range within the join order - that contains all of the inner tables of a semi-join. All ranges must be - disjoint, if tables of several semi-joins are interleaved, then the ranges - are joined together, which is equivalent to converting - SELECT ... WHERE oe1 IN (SELECT ie1 ...) AND oe2 IN (SELECT ie2 ) - to - SELECT ... WHERE (oe1, oe2) IN (SELECT ie1, ie2 ... ...) - . - - Applicability conditions are as follows: - - DuplicateWeedout strategy - ~~~~~~~~~~~~~~~~~~~~~~~~~ - - (ot|nt)* [ it ((it|ot|nt)* (it|ot))] (nt)* - +------+ +=========================+ +---+ - (1) (2) (3) - - (1) - Prefix of OuterTables (those that participate in - IN-equality and/or are correlated with subquery) and outer - Non-correlated tables. - (2) - The handled range. The range starts with the first sj-inner - table, and covers all sj-inner and outer tables - Within the range, Inner, Outer, outer non-correlated tables - may follow in any order. - (3) - The suffix of outer non-correlated tables. - - FirstMatch strategy - ~~~~~~~~~~~~~~~~~~~ - - (ot|nt)* [ it ((it|nt)* it) ] (nt)* - +------+ +==================+ +---+ - (1) (2) (3) - - (1) - Prefix of outer and non-correlated tables - (2) - The handled range, which may contain only inner and - non-correlated tables. - (3) - The suffix of outer non-correlated tables. - - LooseScan strategy - ~~~~~~~~~~~~~~~~~~ - - (ot|ct|nt) [ loosescan_tbl (ot|nt|it)* it ] (ot|nt)* - +--------+ +===========+ +=============+ +------+ - (1) (2) (3) (4) - - (1) - Prefix that may contain any outer tables. The prefix must contain - all the non-trivially correlated outer tables. (non-trivially means - that the correlation is not just through the IN-equality). - - (2) - Inner table for which the LooseScan scan is performed. - - (3) - The remainder of the duplicate-generating range. It is served by - application of FirstMatch strategy, with the exception that - outer IN-correlated tables are considered to be non-correlated. - - (4) - THe suffix of outer and outer non-correlated tables. - - - The choice between the strategies is made by the join optimizer (see - advance_sj_state() and fix_semijoin_strategies_for_picked_join_order()). - This function sets up all fields/structures/etc needed for execution except - for setup/initialization of semi-join materialization which is done in - setup_sj_materialization() (todo: can't we move that to here also?) - - RETURN - FALSE OK - TRUE Out of memory error -*/ - -int setup_semijoin_dups_elimination(JOIN *join, ulonglong options, - uint no_jbuf_after) -{ - uint i; - THD *thd= join->thd; - DBUG_ENTER("setup_semijoin_dups_elimination"); - - for (i= join->const_tables ; i < join->tables ; i++) - { - JOIN_TAB *tab=join->join_tab + i; - POSITION *pos= join->best_positions + i; - uint keylen, keyno; - switch (pos->sj_strategy) { - case SJ_OPT_MATERIALIZE: - case SJ_OPT_MATERIALIZE_SCAN: - /* Do nothing */ - i += pos->n_sj_tables; - break; - case SJ_OPT_LOOSE_SCAN: - { - /* We jump from the last table to the first one */ - tab->loosescan_match_tab= tab + pos->n_sj_tables - 1; - - /* Calculate key length */ - keylen= 0; - keyno= pos->loosescan_key; - for (uint kp=0; kp < pos->loosescan_parts; kp++) - keylen += tab->table->key_info[keyno].key_part[kp].store_length; - - tab->loosescan_key_len= keylen; - if (pos->n_sj_tables > 1) - tab[pos->n_sj_tables - 1].do_firstmatch= tab; - i += pos->n_sj_tables; - break; - } - case SJ_OPT_DUPS_WEEDOUT: - { - /* - Check for join buffering. If there is one, move the first table - forwards, but do not destroy other duplicate elimination methods. - */ - uint first_table= i; - for (uint j= i; j < i + pos->n_sj_tables; j++) - { - if (join->best_positions[j].use_join_buffer && j <= no_jbuf_after) - { - first_table= join->const_tables; - break; - } - } - - SJ_TMP_TABLE::TAB sjtabs[MAX_TABLES]; - SJ_TMP_TABLE::TAB *last_tab= sjtabs; - uint jt_rowid_offset= 0; // # tuple bytes are already occupied (w/o NULL bytes) - uint jt_null_bits= 0; // # null bits in tuple bytes - /* - Walk through the range and remember - - tables that need their rowids to be put into temptable - - the last outer table - */ - for (JOIN_TAB *j=join->join_tab + first_table; - j < join->join_tab + i + pos->n_sj_tables; j++) - { - if (sj_table_is_included(join, j)) - { - last_tab->join_tab= j; - last_tab->rowid_offset= jt_rowid_offset; - jt_rowid_offset += j->table->file->ref_length; - if (j->table->maybe_null) - { - last_tab->null_byte= jt_null_bits / 8; - last_tab->null_bit= jt_null_bits++; - } - last_tab++; - j->table->prepare_for_position(); - j->keep_current_rowid= TRUE; - } - } - - SJ_TMP_TABLE *sjtbl; - if (jt_rowid_offset) /* Temptable has at least one rowid */ - { - uint tabs_size= (last_tab - sjtabs) * sizeof(SJ_TMP_TABLE::TAB); - if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE))) || - !(sjtbl->tabs= (SJ_TMP_TABLE::TAB*) thd->alloc(tabs_size))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - memcpy(sjtbl->tabs, sjtabs, tabs_size); - sjtbl->is_confluent= FALSE; - sjtbl->tabs_end= sjtbl->tabs + (last_tab - sjtabs); - sjtbl->rowid_len= jt_rowid_offset; - sjtbl->null_bits= jt_null_bits; - sjtbl->null_bytes= (jt_null_bits + 7)/8; - sjtbl->tmp_table= - create_duplicate_weedout_tmp_table(thd, - sjtbl->rowid_len + - sjtbl->null_bytes, - sjtbl); - join->sj_tmp_tables.push_back(sjtbl->tmp_table); - } - else - { - /* - This is confluent case where the entire subquery predicate does - not depend on anything at all, ie this is - WHERE const IN (uncorrelated select) - */ - if (!(sjtbl= (SJ_TMP_TABLE*)thd->alloc(sizeof(SJ_TMP_TABLE)))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - sjtbl->tmp_table= NULL; - sjtbl->is_confluent= TRUE; - sjtbl->have_confluent_row= FALSE; - } - join->join_tab[first_table].flush_weedout_table= sjtbl; - join->join_tab[i + pos->n_sj_tables - 1].check_weed_out_table= sjtbl; - - i += pos->n_sj_tables; - break; - } - case SJ_OPT_FIRST_MATCH: - { - JOIN_TAB *j, *jump_to= tab-1; - for (j= tab; j != tab + pos->n_sj_tables; j++) - { - if (!tab->emb_sj_nest) - jump_to= tab; - else - { - j->first_sj_inner_tab= tab; - j->last_sj_inner_tab= tab + pos->n_sj_tables - 1; - } - } - j[-1].do_firstmatch= jump_to; - i += pos->n_sj_tables; - break; - } - case SJ_OPT_NONE: - break; - } - } - DBUG_RETURN(FALSE); -} - - -/* - Destroy all temporary tables created by NL-semijoin runtime -*/ - -static void destroy_sj_tmp_tables(JOIN *join) -{ - List_iterator<TABLE> it(join->sj_tmp_tables); - TABLE *table; - while ((table= it++)) - { - /* - SJ-Materialization tables are initialized for either sequential reading - or index lookup, DuplicateWeedout tables are not initialized for read - (we only write to them), so need to call ha_index_or_rnd_end. - */ - table->file->ha_index_or_rnd_end(); - free_tmp_table(join->thd, table); - } - join->sj_tmp_tables.empty(); - join->sjm_info_list.empty(); -} - - -/* - Remove all records from all temp tables used by NL-semijoin runtime - - SYNOPSIS - clear_sj_tmp_tables() - join The join to remove tables for - - DESCRIPTION - Remove all records from all temp tables used by NL-semijoin runtime. This - must be done before every join re-execution. -*/ - -static int clear_sj_tmp_tables(JOIN *join) -{ - int res; - List_iterator<TABLE> it(join->sj_tmp_tables); - TABLE *table; - while ((table= it++)) - { - if ((res= table->file->ha_delete_all_rows())) - return res; /* purecov: inspected */ - } - - SJ_MATERIALIZATION_INFO *sjm; - List_iterator<SJ_MATERIALIZATION_INFO> it2(join->sjm_info_list); - while ((sjm= it2++)) - { - sjm->materialized= FALSE; - } - return 0; -} - - /** global select optimisation. @@ -1411,7 +694,7 @@ JOIN::optimize() thd_proc_info(thd, "optimizing"); /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */ - if (flatten_subqueries()) + if (convert_join_subqueries_to_semijoins(this)) DBUG_RETURN(1); /* purecov: inspected */ /* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */ @@ -1926,66 +1209,10 @@ JOIN::optimize() /* Create all structures needed for materialized subquery execution. */ if (setup_subquery_materialization()) DBUG_RETURN(1); - - /* - is this simple IN subquery? - */ - if (!group_list && !order && - unit->item && unit->item->substype() == Item_subselect::IN_SUBS && - tables == 1 && conds && - !unit->is_union()) - { - if (!having) - { - Item *where= conds; - if (join_tab[0].type == JT_EQ_REF && - join_tab[0].ref.items[0]->name == in_left_expr_name) - { - remove_subq_pushed_predicates(&where); - save_index_subquery_explain_info(join_tab, where); - join_tab[0].type= JT_UNIQUE_SUBQUERY; - error= 0; - DBUG_RETURN(unit->item-> - change_engine(new - subselect_uniquesubquery_engine(thd, - join_tab, - unit->item, - where))); - } - else if (join_tab[0].type == JT_REF && - join_tab[0].ref.items[0]->name == in_left_expr_name) - { - remove_subq_pushed_predicates(&where); - save_index_subquery_explain_info(join_tab, where); - join_tab[0].type= JT_INDEX_SUBQUERY; - error= 0; - DBUG_RETURN(unit->item-> - change_engine(new - subselect_indexsubquery_engine(thd, - join_tab, - unit->item, - where, - NULL, - 0))); - } - } else if (join_tab[0].type == JT_REF_OR_NULL && - join_tab[0].ref.items[0]->name == in_left_expr_name && - having->name == in_having_cond) - { - join_tab[0].type= JT_INDEX_SUBQUERY; - error= 0; - conds= remove_additional_cond(conds); - save_index_subquery_explain_info(join_tab, conds); - DBUG_RETURN(unit->item-> - change_engine(new subselect_indexsubquery_engine(thd, - join_tab, - unit->item, - conds, - having, - 1))); - } - - } + + int res; + if ((res= rewrite_to_index_subquery_engine(this)) != -1) + DBUG_RETURN(res); /* Need to tell handlers that to play it safe, it should fetch all columns of the primary key of the tables: this is because MySQL may @@ -3118,547 +2345,16 @@ mysql_select(THD *thd, Item ***rref_pointer_array, } -int subq_sj_candidate_cmp(Item_in_subselect* const *el1, - Item_in_subselect* const *el2) -{ - return ((*el1)->sj_convert_priority < (*el2)->sj_convert_priority) ? 1 : - ( ((*el1)->sj_convert_priority == (*el2)->sj_convert_priority)? 0 : -1); -} +/** + Setup for execution all subqueries of a query, for which the optimizer + chose hash semi-join. - -inline Item * and_items(Item* cond, Item *item) -{ - return (cond? (new Item_cond_and(cond, item)) : item); -} - - -static TABLE_LIST *alloc_join_nest(THD *thd) -{ - TABLE_LIST *tbl; - if (!(tbl= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+ - sizeof(NESTED_JOIN)))) - return NULL; - tbl->nested_join= (NESTED_JOIN*) ((uchar*)tbl + - ALIGN_SIZE(sizeof(TABLE_LIST))); - return tbl; -} - - -void fix_list_after_tbl_changes(SELECT_LEX *new_parent, List<TABLE_LIST> *tlist) -{ - List_iterator<TABLE_LIST> it(*tlist); - TABLE_LIST *table; - while ((table= it++)) - { - if (table->on_expr) - table->on_expr->fix_after_pullout(new_parent, &table->on_expr); - if (table->nested_join) - fix_list_after_tbl_changes(new_parent, &table->nested_join->join_list); - } -} - - -/* - Convert a subquery predicate into a TABLE_LIST semi-join nest - - SYNOPSIS - convert_subq_to_sj() - parent_join Parent join, the one that has subq_pred in its WHERE/ON - clause - subq_pred Subquery predicate to be converted - - DESCRIPTION - Convert a subquery predicate into a TABLE_LIST semi-join nest. All the - prerequisites are already checked, so the conversion is always successfull. - - Prepared Statements: the transformation is permanent: - - Changes in TABLE_LIST structures are naturally permanent - - Item tree changes are performed on statement MEM_ROOT: - = we activate statement MEM_ROOT - = this function is called before the first fix_prepare_information - call. - - This is intended because the criteria for subquery-to-sj conversion remain - constant for the lifetime of the Prepared Statement. - - RETURN - FALSE OK - TRUE Out of memory error -*/ - -bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred) -{ - SELECT_LEX *parent_lex= parent_join->select_lex; - TABLE_LIST *emb_tbl_nest= NULL; - List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list; - THD *thd= parent_join->thd; - DBUG_ENTER("convert_subq_to_sj"); - - /* - 1. Find out where to put the predicate into. - Note: for "t1 LEFT JOIN t2" this will be t2, a leaf. - */ - if ((void*)subq_pred->expr_join_nest != (void*)1) - { - if (subq_pred->expr_join_nest->nested_join) - { - /* - We're dealing with - - ... [LEFT] JOIN ( ... ) ON (subquery AND whatever) ... - - The sj-nest will be inserted into the brackets nest. - */ - emb_tbl_nest= subq_pred->expr_join_nest; - emb_join_list= &emb_tbl_nest->nested_join->join_list; - } - else if (!subq_pred->expr_join_nest->outer_join) - { - /* - We're dealing with - - ... INNER JOIN tblX ON (subquery AND whatever) ... - - The sj-nest will be tblX's "sibling", i.e. another child of its - parent. This is ok because tblX is joined as an inner join. - */ - emb_tbl_nest= subq_pred->expr_join_nest->embedding; - if (emb_tbl_nest) - emb_join_list= &emb_tbl_nest->nested_join->join_list; - } - else if (!subq_pred->expr_join_nest->nested_join) - { - TABLE_LIST *outer_tbl= subq_pred->expr_join_nest; - TABLE_LIST *wrap_nest; - /* - We're dealing with - - ... LEFT JOIN tbl ON (on_expr AND subq_pred) ... - - we'll need to convert it into: - - ... LEFT JOIN ( tbl SJ (subq_tables) ) ON (on_expr AND subq_pred) ... - | | - |<----- wrap_nest ---->| - - Q: other subqueries may be pointing to this element. What to do? - A1: simple solution: copy *subq_pred->expr_join_nest= *parent_nest. - But we'll need to fix other pointers. - A2: Another way: have TABLE_LIST::next_ptr so the following - subqueries know the table has been nested. - A3: changes in the TABLE_LIST::outer_join will make everything work - automatically. - */ - if (!(wrap_nest= alloc_join_nest(parent_join->thd))) - { - DBUG_RETURN(TRUE); - } - wrap_nest->embedding= outer_tbl->embedding; - wrap_nest->join_list= outer_tbl->join_list; - wrap_nest->alias= (char*) "(sj-wrap)"; - - wrap_nest->nested_join->join_list.empty(); - wrap_nest->nested_join->join_list.push_back(outer_tbl); - - outer_tbl->embedding= wrap_nest; - outer_tbl->join_list= &wrap_nest->nested_join->join_list; - - /* - wrap_nest will take place of outer_tbl, so move the outer join flag - and on_expr - */ - wrap_nest->outer_join= outer_tbl->outer_join; - outer_tbl->outer_join= 0; - - wrap_nest->on_expr= outer_tbl->on_expr; - outer_tbl->on_expr= NULL; - - List_iterator<TABLE_LIST> li(*wrap_nest->join_list); - TABLE_LIST *tbl; - while ((tbl= li++)) - { - if (tbl == outer_tbl) - { - li.replace(wrap_nest); - break; - } - } - /* - Ok now wrap_nest 'contains' outer_tbl and we're ready to add the - semi-join nest into it - */ - emb_join_list= &wrap_nest->nested_join->join_list; - emb_tbl_nest= wrap_nest; - } - } - - TABLE_LIST *sj_nest; - NESTED_JOIN *nested_join; - if (!(sj_nest= alloc_join_nest(parent_join->thd))) - { - DBUG_RETURN(TRUE); - } - nested_join= sj_nest->nested_join; - - sj_nest->join_list= emb_join_list; - sj_nest->embedding= emb_tbl_nest; - sj_nest->alias= (char*) "(sj-nest)"; - sj_nest->sj_subq_pred= subq_pred; - /* Nests do not participate in those 'chains', so: */ - /* sj_nest->next_leaf= sj_nest->next_local= sj_nest->next_global == NULL*/ - emb_join_list->push_back(sj_nest); - - /* - nested_join->used_tables and nested_join->not_null_tables are - initialized in simplify_joins(). - */ - - /* - 2. Walk through subquery's top list and set 'embedding' to point to the - sj-nest. - */ - st_select_lex *subq_lex= subq_pred->unit->first_select(); - nested_join->join_list.empty(); - List_iterator_fast<TABLE_LIST> li(subq_lex->top_join_list); - TABLE_LIST *tl, *last_leaf; - while ((tl= li++)) - { - tl->embedding= sj_nest; - tl->join_list= &nested_join->join_list; - nested_join->join_list.push_back(tl); - } - - /* - Reconnect the next_leaf chain. - TODO: Do we have to put subquery's tables at the end of the chain? - Inserting them at the beginning would be a bit faster. - NOTE: We actually insert them at the front! That's because the order is - reversed in this list. - */ - for (tl= parent_lex->leaf_tables; tl->next_leaf; tl= tl->next_leaf) ; - tl->next_leaf= subq_lex->leaf_tables; - last_leaf= tl; - - /* - Same as above for next_local chain - (a theory: a next_local chain always starts with ::leaf_tables - because view's tables are inserted after the view) - */ - for (tl= parent_lex->leaf_tables; tl->next_local; tl= tl->next_local) ; - tl->next_local= subq_lex->leaf_tables; - - /* A theory: no need to re-connect the next_global chain */ - - /* 3. Remove the original subquery predicate from the WHERE/ON */ - - // The subqueries were replaced for Item_int(1) earlier - subq_pred->exec_method= - Item_in_subselect::SEMI_JOIN; // for subsequent executions - /*TODO: also reset the 'with_subselect' there. */ - - /* n. Adjust the parent_join->tables counter */ - uint table_no= parent_join->tables; - /* n. Walk through child's tables and adjust table->map */ - for (tl= subq_lex->leaf_tables; tl; tl= tl->next_leaf, table_no++) - { - tl->table->tablenr= table_no; - tl->table->map= ((table_map)1) << table_no; - SELECT_LEX *old_sl= tl->select_lex; - tl->select_lex= parent_join->select_lex; - for (TABLE_LIST *emb= tl->embedding; - emb && emb->select_lex == old_sl; - emb= emb->embedding) - emb->select_lex= parent_join->select_lex; - } - parent_join->tables += subq_lex->join->tables; - - /* - Put the subquery's WHERE into semi-join's sj_on_expr - Add the subquery-induced equalities too. - */ - SELECT_LEX *save_lex= thd->lex->current_select; - thd->lex->current_select=subq_lex; - if (!subq_pred->left_expr->fixed && - subq_pred->left_expr->fix_fields(thd, &subq_pred->left_expr)) - DBUG_RETURN(TRUE); - thd->lex->current_select=save_lex; - - sj_nest->nested_join->sj_corr_tables= subq_pred->used_tables(); - sj_nest->nested_join->sj_depends_on= subq_pred->used_tables() | - subq_pred->left_expr->used_tables(); - sj_nest->sj_on_expr= subq_lex->where; - - /* - Create the IN-equalities and inject them into semi-join's ON expression. - Additionally, for LooseScan strategy - - Record the number of IN-equalities. - - Create list of pointers to (oe1, ..., ieN). We'll need the list to - see which of the expressions are bound and which are not (for those - we'll produce a distinct stream of (ie_i1,...ie_ik). - - (TODO: can we just create a list of pointers and hope the expressions - will not substitute themselves on fix_fields()? or we need to wrap - them into Item_direct_view_refs and store pointers to those. The - pointers to Item_direct_view_refs are guaranteed to be stable as - Item_direct_view_refs doesn't substitute itself with anything in - Item_direct_view_ref::fix_fields. - */ - sj_nest->sj_in_exprs= subq_pred->left_expr->cols(); - sj_nest->nested_join->sj_outer_expr_list.empty(); - - if (subq_pred->left_expr->cols() == 1) - { - nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr); - Item_func_eq *item_eq= - new Item_func_eq(subq_pred->left_expr, subq_lex->ref_pointer_array[0]); - item_eq->in_equality_no= 0; - sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq); - } - else - { - for (uint i= 0; i < subq_pred->left_expr->cols(); i++) - { - nested_join->sj_outer_expr_list.push_back(subq_pred->left_expr-> - element_index(i)); - Item_func_eq *item_eq= - new Item_func_eq(subq_pred->left_expr->element_index(i), - subq_lex->ref_pointer_array[i]); - item_eq->in_equality_no= i; - sj_nest->sj_on_expr= and_items(sj_nest->sj_on_expr, item_eq); - } - } - /* Fix the created equality and AND */ - sj_nest->sj_on_expr->fix_fields(parent_join->thd, &sj_nest->sj_on_expr); - - /* - Walk through sj nest's WHERE and ON expressions and call - item->fix_table_changes() for all items. - */ - sj_nest->sj_on_expr->fix_after_pullout(parent_lex, &sj_nest->sj_on_expr); - fix_list_after_tbl_changes(parent_lex, &sj_nest->nested_join->join_list); - - - /* Unlink the child select_lex so it doesn't show up in EXPLAIN: */ - subq_lex->master_unit()->exclude_level(); - - DBUG_EXECUTE("where", - print_where(sj_nest->sj_on_expr,"SJ-EXPR", QT_ORDINARY);); - - /* Inject sj_on_expr into the parent's WHERE or ON */ - if (emb_tbl_nest) - { - emb_tbl_nest->on_expr= and_items(emb_tbl_nest->on_expr, - sj_nest->sj_on_expr); - emb_tbl_nest->on_expr->fix_fields(parent_join->thd, &emb_tbl_nest->on_expr); - } - else - { - /* Inject into the WHERE */ - parent_join->conds= and_items(parent_join->conds, sj_nest->sj_on_expr); - parent_join->conds->fix_fields(parent_join->thd, &parent_join->conds); - parent_join->select_lex->where= parent_join->conds; - } - - if (subq_lex->ftfunc_list->elements) - { - Item_func_match *ifm; - List_iterator_fast<Item_func_match> li(*(subq_lex->ftfunc_list)); - while ((ifm= li++)) - parent_lex->ftfunc_list->push_front(ifm); - } - - DBUG_RETURN(FALSE); -} - - - -/* - Convert semi-join subquery predicates into semi-join join nests - - SYNOPSIS - JOIN::flatten_subqueries() - - DESCRIPTION - - Convert candidate subquery predicates into semi-join join nests. This - transformation is performed once in query lifetime and is irreversible. - - Conversion of one subquery predicate - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - We start with a join that has a semi-join subquery: - - SELECT ... - FROM ot, ... - WHERE oe IN (SELECT ie FROM it1 ... itN WHERE subq_where) AND outer_where - - and convert it into a semi-join nest: - - SELECT ... - FROM ot SEMI JOIN (it1 ... itN), ... - WHERE outer_where AND subq_where AND oe=ie - - that is, in order to do the conversion, we need to - - * Create the "SEMI JOIN (it1 .. itN)" part and add it into the parent - query's FROM structure. - * Add "AND subq_where AND oe=ie" into parent query's WHERE (or ON if - the subquery predicate was in an ON expression) - * Remove the subquery predicate from the parent query's WHERE - - Considerations when converting many predicates - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - A join may have at most MAX_TABLES tables. This may prevent us from - flattening all subqueries when the total number of tables in parent and - child selects exceeds MAX_TABLES. - We deal with this problem by flattening children's subqueries first and - then using a heuristic rule to determine each subquery predicate's - "priority". - - RETURN - FALSE OK - TRUE Error -*/ - -bool JOIN::flatten_subqueries() -{ - Query_arena *arena, backup; - Item_in_subselect **in_subq; - Item_in_subselect **in_subq_end; - DBUG_ENTER("JOIN::flatten_subqueries"); - - if (sj_subselects.elements() == 0) - DBUG_RETURN(FALSE); - - /* First, convert child join's subqueries. We proceed bottom-up here */ - for (in_subq= sj_subselects.front(), in_subq_end= sj_subselects.back(); - in_subq != in_subq_end; in_subq++) - { - st_select_lex *child_select= (*in_subq)->get_select_lex(); - JOIN *child_join= child_select->join; - child_join->outer_tables = child_join->tables; - - /* - child_select->where contains only the WHERE predicate of the - subquery itself here. We may be selecting from a VIEW, which has its - own predicate. The combined predicates are available in child_join->conds, - which was built by setup_conds() doing prepare_where() for all views. - */ - child_select->where= child_join->conds; - - if (child_join->flatten_subqueries()) - DBUG_RETURN(TRUE); - (*in_subq)->sj_convert_priority= - (*in_subq)->is_correlated * MAX_TABLES + child_join->outer_tables; - } - - // Temporary measure: disable semi-joins when they are together with outer - // joins. - for (TABLE_LIST *tbl= select_lex->leaf_tables; tbl; tbl=tbl->next_leaf) - { - TABLE_LIST *embedding= tbl->embedding; - if (tbl->on_expr || (tbl->embedding && !(embedding->sj_on_expr && - !embedding->embedding))) - { - in_subq= sj_subselects.front(); - arena= thd->activate_stmt_arena_if_needed(&backup); - goto skip_conversion; - } - } - - //dump_TABLE_LIST_struct(select_lex, select_lex->leaf_tables); - /* - 2. Pick which subqueries to convert: - sort the subquery array - - prefer correlated subqueries over uncorrelated; - - prefer subqueries that have greater number of outer tables; - */ - sj_subselects.sort(subq_sj_candidate_cmp); - // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES - /* Replace all subqueries to be flattened with Item_int(1) */ - arena= thd->activate_stmt_arena_if_needed(&backup); - for (in_subq= sj_subselects.front(); - in_subq != in_subq_end && - tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; - in_subq++) - { - Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? - &conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(this, tree, *in_subq, new Item_int(1), - FALSE)) - DBUG_RETURN(TRUE); /* purecov: inspected */ - } - - for (in_subq= sj_subselects.front(); - in_subq != in_subq_end && - tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES; - in_subq++) - { - if (convert_subq_to_sj(this, *in_subq)) - DBUG_RETURN(TRUE); - } -skip_conversion: - /* - 3. Finalize (perform IN->EXISTS rewrite) the subqueries that we didn't - convert: - */ - for (; in_subq!= in_subq_end; in_subq++) - { - JOIN *child_join= (*in_subq)->unit->first_select()->join; - Item_subselect::trans_res res; - (*in_subq)->changed= 0; - (*in_subq)->fixed= 0; - - SELECT_LEX *save_select_lex= thd->lex->current_select; - thd->lex->current_select= (*in_subq)->unit->first_select(); - - res= (*in_subq)->select_transformer(child_join); - - thd->lex->current_select= save_select_lex; - - if (res == Item_subselect::RES_ERROR) - DBUG_RETURN(TRUE); - - (*in_subq)->changed= 1; - (*in_subq)->fixed= 1; - - Item *substitute= (*in_subq)->substitution; - bool do_fix_fields= !(*in_subq)->substitution->fixed; - Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? - &conds : &((*in_subq)->emb_on_expr_nest->on_expr); - if (replace_where_subcondition(this, tree, *in_subq, substitute, - do_fix_fields)) - DBUG_RETURN(TRUE); - (*in_subq)->substitution= NULL; - - if (!thd->stmt_arena->is_conventional()) - { - tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)? - &select_lex->prep_where : &((*in_subq)->emb_on_expr_nest->prep_on_expr); - - if (replace_where_subcondition(this, tree, *in_subq, substitute, - FALSE)) - DBUG_RETURN(TRUE); - } - } - - if (arena) - thd->restore_active_arena(arena, &backup); - sj_subselects.clear(); - DBUG_RETURN(FALSE); -} - - - -/** - Setup for execution all subqueries of a query, for which the optimizer - chose hash semi-join. - - @details Iterate over all subqueries of the query, and if they are under an - IN predicate, and the optimizer chose to compute it via hash semi-join: - - try to initialize all data structures needed for the materialized execution - of the IN predicate, - - if this fails, then perform the IN=>EXISTS transformation which was - previously blocked during JOIN::prepare. + @details Iterate over all subqueries of the query, and if they are under an + IN predicate, and the optimizer chose to compute it via hash semi-join: + - try to initialize all data structures needed for the materialized execution + of the IN predicate, + - if this fails, then perform the IN=>EXISTS transformation which was + previously blocked during JOIN::prepare. This method is part of the "code generation" query processing phase. @@ -3674,7 +2370,6 @@ bool JOIN::flatten_subqueries() bool JOIN::setup_subquery_materialization() { - //psergey-merge: we don't have materialization so dont need that: for (SELECT_LEX_UNIT *un= select_lex->first_inner_unit(); un; un= un->next_unit()) { @@ -3695,267 +2390,30 @@ bool JOIN::setup_subquery_materialization() } -/* - Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate - - SYNOPSIS - find_eq_ref_candidate() - table Table to be checked - sj_inner_tables Bitmap of inner tables. eq_ref(inner_table) doesn't - count. - - DESCRIPTION - Check if table's KEYUSE elements have an eq_ref(outer_tables) candidate - - TODO - Check again if it is feasible to factor common parts with constant table - search - - RETURN - TRUE - There exists an eq_ref(outer-tables) candidate - FALSE - Otherwise -*/ +/***************************************************************************** + Create JOIN_TABS, make a guess about the table types, + Approximate how many records will be used in each table +*****************************************************************************/ -bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables) +static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select, + TABLE *table, + const key_map *keys,ha_rows limit) { - KEYUSE *keyuse= table->reginfo.join_tab->keyuse; - uint key; - - if (keyuse) + int error; + DBUG_ENTER("get_quick_record_count"); +#ifndef EMBEDDED_LIBRARY // Avoid compiler warning + uchar buff[STACK_BUFF_ALLOC]; +#endif + if (check_stack_overrun(thd, STACK_MIN_SIZE, buff)) + DBUG_RETURN(0); // Fatal error flag is set + if (select) { - while (1) /* For each key */ - { - key= keyuse->key; - KEY *keyinfo= table->key_info + key; - key_part_map bound_parts= 0; - if ((keyinfo->flags & (HA_NOSAME | HA_END_SPACE_KEY)) == HA_NOSAME) - { - do /* For all equalities on all key parts */ - { - /* Check if this is "t.keypart = expr(outer_tables) */ - if (!(keyuse->used_tables & sj_inner_tables) && - !(keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)) - { - bound_parts |= 1 << keyuse->keypart; - } - keyuse++; - } while (keyuse->key == key && keyuse->table == table); - - if (bound_parts == PREV_BITS(uint, keyinfo->key_parts)) - return TRUE; - if (keyuse->table != table) - return FALSE; - } - else - { - do - { - keyuse++; - if (keyuse->table != table) - return FALSE; - } - while (keyuse->key == key); - } - } - } - return FALSE; -} - - -/* - Pull tables out of semi-join nests, if possible - - SYNOPSIS - pull_out_semijoin_tables() - join The join where to do the semi-join flattening - - DESCRIPTION - Try to pull tables out of semi-join nests. - - PRECONDITIONS - When this function is called, the join may have several semi-join nests - but it is guaranteed that one semi-join nest does not contain another. - - ACTION - A table can be pulled out of the semi-join nest if - - It is a constant table, or - - It is accessed via eq_ref(outer_tables) - - POSTCONDITIONS - * Tables that were pulled out have JOIN_TAB::emb_sj_nest == NULL - * Tables that were not pulled out have JOIN_TAB::emb_sj_nest pointing - to semi-join nest they are in. - * Semi-join nests' TABLE_LIST::sj_inner_tables is updated accordingly - - This operation is (and should be) performed at each PS execution since - tables may become/cease to be constant across PS reexecutions. - - NOTE - Table pullout may make uncorrelated subquery correlated. Consider this - example: - - ... WHERE oe IN (SELECT it1.primary_key WHERE p(it1, it2) ... ) - - here table it1 can be pulled out (we have it1.primary_key=oe which gives - us functional dependency). Once it1 is pulled out, all references to it1 - from p(it1, it2) become references to outside of the subquery and thus - make the subquery (i.e. its semi-join nest) correlated. - Making the subquery (i.e. its semi-join nest) correlated prevents us from - using Materialization or LooseScan to execute it. - - RETURN - 0 - OK - 1 - Out of memory error -*/ - -int pull_out_semijoin_tables(JOIN *join) -{ - TABLE_LIST *sj_nest; - DBUG_ENTER("pull_out_semijoin_tables"); - List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests); - - /* Try pulling out of the each of the semi-joins */ - while ((sj_nest= sj_list_it++)) - { - /* Action #1: Mark the constant tables to be pulled out */ - table_map pulled_tables= 0; - - List_iterator<TABLE_LIST> child_li(sj_nest->nested_join->join_list); - TABLE_LIST *tbl; - while ((tbl= child_li++)) - { - if (tbl->table) - { - tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest; - if (tbl->table->map & join->const_table_map) - { - pulled_tables |= tbl->table->map; - DBUG_PRINT("info", ("Table %s pulled out (reason: constant)", - tbl->table->alias)); - } - } - } - - /* - Action #2: Find which tables we can pull out based on - update_ref_and_keys() data. Note that pulling one table out can allow - us to pull out some other tables too. - */ - bool pulled_a_table; - do - { - pulled_a_table= FALSE; - child_li.rewind(); - while ((tbl= child_li++)) - { - if (tbl->table && !(pulled_tables & tbl->table->map)) - { - if (find_eq_ref_candidate(tbl->table, - sj_nest->nested_join->used_tables & - ~pulled_tables)) - { - pulled_a_table= TRUE; - pulled_tables |= tbl->table->map; - DBUG_PRINT("info", ("Table %s pulled out (reason: func dep)", - tbl->table->alias)); - /* - Pulling a table out of uncorrelated subquery in general makes - makes it correlated. See the NOTE to this funtion. - */ - sj_nest->sj_subq_pred->is_correlated= TRUE; - sj_nest->nested_join->sj_corr_tables|= tbl->table->map; - sj_nest->nested_join->sj_depends_on|= tbl->table->map; - } - } - } - } while (pulled_a_table); - - child_li.rewind(); - /* - Action #3: Move the pulled out TABLE_LIST elements to the parents. - */ - table_map inner_tables= sj_nest->nested_join->used_tables & - ~pulled_tables; - /* Record the bitmap of inner tables */ - sj_nest->sj_inner_tables= inner_tables; - if (pulled_tables) - { - List<TABLE_LIST> *upper_join_list= (sj_nest->embedding != NULL)? - (&sj_nest->embedding->nested_join->join_list): - (&join->select_lex->top_join_list); - Query_arena *arena, backup; - arena= join->thd->activate_stmt_arena_if_needed(&backup); - while ((tbl= child_li++)) - { - if (tbl->table) - { - if (inner_tables & tbl->table->map) - { - /* This table is not pulled out */ - tbl->table->reginfo.join_tab->emb_sj_nest= sj_nest; - } - else - { - /* This table has been pulled out of the semi-join nest */ - tbl->table->reginfo.join_tab->emb_sj_nest= NULL; - /* - Pull the table up in the same way as simplify_joins() does: - update join_list and embedding pointers but keep next[_local] - pointers. - */ - child_li.remove(); - upper_join_list->push_back(tbl); - tbl->join_list= upper_join_list; - tbl->embedding= sj_nest->embedding; - } - } - } - - /* Remove the sj-nest itself if we've removed everything from it */ - if (!inner_tables) - { - List_iterator<TABLE_LIST> li(*upper_join_list); - /* Find the sj_nest in the list. */ - while (sj_nest != li++) ; - li.remove(); - /* Also remove it from the list of SJ-nests: */ - sj_list_it.remove(); - } - - if (arena) - join->thd->restore_active_arena(arena, &backup); - } - } - DBUG_RETURN(0); -} - - - - -/***************************************************************************** - Create JOIN_TABS, make a guess about the table types, - Approximate how many records will be used in each table -*****************************************************************************/ - -static ha_rows get_quick_record_count(THD *thd, SQL_SELECT *select, - TABLE *table, - const key_map *keys,ha_rows limit) -{ - int error; - DBUG_ENTER("get_quick_record_count"); -#ifndef EMBEDDED_LIBRARY // Avoid compiler warning - uchar buff[STACK_BUFF_ALLOC]; -#endif - if (check_stack_overrun(thd, STACK_MIN_SIZE, buff)) - DBUG_RETURN(0); // Fatal error flag is set - if (select) - { - select->head=table; - table->reginfo.impossible_range=0; - if ((error= select->test_quick_select(thd, *(key_map *)keys,(table_map) 0, - limit, 0, FALSE)) == 1) - DBUG_RETURN(select->quick->records); - if (error == -1) + select->head=table; + table->reginfo.impossible_range=0; + if ((error= select->test_quick_select(thd, *(key_map *)keys,(table_map) 0, + limit, 0, FALSE)) == 1) + DBUG_RETURN(select->quick->records); + if (error == -1) { table->reginfo.impossible_range=1; DBUG_RETURN(0); @@ -3980,63 +2438,6 @@ typedef struct st_sargable_param } SARGABLE_PARAM; -/* - Get estimated record length for semi-join materialization temptable - - SYNOPSIS - get_tmp_table_rec_length() - items IN subquery's select list. - - DESCRIPTION - Calculate estimated record length for semi-join materialization - temptable. It's an estimate because we don't follow every bit of - create_tmp_table()'s logic. This isn't necessary as the return value of - this function is used only for cost calculations. - - RETURN - Length of the temptable record, in bytes -*/ - -static uint get_tmp_table_rec_length(List<Item> &items) -{ - uint len= 0; - Item *item; - List_iterator<Item> it(items); - while ((item= it++)) - { - switch (item->result_type()) { - case REAL_RESULT: - len += sizeof(double); - break; - case INT_RESULT: - if (item->max_length >= (MY_INT32_NUM_DECIMAL_DIGITS - 1)) - len += 8; - else - len += 4; - break; - case STRING_RESULT: - enum enum_field_types type; - /* DATE/TIME and GEOMETRY fields have STRING_RESULT result type. */ - if ((type= item->field_type()) == MYSQL_TYPE_DATETIME || - type == MYSQL_TYPE_TIME || type == MYSQL_TYPE_DATE || - type == MYSQL_TYPE_TIMESTAMP || type == MYSQL_TYPE_GEOMETRY) - len += 8; - else - len += item->max_length; - break; - case DECIMAL_RESULT: - len += 10; - break; - case ROW_RESULT: - default: - DBUG_ASSERT(0); /* purecov: deadcode */ - break; - } - } - return len; -} - - /** Calculate the best possible join and initialize the join structure. @@ -4518,147 +2919,6 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds, } -/* - Optimize semi-join nests that could be run with sj-materialization - - SYNOPSIS - optimize_semijoin_nests() - join The join to optimize semi-join nests for - all_table_map Bitmap of all tables in the join - - DESCRIPTION - Optimize each of the semi-join nests that can be run with - materialization. For each of the nests, we - - Generate the best join order for this "sub-join" and remember it; - - Remember the sub-join execution cost (it's part of materialization - cost); - - Calculate other costs that will be incurred if we decide - to use materialization strategy for this semi-join nest. - - All obtained information is saved and will be used by the main join - optimization pass. - - RETURN - FALSE Ok - TRUE Out of memory error -*/ - -static bool optimize_semijoin_nests(JOIN *join, table_map all_table_map) -{ - DBUG_ENTER("optimize_semijoin_nests"); - List_iterator<TABLE_LIST> sj_list_it(join->select_lex->sj_nests); - TABLE_LIST *sj_nest; - /* - The statement may have been executed with 'semijoin=on' earlier. - We need to verify that 'semijoin=on' still holds. - */ - if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_SEMIJOIN) && - optimizer_flag(join->thd, OPTIMIZER_SWITCH_MATERIALIZATION)) - { - while ((sj_nest= sj_list_it++)) - { - /* semi-join nests with only constant tables are not valid */ - DBUG_ASSERT(sj_nest->sj_inner_tables & ~join->const_table_map); - - sj_nest->sj_mat_info= NULL; - if (sj_nest->sj_inner_tables && /* not everything was pulled out */ - !sj_nest->sj_subq_pred->is_correlated && - sj_nest->sj_subq_pred->types_allow_materialization) - { - join->emb_sjm_nest= sj_nest; - if (choose_plan(join, all_table_map)) - DBUG_RETURN(TRUE); /* purecov: inspected */ - /* - The best plan to run the subquery is now in join->best_positions, - save it. - */ - uint n_tables= my_count_bits(sj_nest->sj_inner_tables); - SJ_MATERIALIZATION_INFO* sjm; - if (!(sjm= new SJ_MATERIALIZATION_INFO) || - !(sjm->positions= (POSITION*)join->thd->alloc(sizeof(POSITION)* - n_tables))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - sjm->tables= n_tables; - sjm->is_used= FALSE; - double subjoin_out_rows, subjoin_read_time; - get_partial_join_cost(join, n_tables, - &subjoin_read_time, &subjoin_out_rows); - - sjm->materialization_cost.convert_from_cost(subjoin_read_time); - sjm->rows= subjoin_out_rows; - - List<Item> &right_expr_list= - sj_nest->sj_subq_pred->unit->first_select()->item_list; - /* - Adjust output cardinality estimates. If the subquery has form - - ... oe IN (SELECT t1.colX, t2.colY, func(X,Y,Z) ) - - then the number of distinct output record combinations has an - upper bound of product of number of records matching the tables - that are used by the SELECT clause. - TODO: - We can get a more precise estimate if we - - use rec_per_key cardinality estimates. For simple cases like - "oe IN (SELECT t.key ...)" it is trivial. - - Functional dependencies between the tables in the semi-join - nest (the payoff is probably less here?) - */ - { - for (uint i=0 ; i < join->const_tables + sjm->tables ; i++) - { - JOIN_TAB *tab= join->best_positions[i].table; - join->map2table[tab->table->tablenr]= tab; - } - List_iterator<Item> it(right_expr_list); - Item *item; - table_map map= 0; - while ((item= it++)) - map |= item->used_tables(); - map= map & ~PSEUDO_TABLE_BITS; - Table_map_iterator tm_it(map); - int tableno; - double rows= 1.0; - while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END) - rows *= join->map2table[tableno]->table->quick_condition_rows; - sjm->rows= min(sjm->rows, rows); - } - memcpy(sjm->positions, join->best_positions + join->const_tables, - sizeof(POSITION) * n_tables); - - /* - Calculate temporary table parameters and usage costs - */ - uint rowlen= get_tmp_table_rec_length(right_expr_list); - double lookup_cost; - if (rowlen * subjoin_out_rows< join->thd->variables.max_heap_table_size) - lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST; - else - lookup_cost= DISK_TEMPTABLE_LOOKUP_COST; - - /* - Let materialization cost include the cost to write the data into the - temporary table: - */ - sjm->materialization_cost.add_io(subjoin_out_rows, lookup_cost); - - /* - Set the cost to do a full scan of the temptable (will need this to - consider doing sjm-scan): - */ - sjm->scan_cost.zero(); - sjm->scan_cost.add_io(sjm->rows, lookup_cost); - - sjm->lookup_cost.convert_from_cost(lookup_cost); - sj_nest->sj_mat_info= sjm; - DBUG_EXECUTE("opt", print_sjm(sjm);); - } - } - } - join->emb_sjm_nest= NULL; - DBUG_RETURN(FALSE); -} - /***************************************************************************** Check with keys are used and with tables references with tables Updates in stat: @@ -5887,362 +4147,60 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key) } -/* - Given a semi-join nest, find out which of the IN-equalities are bound - - SYNOPSIS - get_bound_sj_equalities() - sj_nest Semi-join nest - remaining_tables Tables that are not yet bound - - DESCRIPTION - Given a semi-join nest, find out which of the IN-equalities have their - left part expression bound (i.e. the said expression doesn't refer to - any of remaining_tables and can be evaluated). - - RETURN - Bitmap of bound IN-equalities. -*/ +/** + Find the best access path for an extension of a partial execution + plan and add this path to the plan. -ulonglong get_bound_sj_equalities(TABLE_LIST *sj_nest, - table_map remaining_tables) -{ - List_iterator<Item> li(sj_nest->nested_join->sj_outer_expr_list); - Item *item; - uint i= 0; - ulonglong res= 0; - while ((item= li++)) - { - /* - Q: should this take into account equality propagation and how? - A: If e->outer_side is an Item_field, walk over the equality - class and see if there is an element that is bound? - (this is an optional feature) - */ - if (!(item->used_tables() & remaining_tables)) - { - res |= 1ULL << i; - } - } - return res; -} + The function finds the best access path to table 's' from the passed + partial plan where an access path is the general term for any means to + access the data in 's'. An access path may use either an index or a scan, + whichever is cheaper. The input partial plan is passed via the array + 'join->positions' of length 'idx'. The chosen access method for 's' and its + cost are stored in 'join->positions[idx]'. + @param join pointer to the structure providing all context info + for the query + @param s the table to be joined by the function + @param thd thread for the connection that submitted the query + @param remaining_tables set of tables not included into the partial plan yet + @param idx the length of the partial plan + @param disable_jbuf TRUE<=> Don't use join buffering + @param record_count estimate for the number of records returned by the + partial plan + @param pos OUT Table access plan + @param loose_scan_pos OUT Table plan that uses loosescan, or set cost to + DBL_MAX if not possible. -/* - This is a class for considering possible loose index scan optimizations. - It's usage pattern is as follows: - best_access_path() - { - Loose_scan_opt opt; - - opt.init() - for each index we can do ref access with - { - opt.next_ref_key(); - for each keyuse - opt.add_keyuse(); - opt.check_ref_access(); - } - - if (some criteria for range scans) - opt.check_range_access(); - - opt.get_best_option(); - } + @return + None */ -class Loose_scan_opt +void +best_access_path(JOIN *join, + JOIN_TAB *s, + table_map remaining_tables, + uint idx, + bool disable_jbuf, + double record_count, + POSITION *pos, + POSITION *loose_scan_pos) { -public: - /* All methods must check this before doing anything else */ - bool try_loosescan; - - /* - If we consider (oe1, .. oeN) IN (SELECT ie1, .. ieN) then ieK=oeK is - called sj-equality. If oeK depends only on preceding tables then such - equality is called 'bound'. - */ - ulonglong bound_sj_equalities; - - /* Accumulated properties of ref access we're now considering: */ - ulonglong handled_sj_equalities; - key_part_map loose_scan_keyparts; - uint max_loose_keypart; - bool part1_conds_met; + THD *thd= join->thd; + KEYUSE *best_key= 0; + uint best_max_key_part= 0; + my_bool found_constraint= 0; + double best= DBL_MAX; + double best_time= DBL_MAX; + double records= DBL_MAX; + table_map best_ref_depends_map= 0; + double tmp; + ha_rows rec; + bool best_uses_jbuf= FALSE; - /* - Use of quick select is a special case. Some of its properties: - */ - uint quick_uses_applicable_index; - uint quick_max_loose_keypart; - - /* Best loose scan method so far */ - uint best_loose_scan_key; - double best_loose_scan_cost; - double best_loose_scan_records; - KEYUSE *best_loose_scan_start_key; - - uint best_max_loose_keypart; - - Loose_scan_opt(): - try_loosescan(FALSE), - bound_sj_equalities(0), - quick_uses_applicable_index(FALSE) - { - UNINIT_VAR(quick_max_loose_keypart); /* Protected by quick_uses_applicable_index */ - /* The following are protected by best_loose_scan_cost!= DBL_MAX */ - UNINIT_VAR(best_loose_scan_key); - UNINIT_VAR(best_loose_scan_records); - UNINIT_VAR(best_max_loose_keypart); - UNINIT_VAR(best_loose_scan_start_key); - } + Loose_scan_opt loose_scan_opt; + DBUG_ENTER("best_access_path"); - void init(JOIN *join, JOIN_TAB *s, table_map remaining_tables) - { - /* - Discover the bound equalities. We need to do this if - 1. The next table is an SJ-inner table, and - 2. It is the first table from that semijoin, and - 3. We're not within a semi-join range (i.e. all semi-joins either have - all or none of their tables in join_table_map), except - s->emb_sj_nest (which we've just entered, see #2). - 4. All non-IN-equality correlation references from this sj-nest are - bound - 5. But some of the IN-equalities aren't (so this can't be handled by - FirstMatch strategy) - */ - best_loose_scan_cost= DBL_MAX; - if (!join->emb_sjm_nest && s->emb_sj_nest && // (1) - s->emb_sj_nest->sj_in_exprs < 64 && - ((remaining_tables & s->emb_sj_nest->sj_inner_tables) == // (2) - s->emb_sj_nest->sj_inner_tables) && // (2) - join->cur_sj_inner_tables == 0 && // (3) - !(remaining_tables & - s->emb_sj_nest->nested_join->sj_corr_tables) && // (4) - remaining_tables & s->emb_sj_nest->nested_join->sj_depends_on &&// (5) - optimizer_flag(join->thd, OPTIMIZER_SWITCH_LOOSE_SCAN)) - { - /* This table is an LooseScan scan candidate */ - bound_sj_equalities= get_bound_sj_equalities(s->emb_sj_nest, - remaining_tables); - try_loosescan= TRUE; - DBUG_PRINT("info", ("Will try LooseScan scan, bound_map=%llx", - (longlong)bound_sj_equalities)); - } - } - - void next_ref_key() - { - handled_sj_equalities=0; - loose_scan_keyparts= 0; - max_loose_keypart= 0; - part1_conds_met= FALSE; - } - - void add_keyuse(table_map remaining_tables, KEYUSE *keyuse) - { - if (try_loosescan && keyuse->sj_pred_no != UINT_MAX) - { - if (!(remaining_tables & keyuse->used_tables)) - { - /* - This allows to use equality propagation to infer that some - sj-equalities are bound. - */ - bound_sj_equalities |= 1ULL << keyuse->sj_pred_no; - } - else - { - handled_sj_equalities |= 1ULL << keyuse->sj_pred_no; - loose_scan_keyparts |= ((key_part_map)1) << keyuse->keypart; - set_if_bigger(max_loose_keypart, keyuse->keypart); - } - } - } - - bool have_a_case() { return test(handled_sj_equalities); } - - void check_ref_access_part1(JOIN_TAB *s, uint key, KEYUSE *start_key, - table_map found_part) - { - /* - Check if we can use LooseScan semi-join strategy. We can if - 1. This is the right table at right location - 2. All IN-equalities are either - - "bound", ie. the outer_expr part refers to the preceding tables - - "handled", ie. covered by the index we're considering - 3. Index order allows to enumerate subquery's duplicate groups in - order. This happens when the index definition matches this - pattern: - - (handled_col|bound_col)* (other_col|bound_col) - - */ - if (try_loosescan && // (1) - (handled_sj_equalities | bound_sj_equalities) == // (2) - PREV_BITS(ulonglong, s->emb_sj_nest->sj_in_exprs) && // (2) - (PREV_BITS(key_part_map, max_loose_keypart+1) & // (3) - (found_part | loose_scan_keyparts)) == // (3) - (found_part | loose_scan_keyparts) && // (3) - !key_uses_partial_cols(s->table, key)) - { - /* Ok, can use the strategy */ - part1_conds_met= TRUE; - if (s->quick && s->quick->index == key && - s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE) - { - quick_uses_applicable_index= TRUE; - quick_max_loose_keypart= max_loose_keypart; - } - DBUG_PRINT("info", ("Can use LooseScan scan")); - - /* - Check if this is a confluent where there are no usable bound - IN-equalities, e.g. we have - - outer_expr IN (SELECT innertbl.key FROM ...) - - and outer_expr cannot be evaluated yet, so it's actually full - index scan and not a ref access - */ - if (!(found_part & 1 ) && /* no usable ref access for 1st key part */ - s->table->covering_keys.is_set(key)) - { - DBUG_PRINT("info", ("Can use full index scan for LooseScan")); - - /* Calculate the cost of complete loose index scan. */ - double records= rows2double(s->table->file->stats.records); - - /* The cost is entire index scan cost (divided by 2) */ - double read_time= s->table->file->index_only_read_time(key, records); - - /* - Now find out how many different keys we will get (for now we - ignore the fact that we have "keypart_i=const" restriction for - some key components, that may make us think think that loose - scan will produce more distinct records than it actually will) - */ - ulong rpc; - if ((rpc= s->table->key_info[key].rec_per_key[max_loose_keypart])) - records= records / rpc; - - // TODO: previous version also did /2 - if (read_time < best_loose_scan_cost) - { - best_loose_scan_key= key; - best_loose_scan_cost= read_time; - best_loose_scan_records= records; - best_max_loose_keypart= max_loose_keypart; - best_loose_scan_start_key= start_key; - } - } - } - } - - void check_ref_access_part2(uint key, KEYUSE *start_key, double records, - double read_time) - { - if (part1_conds_met && read_time < best_loose_scan_cost) - { - /* TODO use rec-per-key-based fanout calculations */ - best_loose_scan_key= key; - best_loose_scan_cost= read_time; - best_loose_scan_records= records; - best_max_loose_keypart= max_loose_keypart; - best_loose_scan_start_key= start_key; - } - } - - void check_range_access(JOIN *join, uint idx, QUICK_SELECT_I *quick) - { - /* TODO: this the right part restriction: */ - if (quick_uses_applicable_index && idx == join->const_tables && - quick->read_time < best_loose_scan_cost) - { - best_loose_scan_key= quick->index; - best_loose_scan_cost= quick->read_time; - /* this is ok because idx == join->const_tables */ - best_loose_scan_records= rows2double(quick->records); - best_max_loose_keypart= quick_max_loose_keypart; - best_loose_scan_start_key= NULL; - } - } - - void save_to_position(JOIN_TAB *tab, POSITION *pos) - { - pos->read_time= best_loose_scan_cost; - if (best_loose_scan_cost != DBL_MAX) - { - pos->records_read= best_loose_scan_records; - pos->key= best_loose_scan_start_key; - pos->loosescan_key= best_loose_scan_key; - pos->loosescan_parts= best_max_loose_keypart + 1; - pos->use_join_buffer= FALSE; - pos->table= tab; - // todo need ref_depend_map ? - DBUG_PRINT("info", ("Produced a LooseScan plan, key %s, %s", - tab->table->key_info[best_loose_scan_key].name, - best_loose_scan_start_key? "(ref access)": - "(range/index access)")); - } - } -}; - - -/** - Find the best access path for an extension of a partial execution - plan and add this path to the plan. - - The function finds the best access path to table 's' from the passed - partial plan where an access path is the general term for any means to - access the data in 's'. An access path may use either an index or a scan, - whichever is cheaper. The input partial plan is passed via the array - 'join->positions' of length 'idx'. The chosen access method for 's' and its - cost are stored in 'join->positions[idx]'. - - @param join pointer to the structure providing all context info - for the query - @param s the table to be joined by the function - @param thd thread for the connection that submitted the query - @param remaining_tables set of tables not included into the partial plan yet - @param idx the length of the partial plan - @param disable_jbuf TRUE<=> Don't use join buffering - @param record_count estimate for the number of records returned by the - partial plan - @param pos OUT Table access plan - @param loose_scan_pos OUT Table plan that uses loosescan, or set cost to - DBL_MAX if not possible. - - @return - None -*/ - -static void -best_access_path(JOIN *join, - JOIN_TAB *s, - table_map remaining_tables, - uint idx, - bool disable_jbuf, - double record_count, - POSITION *pos, - POSITION *loose_scan_pos) -{ - THD *thd= join->thd; - KEYUSE *best_key= 0; - uint best_max_key_part= 0; - my_bool found_constraint= 0; - double best= DBL_MAX; - double best_time= DBL_MAX; - double records= DBL_MAX; - table_map best_ref_depends_map= 0; - double tmp; - ha_rows rec; - bool best_uses_jbuf= FALSE; - - Loose_scan_opt loose_scan_opt; - DBUG_ENTER("best_access_path"); - - loose_scan_opt.init(join, s, remaining_tables); + loose_scan_opt.init(join, s, remaining_tables); if (s->keyuse) { /* Use key if possible */ @@ -6783,7 +4741,7 @@ best_access_path(JOIN *join, TRUE Fatal error */ -static bool +bool choose_plan(JOIN *join, table_map join_tables) { uint search_depth= join->thd->variables.optimizer_search_depth; @@ -7068,60 +5026,6 @@ optimize_straight_join(JOIN *join, table_map join_tables) } - -/* - Check if the last tables of the partial join order allow to use - sj-materialization strategy for them - - SYNOPSIS - at_sjmat_pos() - join - remaining_tables - tab the last table's join tab - idx last table's index - loose_scan OUT TRUE <=> use LooseScan - - RETURN - TRUE Yes, can apply sj-materialization - FALSE No, some of the requirements are not met -*/ - -SJ_MATERIALIZATION_INFO * -at_sjmat_pos(const JOIN *join, table_map remaining_tables, const JOIN_TAB *tab, - uint idx, bool *loose_scan) -{ - /* - Check if - 1. We're in a semi-join nest that can be run with SJ-materialization - 2. All the tables correlated through the IN subquery are in the prefix - */ - TABLE_LIST *emb_sj_nest= tab->emb_sj_nest; - table_map suffix= remaining_tables & ~tab->table->map; - if (emb_sj_nest && emb_sj_nest->sj_mat_info && - !(suffix & emb_sj_nest->sj_inner_tables)) - { - /* - Walk back and check if all immediately preceding tables are from - this semi-join. - */ - uint n_tables= my_count_bits(tab->emb_sj_nest->sj_inner_tables); - for (uint i= 1; i < n_tables ; i++) - { - if (join->positions[idx - i].table->emb_sj_nest != tab->emb_sj_nest) - return NULL; - } - *loose_scan= test(remaining_tables & ~tab->table->map & - (emb_sj_nest->sj_inner_tables | - emb_sj_nest->nested_join->sj_depends_on)); - if (*loose_scan && !emb_sj_nest->sj_subq_pred->sjm_scan_allowed) - return NULL; - else - return emb_sj_nest->sj_mat_info; - } - return NULL; -} - - /** Find a good, possibly optimal, query execution plan (QEP) by a greedy search. @@ -7292,8 +5196,6 @@ greedy_search(JOIN *join, } - - /* Calculate a cost of given partial join order @@ -7844,213 +5746,6 @@ prev_record_reads(JOIN *join, uint idx, table_map found_ref) } -/* - Fix semi-join strategies for the picked join order - - SYNOPSIS - fix_semijoin_strategies_for_picked_join_order() - join The join with the picked join order - - DESCRIPTION - Fix semi-join strategies for the picked join order. This is a step that - needs to be done right after we have fixed the join order. What we do - here is switch join's semi-join strategy description from backward-based - to forwards based. - - When join optimization is in progress, we re-consider semi-join - strategies after we've added another table. Here's an illustration. - Suppose the join optimization is underway: - - 1) ot1 it1 it2 - sjX -- looking at (ot1, it1, it2) join prefix, we decide - to use semi-join strategy sjX. - - 2) ot1 it1 it2 ot2 - sjX sjY -- Having added table ot2, we now may consider - another semi-join strategy and decide to use a - different strategy sjY. Note that the record - of sjX has remained under it2. That is - necessary because we need to be able to get - back to (ot1, it1, it2) join prefix. - what makes things even worse is that there are cases where the choice - of sjY changes the way we should access it2. - - 3) [ot1 it1 it2 ot2 ot3] - sjX sjY -- This means that after join optimization is - finished, semi-join info should be read - right-to-left (while nearly all plan refinement - functions, EXPLAIN, etc proceed from left to - right) - - This function does the needed reversal, making it possible to read the - join and semi-join order from left to right. -*/ - -static void fix_semijoin_strategies_for_picked_join_order(JOIN *join) -{ - uint table_count=join->tables; - uint tablenr; - table_map remaining_tables= 0; - table_map handled_tabs= 0; - for (tablenr= table_count - 1 ; tablenr != join->const_tables - 1; tablenr--) - { - POSITION *pos= join->best_positions + tablenr; - JOIN_TAB *s= pos->table; - uint first; - LINT_INIT(first); // Set by every branch except SJ_OPT_NONE which doesn't use it - - if ((handled_tabs & s->table->map) || pos->sj_strategy == SJ_OPT_NONE) - { - remaining_tables |= s->table->map; - continue; - } - - if (pos->sj_strategy == SJ_OPT_MATERIALIZE) - { - SJ_MATERIALIZATION_INFO *sjm= s->emb_sj_nest->sj_mat_info; - sjm->is_used= TRUE; - sjm->is_sj_scan= FALSE; - memcpy(pos - sjm->tables + 1, sjm->positions, - sizeof(POSITION) * sjm->tables); - first= tablenr - sjm->tables + 1; - join->best_positions[first].n_sj_tables= sjm->tables; - join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE; - } - else if (pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN) - { - POSITION *first_inner= join->best_positions + pos->sjm_scan_last_inner; - SJ_MATERIALIZATION_INFO *sjm= first_inner->table->emb_sj_nest->sj_mat_info; - sjm->is_used= TRUE; - sjm->is_sj_scan= TRUE; - first= pos->sjm_scan_last_inner - sjm->tables + 1; - memcpy(join->best_positions + first, - sjm->positions, sizeof(POSITION) * sjm->tables); - join->best_positions[first].sj_strategy= SJ_OPT_MATERIALIZE_SCAN; - join->best_positions[first].n_sj_tables= sjm->tables; - /* - Do what advance_sj_state did: re-run best_access_path for every table - in the [last_inner_table + 1; pos..) range - */ - double prefix_rec_count; - /* Get the prefix record count */ - if (first == join->const_tables) - prefix_rec_count= 1.0; - else - prefix_rec_count= join->best_positions[first-1].prefix_record_count; - - /* Add materialization record count*/ - prefix_rec_count *= sjm->rows; - - uint i; - table_map rem_tables= remaining_tables; - for (i= tablenr; i != (first + sjm->tables - 1); i--) - rem_tables |= join->best_positions[i].table->table->map; - - POSITION dummy; - join->cur_sj_inner_tables= 0; - for (i= first + sjm->tables; i <= tablenr; i++) - { - best_access_path(join, join->best_positions[i].table, rem_tables, i, FALSE, - prefix_rec_count, join->best_positions + i, &dummy); - prefix_rec_count *= join->best_positions[i].records_read; - rem_tables &= ~join->best_positions[i].table->table->map; - } - } - - if (pos->sj_strategy == SJ_OPT_FIRST_MATCH) - { - first= pos->first_firstmatch_table; - join->best_positions[first].sj_strategy= SJ_OPT_FIRST_MATCH; - join->best_positions[first].n_sj_tables= tablenr - first + 1; - POSITION dummy; // For loose scan paths - double record_count= (first== join->const_tables)? 1.0: - join->best_positions[tablenr - 1].prefix_record_count; - - table_map rem_tables= remaining_tables; - uint idx; - for (idx= first; idx <= tablenr; idx++) - { - rem_tables |= join->best_positions[idx].table->table->map; - } - /* - Re-run best_access_path to produce best access methods that do not use - join buffering - */ - join->cur_sj_inner_tables= 0; - for (idx= first; idx <= tablenr; idx++) - { - if (join->best_positions[idx].use_join_buffer) - { - best_access_path(join, join->best_positions[idx].table, - rem_tables, idx, TRUE /* no jbuf */, - record_count, join->best_positions + idx, &dummy); - } - record_count *= join->best_positions[idx].records_read; - rem_tables &= ~join->best_positions[idx].table->table->map; - } - } - - if (pos->sj_strategy == SJ_OPT_LOOSE_SCAN) - { - first= pos->first_loosescan_table; - POSITION *first_pos= join->best_positions + first; - POSITION loose_scan_pos; // For loose scan paths - double record_count= (first== join->const_tables)? 1.0: - join->best_positions[tablenr - 1].prefix_record_count; - - table_map rem_tables= remaining_tables; - uint idx; - for (idx= first; idx <= tablenr; idx++) - rem_tables |= join->best_positions[idx].table->table->map; - /* - Re-run best_access_path to produce best access methods that do not use - join buffering - */ - join->cur_sj_inner_tables= 0; - for (idx= first; idx <= tablenr; idx++) - { - if (join->best_positions[idx].use_join_buffer || (idx == first)) - { - best_access_path(join, join->best_positions[idx].table, - rem_tables, idx, TRUE /* no jbuf */, - record_count, join->best_positions + idx, - &loose_scan_pos); - if (idx==first) - join->best_positions[idx]= loose_scan_pos; - } - rem_tables &= ~join->best_positions[idx].table->table->map; - record_count *= join->best_positions[idx].records_read; - } - first_pos->sj_strategy= SJ_OPT_LOOSE_SCAN; - first_pos->n_sj_tables= my_count_bits(first_pos->table->emb_sj_nest->sj_inner_tables); - } - - if (pos->sj_strategy == SJ_OPT_DUPS_WEEDOUT) - { - /* - Duplicate Weedout starting at pos->first_dupsweedout_table, ending at - this table. - */ - first= pos->first_dupsweedout_table; - join->best_positions[first].sj_strategy= SJ_OPT_DUPS_WEEDOUT; - join->best_positions[first].n_sj_tables= tablenr - first + 1; - } - - uint i_end= first + join->best_positions[first].n_sj_tables; - for (uint i= first; i < i_end; i++) - { - if (i != first) - join->best_positions[i].sj_strategy= SJ_OPT_NONE; - handled_tabs |= join->best_positions[i].table->table->map; - } - - if (tablenr != first) - pos->sj_strategy= SJ_OPT_NONE; - remaining_tables |= s->table->map; - } -} - - /* Set up join struct according to the picked join order in @@ -9297,319 +6992,17 @@ end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records) } -/* Check if given Item was injected by semi-join equality */ -static bool is_cond_sj_in_equality(Item *item) -{ - if (item->type() == Item::FUNC_ITEM && - ((Item_func*)item)->functype()== Item_func::EQ_FUNC) - { - Item_func_eq *item_eq= (Item_func_eq*)item; - return test(item_eq->in_equality_no != UINT_MAX); - } - return FALSE; -} - - - - -void remove_sj_conds(Item **tree) -{ - if (*tree) - { - if (is_cond_sj_in_equality(*tree)) - { - *tree= NULL; - return; - } - else if ((*tree)->type() == Item::COND_ITEM) - { - Item *item; - List_iterator<Item> li(*(((Item_cond*)*tree)->argument_list())); - while ((item= li++)) - { - if (is_cond_sj_in_equality(item)) - li.replace(new Item_int(1)); - } - } - } -} - - -/* - Create subquery IN-equalities assuming use of materialization strategy - - SYNOPSIS - create_subq_in_equalities() - thd Thread handle - sjm Semi-join materialization structure - subq_pred The subquery predicate - - DESCRIPTION - Create subquery IN-equality predicates. That is, for a subquery - - (oe1, oe2, ...) IN (SELECT ie1, ie2, ... FROM ...) - - create "oe1=ie1 AND ie1=ie2 AND ..." expression, such that ie1, ie2, .. - refer to the columns of the table that's used to materialize the - subquery. - - RETURN - Created condition -*/ - -Item *create_subq_in_equalities(THD *thd, SJ_MATERIALIZATION_INFO *sjm, - Item_in_subselect *subq_pred) -{ - Item *res= NULL; - if (subq_pred->left_expr->cols() == 1) - { - if (!(res= new Item_func_eq(subq_pred->left_expr, - new Item_field(sjm->table->field[0])))) - return NULL; /* purecov: inspected */ - } - else - { - Item *conj; - for (uint i= 0; i < subq_pred->left_expr->cols(); i++) - { - if (!(conj= new Item_func_eq(subq_pred->left_expr->element_index(i), - new Item_field(sjm->table->field[i]))) || - !(res= and_items(res, conj))) - return NULL; /* purecov: inspected */ - } - } - if (res->fix_fields(thd, &res)) - return NULL; /* purecov: inspected */ - return res; -} - +/* + Check whether a join buffer can be used to join the specified table -/* - Setup semi-join materialization strategy for one semi-join nest - SYNOPSIS - - setup_sj_materialization() - tab The first tab in the semi-join - - DESCRIPTION - Setup execution structures for one semi-join materialization nest: - - Create the materialization temporary table - - If we're going to do index lookups - create TABLE_REF structure to make the lookus - - else (if we're going to do a full scan of the temptable) - create Copy_field structures to do copying. - - RETURN - FALSE Ok - TRUE Error -*/ - -bool setup_sj_materialization(JOIN_TAB *tab) -{ - uint i; - DBUG_ENTER("setup_sj_materialization"); - TABLE_LIST *emb_sj_nest= tab->table->pos_in_table_list->embedding; - SJ_MATERIALIZATION_INFO *sjm= emb_sj_nest->sj_mat_info; - THD *thd= tab->join->thd; - /* First the calls come to the materialization function */ - List<Item> &item_list= emb_sj_nest->sj_subq_pred->unit->first_select()->item_list; - - /* - Set up the table to write to, do as select_union::create_result_table does - */ - sjm->sjm_table_param.init(); - sjm->sjm_table_param.field_count= item_list.elements; - // psergey-merge: the following is not in 5.x: sjm->sjm_table_param.bit_fields_as_long= TRUE; - List_iterator<Item> it(item_list); - Item *right_expr; - while((right_expr= it++)) - sjm->sjm_table_cols.push_back(right_expr); - - if (!(sjm->table= create_tmp_table(thd, &sjm->sjm_table_param, - sjm->sjm_table_cols, (ORDER*) 0, - TRUE /* distinct */, - 1, /*save_sum_fields*/ - thd->options | TMP_TABLE_ALL_COLUMNS, - HA_POS_ERROR /*rows_limit */, - (char*)"sj-materialize"))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - sjm->table->file->extra(HA_EXTRA_WRITE_CACHE); - sjm->table->file->extra(HA_EXTRA_IGNORE_DUP_KEY); - tab->join->sj_tmp_tables.push_back(sjm->table); - tab->join->sjm_info_list.push_back(sjm); - - sjm->materialized= FALSE; - if (!sjm->is_sj_scan) - { - KEY *tmp_key; /* The only index on the temporary table. */ - uint tmp_key_parts; /* Number of keyparts in tmp_key. */ - tmp_key= sjm->table->key_info; - tmp_key_parts= tmp_key->key_parts; - - /* - Create/initialize everything we will need to index lookups into the - temptable. - */ - TABLE_REF *tab_ref; - if (!(tab_ref= (TABLE_REF*) thd->alloc(sizeof(TABLE_REF)))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - tab_ref->key= 0; /* The only temp table index. */ - tab_ref->key_length= tmp_key->key_length; - if (!(tab_ref->key_buff= - (uchar*) thd->calloc(ALIGN_SIZE(tmp_key->key_length) * 2)) || - !(tab_ref->key_copy= - (store_key**) thd->alloc((sizeof(store_key*) * - (tmp_key_parts + 1)))) || - !(tab_ref->items= - (Item**) thd->alloc(sizeof(Item*) * tmp_key_parts))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - - tab_ref->key_buff2=tab_ref->key_buff+ALIGN_SIZE(tmp_key->key_length); - tab_ref->key_err=1; - tab_ref->null_rejecting= 1; - tab_ref->disable_cache= FALSE; - - KEY_PART_INFO *cur_key_part= tmp_key->key_part; - store_key **ref_key= tab_ref->key_copy; - uchar *cur_ref_buff= tab_ref->key_buff; - - for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++) - { - tab_ref->items[i]= emb_sj_nest->sj_subq_pred->left_expr->element_index(i); - int null_count= test(cur_key_part->field->real_maybe_null()); - *ref_key= new store_key_item(thd, cur_key_part->field, - /* TODO: - the NULL byte is taken into account in - cur_key_part->store_length, so instead of - cur_ref_buff + test(maybe_null), we could - use that information instead. - */ - cur_ref_buff + null_count, - null_count ? tab_ref->key_buff : 0, - cur_key_part->length, tab_ref->items[i]); - cur_ref_buff+= cur_key_part->store_length; - } - *ref_key= NULL; /* End marker. */ - tab_ref->key_err= 1; - tab_ref->key_parts= tmp_key_parts; - sjm->tab_ref= tab_ref; - - /* - Remove the injected semi-join IN-equalities from join_tab conds. This - needs to be done because the IN-equalities refer to columns of - sj-inner tables which are not available after the materialization - has been finished. - */ - for (i= 0; i < sjm->tables; i++) - { - remove_sj_conds(&tab[i].select_cond); - if (tab[i].select) - remove_sj_conds(&tab[i].select->cond); - } - if (!(sjm->in_equality= create_subq_in_equalities(thd, sjm, - emb_sj_nest->sj_subq_pred))) - DBUG_RETURN(TRUE); /* purecov: inspected */ - } - else - { - /* - We'll be doing full scan of the temptable. - Setup copying of temptable columns back to the record buffers - for their source tables. We need this because IN-equalities - refer to the original tables. - - EXAMPLE - - Consider the query: - SELECT * FROM ot WHERE ot.col1 IN (SELECT it.col2 FROM it) - - Suppose it's executed with SJ-Materialization-scan. We choose to do scan - if we can't do the lookup, i.e. the join order is (it, ot). The plan - would look as follows: - - table access method condition - it materialize+scan - - ot (whatever) ot1.col1=it.col2 (C2) - - The condition C2 refers to current row of table it. The problem is - that by the time we evaluate C2, we would have finished with scanning - it itself and will be scanning the temptable. - - At the moment, our solution is to copy back: when we get the next - temptable record, we copy its columns to their corresponding columns - in the record buffers for the source tables. - */ - sjm->copy_field= new Copy_field[sjm->sjm_table_cols.elements]; - it.rewind(); - for (uint i=0; i < sjm->sjm_table_cols.elements; i++) - { - bool dummy; - Item_equal *item_eq; - Item *item= (it++)->real_item(); - DBUG_ASSERT(item->type() == Item::FIELD_ITEM); - Field *copy_to= ((Item_field*)item)->field; - /* - Tricks with Item_equal are due to the following: suppose we have a - query: - - ... WHERE cond(ot.col) AND ot.col IN (SELECT it2.col FROM it1,it2 - WHERE it1.col= it2.col) - then equality propagation will create an - - Item_equal(it1.col, it2.col, ot.col) - - then substitute_for_best_equal_field() will change the conditions - according to the join order: - - it1 - it2 it1.col=it2.col - ot cond(it1.col) - - although we've originally had "SELECT it2.col", conditions attached - to subsequent outer tables will refer to it1.col, so SJM-Scan will - need to unpack data to there. - That is, if an element from subquery's select list participates in - equality propagation, then we need to unpack it to the first - element equality propagation member that refers to table that is - within the subquery. - */ - item_eq= find_item_equal(tab->join->cond_equal, copy_to, &dummy); - - if (item_eq) - { - List_iterator<Item_field> it(item_eq->fields); - Item_field *item; - while ((item= it++)) - { - if (!(item->used_tables() & ~emb_sj_nest->sj_inner_tables)) - { - copy_to= item->field; - break; - } - } - } - sjm->copy_field[i].set(copy_to, sjm->table->field[i], FALSE); - /* The write_set for source tables must be set up to allow the copying */ - bitmap_set_bit(copy_to->table->write_set, copy_to->field_index); - } - } - - DBUG_RETURN(FALSE); -} - - -/* - Check whether a join buffer can be used to join the specified table - - SYNOPSIS - check_join_cache_usage() - tab joined table to check join buffer usage for - join join for which the check is performed - options options of the join - no_jbuf_after don't use join buffering after table with this number - icp_other_tables_ok OUT TRUE if condition pushdown supports - other tables presence + check_join_cache_usage() + tab joined table to check join buffer usage for + join join for which the check is performed + options options of the join + no_jbuf_after don't use join buffering after table with this number + icp_other_tables_ok OUT TRUE if condition pushdown supports + other tables presence DESCRIPTION The function finds out whether the table 'tab' can be joined using a join @@ -11875,37 +9268,6 @@ change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list, } } -/** - Remove additional condition inserted by IN/ALL/ANY transformation. - - @param conds condition for processing - - @return - new conditions -*/ - -static Item *remove_additional_cond(Item* conds) -{ - if (conds->name == in_additional_cond) - return 0; - if (conds->type() == Item::COND_ITEM) - { - Item_cond *cnd= (Item_cond*) conds; - List_iterator<Item> li(*(cnd->argument_list())); - Item *item; - while ((item= li++)) - { - if (item->name == in_additional_cond) - { - li.remove(); - if (cnd->argument_list()->elements == 1) - return cnd->argument_list()->head(); - return conds; - } - } - } - return conds; -} static void propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list, @@ -12275,8 +9637,8 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top, /** Assign each nested join structure a bit in nested_join_map. - Assign each nested join structure (except "confluent" ones - those that - embed only one element) a bit in nested_join_map. + Assign each nested join structure (except ones that embed only one element + and so are redundant) a bit in nested_join_map. @param join Join being processed @param join_list List of tables @@ -12285,7 +9647,7 @@ simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top, @note This function is called after simplify_joins(), when there are no - redundant nested joins, #non_confluent_nested_joins <= #tables_in_join so + redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so we will not run out of bits in nested_join_map. @return @@ -12611,642 +9973,118 @@ void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, } -/* - Do semi-join optimization step after we've added a new tab to join prefix - - SYNOPSIS - advance_sj_state() - join The join we're optimizing - remaining_tables Tables not in the join prefix - new_join_tab Join tab we've just added to the join prefix - idx Index of this join tab (i.e. number of tables - in the prefix minus one) - current_record_count INOUT Estimate of #records in join prefix's output - current_read_time INOUT Cost to execute the join prefix - loose_scan_pos IN A POSITION with LooseScan plan to access - table new_join_tab - (produced by the last best_access_path call) - - DESCRIPTION - Update semi-join optimization state after we've added another tab (table - and access method) to the join prefix. - - The state is maintained in join->positions[#prefix_size]. Each of the - available strategies has its own state variables. - - for each semi-join strategy - { - update strategy's state variables; - - if (join prefix has all the tables that are needed to consider - using this strategy for the semi-join(s)) - { - calculate cost of using the strategy - if ((this is the first strategy to handle the semi-join nest(s) || - the cost is less than other strategies)) - { - // Pick this strategy - pos->sj_strategy= .. - .. - } - } - - Most of the new state is saved join->positions[idx] (and hence no undo - is necessary). Several members of class JOIN are updated also, these - changes can be rolled back with restore_prev_sj_state(). - - See setup_semijoin_dups_elimination() for a description of what kinds of - join prefixes each strategy can handle. -*/ - -static -void advance_sj_state(JOIN *join, table_map remaining_tables, - const JOIN_TAB *new_join_tab, uint idx, - double *current_record_count, double *current_read_time, - POSITION *loose_scan_pos) +static COND * +optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list, + Item::cond_result *cond_value) { - TABLE_LIST *emb_sj_nest; - POSITION *pos= join->positions + idx; - remaining_tables &= ~new_join_tab->table->map; + THD *thd= join->thd; + DBUG_ENTER("optimize_cond"); - pos->prefix_cost.convert_from_cost(*current_read_time); - pos->prefix_record_count= *current_record_count; - pos->sj_strategy= SJ_OPT_NONE; - - /* Initialize the state or copy it from prev. tables */ - if (idx == join->const_tables) - { - pos->first_firstmatch_table= MAX_TABLES; - pos->first_loosescan_table= MAX_TABLES; - pos->dupsweedout_tables= 0; - pos->sjm_scan_need_tables= 0; - LINT_INIT(pos->sjm_scan_last_inner); - } + if (!conds) + *cond_value= Item::COND_TRUE; else { - // FirstMatch - pos->first_firstmatch_table= - (pos[-1].sj_strategy == SJ_OPT_FIRST_MATCH) ? - MAX_TABLES : pos[-1].first_firstmatch_table; - pos->first_firstmatch_rtbl= pos[-1].first_firstmatch_rtbl; - pos->firstmatch_need_tables= pos[-1].firstmatch_need_tables; - - // LooseScan - pos->first_loosescan_table= - (pos[-1].sj_strategy == SJ_OPT_LOOSE_SCAN) ? - MAX_TABLES : pos[-1].first_loosescan_table; - pos->loosescan_need_tables= pos[-1].loosescan_need_tables; - - // SJ-Materialization Scan - pos->sjm_scan_need_tables= - (pos[-1].sj_strategy == SJ_OPT_MATERIALIZE_SCAN) ? - 0 : pos[-1].sjm_scan_need_tables; - pos->sjm_scan_last_inner= pos[-1].sjm_scan_last_inner; - - // Duplicate Weedout - pos->dupsweedout_tables= pos[-1].dupsweedout_tables; - pos->first_dupsweedout_table= pos[-1].first_dupsweedout_table; - } - - table_map handled_by_fm_or_ls= 0; - /* FirstMatch Strategy */ - if (new_join_tab->emb_sj_nest && - optimizer_flag(join->thd, OPTIMIZER_SWITCH_FIRSTMATCH)) - { - const table_map outer_corr_tables= - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on; - const table_map sj_inner_tables= - new_join_tab->emb_sj_nest->sj_inner_tables; - - /* - Enter condition: - 1. The next join tab belongs to semi-join nest - (verified for the encompassing code block above). - 2. We're not in a duplicate producer range yet - 3. All outer tables that - - the subquery is correlated with, or - - referred to from the outer_expr - are in the join prefix - 4. All inner tables are still part of remaining_tables. - */ - if (!join->cur_sj_inner_tables && // (2) - !(remaining_tables & outer_corr_tables) && // (3) - (sj_inner_tables == // (4) - ((remaining_tables | new_join_tab->table->map) & sj_inner_tables))) - { - /* Start tracking potential FirstMatch range */ - pos->first_firstmatch_table= idx; - pos->firstmatch_need_tables= sj_inner_tables; - pos->first_firstmatch_rtbl= remaining_tables; - } - - if (pos->first_firstmatch_table != MAX_TABLES) - { - if (outer_corr_tables & pos->first_firstmatch_rtbl) - { - /* - Trying to add an sj-inner table whose sj-nest has an outer correlated - table that was not in the prefix. This means FirstMatch can't be used. - */ - pos->first_firstmatch_table= MAX_TABLES; - } - else - { - /* Record that we need all of this semi-join's inner tables, too */ - pos->firstmatch_need_tables|= sj_inner_tables; - } - - if (!(pos->firstmatch_need_tables & remaining_tables)) - { - /* - Got a complete FirstMatch range. - Calculate correct costs and fanout - */ - double reopt_cost, reopt_rec_count, sj_inner_fanout; - optimize_wo_join_buffering(join, pos->first_firstmatch_table, idx, - remaining_tables, FALSE, idx, - &reopt_rec_count, &reopt_cost, - &sj_inner_fanout); - /* - We don't yet know what are the other strategies, so pick the - FirstMatch. - - We ought to save the alternate POSITIONs produced by - optimize_wo_join_buffering but the problem is that providing save - space uses too much space. Instead, we will re-calculate the - alternate POSITIONs after we've picked the best QEP. - */ - pos->sj_strategy= SJ_OPT_FIRST_MATCH; - *current_read_time= reopt_cost; - *current_record_count= reopt_rec_count / sj_inner_fanout; - handled_by_fm_or_ls= pos->firstmatch_need_tables; - } - } - } - - /* LooseScan Strategy */ - { - POSITION *first=join->positions+pos->first_loosescan_table; /* - LooseScan strategy can't handle interleaving between tables from the - semi-join that LooseScan is handling and any other tables. - - If we were considering LooseScan for the join prefix (1) - and the table we're adding creates an interleaving (2) - then - stop considering loose scan - */ - if ((pos->first_loosescan_table != MAX_TABLES) && // (1) - (first->table->emb_sj_nest->sj_inner_tables & remaining_tables) && //(2) - new_join_tab->emb_sj_nest != first->table->emb_sj_nest) //(2) - { - pos->first_loosescan_table= MAX_TABLES; - } + Build all multiple equality predicates and eliminate equality + predicates that can be inferred from these multiple equalities. + For each reference of a field included into a multiple equality + that occurs in a function set a pointer to the multiple equality + predicate. Substitute a constant instead of this field if the + multiple equality contains a constant. + */ + DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY);); + conds= build_equal_items(join->thd, conds, NULL, join_list, + &join->cond_equal); + DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY);); + /* change field = field to field = const for each found field = const */ + propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds); /* - If we got an option to use LooseScan for the current table, start - considering using LooseScan strategy + Remove all instances of item == item + Remove all and-levels where CONST item != CONST item */ - if (loose_scan_pos->read_time != DBL_MAX) - { - pos->first_loosescan_table= idx; - pos->loosescan_need_tables= - new_join_tab->emb_sj_nest->sj_inner_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on | - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; - } - - if ((pos->first_loosescan_table != MAX_TABLES) && - !(remaining_tables & pos->loosescan_need_tables)) - { - /* - Ok we have LooseScan plan and also have all LooseScan sj-nest's - inner tables and outer correlated tables into the prefix. - */ - - first=join->positions + pos->first_loosescan_table; - uint n_tables= my_count_bits(first->table->emb_sj_nest->sj_inner_tables); - /* Got a complete LooseScan range. Calculate its cost */ - double reopt_cost, reopt_rec_count, sj_inner_fanout; - /* - The same problem as with FirstMatch - we need to save POSITIONs - somewhere but reserving space for all cases would require too - much space. We will re-calculate POSITION structures later on. - */ - optimize_wo_join_buffering(join, pos->first_loosescan_table, idx, - remaining_tables, - TRUE, //first_alt - pos->first_loosescan_table + n_tables, - &reopt_rec_count, - &reopt_cost, &sj_inner_fanout); - /* - We don't yet have any other strategies that could handle this - semi-join nest (the other options are Duplicate Elimination or - Materialization, which need at least the same set of tables in - the join prefix to be considered) so unconditionally pick the - LooseScan. - */ - pos->sj_strategy= SJ_OPT_LOOSE_SCAN; - *current_read_time= reopt_cost; - *current_record_count= reopt_rec_count / sj_inner_fanout; - handled_by_fm_or_ls= first->table->emb_sj_nest->sj_inner_tables; - } - } - - /* - Update join->cur_sj_inner_tables (Used by FirstMatch in this function and - LooseScan detector in best_access_path) - */ - if ((emb_sj_nest= new_join_tab->emb_sj_nest)) - { - join->cur_sj_inner_tables |= emb_sj_nest->sj_inner_tables; - join->cur_dups_producing_tables |= emb_sj_nest->sj_inner_tables; - - /* Remove the sj_nest if all of its SJ-inner tables are in cur_table_map */ - if (!(remaining_tables & - emb_sj_nest->sj_inner_tables & ~new_join_tab->table->map)) - join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; + DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY);); + conds= remove_eq_conds(thd, conds, cond_value) ; + DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY);); } - join->cur_dups_producing_tables &= ~handled_by_fm_or_ls; + DBUG_RETURN(conds); +} - /* 4. SJ-Materialization and SJ-Materialization-scan strategy handler */ - bool sjm_scan; - SJ_MATERIALIZATION_INFO *mat_info; - if ((mat_info= at_sjmat_pos(join, remaining_tables, - new_join_tab, idx, &sjm_scan))) - { - if (sjm_scan) - { - /* - We can't yet evaluate this option yet. This is because we can't - accout for fanout of sj-inner tables yet: - ntX SJM-SCAN(it1 ... itN) | ot1 ... otN | - ^(1) ^(2) +/** + Remove const and eq items. - we're now at position (1). SJM temptable in general has multiple - records, so at point (1) we'll get the fanout from sj-inner tables (ie - there will be multiple record combinations). + @return + Return new item, or NULL if no condition @n + cond_value is set to according: + - COND_OK : query is possible (field = constant) + - COND_TRUE : always true ( 1 = 1 ) + - COND_FALSE : always false ( 1 = 2 ) +*/ - The final join result will not contain any semi-join produced - fanout, i.e. tables within SJM-SCAN(...) will not contribute to - the cardinality of the join output. Extra fanout produced by - SJM-SCAN(...) will be 'absorbed' into fanout produced by ot1 ... otN. +COND * +remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value) +{ + if (cond->type() == Item::COND_ITEM) + { + bool and_level= ((Item_cond*) cond)->functype() + == Item_func::COND_AND_FUNC; + List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); + Item::cond_result tmp_cond_value; + bool should_fix_fields=0; - The simple way to model this is to remove SJM-SCAN(...) fanout once - we reach the point #2. - */ - pos->sjm_scan_need_tables= - new_join_tab->emb_sj_nest->sj_inner_tables | - new_join_tab->emb_sj_nest->nested_join->sj_depends_on | - new_join_tab->emb_sj_nest->nested_join->sj_corr_tables; - pos->sjm_scan_last_inner= idx; - } - else + *cond_value=Item::COND_UNDEF; + Item *item; + while ((item=li++)) { - /* This is SJ-Materialization with lookups */ - COST_VECT prefix_cost; - signed int first_tab= (int)idx - mat_info->tables; - double prefix_rec_count; - if (first_tab < (int)join->const_tables) + Item *new_item=remove_eq_conds(thd, item, &tmp_cond_value); + if (!new_item) + li.remove(); + else if (item != new_item) { - prefix_cost.zero(); - prefix_rec_count= 1.0; + VOID(li.replace(new_item)); + should_fix_fields=1; } - else - { - prefix_cost= join->positions[first_tab].prefix_cost; - prefix_rec_count= join->positions[first_tab].prefix_record_count; + if (*cond_value == Item::COND_UNDEF) + *cond_value=tmp_cond_value; + switch (tmp_cond_value) { + case Item::COND_OK: // Not TRUE or FALSE + if (and_level || *cond_value == Item::COND_FALSE) + *cond_value=tmp_cond_value; + break; + case Item::COND_FALSE: + if (and_level) + { + *cond_value=tmp_cond_value; + return (COND*) 0; // Always false + } + break; + case Item::COND_TRUE: + if (!and_level) + { + *cond_value= tmp_cond_value; + return (COND*) 0; // Always true + } + break; + case Item::COND_UNDEF: // Impossible + break; /* purecov: deadcode */ } + } + if (should_fix_fields) + cond->update_used_tables(); - double mat_read_time= prefix_cost.total_cost(); - mat_read_time += mat_info->materialization_cost.total_cost() + - prefix_rec_count * mat_info->lookup_cost.total_cost(); - - if (mat_read_time < *current_read_time || join->cur_dups_producing_tables) - { - /* - NOTE: When we pick to use SJM[-Scan] we don't memcpy its POSITION - elements to join->positions as that makes it hard to return things - back when making one step back in join optimization. That's done - after the QEP has been chosen. - */ - pos->sj_strategy= SJ_OPT_MATERIALIZE; - *current_read_time= mat_read_time; - *current_record_count= prefix_rec_count; - join->cur_dups_producing_tables&= - ~new_join_tab->emb_sj_nest->sj_inner_tables; - } + if (!((Item_cond*) cond)->argument_list()->elements || + *cond_value != Item::COND_OK) + return (COND*) 0; + if (((Item_cond*) cond)->argument_list()->elements == 1) + { // Remove list + item= ((Item_cond*) cond)->argument_list()->head(); + ((Item_cond*) cond)->argument_list()->empty(); + return item; } } - - /* 4.A SJM-Scan second phase check */ - if (pos->sjm_scan_need_tables && /* Have SJM-Scan prefix */ - !(pos->sjm_scan_need_tables & remaining_tables)) - { - TABLE_LIST *mat_nest= - join->positions[pos->sjm_scan_last_inner].table->emb_sj_nest; - SJ_MATERIALIZATION_INFO *mat_info= mat_nest->sj_mat_info; - - double prefix_cost; - double prefix_rec_count; - int first_tab= pos->sjm_scan_last_inner + 1 - mat_info->tables; - /* Get the prefix cost */ - if (first_tab == (int)join->const_tables) - { - prefix_rec_count= 1.0; - prefix_cost= 0.0; - } - else - { - prefix_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); - prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; - } - - /* Add materialization cost */ - prefix_cost += mat_info->materialization_cost.total_cost() + - prefix_rec_count * mat_info->scan_cost.total_cost(); - prefix_rec_count *= mat_info->rows; - - uint i; - table_map rem_tables= remaining_tables; - for (i= idx; i != (first_tab + mat_info->tables - 1); i--) - rem_tables |= join->positions[i].table->table->map; - - POSITION curpos, dummy; - /* Need to re-run best-access-path as we prefix_rec_count has changed */ - for (i= first_tab + mat_info->tables; i <= idx; i++) - { - best_access_path(join, join->positions[i].table, rem_tables, i, FALSE, - prefix_rec_count, &curpos, &dummy); - prefix_rec_count *= curpos.records_read; - prefix_cost += curpos.read_time; - } - - /* - Use the strategy if - * it is cheaper then what we've had, or - * we haven't picked any other semi-join strategy yet - In the second case, we pick this strategy unconditionally because - comparing cost without semi-join duplicate removal with cost with - duplicate removal is not an apples-to-apples comparison. - */ - if (prefix_cost < *current_read_time || join->cur_dups_producing_tables) - { - pos->sj_strategy= SJ_OPT_MATERIALIZE_SCAN; - *current_read_time= prefix_cost; - *current_record_count= prefix_rec_count; - join->cur_dups_producing_tables&= ~mat_nest->sj_inner_tables; - - } - } - - /* 5. Duplicate Weedout strategy handler */ - { - /* - Duplicate weedout can be applied after all ON-correlated and - correlated - */ - TABLE_LIST *nest; - if ((nest= new_join_tab->emb_sj_nest)) - { - if (!pos->dupsweedout_tables) - pos->first_dupsweedout_table= idx; - - pos->dupsweedout_tables |= nest->sj_inner_tables | - nest->nested_join->sj_depends_on | - nest->nested_join->sj_corr_tables; - } - - if (pos->dupsweedout_tables && - !(remaining_tables & - ~new_join_tab->table->map & pos->dupsweedout_tables)) - { - /* - Ok, reached a state where we could put a dups weedout point. - Walk back and calculate - - the join cost (this is needed as the accumulated cost may assume - some other duplicate elimination method) - - extra fanout that will be removed by duplicate elimination - - duplicate elimination cost - There are two cases: - 1. We have other strategy/ies to remove all of the duplicates. - 2. We don't. - - We need to calculate the cost in case #2 also because we need to make - choice between this join order and others. - */ - uint first_tab= pos->first_dupsweedout_table; - double dups_cost; - double prefix_rec_count; - double sj_inner_fanout= 1.0; - double sj_outer_fanout= 1.0; - uint temptable_rec_size; - if (first_tab == join->const_tables) - { - prefix_rec_count= 1.0; - temptable_rec_size= 0; - dups_cost= 0.0; - } - else - { - dups_cost= join->positions[first_tab - 1].prefix_cost.total_cost(); - prefix_rec_count= join->positions[first_tab - 1].prefix_record_count; - temptable_rec_size= 8; /* This is not true but we'll make it so */ - } - - table_map dups_removed_fanout= 0; - for (uint j= pos->first_dupsweedout_table; j <= idx; j++) - { - POSITION *p= join->positions + j; - dups_cost += p->read_time; - if (p->table->emb_sj_nest) - { - sj_inner_fanout *= p->records_read; - dups_removed_fanout |= p->table->table->map; - } - else - { - sj_outer_fanout *= p->records_read; - temptable_rec_size += p->table->table->file->ref_length; - } - } - - /* - Add the cost of temptable use. The table will have sj_outer_fanout - records, and we will make - - sj_outer_fanout table writes - - sj_inner_fanout*sj_outer_fanout lookups. - - */ - double one_lookup_cost; - if (sj_outer_fanout*temptable_rec_size > - join->thd->variables.max_heap_table_size) - one_lookup_cost= DISK_TEMPTABLE_LOOKUP_COST; - else - one_lookup_cost= HEAP_TEMPTABLE_LOOKUP_COST; - - double write_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout * one_lookup_cost; - double full_lookup_cost= join->positions[first_tab].prefix_record_count* - sj_outer_fanout* sj_inner_fanout * - one_lookup_cost; - dups_cost += write_cost + full_lookup_cost; - - /* - Use the strategy if - * it is cheaper then what we've had, or - * we haven't picked any other semi-join strategy yet - The second part is necessary because this strategy is the last one - to consider (it needs "the most" tables in the prefix) and we can't - leave duplicate-producing tables not handled by any strategy. - */ - if (dups_cost < *current_read_time || join->cur_dups_producing_tables) - { - pos->sj_strategy= SJ_OPT_DUPS_WEEDOUT; - *current_read_time= dups_cost; - *current_record_count= *current_record_count / sj_inner_fanout; - join->cur_dups_producing_tables &= ~dups_removed_fanout; - } - } - } -} - - -/* - Remove the last join tab from from join->cur_sj_inner_tables bitmap - we assume remaining_tables doesnt contain @tab. -*/ - -static void restore_prev_sj_state(const table_map remaining_tables, - const JOIN_TAB *tab, uint idx) -{ - TABLE_LIST *emb_sj_nest; - if ((emb_sj_nest= tab->emb_sj_nest)) - { - /* If we're removing the last SJ-inner table, remove the sj-nest */ - if ((remaining_tables & emb_sj_nest->sj_inner_tables) == - (emb_sj_nest->sj_inner_tables & ~tab->table->map)) - { - tab->join->cur_sj_inner_tables &= ~emb_sj_nest->sj_inner_tables; - } - } -} - - -static COND * -optimize_cond(JOIN *join, COND *conds, List<TABLE_LIST> *join_list, - Item::cond_result *cond_value) -{ - THD *thd= join->thd; - DBUG_ENTER("optimize_cond"); - - if (!conds) - *cond_value= Item::COND_TRUE; - else - { - /* - Build all multiple equality predicates and eliminate equality - predicates that can be inferred from these multiple equalities. - For each reference of a field included into a multiple equality - that occurs in a function set a pointer to the multiple equality - predicate. Substitute a constant instead of this field if the - multiple equality contains a constant. - */ - DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY);); - conds= build_equal_items(join->thd, conds, NULL, join_list, - &join->cond_equal); - DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY);); - - /* change field = field to field = const for each found field = const */ - propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds); - /* - Remove all instances of item == item - Remove all and-levels where CONST item != CONST item - */ - DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY);); - conds= remove_eq_conds(thd, conds, cond_value) ; - DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY);); - } - DBUG_RETURN(conds); -} - - -/** - Remove const and eq items. - - @return - Return new item, or NULL if no condition @n - cond_value is set to according: - - COND_OK : query is possible (field = constant) - - COND_TRUE : always true ( 1 = 1 ) - - COND_FALSE : always false ( 1 = 2 ) -*/ - -COND * -remove_eq_conds(THD *thd, COND *cond, Item::cond_result *cond_value) -{ - if (cond->type() == Item::COND_ITEM) - { - bool and_level= ((Item_cond*) cond)->functype() - == Item_func::COND_AND_FUNC; - List_iterator<Item> li(*((Item_cond*) cond)->argument_list()); - Item::cond_result tmp_cond_value; - bool should_fix_fields=0; - - *cond_value=Item::COND_UNDEF; - Item *item; - while ((item=li++)) - { - Item *new_item=remove_eq_conds(thd, item, &tmp_cond_value); - if (!new_item) - li.remove(); - else if (item != new_item) - { - VOID(li.replace(new_item)); - should_fix_fields=1; - } - if (*cond_value == Item::COND_UNDEF) - *cond_value=tmp_cond_value; - switch (tmp_cond_value) { - case Item::COND_OK: // Not TRUE or FALSE - if (and_level || *cond_value == Item::COND_FALSE) - *cond_value=tmp_cond_value; - break; - case Item::COND_FALSE: - if (and_level) - { - *cond_value=tmp_cond_value; - return (COND*) 0; // Always false - } - break; - case Item::COND_TRUE: - if (!and_level) - { - *cond_value= tmp_cond_value; - return (COND*) 0; // Always true - } - break; - case Item::COND_UNDEF: // Impossible - break; /* purecov: deadcode */ - } - } - if (should_fix_fields) - cond->update_used_tables(); - - if (!((Item_cond*) cond)->argument_list()->elements || - *cond_value != Item::COND_OK) - return (COND*) 0; - if (((Item_cond*) cond)->argument_list()->elements == 1) - { // Remove list - item= ((Item_cond*) cond)->argument_list()->head(); - ((Item_cond*) cond)->argument_list()->empty(); - return item; - } - } - else if (cond->type() == Item::FUNC_ITEM && - ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC) + else if (cond->type() == Item::FUNC_ITEM && + ((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC) { /* Handles this special case for some ODBC applications: @@ -13743,948 +10581,263 @@ Field *create_tmp_field(THD *thd, TABLE *table,Item *item, Item::Type type, case Item::FUNC_ITEM: if (((Item_func *) item)->functype() == Item_func::FUNC_SP) { - Item_func_sp *item_func_sp= (Item_func_sp *) item; - Field *sp_result_field= item_func_sp->get_sp_result_field(); - - if (make_copy_field) - { - DBUG_ASSERT(item_func_sp->result_field); - *from_field= item_func_sp->result_field; - } - else - { - *((*copy_func)++)= item; - } - - Field *result_field= - create_tmp_field_from_field(thd, - sp_result_field, - item_func_sp->name, - table, - NULL, - convert_blob_length); - - if (modify_item) - item->set_result_field(result_field); - - return result_field; - } - - /* Fall through */ - case Item::COND_ITEM: - case Item::FIELD_AVG_ITEM: - case Item::FIELD_STD_ITEM: - case Item::SUBSELECT_ITEM: - /* The following can only happen with 'CREATE TABLE ... SELECT' */ - case Item::PROC_ITEM: - case Item::INT_ITEM: - case Item::REAL_ITEM: - case Item::DECIMAL_ITEM: - case Item::STRING_ITEM: - case Item::REF_ITEM: - case Item::NULL_ITEM: - case Item::VARBIN_ITEM: - if (make_copy_field) - { - DBUG_ASSERT(((Item_result_field*)item)->result_field); - *from_field= ((Item_result_field*)item)->result_field; - } - return create_tmp_field_from_item(thd, item, table, - (make_copy_field ? 0 : copy_func), - modify_item, convert_blob_length); - case Item::TYPE_HOLDER: - result= ((Item_type_holder *)item)->make_field_by_type(table); - result->set_derivation(item->collation.derivation); - return result; - default: // Dosen't have to be stored - return 0; - } -} - -/* - Set up column usage bitmaps for a temporary table - - IMPLEMENTATION - For temporary tables, we need one bitmap with all columns set and - a tmp_set bitmap to be used by things like filesort. -*/ - -void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps) -{ - uint field_count= table->s->fields; - bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count, - FALSE); - bitmap_init(&table->tmp_set, - (my_bitmap_map*) (bitmaps+ bitmap_buffer_size(field_count)), - field_count, FALSE); - bitmap_init(&table->vcol_set, - (my_bitmap_map*) (bitmaps+ 2+bitmap_buffer_size(field_count)), - field_count, FALSE); - - /* write_set and all_set are copies of read_set */ - table->def_write_set= table->def_read_set; - table->s->all_set= table->def_read_set; - bitmap_set_all(&table->s->all_set); - table->default_column_bitmaps(); -} - - -/** - Create a temp table according to a field list. - - Given field pointers are changed to point at tmp_table for - send_fields. The table object is self contained: it's - allocated in its own memory root, as well as Field objects - created for table columns. - This function will replace Item_sum items in 'fields' list with - corresponding Item_field items, pointing at the fields in the - temporary table, unless this was prohibited by TRUE - value of argument save_sum_fields. The Item_field objects - are created in THD memory root. - - @param thd thread handle - @param param a description used as input to create the table - @param fields list of items that will be used to define - column types of the table (also see NOTES) - @param group TODO document - @param distinct should table rows be distinct - @param save_sum_fields see NOTES - @param select_options - @param rows_limit - @param table_alias possible name of the temporary table that can - be used for name resolving; can be "". -*/ - -#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128 -#define AVG_STRING_LENGTH_TO_PACK_ROWS 64 -#define RATIO_TO_PACK_ROWS 2 -#define MIN_STRING_LENGTH_TO_PACK_ROWS 10 - -TABLE * -create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, - ORDER *group, bool distinct, bool save_sum_fields, - ulonglong select_options, ha_rows rows_limit, - char *table_alias) -{ - MEM_ROOT *mem_root_save, own_root; - TABLE *table; - TABLE_SHARE *share; - uint i,field_count,null_count,null_pack_length; - uint copy_func_count= param->func_count; - uint hidden_null_count, hidden_null_pack_length, hidden_field_count; - uint blob_count,group_null_items, string_count; - uint temp_pool_slot=MY_BIT_NONE; - uint fieldnr= 0; - ulong reclength, string_total_length; - bool using_unique_constraint= 0; - bool use_packed_rows= 0; - bool not_all_columns= !(select_options & TMP_TABLE_ALL_COLUMNS); - char *tmpname,path[FN_REFLEN], tmp_table_name[50]; - uchar *pos, *group_buff, *bitmaps; - uchar *null_flags; - Field **reg_field, **from_field, **default_field; - uint *blob_field; - Copy_field *copy=0; - KEY *keyinfo; - KEY_PART_INFO *key_part_info; - Item **copy_func; - ENGINE_COLUMNDEF *recinfo; - uint total_uneven_bit_length= 0; - bool force_copy_fields= param->force_copy_fields; - /* Treat sum functions as normal ones when loose index scan is used. */ - save_sum_fields|= param->precomputed_group_by; - DBUG_ENTER("create_tmp_table"); - DBUG_PRINT("enter", - ("distinct: %d save_sum_fields: %d rows_limit: %lu group: %d", - (int) distinct, (int) save_sum_fields, - (ulong) rows_limit,test(group))); - - status_var_increment(thd->status_var.created_tmp_tables); - thd->query_plan_flags|= QPLAN_TMP_TABLE; - - if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) - temp_pool_slot = bitmap_lock_set_next(&temp_pool); - - if (temp_pool_slot != MY_BIT_NONE) // we got a slot - sprintf(tmp_table_name, "%s_%lx_%i", tmp_file_prefix, - current_pid, temp_pool_slot); - else - { - /* if we run out of slots or we are not using tempool */ - sprintf(tmp_table_name, "%s%lx_%lx_%x", tmp_file_prefix,current_pid, - thd->thread_id, thd->tmp_table++); - } - - /* - No need to change table name to lower case as we are only creating - MyISAM, Maria or HEAP tables here - */ - fn_format(path, tmp_table_name, mysql_tmpdir, "", - MY_REPLACE_EXT|MY_UNPACK_FILENAME); - - if (group) - { - if (!param->quick_group) - group=0; // Can't use group key - else for (ORDER *tmp=group ; tmp ; tmp=tmp->next) - { - /* - marker == 4 means two things: - - store NULLs in the key, and - - convert BIT fields to 64-bit long, needed because MEMORY tables - can't index BIT fields. - */ - (*tmp->item)->marker=4; // Store null in key - if ((*tmp->item)->max_length >= CONVERT_IF_BIGGER_TO_BLOB) - using_unique_constraint=1; - } - if (param->group_length >= MAX_BLOB_WIDTH) - using_unique_constraint=1; - if (group) - distinct=0; // Can't use distinct - } - - field_count=param->field_count+param->func_count+param->sum_func_count; - hidden_field_count=param->hidden_field_count; - - /* - When loose index scan is employed as access method, it already - computes all groups and the result of all aggregate functions. We - make space for the items of the aggregate function in the list of - functions TMP_TABLE_PARAM::items_to_copy, so that the values of - these items are stored in the temporary table. - */ - if (param->precomputed_group_by) - copy_func_count+= param->sum_func_count; - - init_sql_alloc(&own_root, TABLE_ALLOC_BLOCK_SIZE, 0); - - if (!multi_alloc_root(&own_root, - &table, sizeof(*table), - &share, sizeof(*share), - ®_field, sizeof(Field*) * (field_count+1), - &default_field, sizeof(Field*) * (field_count), - &blob_field, sizeof(uint)*(field_count+1), - &from_field, sizeof(Field*)*field_count, - ©_func, sizeof(*copy_func)*(copy_func_count+1), - ¶m->keyinfo, sizeof(*param->keyinfo), - &key_part_info, - sizeof(*key_part_info)*(param->group_parts+1), - ¶m->start_recinfo, - sizeof(*param->recinfo)*(field_count*2+4), - &tmpname, (uint) strlen(tmp_table_name)+1, - &group_buff, (group && ! using_unique_constraint ? - param->group_length : 0), - &bitmaps, bitmap_buffer_size(field_count)*3, - NullS)) - { - if (temp_pool_slot != MY_BIT_NONE) - bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); - DBUG_RETURN(NULL); /* purecov: inspected */ - } - /* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */ - if (!(param->copy_field= copy= new (thd->mem_root) Copy_field[field_count])) - { - if (temp_pool_slot != MY_BIT_NONE) - bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); - free_root(&own_root, MYF(0)); /* purecov: inspected */ - DBUG_RETURN(NULL); /* purecov: inspected */ - } - param->items_to_copy= copy_func; - strmov(tmpname, tmp_table_name); - /* make table according to fields */ - - bzero((char*) table,sizeof(*table)); - bzero((char*) reg_field,sizeof(Field*)*(field_count+1)); - bzero((char*) default_field, sizeof(Field*) * (field_count)); - bzero((char*) from_field,sizeof(Field*)*field_count); - - table->mem_root= own_root; - mem_root_save= thd->mem_root; - thd->mem_root= &table->mem_root; - - table->field=reg_field; - table->alias= table_alias; - table->reginfo.lock_type=TL_WRITE; /* Will be updated */ - table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE; - table->map=1; - table->temp_pool_slot = temp_pool_slot; - table->copy_blobs= 1; - table->in_use= thd; - table->quick_keys.init(); - table->covering_keys.init(); - table->merge_keys.init(); - table->keys_in_use_for_query.init(); - - table->s= share; - init_tmp_table_share(thd, share, "", 0, tmpname, tmpname); - share->blob_field= blob_field; - share->blob_ptr_size= portable_sizeof_char_ptr; - share->db_low_byte_first=1; // True for HEAP, MyISAM and Maria - share->table_charset= param->table_charset; - share->primary_key= MAX_KEY; // Indicate no primary key - share->keys_for_keyread.init(); - share->keys_in_use.init(); - - /* Calculate which type of fields we will store in the temporary table */ - - reclength= string_total_length= 0; - blob_count= string_count= null_count= hidden_null_count= group_null_items= 0; - param->using_indirect_summary_function=0; - - List_iterator_fast<Item> li(fields); - Item *item; - Field **tmp_from_field=from_field; - while ((item=li++)) - { - Item::Type type=item->type(); - if (not_all_columns) - { - if (item->with_sum_func && type != Item::SUM_FUNC_ITEM) - { - if (item->used_tables() & OUTER_REF_TABLE_BIT) - item->update_used_tables(); - if (type == Item::SUBSELECT_ITEM || - (item->used_tables() & ~OUTER_REF_TABLE_BIT)) - { - /* - Mark that the we have ignored an item that refers to a summary - function. We need to know this if someone is going to use - DISTINCT on the result. - */ - param->using_indirect_summary_function=1; - continue; - } - } - if (item->const_item() && (int) hidden_field_count <= 0) - continue; // We don't have to store this - } - if (type == Item::SUM_FUNC_ITEM && !group && !save_sum_fields) - { /* Can't calc group yet */ - Item_sum *sum_item= (Item_sum *) item; - sum_item->result_field=0; - for (i=0 ; i < sum_item->get_arg_count() ; i++) - { - Item *arg= sum_item->get_arg(i); - if (!arg->const_item()) - { - Field *new_field= - create_tmp_field(thd, table, arg, arg->type(), ©_func, - tmp_from_field, &default_field[fieldnr], - group != 0,not_all_columns, - distinct, 0, - param->convert_blob_length); - if (!new_field) - goto err; // Should be OOM - tmp_from_field++; - reclength+=new_field->pack_length(); - if (new_field->flags & BLOB_FLAG) - { - *blob_field++= fieldnr; - blob_count++; - } - if (new_field->type() == MYSQL_TYPE_BIT) - total_uneven_bit_length+= new_field->field_length & 7; - *(reg_field++)= new_field; - if (new_field->real_type() == MYSQL_TYPE_STRING || - new_field->real_type() == MYSQL_TYPE_VARCHAR) - { - string_count++; - string_total_length+= new_field->pack_length(); - } - thd->mem_root= mem_root_save; - arg= sum_item->set_arg(i, thd, new Item_field(new_field)); - thd->mem_root= &table->mem_root; - if (!(new_field->flags & NOT_NULL_FLAG)) - { - null_count++; - /* - new_field->maybe_null() is still false, it will be - changed below. But we have to setup Item_field correctly - */ - arg->maybe_null=1; - } - new_field->field_index= fieldnr++; - } - } - } - else - { - /* - The last parameter to create_tmp_field() is a bit tricky: - - We need to set it to 0 in union, to get fill_record() to modify the - temporary table. - We need to set it to 1 on multi-table-update and in select to - write rows to the temporary table. - We here distinguish between UNION and multi-table-updates by the fact - that in the later case group is set to the row pointer. - - The test for item->marker == 4 is ensure we don't create a group-by - key over a bit field as heap tables can't handle that. - */ - Field *new_field= (param->schema_table) ? - create_tmp_field_for_schema(thd, item, table) : - create_tmp_field(thd, table, item, type, ©_func, - tmp_from_field, &default_field[fieldnr], - group != 0, - !force_copy_fields && - (not_all_columns || group !=0), - item->marker == 4, force_copy_fields, - param->convert_blob_length); - - if (!new_field) - { - if (thd->is_fatal_error) - goto err; // Got OOM - continue; // Some kindf of const item - } - if (type == Item::SUM_FUNC_ITEM) - ((Item_sum *) item)->result_field= new_field; - tmp_from_field++; - reclength+=new_field->pack_length(); - if (!(new_field->flags & NOT_NULL_FLAG)) - null_count++; - if (new_field->type() == MYSQL_TYPE_BIT) - total_uneven_bit_length+= new_field->field_length & 7; - if (new_field->flags & BLOB_FLAG) - { - *blob_field++= fieldnr; - blob_count++; - } - if (new_field->real_type() == MYSQL_TYPE_STRING || - new_field->real_type() == MYSQL_TYPE_VARCHAR) - { - string_count++; - string_total_length+= new_field->pack_length(); - } - if (item->marker == 4 && item->maybe_null) - { - group_null_items++; - new_field->flags|= GROUP_FLAG; - } - new_field->field_index= fieldnr++; - *(reg_field++)= new_field; - } - if (!--hidden_field_count) - { - /* - This was the last hidden field; Remember how many hidden fields could - have null - */ - hidden_null_count=null_count; - /* - We need to update hidden_field_count as we may have stored group - functions with constant arguments - */ - param->hidden_field_count= fieldnr; - null_count= 0; - } - } - DBUG_ASSERT(fieldnr == (uint) (reg_field - table->field)); - DBUG_ASSERT(field_count >= (uint) (reg_field - table->field)); - field_count= fieldnr; - *reg_field= 0; - *blob_field= 0; // End marker - share->fields= field_count; - - /* If result table is small; use a heap */ - /* future: storage engine selection can be made dynamic? */ - if (blob_count || using_unique_constraint || - (select_options & (OPTION_BIG_TABLES | SELECT_SMALL_RESULT)) == - OPTION_BIG_TABLES || (select_options & TMP_TABLE_FORCE_MYISAM)) - { - share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); - table->file= get_new_handler(share, &table->mem_root, - share->db_type()); - if (group && - (param->group_parts > table->file->max_key_parts() || - param->group_length > table->file->max_key_length())) - using_unique_constraint=1; - } - else - { - share->db_plugin= ha_lock_engine(0, heap_hton); - table->file= get_new_handler(share, &table->mem_root, - share->db_type()); - } - if (!table->file) - goto err; - - if (!using_unique_constraint) - reclength+= group_null_items; // null flag is stored separately - - share->blob_fields= blob_count; - if (blob_count == 0) - { - /* We need to ensure that first byte is not 0 for the delete link */ - if (param->hidden_field_count) - hidden_null_count++; - else - null_count++; - } - hidden_null_pack_length=(hidden_null_count+7)/8; - null_pack_length= (hidden_null_pack_length + - (null_count + total_uneven_bit_length + 7) / 8); - reclength+=null_pack_length; - if (!reclength) - reclength=1; // Dummy select - /* Use packed rows if there is blobs or a lot of space to gain */ - if (blob_count || - (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS && - (reclength / string_total_length <= RATIO_TO_PACK_ROWS || - string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS))) - use_packed_rows= 1; - - share->reclength= reclength; - { - uint alloc_length=ALIGN_SIZE(reclength+MI_UNIQUE_HASH_LENGTH+1); - share->rec_buff_length= alloc_length; - if (!(table->record[0]= (uchar*) - alloc_root(&table->mem_root, alloc_length*3))) - goto err; - table->record[1]= table->record[0]+alloc_length; - share->default_values= table->record[1]+alloc_length; - } - copy_func[0]=0; // End marker - param->func_count= copy_func - param->items_to_copy; - - setup_tmp_table_column_bitmaps(table, bitmaps); - - recinfo=param->start_recinfo; - null_flags=(uchar*) table->record[0]; - pos=table->record[0]+ null_pack_length; - if (null_pack_length) - { - bzero((uchar*) recinfo,sizeof(*recinfo)); - recinfo->type=FIELD_NORMAL; - recinfo->length=null_pack_length; - recinfo++; - bfill(null_flags,null_pack_length,255); // Set null fields - - table->null_flags= (uchar*) table->record[0]; - share->null_fields= null_count+ hidden_null_count; - share->null_bytes= null_pack_length; - } - null_count= (blob_count == 0) ? 1 : 0; - hidden_field_count=param->hidden_field_count; - for (i=0,reg_field=table->field; i < field_count; i++,reg_field++,recinfo++) - { - Field *field= *reg_field; - uint length; - bzero((uchar*) recinfo,sizeof(*recinfo)); - - if (!(field->flags & NOT_NULL_FLAG)) - { - if (field->flags & GROUP_FLAG && !using_unique_constraint) - { - /* - We have to reserve one byte here for NULL bits, - as this is updated by 'end_update()' - */ - *pos++=0; // Null is stored here - recinfo->length=1; - recinfo->type=FIELD_NORMAL; - recinfo++; - bzero((uchar*) recinfo,sizeof(*recinfo)); - } - else - { - recinfo->null_bit= 1 << (null_count & 7); - recinfo->null_pos= null_count/8; - } - field->move_field(pos,null_flags+null_count/8, - 1 << (null_count & 7)); - null_count++; - } - else - field->move_field(pos,(uchar*) 0,0); - if (field->type() == MYSQL_TYPE_BIT) - { - /* We have to reserve place for extra bits among null bits */ - ((Field_bit*) field)->set_bit_ptr(null_flags + null_count / 8, - null_count & 7); - null_count+= (field->field_length & 7); - } - field->reset(); - - /* - Test if there is a default field value. The test for ->ptr is to skip - 'offset' fields generated by initalize_tables - */ - if (default_field[i] && default_field[i]->ptr) - { - /* - default_field[i] is set only in the cases when 'field' can - inherit the default value that is defined for the field referred - by the Item_field object from which 'field' has been created. - */ - my_ptrdiff_t diff; - Field *orig_field= default_field[i]; - /* Get the value from default_values */ - diff= (my_ptrdiff_t) (orig_field->table->s->default_values- - orig_field->table->record[0]); - orig_field->move_field_offset(diff); // Points now at default_values - if (orig_field->is_real_null()) - field->set_null(); - else - { - field->set_notnull(); - memcpy(field->ptr, orig_field->ptr, field->pack_length()); - } - orig_field->move_field_offset(-diff); // Back to record[0] - } - - if (from_field[i]) - { /* Not a table Item */ - copy->set(field,from_field[i],save_sum_fields); - copy++; - } - length=field->pack_length(); - pos+= length; - - /* Make entry for create table */ - recinfo->length=length; - if (field->flags & BLOB_FLAG) - recinfo->type= FIELD_BLOB; - else if (use_packed_rows && - field->real_type() == MYSQL_TYPE_STRING && - length >= MIN_STRING_LENGTH_TO_PACK_ROWS) - recinfo->type= FIELD_SKIP_ENDSPACE; - else if (field->real_type() == MYSQL_TYPE_VARCHAR) - recinfo->type= FIELD_VARCHAR; - else - recinfo->type= FIELD_NORMAL; - - if (!--hidden_field_count) - null_count=(null_count+7) & ~7; // move to next byte - - // fix table name in field entry - field->table_name= &table->alias; - } - - param->copy_field_end=copy; - param->recinfo=recinfo; - store_record(table,s->default_values); // Make empty default record - - if (thd->variables.tmp_table_size == ~ (ulonglong) 0) // No limit - share->max_rows= ~(ha_rows) 0; - else - share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ? - min(thd->variables.tmp_table_size, - thd->variables.max_heap_table_size) : - thd->variables.tmp_table_size) / - share->reclength); - set_if_bigger(share->max_rows,1); // For dummy start options - /* - Push the LIMIT clause to the temporary table creation, so that we - materialize only up to 'rows_limit' records instead of all result records. - */ - set_if_smaller(share->max_rows, rows_limit); - param->end_write_records= rows_limit; - - keyinfo= param->keyinfo; - - if (group) - { - DBUG_PRINT("info",("Creating group key in temporary table")); - table->group=group; /* Table is grouped by key */ - param->group_buff=group_buff; - share->keys=1; - share->uniques= test(using_unique_constraint); - table->key_info=keyinfo; - keyinfo->key_part=key_part_info; - keyinfo->flags=HA_NOSAME; - keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; - keyinfo->key_length=0; - keyinfo->rec_per_key=0; - keyinfo->algorithm= HA_KEY_ALG_UNDEF; - keyinfo->name= (char*) "group_key"; - ORDER *cur_group= group; - for (; cur_group ; cur_group= cur_group->next, key_part_info++) - { - Field *field=(*cur_group->item)->get_tmp_table_field(); - DBUG_ASSERT(field->table == table); - bool maybe_null=(*cur_group->item)->maybe_null; - key_part_info->null_bit=0; - key_part_info->field= field; - key_part_info->offset= field->offset(table->record[0]); - key_part_info->length= (uint16) field->key_length(); - key_part_info->type= (uint8) field->key_type(); - key_part_info->key_type = - ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT || - (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 || - (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ? - 0 : FIELDFLAG_BINARY; - if (!using_unique_constraint) - { - cur_group->buff=(char*) group_buff; - if (!(cur_group->field= field->new_key_field(thd->mem_root,table, - group_buff + - test(maybe_null), - field->null_ptr, - field->null_bit))) - goto err; /* purecov: inspected */ - if (maybe_null) - { - /* - To be able to group on NULL, we reserved place in group_buff - for the NULL flag just before the column. (see above). - The field data is after this flag. - The NULL flag is updated in 'end_update()' and 'end_write()' - */ - keyinfo->flags|= HA_NULL_ARE_EQUAL; // def. that NULL == NULL - key_part_info->null_bit=field->null_bit; - key_part_info->null_offset= (uint) (field->null_ptr - - (uchar*) table->record[0]); - cur_group->buff++; // Pointer to field data - group_buff++; // Skipp null flag - } - /* In GROUP BY 'a' and 'a ' are equal for VARCHAR fields */ - key_part_info->key_part_flag|= HA_END_SPACE_ARE_EQUAL; - group_buff+= cur_group->field->pack_length(); - } - keyinfo->key_length+= key_part_info->length; - } - } - - if (distinct && field_count != param->hidden_field_count) - { - /* - Create an unique key or an unique constraint over all columns - that should be in the result. In the temporary table, there are - 'param->hidden_field_count' extra columns, whose null bits are stored - in the first 'hidden_null_pack_length' bytes of the row. - */ - DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count)); - - if (blob_count) - { - /* - Special mode for index creation in MyISAM used to support unique - indexes on blobs with arbitrary length. Such indexes cannot be - used for lookups. - */ - //// psergey-merge: using_unique_constraint=1; - share->uniques= 1; - } - null_pack_length-=hidden_null_pack_length; - keyinfo->key_parts= ((field_count-param->hidden_field_count)+ - (share->uniques ? test(null_pack_length) : 0)); - table->distinct= 1; - share->keys= 1; - if (!(key_part_info= (KEY_PART_INFO*) - alloc_root(&table->mem_root, - keyinfo->key_parts * sizeof(KEY_PART_INFO)))) - goto err; - bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); - table->key_info=keyinfo; - keyinfo->key_part=key_part_info; - keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; - keyinfo->key_length= 0; // Will compute the sum of the parts below. - keyinfo->name= (char*) "distinct_key"; - keyinfo->algorithm= HA_KEY_ALG_UNDEF; - keyinfo->rec_per_key=0; - - /* - Create an extra field to hold NULL bits so that unique indexes on - blobs can distinguish NULL from 0. This extra field is not needed - when we do not use UNIQUE indexes for blobs. - */ - if (null_pack_length && share->uniques) - { - key_part_info->null_bit=0; - key_part_info->offset=hidden_null_pack_length; - key_part_info->length=null_pack_length; - key_part_info->field= new Field_string(table->record[0], - (uint32) key_part_info->length, - (uchar*) 0, - (uint) 0, - Field::NONE, - NullS, &my_charset_bin); - if (!key_part_info->field) - goto err; - key_part_info->field->init(table); - key_part_info->key_type=FIELDFLAG_BINARY; - key_part_info->type= HA_KEYTYPE_BINARY; - key_part_info++; - } - /* Create a distinct key over the columns we are going to return */ - for (i=param->hidden_field_count, reg_field=table->field + i ; - i < field_count; - i++, reg_field++, key_part_info++) - { - key_part_info->null_bit=0; - key_part_info->field= *reg_field; - key_part_info->offset= (*reg_field)->offset(table->record[0]); - key_part_info->length= (uint16) (*reg_field)->pack_length(); - /* TODO: - The below method of computing the key format length of the - key part is a copy/paste from opt_range.cc, and table.cc. - This should be factored out, e.g. as a method of Field. - In addition it is not clear if any of the Field::*_length - methods is supposed to compute the same length. If so, it - might be reused. - */ - key_part_info->store_length= key_part_info->length; + Item_func_sp *item_func_sp= (Item_func_sp *) item; + Field *sp_result_field= item_func_sp->get_sp_result_field(); - if ((*reg_field)->real_maybe_null()) - key_part_info->store_length+= HA_KEY_NULL_LENGTH; - if ((*reg_field)->type() == MYSQL_TYPE_BLOB || - (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR) - key_part_info->store_length+= HA_KEY_BLOB_LENGTH; + if (make_copy_field) + { + DBUG_ASSERT(item_func_sp->result_field); + *from_field= item_func_sp->result_field; + } + else + { + *((*copy_func)++)= item; + } - keyinfo->key_length+= key_part_info->store_length; + Field *result_field= + create_tmp_field_from_field(thd, + sp_result_field, + item_func_sp->name, + table, + NULL, + convert_blob_length); - key_part_info->type= (uint8) (*reg_field)->key_type(); - key_part_info->key_type = - ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT || - (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 || - (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ? - 0 : FIELDFLAG_BINARY; + if (modify_item) + item->set_result_field(result_field); + + return result_field; } - } - if (thd->is_fatal_error) // If end of memory - goto err; /* purecov: inspected */ - share->db_record_offset= 1; - if (share->db_type() == TMP_ENGINE_HTON) - { - if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo, - ¶m->recinfo, select_options)) - goto err; + /* Fall through */ + case Item::COND_ITEM: + case Item::FIELD_AVG_ITEM: + case Item::FIELD_STD_ITEM: + case Item::SUBSELECT_ITEM: + /* The following can only happen with 'CREATE TABLE ... SELECT' */ + case Item::PROC_ITEM: + case Item::INT_ITEM: + case Item::REAL_ITEM: + case Item::DECIMAL_ITEM: + case Item::STRING_ITEM: + case Item::REF_ITEM: + case Item::NULL_ITEM: + case Item::VARBIN_ITEM: + if (make_copy_field) + { + DBUG_ASSERT(((Item_result_field*)item)->result_field); + *from_field= ((Item_result_field*)item)->result_field; + } + return create_tmp_field_from_item(thd, item, table, + (make_copy_field ? 0 : copy_func), + modify_item, convert_blob_length); + case Item::TYPE_HOLDER: + result= ((Item_type_holder *)item)->make_field_by_type(table); + result->set_derivation(item->collation.derivation); + return result; + default: // Dosen't have to be stored + return 0; } - if (open_tmp_table(table)) - goto err; - - thd->mem_root= mem_root_save; - - DBUG_RETURN(table); - -err: - thd->mem_root= mem_root_save; - free_tmp_table(thd,table); /* purecov: inspected */ - if (temp_pool_slot != MY_BIT_NONE) - bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); - DBUG_RETURN(NULL); /* purecov: inspected */ } - /* - Create a temporary table to weed out duplicate rowid combinations - - SYNOPSIS - - create_duplicate_weedout_tmp_table() - thd Thread handle - uniq_tuple_length_arg Length of the table's column - sjtbl Update sjtbl->[start_]recinfo values which - will be needed if we'll need to convert the - created temptable from HEAP to MyISAM/Maria. - - DESCRIPTION - Create a temporary table to weed out duplicate rowid combinations. The - table has a single column that is a concatenation of all rowids in the - combination. + Set up column usage bitmaps for a temporary table - Depending on the needed length, there are two cases: + IMPLEMENTATION + For temporary tables, we need one bitmap with all columns set and + a tmp_set bitmap to be used by things like filesort. +*/ - 1. When the length of the column < max_key_length: +void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps) +{ + uint field_count= table->s->fields; + bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count, + FALSE); + bitmap_init(&table->tmp_set, + (my_bitmap_map*) (bitmaps+ bitmap_buffer_size(field_count)), + field_count, FALSE); + bitmap_init(&table->vcol_set, + (my_bitmap_map*) (bitmaps+ 2+bitmap_buffer_size(field_count)), + field_count, FALSE); - CREATE TABLE tmp (col VARBINARY(n) NOT NULL, UNIQUE KEY(col)); + /* write_set and all_set are copies of read_set */ + table->def_write_set= table->def_read_set; + table->s->all_set= table->def_read_set; + bitmap_set_all(&table->s->all_set); + table->default_column_bitmaps(); +} - 2. Otherwise (not a valid SQL syntax but internally supported): - CREATE TABLE tmp (col VARBINARY NOT NULL, UNIQUE CONSTRAINT(col)); +/** + Create a temp table according to a field list. - The code in this function was produced by extraction of relevant parts - from create_tmp_table(). + Given field pointers are changed to point at tmp_table for + send_fields. The table object is self contained: it's + allocated in its own memory root, as well as Field objects + created for table columns. + This function will replace Item_sum items in 'fields' list with + corresponding Item_field items, pointing at the fields in the + temporary table, unless this was prohibited by TRUE + value of argument save_sum_fields. The Item_field objects + are created in THD memory root. - RETURN - created table - NULL on error + @param thd thread handle + @param param a description used as input to create the table + @param fields list of items that will be used to define + column types of the table (also see NOTES) + @param group TODO document + @param distinct should table rows be distinct + @param save_sum_fields see NOTES + @param select_options + @param rows_limit + @param table_alias possible name of the temporary table that can + be used for name resolving; can be "". */ -TABLE *create_duplicate_weedout_tmp_table(THD *thd, - uint uniq_tuple_length_arg, - SJ_TMP_TABLE *sjtbl) +TABLE * +create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, + ORDER *group, bool distinct, bool save_sum_fields, + ulonglong select_options, ha_rows rows_limit, + char *table_alias) { MEM_ROOT *mem_root_save, own_root; TABLE *table; TABLE_SHARE *share; + uint i,field_count,null_count,null_pack_length; + uint copy_func_count= param->func_count; + uint hidden_null_count, hidden_null_pack_length, hidden_field_count; + uint blob_count,group_null_items, string_count; uint temp_pool_slot=MY_BIT_NONE; - char *tmpname,path[FN_REFLEN]; - Field **reg_field; - KEY_PART_INFO *key_part_info; - KEY *keyinfo; - uchar *group_buff; - uchar *bitmaps; - uint *blob_field; - ENGINE_COLUMNDEF *recinfo, *start_recinfo; - bool using_unique_constraint=FALSE; - bool use_packed_rows= FALSE; - Field *field, *key_field; - uint blob_count, null_pack_length, null_count; + uint fieldnr= 0; + ulong reclength, string_total_length; + bool using_unique_constraint= 0; + bool use_packed_rows= 0; + bool not_all_columns= !(select_options & TMP_TABLE_ALL_COLUMNS); + char *tmpname,path[FN_REFLEN], tmp_table_name[50]; + uchar *pos, *group_buff, *bitmaps; uchar *null_flags; - uchar *pos; - DBUG_ENTER("create_duplicate_weedout_tmp_table"); - DBUG_ASSERT(!sjtbl->is_confluent); - /* - STEP 1: Get temporary table name - */ - statistic_increment(thd->status_var.created_tmp_tables, &LOCK_status); + Field **reg_field, **from_field, **default_field; + uint *blob_field; + Copy_field *copy=0; + KEY *keyinfo; + KEY_PART_INFO *key_part_info; + Item **copy_func; + ENGINE_COLUMNDEF *recinfo; + uint total_uneven_bit_length= 0; + bool force_copy_fields= param->force_copy_fields; + /* Treat sum functions as normal ones when loose index scan is used. */ + save_sum_fields|= param->precomputed_group_by; + DBUG_ENTER("create_tmp_table"); + DBUG_PRINT("enter", + ("distinct: %d save_sum_fields: %d rows_limit: %lu group: %d", + (int) distinct, (int) save_sum_fields, + (ulong) rows_limit,test(group))); + + status_var_increment(thd->status_var.created_tmp_tables); + thd->query_plan_flags|= QPLAN_TMP_TABLE; + if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES)) temp_pool_slot = bitmap_lock_set_next(&temp_pool); if (temp_pool_slot != MY_BIT_NONE) // we got a slot - sprintf(path, "%s_%lx_%i", tmp_file_prefix, - current_pid, temp_pool_slot); + sprintf(tmp_table_name, "%s_%lx_%i", tmp_file_prefix, + current_pid, temp_pool_slot); else { /* if we run out of slots or we are not using tempool */ - sprintf(path,"%s%lx_%lx_%x", tmp_file_prefix,current_pid, + sprintf(tmp_table_name, "%s%lx_%lx_%x", tmp_file_prefix,current_pid, thd->thread_id, thd->tmp_table++); } - fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME); - /* STEP 2: Figure if we'll be using a key or blob+constraint */ - if (uniq_tuple_length_arg >= CONVERT_IF_BIGGER_TO_BLOB) - using_unique_constraint= TRUE; + /* + No need to change table name to lower case as we are only creating + MyISAM, Maria or HEAP tables here + */ + fn_format(path, tmp_table_name, mysql_tmpdir, "", + MY_REPLACE_EXT|MY_UNPACK_FILENAME); + + if (group) + { + if (!param->quick_group) + group=0; // Can't use group key + else for (ORDER *tmp=group ; tmp ; tmp=tmp->next) + { + /* + marker == 4 means two things: + - store NULLs in the key, and + - convert BIT fields to 64-bit long, needed because MEMORY tables + can't index BIT fields. + */ + (*tmp->item)->marker=4; // Store null in key + if ((*tmp->item)->max_length >= CONVERT_IF_BIGGER_TO_BLOB) + using_unique_constraint=1; + } + if (param->group_length >= MAX_BLOB_WIDTH) + using_unique_constraint=1; + if (group) + distinct=0; // Can't use distinct + } + + field_count=param->field_count+param->func_count+param->sum_func_count; + hidden_field_count=param->hidden_field_count; - /* STEP 3: Allocate memory for temptable description */ + /* + When loose index scan is employed as access method, it already + computes all groups and the result of all aggregate functions. We + make space for the items of the aggregate function in the list of + functions TMP_TABLE_PARAM::items_to_copy, so that the values of + these items are stored in the temporary table. + */ + if (param->precomputed_group_by) + copy_func_count+= param->sum_func_count; + init_sql_alloc(&own_root, TABLE_ALLOC_BLOCK_SIZE, 0); + if (!multi_alloc_root(&own_root, &table, sizeof(*table), &share, sizeof(*share), - ®_field, sizeof(Field*) * (1+1), - &blob_field, sizeof(uint)*2, - &keyinfo, sizeof(*keyinfo), - &key_part_info, sizeof(*key_part_info) * 2, - &start_recinfo, - sizeof(*recinfo)*(1*2+4), - &tmpname, (uint) strlen(path)+1, - &group_buff, (!using_unique_constraint ? - uniq_tuple_length_arg : 0), - &bitmaps, bitmap_buffer_size(1)*3, + ®_field, sizeof(Field*) * (field_count+1), + &default_field, sizeof(Field*) * (field_count), + &blob_field, sizeof(uint)*(field_count+1), + &from_field, sizeof(Field*)*field_count, + ©_func, sizeof(*copy_func)*(copy_func_count+1), + ¶m->keyinfo, sizeof(*param->keyinfo), + &key_part_info, + sizeof(*key_part_info)*(param->group_parts+1), + ¶m->start_recinfo, + sizeof(*param->recinfo)*(field_count*2+4), + &tmpname, (uint) strlen(tmp_table_name)+1, + &group_buff, (group && ! using_unique_constraint ? + param->group_length : 0), + &bitmaps, bitmap_buffer_size(field_count)*3, NullS)) { if (temp_pool_slot != MY_BIT_NONE) bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); - DBUG_RETURN(NULL); + DBUG_RETURN(NULL); /* purecov: inspected */ } - strmov(tmpname,path); - + /* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */ + if (!(param->copy_field= copy= new (thd->mem_root) Copy_field[field_count])) + { + if (temp_pool_slot != MY_BIT_NONE) + bitmap_lock_clear_bit(&temp_pool, temp_pool_slot); + free_root(&own_root, MYF(0)); /* purecov: inspected */ + DBUG_RETURN(NULL); /* purecov: inspected */ + } + param->items_to_copy= copy_func; + strmov(tmpname, tmp_table_name); + /* make table according to fields */ - /* STEP 4: Create TABLE description */ bzero((char*) table,sizeof(*table)); - bzero((char*) reg_field,sizeof(Field*)*2); + bzero((char*) reg_field,sizeof(Field*)*(field_count+1)); + bzero((char*) default_field, sizeof(Field*) * (field_count)); + bzero((char*) from_field,sizeof(Field*)*field_count); table->mem_root= own_root; mem_root_save= thd->mem_root; thd->mem_root= &table->mem_root; table->field=reg_field; - table->alias= "weedout-tmp"; + table->alias= table_alias; table->reginfo.lock_type=TL_WRITE; /* Will be updated */ table->db_stat=HA_OPEN_KEYFILE+HA_OPEN_RNDFILE; table->map=1; @@ -14693,57 +10846,194 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, table->in_use= thd; table->quick_keys.init(); table->covering_keys.init(); + table->merge_keys.init(); table->keys_in_use_for_query.init(); table->s= share; init_tmp_table_share(thd, share, "", 0, tmpname, tmpname); share->blob_field= blob_field; share->blob_ptr_size= portable_sizeof_char_ptr; - share->db_low_byte_first=1; // True for HEAP and MyISAM - share->table_charset= NULL; + share->db_low_byte_first=1; // True for HEAP, MyISAM and Maria + share->table_charset= param->table_charset; share->primary_key= MAX_KEY; // Indicate no primary key share->keys_for_keyread.init(); share->keys_in_use.init(); - blob_count= 0; + /* Calculate which type of fields we will store in the temporary table */ + + reclength= string_total_length= 0; + blob_count= string_count= null_count= hidden_null_count= group_null_items= 0; + param->using_indirect_summary_function=0; - /* Create the field */ + List_iterator_fast<Item> li(fields); + Item *item; + Field **tmp_from_field=from_field; + while ((item=li++)) { - /* - For the sake of uniformity, always use Field_varstring (altough we could - use Field_string for shorter keys) - */ - field= new Field_varstring(uniq_tuple_length_arg, FALSE, "rowids", share, - &my_charset_bin); - if (!field) - DBUG_RETURN(0); - field->table= table; - field->key_start.init(0); - field->part_of_key.init(0); - field->part_of_sortkey.init(0); - field->unireg_check= Field::NONE; - field->flags= (NOT_NULL_FLAG | BINARY_FLAG | NO_DEFAULT_VALUE_FLAG); - field->reset_fields(); - field->init(table); - field->orig_table= NULL; - - field->field_index= 0; - - *(reg_field++)= field; - *blob_field= 0; - *reg_field= 0; + Item::Type type=item->type(); + if (not_all_columns) + { + if (item->with_sum_func && type != Item::SUM_FUNC_ITEM) + { + if (item->used_tables() & OUTER_REF_TABLE_BIT) + item->update_used_tables(); + if (type == Item::SUBSELECT_ITEM || + (item->used_tables() & ~OUTER_REF_TABLE_BIT)) + { + /* + Mark that the we have ignored an item that refers to a summary + function. We need to know this if someone is going to use + DISTINCT on the result. + */ + param->using_indirect_summary_function=1; + continue; + } + } + if (item->const_item() && (int) hidden_field_count <= 0) + continue; // We don't have to store this + } + if (type == Item::SUM_FUNC_ITEM && !group && !save_sum_fields) + { /* Can't calc group yet */ + Item_sum *sum_item= (Item_sum *) item; + sum_item->result_field=0; + for (i=0 ; i < sum_item->get_arg_count() ; i++) + { + Item *arg= sum_item->get_arg(i); + if (!arg->const_item()) + { + Field *new_field= + create_tmp_field(thd, table, arg, arg->type(), ©_func, + tmp_from_field, &default_field[fieldnr], + group != 0,not_all_columns, + distinct, 0, + param->convert_blob_length); + if (!new_field) + goto err; // Should be OOM + tmp_from_field++; + reclength+=new_field->pack_length(); + if (new_field->flags & BLOB_FLAG) + { + *blob_field++= fieldnr; + blob_count++; + } + if (new_field->type() == MYSQL_TYPE_BIT) + total_uneven_bit_length+= new_field->field_length & 7; + *(reg_field++)= new_field; + if (new_field->real_type() == MYSQL_TYPE_STRING || + new_field->real_type() == MYSQL_TYPE_VARCHAR) + { + string_count++; + string_total_length+= new_field->pack_length(); + } + thd->mem_root= mem_root_save; + arg= sum_item->set_arg(i, thd, new Item_field(new_field)); + thd->mem_root= &table->mem_root; + if (!(new_field->flags & NOT_NULL_FLAG)) + { + null_count++; + /* + new_field->maybe_null() is still false, it will be + changed below. But we have to setup Item_field correctly + */ + arg->maybe_null=1; + } + new_field->field_index= fieldnr++; + } + } + } + else + { + /* + The last parameter to create_tmp_field() is a bit tricky: + + We need to set it to 0 in union, to get fill_record() to modify the + temporary table. + We need to set it to 1 on multi-table-update and in select to + write rows to the temporary table. + We here distinguish between UNION and multi-table-updates by the fact + that in the later case group is set to the row pointer. - share->fields= 1; - share->blob_fields= 0; + The test for item->marker == 4 is ensure we don't create a group-by + key over a bit field as heap tables can't handle that. + */ + Field *new_field= (param->schema_table) ? + create_tmp_field_for_schema(thd, item, table) : + create_tmp_field(thd, table, item, type, ©_func, + tmp_from_field, &default_field[fieldnr], + group != 0, + !force_copy_fields && + (not_all_columns || group !=0), + item->marker == 4, force_copy_fields, + param->convert_blob_length); + + if (!new_field) + { + if (thd->is_fatal_error) + goto err; // Got OOM + continue; // Some kindf of const item + } + if (type == Item::SUM_FUNC_ITEM) + ((Item_sum *) item)->result_field= new_field; + tmp_from_field++; + reclength+=new_field->pack_length(); + if (!(new_field->flags & NOT_NULL_FLAG)) + null_count++; + if (new_field->type() == MYSQL_TYPE_BIT) + total_uneven_bit_length+= new_field->field_length & 7; + if (new_field->flags & BLOB_FLAG) + { + *blob_field++= fieldnr; + blob_count++; + } + if (new_field->real_type() == MYSQL_TYPE_STRING || + new_field->real_type() == MYSQL_TYPE_VARCHAR) + { + string_count++; + string_total_length+= new_field->pack_length(); + } + if (item->marker == 4 && item->maybe_null) + { + group_null_items++; + new_field->flags|= GROUP_FLAG; + } + new_field->field_index= fieldnr++; + *(reg_field++)= new_field; + } + if (!--hidden_field_count) + { + /* + This was the last hidden field; Remember how many hidden fields could + have null + */ + hidden_null_count=null_count; + /* + We need to update hidden_field_count as we may have stored group + functions with constant arguments + */ + param->hidden_field_count= fieldnr; + null_count= 0; + } } + DBUG_ASSERT(fieldnr == (uint) (reg_field - table->field)); + DBUG_ASSERT(field_count >= (uint) (reg_field - table->field)); + field_count= fieldnr; + *reg_field= 0; + *blob_field= 0; // End marker + share->fields= field_count; - uint reclength= field->pack_length(); - if (using_unique_constraint) - { + /* If result table is small; use a heap */ + /* future: storage engine selection can be made dynamic? */ + if (blob_count || using_unique_constraint || + (select_options & (OPTION_BIG_TABLES | SELECT_SMALL_RESULT)) == + OPTION_BIG_TABLES || (select_options & TMP_TABLE_FORCE_MYISAM)) + { share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON); table->file= get_new_handler(share, &table->mem_root, share->db_type()); - DBUG_ASSERT(uniq_tuple_length_arg <= table->file->max_key_length()); + if (group && + (param->group_parts > table->file->max_key_parts() || + param->group_length > table->file->max_key_length())) + using_unique_constraint=1; } else { @@ -14754,14 +11044,34 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, if (!table->file) goto err; - null_count=1; - - null_pack_length= 1; - reclength += null_pack_length; + if (!using_unique_constraint) + reclength+= group_null_items; // null flag is stored separately + + share->blob_fields= blob_count; + if (blob_count == 0) + { + /* We need to ensure that first byte is not 0 for the delete link */ + if (param->hidden_field_count) + hidden_null_count++; + else + null_count++; + } + hidden_null_pack_length=(hidden_null_count+7)/8; + null_pack_length= (hidden_null_pack_length + + (null_count + total_uneven_bit_length + 7) / 8); + reclength+=null_pack_length; + if (!reclength) + reclength=1; // Dummy select + /* Use packed rows if there is blobs or a lot of space to gain */ + if (blob_count || + (string_total_length >= STRING_TOTAL_LENGTH_TO_PACK_ROWS && + (reclength / string_total_length <= RATIO_TO_PACK_ROWS || + string_total_length / string_count >= AVG_STRING_LENGTH_TO_PACK_ROWS))) + use_packed_rows= 1; share->reclength= reclength; { - uint alloc_length=ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1); + uint alloc_length=ALIGN_SIZE(reclength+MI_UNIQUE_HASH_LENGTH+1); share->rec_buff_length= alloc_length; if (!(table->record[0]= (uchar*) alloc_root(&table->mem_root, alloc_length*3))) @@ -14769,9 +11079,12 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, table->record[1]= table->record[0]+alloc_length; share->default_values= table->record[1]+alloc_length; } + copy_func[0]=0; // End marker + param->func_count= copy_func - param->items_to_copy; + setup_tmp_table_column_bitmaps(table, bitmaps); - recinfo= start_recinfo; + recinfo=param->start_recinfo; null_flags=(uchar*) table->record[0]; pos=table->record[0]+ null_pack_length; if (null_pack_length) @@ -14783,25 +11096,83 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, bfill(null_flags,null_pack_length,255); // Set null fields table->null_flags= (uchar*) table->record[0]; - share->null_fields= null_count; + share->null_fields= null_count+ hidden_null_count; share->null_bytes= null_pack_length; } - null_count=1; - + null_count= (blob_count == 0) ? 1 : 0; + hidden_field_count=param->hidden_field_count; + for (i=0,reg_field=table->field; i < field_count; i++,reg_field++,recinfo++) { - //Field *field= *reg_field; + Field *field= *reg_field; uint length; bzero((uchar*) recinfo,sizeof(*recinfo)); - field->move_field(pos,(uchar*) 0,0); + if (!(field->flags & NOT_NULL_FLAG)) + { + if (field->flags & GROUP_FLAG && !using_unique_constraint) + { + /* + We have to reserve one byte here for NULL bits, + as this is updated by 'end_update()' + */ + *pos++=0; // Null is stored here + recinfo->length=1; + recinfo->type=FIELD_NORMAL; + recinfo++; + bzero((uchar*) recinfo,sizeof(*recinfo)); + } + else + { + recinfo->null_bit= 1 << (null_count & 7); + recinfo->null_pos= null_count/8; + } + field->move_field(pos,null_flags+null_count/8, + 1 << (null_count & 7)); + null_count++; + } + else + field->move_field(pos,(uchar*) 0,0); + if (field->type() == MYSQL_TYPE_BIT) + { + /* We have to reserve place for extra bits among null bits */ + ((Field_bit*) field)->set_bit_ptr(null_flags + null_count / 8, + null_count & 7); + null_count+= (field->field_length & 7); + } field->reset(); + /* Test if there is a default field value. The test for ->ptr is to skip 'offset' fields generated by initalize_tables */ - // Initialize the table field: - bzero(field->ptr, field->pack_length()); + if (default_field[i] && default_field[i]->ptr) + { + /* + default_field[i] is set only in the cases when 'field' can + inherit the default value that is defined for the field referred + by the Item_field object from which 'field' has been created. + */ + my_ptrdiff_t diff; + Field *orig_field= default_field[i]; + /* Get the value from default_values */ + diff= (my_ptrdiff_t) (orig_field->table->s->default_values- + orig_field->table->record[0]); + orig_field->move_field_offset(diff); // Points now at default_values + if (orig_field->is_real_null()) + field->set_null(); + else + { + field->set_notnull(); + memcpy(field->ptr, orig_field->ptr, field->pack_length()); + } + orig_field->move_field_offset(-diff); // Back to record[0] + } + if (from_field[i]) + { /* Not a table Item */ + copy->set(field,from_field[i],save_sum_fields); + copy++; + } length=field->pack_length(); pos+= length; @@ -14812,15 +11183,22 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, else if (use_packed_rows && field->real_type() == MYSQL_TYPE_STRING && length >= MIN_STRING_LENGTH_TO_PACK_ROWS) - recinfo->type=FIELD_SKIP_ENDSPACE; + recinfo->type= FIELD_SKIP_ENDSPACE; + else if (field->real_type() == MYSQL_TYPE_VARCHAR) + recinfo->type= FIELD_VARCHAR; else - recinfo->type=FIELD_NORMAL; + recinfo->type= FIELD_NORMAL; + + if (!--hidden_field_count) + null_count=(null_count+7) & ~7; // move to next byte + // fix table name in field entry field->table_name= &table->alias; } - //param->recinfo=recinfo; - //store_record(table,s->default_values); // Make empty default record + param->copy_field_end=copy; + param->recinfo=recinfo; + store_record(table,s->default_values); // Make empty default record if (thd->variables.tmp_table_size == ~ (ulonglong) 0) // No limit share->max_rows= ~(ha_rows) 0; @@ -14831,57 +11209,189 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, thd->variables.tmp_table_size) / share->reclength); set_if_bigger(share->max_rows,1); // For dummy start options + /* + Push the LIMIT clause to the temporary table creation, so that we + materialize only up to 'rows_limit' records instead of all result records. + */ + set_if_smaller(share->max_rows, rows_limit); + param->end_write_records= rows_limit; + keyinfo= param->keyinfo; - //// keyinfo= param->keyinfo; - if (TRUE) + if (group) { DBUG_PRINT("info",("Creating group key in temporary table")); + table->group=group; /* Table is grouped by key */ + param->group_buff=group_buff; share->keys=1; share->uniques= test(using_unique_constraint); table->key_info=keyinfo; keyinfo->key_part=key_part_info; keyinfo->flags=HA_NOSAME; - keyinfo->usable_key_parts= keyinfo->key_parts= 1; + keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; keyinfo->key_length=0; keyinfo->rec_per_key=0; keyinfo->algorithm= HA_KEY_ALG_UNDEF; - keyinfo->name= (char*) "weedout_key"; + keyinfo->name= (char*) "group_key"; + ORDER *cur_group= group; + for (; cur_group ; cur_group= cur_group->next, key_part_info++) { + Field *field=(*cur_group->item)->get_tmp_table_field(); + DBUG_ASSERT(field->table == table); + bool maybe_null=(*cur_group->item)->maybe_null; key_part_info->null_bit=0; key_part_info->field= field; key_part_info->offset= field->offset(table->record[0]); key_part_info->length= (uint16) field->key_length(); key_part_info->type= (uint8) field->key_type(); - key_part_info->key_type = FIELDFLAG_BINARY; + key_part_info->key_type = + ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT || + (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 || + (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ? + 0 : FIELDFLAG_BINARY; if (!using_unique_constraint) { - if (!(key_field= field->new_key_field(thd->mem_root, table, - group_buff, - field->null_ptr, - field->null_bit))) - goto err; - key_part_info->key_part_flag|= HA_END_SPACE_ARE_EQUAL; //todo need this? + cur_group->buff=(char*) group_buff; + if (!(cur_group->field= field->new_key_field(thd->mem_root,table, + group_buff + + test(maybe_null), + field->null_ptr, + field->null_bit))) + goto err; /* purecov: inspected */ + if (maybe_null) + { + /* + To be able to group on NULL, we reserved place in group_buff + for the NULL flag just before the column. (see above). + The field data is after this flag. + The NULL flag is updated in 'end_update()' and 'end_write()' + */ + keyinfo->flags|= HA_NULL_ARE_EQUAL; // def. that NULL == NULL + key_part_info->null_bit=field->null_bit; + key_part_info->null_offset= (uint) (field->null_ptr - + (uchar*) table->record[0]); + cur_group->buff++; // Pointer to field data + group_buff++; // Skipp null flag + } + /* In GROUP BY 'a' and 'a ' are equal for VARCHAR fields */ + key_part_info->key_part_flag|= HA_END_SPACE_ARE_EQUAL; + group_buff+= cur_group->field->pack_length(); } keyinfo->key_length+= key_part_info->length; } } + if (distinct && field_count != param->hidden_field_count) + { + /* + Create an unique key or an unique constraint over all columns + that should be in the result. In the temporary table, there are + 'param->hidden_field_count' extra columns, whose null bits are stored + in the first 'hidden_null_pack_length' bytes of the row. + */ + DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count)); + + if (blob_count) + { + /* + Special mode for index creation in MyISAM used to support unique + indexes on blobs with arbitrary length. Such indexes cannot be + used for lookups. + */ + //// psergey-merge: using_unique_constraint=1; + share->uniques= 1; + } + null_pack_length-=hidden_null_pack_length; + keyinfo->key_parts= ((field_count-param->hidden_field_count)+ + (share->uniques ? test(null_pack_length) : 0)); + table->distinct= 1; + share->keys= 1; + if (!(key_part_info= (KEY_PART_INFO*) + alloc_root(&table->mem_root, + keyinfo->key_parts * sizeof(KEY_PART_INFO)))) + goto err; + bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); + table->key_info=keyinfo; + keyinfo->key_part=key_part_info; + keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; + keyinfo->key_length= 0; // Will compute the sum of the parts below. + keyinfo->name= (char*) "distinct_key"; + keyinfo->algorithm= HA_KEY_ALG_UNDEF; + keyinfo->rec_per_key=0; + + /* + Create an extra field to hold NULL bits so that unique indexes on + blobs can distinguish NULL from 0. This extra field is not needed + when we do not use UNIQUE indexes for blobs. + */ + if (null_pack_length && share->uniques) + { + key_part_info->null_bit=0; + key_part_info->offset=hidden_null_pack_length; + key_part_info->length=null_pack_length; + key_part_info->field= new Field_string(table->record[0], + (uint32) key_part_info->length, + (uchar*) 0, + (uint) 0, + Field::NONE, + NullS, &my_charset_bin); + if (!key_part_info->field) + goto err; + key_part_info->field->init(table); + key_part_info->key_type=FIELDFLAG_BINARY; + key_part_info->type= HA_KEYTYPE_BINARY; + key_part_info++; + } + /* Create a distinct key over the columns we are going to return */ + for (i=param->hidden_field_count, reg_field=table->field + i ; + i < field_count; + i++, reg_field++, key_part_info++) + { + key_part_info->null_bit=0; + key_part_info->field= *reg_field; + key_part_info->offset= (*reg_field)->offset(table->record[0]); + key_part_info->length= (uint16) (*reg_field)->pack_length(); + /* TODO: + The below method of computing the key format length of the + key part is a copy/paste from opt_range.cc, and table.cc. + This should be factored out, e.g. as a method of Field. + In addition it is not clear if any of the Field::*_length + methods is supposed to compute the same length. If so, it + might be reused. + */ + key_part_info->store_length= key_part_info->length; + + if ((*reg_field)->real_maybe_null()) + key_part_info->store_length+= HA_KEY_NULL_LENGTH; + if ((*reg_field)->type() == MYSQL_TYPE_BLOB || + (*reg_field)->real_type() == MYSQL_TYPE_VARCHAR) + key_part_info->store_length+= HA_KEY_BLOB_LENGTH; + + keyinfo->key_length+= key_part_info->store_length; + + key_part_info->type= (uint8) (*reg_field)->key_type(); + key_part_info->key_type = + ((ha_base_keytype) key_part_info->type == HA_KEYTYPE_TEXT || + (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT1 || + (ha_base_keytype) key_part_info->type == HA_KEYTYPE_VARTEXT2) ? + 0 : FIELDFLAG_BINARY; + } + } + if (thd->is_fatal_error) // If end of memory - goto err; + goto err; /* purecov: inspected */ share->db_record_offset= 1; if (share->db_type() == TMP_ENGINE_HTON) { - recinfo++; - if (create_internal_tmp_table(table, keyinfo, start_recinfo, &recinfo, 0)) + if (create_internal_tmp_table(table, param->keyinfo, param->start_recinfo, + ¶m->recinfo, select_options)) goto err; } - sjtbl->start_recinfo= start_recinfo; - sjtbl->recinfo= recinfo; if (open_tmp_table(table)) goto err; thd->mem_root= mem_root_save; + DBUG_RETURN(table); err: @@ -14893,6 +11403,7 @@ TABLE *create_duplicate_weedout_tmp_table(THD *thd, } + /****************************************************************************/ /** @@ -15021,7 +11532,7 @@ TABLE *create_virtual_tmp_table(THD *thd, List<Create_field> &field_list) } -static bool open_tmp_table(TABLE *table) +bool open_tmp_table(TABLE *table) { int error; if ((error= table->file->ha_open(table, table->s->table_name.str, O_RDWR, @@ -15071,10 +11582,10 @@ static bool open_tmp_table(TABLE *table) /* Create internal Maria temporary table */ -static bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, - ENGINE_COLUMNDEF *start_recinfo, - ENGINE_COLUMNDEF **recinfo, - ulonglong options) +bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, + ENGINE_COLUMNDEF *start_recinfo, + ENGINE_COLUMNDEF **recinfo, + ulonglong options) { int error; MARIA_KEYDEF keydef; @@ -16167,117 +12678,6 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) } -/* - SemiJoinDuplicateElimination: Weed out duplicate row combinations - - SYNPOSIS - do_sj_dups_weedout() - thd Thread handle - sjtbl Duplicate weedout table - - DESCRIPTION - Try storing current record combination of outer tables (i.e. their - rowids) in the temporary table. This records the fact that we've seen - this record combination and also tells us if we've seen it before. - - RETURN - -1 Error - 1 The row combination is a duplicate (discard it) - 0 The row combination is not a duplicate (continue) -*/ - -int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl) -{ - int error; - SJ_TMP_TABLE::TAB *tab= sjtbl->tabs; - SJ_TMP_TABLE::TAB *tab_end= sjtbl->tabs_end; - uchar *ptr; - uchar *nulls_ptr; - - DBUG_ENTER("do_sj_dups_weedout"); - - if (sjtbl->is_confluent) - { - if (sjtbl->have_confluent_row) - DBUG_RETURN(1); - - sjtbl->have_confluent_row= TRUE; - DBUG_RETURN(0); - } - - ptr= sjtbl->tmp_table->record[0] + 1; - nulls_ptr= ptr; - - /* Put the the rowids tuple into table->record[0]: */ - - // 1. Store the length - if (((Field_varstring*)(sjtbl->tmp_table->field[0]))->length_bytes == 1) - { - *ptr= (uchar)(sjtbl->rowid_len + sjtbl->null_bytes); - ptr++; - } - else - { - int2store(ptr, sjtbl->rowid_len + sjtbl->null_bytes); - ptr += 2; - } - - // 2. Zero the null bytes - if (sjtbl->null_bytes) - { - bzero(ptr, sjtbl->null_bytes); - ptr += sjtbl->null_bytes; - } - - // 3. Put the rowids - for (uint i=0; tab != tab_end; tab++, i++) - { - handler *h= tab->join_tab->table->file; - if (tab->join_tab->table->maybe_null && tab->join_tab->table->null_row) - { - /* It's a NULL-complemented row */ - *(nulls_ptr + tab->null_byte) |= tab->null_bit; - bzero(ptr + tab->rowid_offset, h->ref_length); - } - else - { - /* Copy the rowid value */ - memcpy(ptr + tab->rowid_offset, h->ref, h->ref_length); - } - } - - error= sjtbl->tmp_table->file->ha_write_row(sjtbl->tmp_table->record[0]); - if (error) - { - /* create_internal_tmp_table_from_heap will generate error if needed */ - if (!sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP)) - DBUG_RETURN(1); /* Duplicate */ - if (create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table, - sjtbl->start_recinfo, - &sjtbl->recinfo, error, 1)) - DBUG_RETURN(-1); - } - DBUG_RETURN(0); -} - - -/* - SemiJoinDuplicateElimination: Reset the temporary table -*/ - -int do_sj_reset(SJ_TMP_TABLE *sj_tbl) -{ - DBUG_ENTER("do_sj_reset"); - if (sj_tbl->tmp_table) - { - int rc= sj_tbl->tmp_table->file->ha_delete_all_rows(); - DBUG_RETURN(rc); - } - sj_tbl->have_confluent_row= FALSE; - DBUG_RETURN(0); -} - - /** @brief Process one row of the nested loop join. @@ -17619,7 +14019,7 @@ end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), 1 if right_item is used removable reference key on left_item */ -static bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) +bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) { Field *field=left_item->field; JOIN_TAB *join_tab= field->table->reginfo.join_tab; @@ -17667,56 +14067,6 @@ static bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item) } -/** - @brief Replaces an expression destructively inside the expression tree of - the WHERE clase. - - @note Because of current requirements for semijoin flattening, we do not - need to recurse here, hence this function will only examine the top-level - AND conditions. (see JOIN::prepare, comment starting with "Check if the - subquery predicate can be executed via materialization". - - @param join The top-level query. - @param old_cond The expression to be replaced. - @param new_cond The expression to be substituted. - @param do_fix_fields If true, Item::fix_fields(THD*, Item**) is called for - the new expression. - @return <code>true</code> if there was an error, <code>false</code> if - successful. -*/ -static bool replace_where_subcondition(JOIN *join, Item **expr, - Item *old_cond, Item *new_cond, - bool do_fix_fields) -{ - //Item **expr= (emb_nest == (TABLE_LIST*)1)? &join->conds : &emb_nest->on_expr; - if (*expr == old_cond) - { - *expr= new_cond; - if (do_fix_fields) - new_cond->fix_fields(join->thd, expr); - join->select_lex->where= *expr; - return FALSE; - } - - if ((*expr)->type() == Item::COND_ITEM) - { - List_iterator<Item> li(*((Item_cond*)(*expr))->argument_list()); - Item *item; - while ((item= li++)) - { - if (item == old_cond) - { - li.replace(new_cond); - if (do_fix_fields) - new_cond->fix_fields(join->thd, li.ref()); - return FALSE; - } - } - } - // If we came here it means there were an error during prerequisites check. - DBUG_ASSERT(0); - return TRUE; -} /* Extract a condition that can be checked after reading given table diff --git a/sql/sql_select.h b/sql/sql_select.h index 4af1de01f67..8f509a9d270 100644 --- a/sql/sql_select.h +++ b/sql/sql_select.h @@ -28,6 +28,12 @@ #include "procedure.h" #include <myisam.h> +#if defined(WITH_MARIA_STORAGE_ENGINE) && defined(USE_MARIA_FOR_TMP_TABLES) +#include "../storage/maria/ha_maria.h" +#define TMP_ENGINE_HTON maria_hton +#else +#define TMP_ENGINE_HTON myisam_hton +#endif /* Values in optimize */ #define KEY_OPTIMIZE_EXISTS 1 #define KEY_OPTIMIZE_REF_OR_NULL 2 @@ -1195,7 +1201,6 @@ enum_nested_loop_state sub_select(JOIN *join,JOIN_TAB *join_tab, bool end_of_records); enum_nested_loop_state sub_select_sjm(JOIN *join, JOIN_TAB *join_tab, bool end_of_records); -int do_sj_dups_weedout(THD *thd, SJ_TMP_TABLE *sjtbl); enum_nested_loop_state end_send_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)), @@ -1329,74 +1334,6 @@ typedef struct st_rollup List<Item> *fields; } ROLLUP; -/* - Temporary table used by semi-join DuplicateElimination strategy - - This consists of the temptable itself and data needed to put records - into it. The table's DDL is as follows: - - CREATE TABLE tmptable (col VARCHAR(n) BINARY, PRIMARY KEY(col)); - - where the primary key can be replaced with unique constraint if n exceeds - the limit (as it is always done for query execution-time temptables). - - The record value is a concatenation of rowids of tables from the join we're - executing. If a join table is on the inner side of the outer join, we - assume that its rowid can be NULL and provide means to store this rowid in - the tuple. -*/ - -class SJ_TMP_TABLE : public Sql_alloc -{ -public: - /* - Array of pointers to tables whose rowids compose the temporary table - record. - */ - class TAB - { - public: - JOIN_TAB *join_tab; - uint rowid_offset; - ushort null_byte; - uchar null_bit; - }; - TAB *tabs; - TAB *tabs_end; - - /* - is_confluent==TRUE means this is a special case where the temptable record - has zero length (and presence of a unique key means that the temptable can - have either 0 or 1 records). - In this case we don't create the physical temptable but instead record - its state in SJ_TMP_TABLE::have_confluent_record. - */ - bool is_confluent; - - /* - When is_confluent==TRUE: the contents of the table (whether it has the - record or not). - */ - bool have_confluent_row; - - /* table record parameters */ - uint null_bits; - uint null_bytes; - uint rowid_len; - - /* The temporary table itself (NULL means not created yet) */ - TABLE *tmp_table; - - /* - These are the members we got from temptable creation code. We'll need - them if we'll need to convert table from HEAP to MyISAM/Maria. - */ - ENGINE_COLUMNDEF *start_recinfo; - ENGINE_COLUMNDEF *recinfo; - - /* Pointer to next table (next->start_idx > this->end_idx) */ - SJ_TMP_TABLE *next; -}; #define SJ_OPT_NONE 0 #define SJ_OPT_DUPS_WEEDOUT 1 @@ -1711,7 +1648,6 @@ class JOIN :public Sql_alloc Item_sum ***func); int rollup_send_data(uint idx); int rollup_write_data(uint idx, TABLE *table); - void remove_subq_pushed_predicates(Item **where); /** Release memory and, if possible, the open tables held by this execution plan (and nested plans). It's used to release some tables before @@ -1763,11 +1699,6 @@ void TEST_join(JOIN *join); /* Extern functions in sql_select.cc */ bool store_val_in_field(Field *field, Item *val, enum_check_fields check_flag); -TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, - ORDER *group, bool distinct, bool save_sum_fields, - ulonglong select_options, ha_rows rows_limit, - char* alias); -void free_tmp_table(THD *thd, TABLE *entry); void count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param, List<Item> &fields, bool reset_with_sum_func); bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, @@ -1776,10 +1707,6 @@ bool setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param, uint elements, List<Item> &fields); void copy_fields(TMP_TABLE_PARAM *param); void copy_funcs(Item **func_ptr); -bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table, - ENGINE_COLUMNDEF *start_recinfo, - ENGINE_COLUMNDEF **recinfo, - int error, bool ignore_last_dupp_key_error); uint find_shortest_key(TABLE *table, const key_map *usable_keys); Field* create_tmp_field_from_field(THD *thd, Field* org_field, const char *name, TABLE *table, @@ -1955,13 +1882,59 @@ int test_if_item_cache_changed(List<Cached_item> &list); void calc_used_field_length(THD *thd, JOIN_TAB *join_tab); int join_init_read_record(JOIN_TAB *tab); void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); +inline Item * and_items(Item* cond, Item *item) +{ + return (cond? (new Item_cond_and(cond, item)) : item); +} +bool choose_plan(JOIN *join,table_map join_tables); +void get_partial_join_cost(JOIN *join, uint n_tables, double *read_time_arg, + double *record_count_arg); +void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab, + table_map last_remaining_tables, + bool first_alt, uint no_jbuf_before, + double *reopt_rec_count, double *reopt_cost, + double *sj_inner_fanout); +Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field, + bool *inherited_fl); +bool test_if_ref(COND *root_cond, + Item_field *left_item,Item *right_item); inline bool optimizer_flag(THD *thd, uint flag) { return (thd->variables.optimizer_switch & flag); } +/* Table elimination entry point function */ void eliminate_tables(JOIN *join); +/* Index Condition Pushdown entry point function */ void push_index_cond(JOIN_TAB *tab, uint keyno, bool other_tbls_ok); +/**************************************************************************** + Temporary table support for SQL Runtime + ***************************************************************************/ + +#define STRING_TOTAL_LENGTH_TO_PACK_ROWS 128 +#define AVG_STRING_LENGTH_TO_PACK_ROWS 64 +#define RATIO_TO_PACK_ROWS 2 +#define MIN_STRING_LENGTH_TO_PACK_ROWS 10 + +TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields, + ORDER *group, bool distinct, bool save_sum_fields, + ulonglong select_options, ha_rows rows_limit, + char* alias); +void free_tmp_table(THD *thd, TABLE *entry); +bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table, + ENGINE_COLUMNDEF *start_recinfo, + ENGINE_COLUMNDEF **recinfo, + int error, bool ignore_last_dupp_key_error); +bool create_internal_tmp_table(TABLE *table, KEY *keyinfo, + ENGINE_COLUMNDEF *start_recinfo, + ENGINE_COLUMNDEF **recinfo, + ulonglong options); +bool open_tmp_table(TABLE *table); +void setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps); + + + + -- 2.30.9