From b2c57cedbfd0f8ce2b4f67021a2aca5dedf9b6c9 Mon Sep 17 00:00:00 2001
From: Sergey Petrunya <psergey@askmonty.org>
Date: Sun, 25 Apr 2010 12:23:52 +0400
Subject: [PATCH] Code cleanup in subquery optimizations

---
 sql/item_subselect.h | 14 ++++++++++++-
 sql/opt_subselect.cc | 50 ++++++++++++++------------------------------
 sql/opt_subselect.h  |  8 ++++++-
 sql/sql_select.cc    |  8 +------
 sql/sql_select.h     | 12 ++---------
 sql/table.h          | 11 ++++++++--
 6 files changed, 48 insertions(+), 55 deletions(-)

diff --git a/sql/item_subselect.h b/sql/item_subselect.h
index c26977982d..bd1c11fa29 100644
--- a/sql/item_subselect.h
+++ b/sql/item_subselect.h
@@ -382,8 +382,20 @@ public:
   };
   enum_exec_method exec_method;
 
-  /* JTBM: temporary measure to tell JTBM predicates from SJ predicates */
+  /*
+    JTBM: temporary measure to tell JTBM predicates from SJ predicates
+    psergey-jtbm-todo: can't we do without this?
+     - either remove it altogether
+     - or put into enum_exec_method
+    
+    We can't remove it altogether as it is used to classify contents in
+    join->sj_subselects.
+  */
   bool convert_to_semi_join;
+  
+  /*
+    Cost to populate the temporary table (set on if-needed basis).
+  */
   double startup_cost;
 
   bool *get_cond_guard(int i)
diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc
index 8bf263da17..6b892398be 100644
--- a/sql/opt_subselect.cc
+++ b/sql/opt_subselect.cc
@@ -2,7 +2,7 @@
   @file
 
   @brief
-    Subquery optimization code here.
+    Semi-join subquery optimizations code
 
 */
 
@@ -228,15 +228,17 @@ int check_and_do_in_subquery_rewrites(JOIN *join)
         if (in_subs->exec_method == Item_in_subselect::NOT_TRANSFORMED)
           in_subs->exec_method= Item_in_subselect::MATERIALIZATION;
 
-        // psergey-jtbm: "if we're top-level, register for
-        // conversion-to-join-tab".
+        /*
+          If the subquery is an AND-part of WHERE register for being processed
+          with jtbm strategy
+        */
         if (in_subs->exec_method == Item_in_subselect::MATERIALIZATION &&
             thd->thd_marker.emb_on_expr_nest == (TABLE_LIST*)0x1)
         {
           in_subs->emb_on_expr_nest= thd->thd_marker.emb_on_expr_nest;
-          in_subs->convert_to_semi_join= FALSE; //JTBM
+          in_subs->convert_to_semi_join= FALSE;
           select_lex->outer_select()->
-            join->sj_subselects.append(thd->mem_root, in_subs);//JTBM
+            join->sj_subselects.append(thd->mem_root, in_subs);
         }
       }
 
@@ -406,6 +408,8 @@ static bool make_in_exists_conversion(THD *thd, JOIN *join, Item_in_subselect *i
   }
   DBUG_RETURN(FALSE);
 }
+
+
 /*
   Convert semi-join subquery predicates into semi-join join nests
 
@@ -513,29 +517,6 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
   // #tables-in-parent-query + #tables-in-subquery < MAX_TABLES
   /* Replace all subqueries to be flattened with Item_int(1) */
   arena= thd->activate_stmt_arena_if_needed(&backup);
-#if 0  
-  for (in_subq= join->sj_subselects.front(); 
-       in_subq != in_subq_end && 
-       join->tables + (*in_subq)->unit->first_select()->join->tables < MAX_TABLES;
-       in_subq++)
-  {
-    Item **tree= ((*in_subq)->emb_on_expr_nest == (TABLE_LIST*)1)?
-                   &join->conds : &((*in_subq)->emb_on_expr_nest->on_expr);
-    Item *replace_me= *in_subq;
-    /*
-      JTBM: the subquery was already mapped with Item_in_optimizer, so we
-      should search for that, not for original Item_in_subselect.
-      TODO: what about delaying that rewrite until here?
-    */
-    if (!(*in_subq)->convert_to_semi_join)
-    {
-      replace_me= (*in_subq)->optimizer;
-    }
-    if (replace_where_subcondition(join, tree, replace_me, new Item_int(1),
-                                   FALSE))
-      DBUG_RETURN(TRUE); /* purecov: inspected */
-  }
-#endif
  
   for (in_subq= join->sj_subselects.front(); 
        in_subq != in_subq_end && 
@@ -543,8 +524,6 @@ bool convert_join_subqueries_to_semijoins(JOIN *join)
        in_subq++)
   {
     bool remove_item= TRUE;
-    //psergey-jtbm: todo: here: check if we should convert to semi-join or 
-    // to JTBM nest.
     if ((*in_subq)->convert_to_semi_join) 
     {
       if (convert_subq_to_sj(join, *in_subq))
@@ -668,6 +647,7 @@ void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows,
   *scan_time= data_size/IO_SIZE + 2;
 } 
 
+
 /**
    @brief Replaces an expression destructively inside the expression tree of
    the WHERE clase.
@@ -685,6 +665,7 @@ void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows,
    @return <code>true</code> if there was an error, <code>false</code> if
    successful.
 */
+
 static bool replace_where_subcondition(JOIN *join, Item **expr, 
                                        Item *old_cond, Item *new_cond,
                                        bool do_fix_fields)
@@ -920,8 +901,7 @@ static bool convert_subq_to_sj(JOIN *parent_join, Item_in_subselect *subq_pred)
   /* 3. Remove the original subquery predicate from the WHERE/ON */
 
   // The subqueries were replaced for Item_int(1) earlier
-  subq_pred->exec_method=
-    Item_in_subselect::SEMI_JOIN;         // for subsequent executions
+  subq_pred->exec_method= Item_in_subselect::SEMI_JOIN; // for subsequent executions
   /*TODO: also reset the 'with_subselect' there. */
 
   /* n. Adjust the parent_join->tables counter */
@@ -1046,7 +1026,7 @@ static bool convert_subq_to_jtbm(JOIN *parent_join,
   SELECT_LEX *parent_lex= parent_join->select_lex;
   List<TABLE_LIST> *emb_join_list= &parent_lex->top_join_list;
   TABLE_LIST *emb_tbl_nest= NULL; // will change when we learn to handle outer joins
-  TABLE_LIST *tl;//, *last_leaf;
+  TABLE_LIST *tl;
   DBUG_ENTER("convert_subq_to_jtbm");
 
   if (subq_pred->setup_engine(TRUE))
@@ -1071,7 +1051,6 @@ static bool convert_subq_to_jtbm(JOIN *parent_join,
 
   jtbm->join_list= emb_join_list;
   jtbm->embedding= emb_tbl_nest;
-  jtbm->alias= (char*)"(jtbm)"; 
   jtbm->jtbm_subselect= subq_pred;
   jtbm->nested_join= NULL;
 
@@ -1504,6 +1483,7 @@ bool optimize_semijoin_nests(JOIN *join, table_map all_table_map)
   DBUG_RETURN(FALSE);
 }
 
+
 /*
   Get estimated record length for semi-join materialization temptable
   
@@ -1627,6 +1607,7 @@ bool find_eq_ref_candidate(TABLE *table, table_map sj_inner_tables)
   return FALSE;
 }
 
+
 /*
   Do semi-join optimization step after we've added a new tab to join prefix
 
@@ -3762,6 +3743,7 @@ static void remove_subq_pushed_predicates(JOIN *join, Item **where)
   }
 }
 
+
 int do_jtbm_materialization_if_needed(JOIN_TAB *tab)
 {
   Item_in_subselect *in_subs;
diff --git a/sql/opt_subselect.h b/sql/opt_subselect.h
index d8716dbb77..a9ed25fe31 100644
--- a/sql/opt_subselect.h
+++ b/sql/opt_subselect.h
@@ -1,4 +1,6 @@
-/* */
+/*
+  Semi-join subquery optimization code definitions
+*/
 
 #ifdef USE_PRAGMA_INTERFACE
 #pragma interface			/* gcc class implementation */
@@ -365,4 +367,8 @@ int clear_sj_tmp_tables(JOIN *join);
 int rewrite_to_index_subquery_engine(JOIN *join);
 
 
+void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows,
+                          ha_rows *scan_time);
+
+int do_jtbm_materialization_if_needed(JOIN_TAB *tab);
 
diff --git a/sql/sql_select.cc b/sql/sql_select.cc
index 96bba985dc..99d88168fb 100644
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -243,10 +243,6 @@ join_read_record_no_init(JOIN_TAB *tab);
 Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
                             bool *inherited_fl);
 
-void get_temptable_params(Item_in_subselect *item, ha_rows *out_rows,
-                          ha_rows *scan_time);
-int do_jtbm_materialization_if_needed(JOIN_TAB *tab);
-
 /**
   This handles SELECT with and without UNION.
 */
@@ -2634,7 +2630,6 @@ make_join_statistics(JOIN *join, TABLE_LIST *tables_arg, COND *conds,
       no_rows_const_tables |= table->map;
     }
   }
-  //psergey-todo: inject jtbm JOIN_TABS here.
 
   stat_vector[i]=0;
   join->outer_join=outer_join;
@@ -7723,7 +7718,7 @@ void JOIN_TAB::cleanup()
       table->file->extra(HA_EXTRA_NO_KEYREAD);
     }
     table->file->ha_index_or_rnd_end();
-    //psergey-jtbm2:
+
     if (table->pos_in_table_list && 
         table->pos_in_table_list->jtbm_subselect)
     {
@@ -11570,7 +11565,6 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
     {
       key_part_info->null_bit=0;
       key_part_info->field=    *reg_field;
-      //psergey-jtbm:
       (*reg_field)->flags |= PART_KEY_FLAG;
       if (key_part_info == keyinfo->key_part)
         (*reg_field)->key_start.set_bit(0);
diff --git a/sql/sql_select.h b/sql/sql_select.h
index a0a722b2fc..733ea310ad 100644
--- a/sql/sql_select.h
+++ b/sql/sql_select.h
@@ -1552,18 +1552,11 @@ public:
   bool optimized; ///< flag to avoid double optimization in EXPLAIN
 
   /* 
-    Subqueries that will need to be converted to semi-join nests (the list 
-    is emptied when conversion is done
+    Subqueries that will need to be converted to semi-join nests, including
+    those converted to jtbm nests. The list is emptied when conversion is done.
   */
   Array<Item_in_subselect> sj_subselects;
   
-  /*
-    Subqueries that will need to be converted to JOIN_TABs
-    (Note this is different from the above in the respect that it's part 
-    of WHERE clause or something like that?)
-  */
-  //Array<Item_in_subselect> jtbm_subselects;
-
   /* Temporary tables used to weed-out semi-join duplicates */
   List<TABLE> sj_tmp_tables;
   List<SJ_MATERIALIZATION_INFO> sjm_info_list;
@@ -1586,7 +1579,6 @@ public:
   JOIN(THD *thd_arg, List<Item> &fields_arg, ulonglong select_options_arg,
        select_result *result_arg)
     :fields_list(fields_arg), sj_subselects(thd_arg->mem_root, 4)
-     //jtbm_subselects(thd_arg->mem_root, 4)
   {
     init(thd_arg, fields_arg, select_options_arg, result_arg);
   }
diff --git a/sql/table.h b/sql/table.h
index 5898c5bb12..b564d76254 100644
--- a/sql/table.h
+++ b/sql/table.h
@@ -1131,7 +1131,7 @@ class Item_in_subselect;
   1) table (TABLE_LIST::view == NULL)
      - base table
        (TABLE_LIST::derived == NULL)
-     - subquery - TABLE_LIST::table is a temp table
+     - FROM-clause subquery - TABLE_LIST::table is a temp table
        (TABLE_LIST::derived != NULL)
      - information schema table
        (TABLE_LIST::schema_table != NULL)
@@ -1150,6 +1150,8 @@ class Item_in_subselect;
        (TABLE_LIST::natural_join != NULL)
        - JOIN ... USING
          (TABLE_LIST::join_using_fields != NULL)
+     - semi-join nest (sj_on_expr!= NULL && sj_subq_pred!=NULL)
+  4) jtbm semi-join (jtbm_subselect != NULL)
 */
 
 class Index_hint;
@@ -1192,9 +1194,14 @@ struct TABLE_LIST
   */
   table_map     sj_inner_tables;
   /* Number of IN-compared expressions */
-  uint          sj_in_exprs; 
+  uint          sj_in_exprs;
+  
+  /* If this is a non-jtbm semi-join nest: corresponding subselect predicate */
   Item_in_subselect  *sj_subq_pred;
+
+  /* If this is a jtbm semi-join object: corresponding subselect predicate */
   Item_in_subselect  *jtbm_subselect;
+
   SJ_MATERIALIZATION_INFO *sj_mat_info;
 
   /*
-- 
2.30.9