MDEV-33306 Optimizer choosing incorrect index in 10.6, 10.5 but not in 10.4

In MariaDB up to 10.11, the test_if_cheaper_ordering() code (that tries to optimizer how GROUP BY is executed) assumes that if a table scan is used then if there is any index usable by GROUP BY it will be used. The reason MySQL 10.4 provides a better plan is because of two differences: - Plans using 'ref' has a cost of 1/10 of what it should be (as a protection against table scans). This is why 'ref' is used in 10.4 and not in 10.5. - When 'ref' is used, then GROUP BY will not use an index for GROUP BY. In MariaDB 10.5 the chosen plan is a table scan (as it calculated to be faster) but as 'ref' is not used, the test_if_cheaper_ordering() optimizer phase decides (as ref is not usd) to use an index for GROUP BY, which has bad performance. Description of fix: - All new code is protected by the "optimizer_adjust_secondary_key_costs" variable, which is now a bit map, and is only executed if the option "disable_forced_index_in_group_by" set. - Corrects GROUP BY handling in test_if_cheaper_ordering() by making the choise of using and index with GROUP BY cost based instead of rule based. - Adds TIME_FOR_COMPARE to all costs, when using group by, to make read_time, index_scan_time and range_cost comparable. Other things: - Made optimizer_adjust_secondary_key_costs a bit map (compatible with old code). Notes: Current code ignores costs for the algorithm used when doing GROUP BY on the first table: - Create an in-memory temporary table for handling group by and doing a filesort of the result file We can probably in 10.6 continue to ignore this cost. This patch should NOT be merged to 11.0 series (not needed in 11.0).

MDEV-33306 Optimizer choosing incorrect index in 10.6, 10.5 but not in 10.4
In MariaDB up to 10.11, the test_if_cheaper_ordering() code (that tries to optimizer how GROUP BY is executed) assumes that if a table scan is used then if there is any index usable by GROUP BY it will be used. The reason MySQL 10.4 provides a better plan is because of two differences: - Plans using 'ref' has a cost of 1/10 of what it should be (as a protection against table scans). This is why 'ref' is used in 10.4 and not in 10.5. - When 'ref' is used, then GROUP BY will not use an index for GROUP BY. In MariaDB 10.5 the chosen plan is a table scan (as it calculated to be faster) but as 'ref' is not used, the test_if_cheaper_ordering() optimizer phase decides (as ref is not usd) to use an index for GROUP BY, which has bad performance. Description of fix: - All new code is protected by the "optimizer_adjust_secondary_key_costs" variable, which is now a bit map, and is only executed if the option "disable_forced_index_in_group_by" set. - Corrects GROUP BY handling in test_if_cheaper_ordering() by making the choise of using and index with GROUP BY cost based instead of rule based. - Adds TIME_FOR_COMPARE to all costs, when using group by, to make read_time, index_scan_time and range_cost comparable. Other things: - Made optimizer_adjust_secondary_key_costs a bit map (compatible with old code). Notes: Current code ignores costs for the algorithm used when doing GROUP BY on the first table: - Create an in-memory temporary table for handling group by and doing a filesort of the result file We can probably in 10.6 continue to ignore this cost. This patch should NOT be merged to 11.0 series (not needed in 11.0).
3907345e · Monty · 4106974b · 3907345e · 3907345e · 3907345e
Commit 3907345e authored Feb 09, 2024 by Monty
9 changed files
--- a/mysql-test/main/mysqld--help.result
+++ b/mysql-test/main/mysqld--help.result
@@ -712,13 +712,17 @@ The following specify which files/extra groups are read (specified before remain
 max_connections*5 or max_connections + table_cache*2
 (whichever is larger) number of file descriptors
 (Automatically configured unless set explicitly)
- --optimizer-adjust-secondary-key-costs=# 
- 0 = No changes. 1 = Update secondary key costs for ranges
- to be at least 5x of clustered primary key costs. 2 =
- Remove 'max_seek optimization' for secondary keys and
- slight adjustment of filter cost. This option will be
- deleted in MariaDB 11.0 as it is not needed with the new
- 11.0 optimizer.
+ --optimizer-adjust-secondary-key-costs=name 
+ A bit field with the following values:
+ adjust_secondary_key_cost = Update secondary key costs
+ for ranges to be at least 5x of clustered primary key
+ costs. disable_max_seek = Disable 'max_seek optimization'
+ for secondary keys and slight adjustment of filter cost.
+ disable_forced_index_in_group_by = Disable automatic
+ forced index in GROUP BY. This variable will be deleted
+ in MariaDB 11.0 as it is not needed with the new 11.0
+ optimizer.
+ Use 'ALL' to set all combinations.
 --optimizer-max-sel-arg-weight=# 
 The maximum weight of the SEL_ARG graph. Set to 0 for no
 limit
@@ -1691,7 +1695,7 @@ old-alter-table DEFAULT
 old-mode UTF8_IS_UTF8MB3
 old-passwords FALSE
 old-style-user-limits FALSE
-optimizer-adjust-secondary-key-costs 0
+optimizer-adjust-secondary-key-costs 
 optimizer-max-sel-arg-weight 32000
 optimizer-max-sel-args 16000
 optimizer-prune-level 1

--- a/mysql-test/main/secondary_key_costs.result
+++ b/mysql-test/main/secondary_key_costs.result
@@ -80,3 +80,38 @@ json_detailed(json_extract(@trace, '$**.considered_access_paths'))
    ]
 ]
 drop table t1, name, flag2;
+select @@optimizer_adjust_secondary_key_costs;
+@@optimizer_adjust_secondary_key_costs
+
+set @@optimizer_adjust_secondary_key_costs=7;
+select @@optimizer_adjust_secondary_key_costs;
+@@optimizer_adjust_secondary_key_costs
+adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
+set @@optimizer_adjust_secondary_key_costs=default;
+#
+# MDEV-33306 Optimizer choosing incorrect index in 10.6, 10.5 but not in 10.4
+#
+create table t1 (a int primary key, b int, c int, d int, key(b),key(c)) engine=innodb;
+insert into t1 select seq, mod(seq,10), mod(seq,2), seq from seq_1_to_50000;
+explain select b, sum(d) from t1 where c=0 group by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	index	c	b	5	NULL	#	Using where
+select b, sum(d) from t1 where c=0 group by b;
+b	sum(d)
+0	125025000
+2	124985000
+4	124995000
+6	125005000
+8	125015000
+set @@optimizer_adjust_secondary_key_costs="disable_forced_index_in_group_by";
+explain select b, sum(d) from t1 where c=0 group by b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	ALL	c	NULL	NULL	NULL	#	Using where; Using temporary; Using filesort
+select b, sum(d) from t1 where c=0 group by b;
+b	sum(d)
+0	125025000
+2	124985000
+4	124995000
+6	125005000
+8	125015000
+drop table t1;
--- a/mysql-test/main/secondary_key_costs.test
+++ b/mysql-test/main/secondary_key_costs.test
 --source include/have_sequence.inc
 --source include/not_embedded.inc
+--source include/have_innodb.inc

 #
 # Show the costs for rowid filter
@@ -51,3 +52,23 @@ select json_detailed(json_extract(@trace, '$**.considered_access_paths'));
 --enable_ps_protocol

 drop table t1, name, flag2;
+
+select @@optimizer_adjust_secondary_key_costs;
+set @@optimizer_adjust_secondary_key_costs=7;
+select @@optimizer_adjust_secondary_key_costs;
+set @@optimizer_adjust_secondary_key_costs=default;
+
+--echo #
+--echo # MDEV-33306 Optimizer choosing incorrect index in 10.6, 10.5 but not in 10.4
+--echo #
+
+create table t1 (a int primary key, b int, c int, d int, key(b),key(c)) engine=innodb;
+insert into t1 select seq, mod(seq,10), mod(seq,2), seq from seq_1_to_50000;
+--replace_column 9 #
+explain select b, sum(d) from t1 where c=0 group by b;
+select b, sum(d) from t1 where c=0 group by b;
+set @@optimizer_adjust_secondary_key_costs="disable_forced_index_in_group_by";
+--replace_column 9 #
+explain select b, sum(d) from t1 where c=0 group by b;
+select b, sum(d) from t1 where c=0 group by b;
+drop table t1;
--- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
@@ -2274,12 +2274,12 @@ READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
 VARIABLE_SCOPE	SESSION
-VARIABLE_TYPE	BIGINT UNSIGNED
-VARIABLE_COMMENT	0 = No changes. 1 = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. 2 = Remove 'max_seek optimization' for secondary keys and slight adjustment of filter cost. This option will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
-NUMERIC_MIN_VALUE	0
-NUMERIC_MAX_VALUE	2
-NUMERIC_BLOCK_SIZE	1
-ENUM_VALUE_LIST	NULL
+VARIABLE_TYPE	SET
+VARIABLE_COMMENT	A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	OPTIMIZER_MAX_SEL_ARGS

--- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result
@@ -2434,12 +2434,12 @@ READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
 VARIABLE_SCOPE	SESSION
-VARIABLE_TYPE	BIGINT UNSIGNED
-VARIABLE_COMMENT	0 = No changes. 1 = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. 2 = Remove 'max_seek optimization' for secondary keys and slight adjustment of filter cost. This option will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
-NUMERIC_MIN_VALUE	0
-NUMERIC_MAX_VALUE	2
-NUMERIC_BLOCK_SIZE	1
-ENUM_VALUE_LIST	NULL
+VARIABLE_TYPE	SET
+VARIABLE_COMMENT	A bit field with the following values: adjust_secondary_key_cost = Update secondary key costs for ranges to be at least 5x of clustered primary key costs. disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight adjustment of filter cost. disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	adjust_secondary_key_cost,disable_max_seek,disable_forced_index_in_group_by
 READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	OPTIMIZER_MAX_SEL_ARGS

--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -712,6 +712,11 @@ typedef struct system_variables
  ulonglong sortbuff_size;
  ulonglong default_regex_flags;
  ulonglong max_mem_used;
+  /*
+    A bitmap of OPTIMIZER_ADJ_* flags (defined in sql_priv.h).
+    See sql_vars.cc:adjust_secondary_key_cost for symbolic names.
+  */
+  ulonglong optimizer_adjust_secondary_key_costs;

  /**
     Place holders to store Multi-source variables in sys_var.cc during
@@ -761,7 +766,6 @@ typedef struct system_variables
  ulong optimizer_max_sel_arg_weight;
  ulong optimizer_max_sel_args;
  ulong optimizer_trace_max_mem_size;
-  ulong optimizer_adjust_secondary_key_costs;
  ulong use_stat_tables;
  double sample_percentage;
  ulong histogram_size;

--- a/sql/sql_priv.h
+++ b/sql/sql_priv.h
@@ -268,6 +268,13 @@
                                    OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING | \
                                    OPTIMIZER_SWITCH_OPTIMIZE_JOIN_BUFFER_SIZE)

+/*
+  See adjust_secondary_key_cost in sys_vars.cc for symbolic names.
+*/
+#define OPTIMIZER_ADJ_SEC_KEY_COST (1)
+#define OPTIMIZER_ADJ_DISABLE_MAX_SEEKS (2)
+#define OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY (4)
+
 /*
  Replication uses 8 bytes to store SQL_MODE in the binary log. The day you
  use strictly more than 64 bits by adding one more define above, you should

--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@@ -5897,7 +5897,8 @@ make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
      */
      /* Largest integer that can be stored in double (no compiler warning) */
      s->worst_seeks= (double) (1ULL << 53);
-      if (thd->variables.optimizer_adjust_secondary_key_costs != 2)
+      if ((thd->variables.optimizer_adjust_secondary_key_costs &
+           OPTIMIZER_ADJ_DISABLE_MAX_SEEKS) == 0)
      {
        s->worst_seeks= MY_MIN((double) s->found_records / 10,
                               (double) s->read_time*3);
@@ -7824,7 +7825,8 @@ double cost_for_index_read(const THD *thd, const TABLE *table, uint key,
  {
    cost= ((file->keyread_time(key, 0, records) +
            file->read_time(key, 1, MY_MIN(records, worst_seeks))));
-    if (thd->variables.optimizer_adjust_secondary_key_costs == 1 &&
+    if ((thd->variables.optimizer_adjust_secondary_key_costs &
+         OPTIMIZER_ADJ_SEC_KEY_COST) &&
        file->is_clustering_key(0))
    {
      /*
@@ -8020,8 +8022,9 @@ best_access_path(JOIN      *join,
    higher to ensure that ref|filter is not less than range over same
    number of rows
  */
-  double filter_setup_cost= (thd->variables.
-                             optimizer_adjust_secondary_key_costs == 2 ?
+  double filter_setup_cost= ((thd->variables.
+                              optimizer_adjust_secondary_key_costs &
+                              OPTIMIZER_ADJ_DISABLE_MAX_SEEKS) ?
                             1.0 : 0.0);
  MY_BITMAP *eq_join_set= &s->table->eq_join_set;
  KEYUSE *hj_start_key= 0;
@@ -30119,12 +30122,13 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
  uint best_key_parts= 0;
  int best_key_direction= 0;
  ha_rows best_records= 0;
-  double read_time;
+  double read_time, records;
  int best_key= -1;
  bool is_best_covering= FALSE;
  double fanout= 1;
  ha_rows table_records= table->stat_records();
  bool group= join && join->group && order == join->group_list;
+  bool group_forces_index_usage= group;
  ha_rows refkey_rows_estimate= table->opt_range_condition_rows;
  const bool has_limit= (select_limit_arg != HA_POS_ERROR);
  THD* thd= join ? join->thd : table->in_use;
@@ -30161,6 +30165,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
  {
    uint tablenr= (uint)(tab - join->join_tab);
    read_time= join->best_positions[tablenr].read_time;
+    records= join->best_positions[tablenr].records_read;
    for (uint i= tablenr+1; i < join->table_count; i++)
    {
      fanout*= join->best_positions[i].records_read; // fanout is always >= 1
@@ -30169,8 +30174,23 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
    }
  }
  else
+  {
    read_time= table->file->scan_time();
+    records= rows2double(table_records);
+  }
  
+  if ((thd->variables.optimizer_adjust_secondary_key_costs &
+       OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY) && group)
+  {
+    /*
+      read_time does not include TIME_FOR_COMPARE while opt_range.cost, which
+      is used by index_scan_time contains it.
+      Ensure that read_time and index_scan_time always include it to make
+      costs comparable.
+    */
+    read_time+= records/TIME_FOR_COMPARE;
+  }
+
  trace_cheaper_ordering.add("fanout", fanout);
  /*
    TODO: add cost of sorting here.
@@ -30361,30 +30381,62 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
        possible_key.add("updated_limit", select_limit);
        rec_per_key= keyinfo->actual_rec_per_key(keyinfo->user_defined_key_parts-1);
        set_if_bigger(rec_per_key, 1);
-        /*
-          Here we take into account the fact that rows are
-          accessed in sequences rec_per_key records in each.
-          Rows in such a sequence are supposed to be ordered
-          by rowid/primary key. When reading the data
-          in a sequence we'll touch not more pages than the
-          table file contains.
-          TODO. Use the formula for a disk sweep sequential access
-          to calculate the cost of accessing data rows for one 
-          index entry.
-        */
-        index_scan_time= select_limit/rec_per_key *
-                         MY_MIN(rec_per_key, table->file->scan_time());
-        double range_scan_time;
-        if (get_range_limit_read_cost(tab, table, table_records, nr,
-                                      select_limit, &range_scan_time))
+
+        if ((thd->variables.optimizer_adjust_secondary_key_costs &
+             OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY) && group)
+        {
+          /* Special optimization to avoid forcing an index when group by is used */
+          group_forces_index_usage= 0;
+
+          if (table->opt_range_keys.is_set(nr))
+          {
+            /* opt_range includes TIME_FOR_COMPARE */
+            index_scan_time= (double) table->opt_range[nr].cost;
+          }
+          else
+          {
+            /* Enable secondary_key_cost and disable max_seek option */
+            ulonglong save= thd->variables.optimizer_adjust_secondary_key_costs;
+            thd->variables.optimizer_adjust_secondary_key_costs|=
+                OPTIMIZER_ADJ_SEC_KEY_COST | OPTIMIZER_ADJ_DISABLE_MAX_SEEKS;
+
+            index_scan_time= cost_for_index_read(thd, table, nr,
+                                                 table_records, HA_ROWS_MAX);
+            index_scan_time+= rows2double(table_records) / TIME_FOR_COMPARE;
+            thd->variables.optimizer_adjust_secondary_key_costs= save;
+          }
+          /* Assume data is proportionalyl distributed */
+          index_scan_time*= MY_MIN(select_limit, rec_per_key) / rec_per_key;
+        }
+        else
        {
-          possible_key.add("range_scan_time", range_scan_time);
-          if (range_scan_time < index_scan_time)
-            index_scan_time= range_scan_time;
+          /*
+            Here we take into account the fact that rows are
+            accessed in sequences rec_per_key records in each.
+            Rows in such a sequence are supposed to be ordered
+            by rowid/primary key. When reading the data
+            in a sequence we'll touch not more pages than the
+            table file contains.
+            TODO. Use the formula for a disk sweep sequential access
+            to calculate the cost of accessing data rows for one
+            index entry.
+          */
+          index_scan_time= select_limit/rec_per_key *
+            MY_MIN(rec_per_key, table->file->scan_time());
+
+          double range_scan_time;
+          if (get_range_limit_read_cost(tab, table, table_records, nr,
+                                        select_limit, &range_scan_time))
+          {
+            possible_key.add("range_scan_time", range_scan_time);
+            if (range_scan_time < index_scan_time)
+              index_scan_time= range_scan_time;
+          }
        }
        possible_key.add("index_scan_time", index_scan_time);

-        if ((ref_key < 0 && (group || table->force_index || is_covering)) ||
+        if ((ref_key < 0 &&
+             (group_forces_index_usage || table->force_index || is_covering)) ||
            index_scan_time < read_time)
        {
          ha_rows quick_records= table_records;
@@ -30406,6 +30458,7 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
            possible_key.add("cause", "ref estimates better");
            continue;
          }
+
          if (table->opt_range_keys.is_set(nr))
            quick_records= table->opt_range[nr].rows;
          possible_key.add("records", quick_records);
@@ -30424,6 +30477,9 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
            is_best_covering= is_covering;
            best_key_direction= direction; 
            best_select_limit= select_limit;
+            if ((thd->variables.optimizer_adjust_secondary_key_costs &
+                 OPTIMIZER_ADJ_DISABLE_FORCE_INDEX_GROUP_BY) && group)
+              set_if_smaller(read_time, index_scan_time);
          }
          else
          {
--- a/sql/sys_vars.cc
+++ b/sql/sys_vars.cc
@@ -2817,17 +2817,28 @@ static Sys_var_ulong Sys_optimizer_trace_max_mem_size(
    SESSION_VAR(optimizer_trace_max_mem_size), CMD_LINE(REQUIRED_ARG),
    VALID_RANGE(0, ULONG_MAX), DEFAULT(1024 * 1024), BLOCK_SIZE(1));

-static Sys_var_ulong Sys_optimizer_adjust_secondary_key_costs(
+
+/*
+  Symbolic names for OPTIMIZER_ADJ_* flags in sql_priv.h
+*/
+static const char *adjust_secondary_key_cost[]=
+{
+  "adjust_secondary_key_cost", "disable_max_seek", "disable_forced_index_in_group_by", 0
+};
+
+
+static Sys_var_set Sys_optimizer_adjust_secondary_key_costs(
    "optimizer_adjust_secondary_key_costs",
-    "0 = No changes. "
-    "1 = Update secondary key costs for ranges to be at least 5x of clustered "
-    "primary key costs. "
-    "2 = Remove 'max_seek optimization' for secondary keys and slight "
+    "A bit field with the following values: "
+    "adjust_secondary_key_cost = Update secondary key costs for ranges to be at least "
+    "5x of clustered primary key costs. "
+    "disable_max_seek = Disable 'max_seek optimization' for secondary keys and slight "
    "adjustment of filter cost. "
-    "This option will be deleted in MariaDB 11.0 as it is not needed with the "
+    "disable_forced_index_in_group_by = Disable automatic forced index in GROUP BY. "
+    "This variable will be deleted in MariaDB 11.0 as it is not needed with the "
    "new 11.0 optimizer.",
    SESSION_VAR(optimizer_adjust_secondary_key_costs), CMD_LINE(REQUIRED_ARG),
-    VALID_RANGE(0, 2), DEFAULT(0), BLOCK_SIZE(1));
+    adjust_secondary_key_cost, DEFAULT(0));


 static Sys_var_charptr_fscs Sys_pid_file(