MDEV-34993: Incorrect cardinality estimation causes poor query plan

When calculate_cond_selectivity_for_table() takes into account multi- column selectivities from range access, it tries to take-into account that selectivity for some columns may have been already taken into account. For example, for range access on IDX1 using {kp1, kp2}, the selectivity of restrictions on "kp2" might have already been taken into account to some extent. So, the code tries to "discount" that using rec_per_key[] estimates. This seems to be wrong and unreliable: the "discounting" may produce a rselectivity_multiplier number that hints that the overall selectivity of range access on IDX1 was greater than 1. Do a conservative fix: if we arrive at conclusion that selectivity of range access on condition in IDX1 >1.0, clip it down to 1.

MDEV-34993: Incorrect cardinality estimation causes poor query plan
When calculate_cond_selectivity_for_table() takes into account multi- column selectivities from range access, it tries to take-into account that selectivity for some columns may have been already taken into account. For example, for range access on IDX1 using {kp1, kp2}, the selectivity of restrictions on "kp2" might have already been taken into account to some extent. So, the code tries to "discount" that using rec_per_key[] estimates. This seems to be wrong and unreliable: the "discounting" may produce a rselectivity_multiplier number that hints that the overall selectivity of range access on IDX1 was greater than 1. Do a conservative fix: if we arrive at conclusion that selectivity of range access on condition in IDX1 >1.0, clip it down to 1.
8166a5d3 · Sergei Petrunia · 9021f40b · 8166a5d3 · 8166a5d3 · 8166a5d3
Commit 8166a5d3 authored Sep 24, 2024 by Sergei Petrunia
6 changed files
--- a/mysql-test/main/selectivity_innodb_notembedded.result
+++ b/mysql-test/main/selectivity_innodb_notembedded.result
@@ -208,6 +208,80 @@ JS
 set optimizer_trace=@trace_tmp;
 drop table t1;
 #
+# MDEV-34993: Incorrect cardinality estimation causes poor query plan
+#
+create table t1 (
+pk int,
+key1 int,
+filler char(100),
+index (key1, pk),
+primary key (pk)
+);
+insert into t1
+select 
+seq, FLOOR(seq/100), 'filler'
+from 
+seq_1_to_1000;
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	OK
+set optimizer_trace=1;
+explain select * from t1 
+where
+pk in (1,2,3,4,5) and
+key1 <= 4;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	PRIMARY,key1	PRIMARY	4	NULL	5	Using where
+# Must have a note that "multiplier is too high":
+select 
+json_detailed(json_extract(trace,'$**.selectivity_for_indexes')) as JS
+from 
+information_schema.optimizer_trace;
+JS
+[
+    [
+        {
+            "index_name": "PRIMARY",
+            "selectivity_from_index": 0.005
+        },
+        {
+            "index_name": "key1",
+            "selectivity_from_index": 0.399,
+            "selectivity_multiplier": 90.9091,
+            "note": "multiplier too high, clipping",
+            "clipped_multiplier": 2.506265664
+        }
+    ]
+]
+# Must not include 1.79...e308 as cost:
+select 
+json_detailed(json_extract(trace,'$**.best_access_path')) as JS
+from 
+information_schema.optimizer_trace;
+JS
+[
+    {
+        "considered_access_paths": 
+        [
+            {
+                "access_type": "range",
+                "resulting_rows": 5,
+                "cost": 6.666533161,
+                "chosen": true
+            }
+        ],
+        "chosen_access_method": 
+        {
+            "type": "range",
+            "records": 5,
+            "cost": 6.666533161,
+            "uses_join_buffering": false
+        }
+    }
+]
+drop table t1;
+#
 # Clean up
 #
 set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;

--- a/mysql-test/main/selectivity_innodb_notembedded.test
+++ b/mysql-test/main/selectivity_innodb_notembedded.test
@@ -3,6 +3,7 @@
 --source include/big_test.inc
 --source include/default_optimizer_switch.inc
 --source include/not_embedded.inc
+--source ./include/innodb_stable_estimates.inc

 SET SESSION STORAGE_ENGINE='InnoDB';


--- a/mysql-test/main/selectivity_notembedded.result
+++ b/mysql-test/main/selectivity_notembedded.result
@@ -203,6 +203,80 @@ JS
 set optimizer_trace=@trace_tmp;
 drop table t1;
 #
+# MDEV-34993: Incorrect cardinality estimation causes poor query plan
+#
+create table t1 (
+pk int,
+key1 int,
+filler char(100),
+index (key1, pk),
+primary key (pk)
+);
+insert into t1
+select 
+seq, FLOOR(seq/100), 'filler'
+from 
+seq_1_to_1000;
+analyze table t1;
+Table	Op	Msg_type	Msg_text
+test.t1	analyze	status	Engine-independent statistics collected
+test.t1	analyze	status	Table is already up to date
+set optimizer_trace=1;
+explain select * from t1 
+where
+pk in (1,2,3,4,5) and
+key1 <= 4;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	PRIMARY,key1	PRIMARY	4	NULL	5	Using index condition; Using where
+# Must have a note that "multiplier is too high":
+select 
+json_detailed(json_extract(trace,'$**.selectivity_for_indexes')) as JS
+from 
+information_schema.optimizer_trace;
+JS
+[
+    [
+        {
+            "index_name": "PRIMARY",
+            "selectivity_from_index": 0.005
+        },
+        {
+            "index_name": "key1",
+            "selectivity_from_index": 0.391,
+            "selectivity_multiplier": 90.9091,
+            "note": "multiplier too high, clipping",
+            "clipped_multiplier": 2.557544757
+        }
+    ]
+]
+# Must not include 1.79...e308 as cost:
+select 
+json_detailed(json_extract(trace,'$**.best_access_path')) as JS
+from 
+information_schema.optimizer_trace;
+JS
+[
+    {
+        "considered_access_paths": 
+        [
+            {
+                "access_type": "range",
+                "resulting_rows": 5,
+                "cost": 6.647684891,
+                "chosen": true
+            }
+        ],
+        "chosen_access_method": 
+        {
+            "type": "range",
+            "records": 5,
+            "cost": 6.647684891,
+            "uses_join_buffering": false
+        }
+    }
+]
+drop table t1;
+#
 # Clean up
 #
 set optimizer_use_condition_selectivity= @save_optimizer_use_condition_selectivity;

--- a/mysql-test/main/selectivity_notembedded.test
+++ b/mysql-test/main/selectivity_notembedded.test
@@ -220,6 +220,45 @@ from
 set optimizer_trace=@trace_tmp;
 drop table t1;

+--echo #
+--echo # MDEV-34993: Incorrect cardinality estimation causes poor query plan
+--echo #
+
+create table t1 (
+  pk int,
+  key1 int,
+  filler char(100),
+  index (key1, pk),
+  primary key (pk)
+);
+
+insert into t1
+select 
+  seq, FLOOR(seq/100), 'filler'
+from 
+  seq_1_to_1000;
+analyze table t1;
+
+set optimizer_trace=1;
+explain select * from t1 
+where
+  pk in (1,2,3,4,5) and
+  key1 <= 4;
+
+--echo # Must have a note that "multiplier is too high":
+select 
+  json_detailed(json_extract(trace,'$**.selectivity_for_indexes')) as JS
+from 
+  information_schema.optimizer_trace;
+
+--echo # Must not include 1.79...e308 as cost:
+select 
+  json_detailed(json_extract(trace,'$**.best_access_path')) as JS
+from 
+  information_schema.optimizer_trace;
+
+drop table t1;
+
 --echo #
 --echo # Clean up
 --echo #

--- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result
@@ -2242,6 +2242,16 @@ NUMERIC_BLOCK_SIZE	1
 ENUM_VALUE_LIST	NULL
 READ_ONLY	YES
 COMMAND_LINE_ARGUMENT	REQUIRED
+VARIABLE_NAME	OPTIMIZER_ADJUST_SECONDARY_KEY_COSTS
+VARIABLE_SCOPE	SESSION
+VARIABLE_TYPE	SET
+VARIABLE_COMMENT	A bit field with the following values: fix_card_multiplier = Fix the computation in selectivity_for_indexes. selectivity_multiplier. This variable will be deleted in MariaDB 11.0 as it is not needed with the new 11.0 optimizer.
+NUMERIC_MIN_VALUE	NULL
+NUMERIC_MAX_VALUE	NULL
+NUMERIC_BLOCK_SIZE	NULL
+ENUM_VALUE_LIST	fix_card_multiplier
+READ_ONLY	NO
+COMMAND_LINE_ARGUMENT	REQUIRED
 VARIABLE_NAME	OPTIMIZER_MAX_SEL_ARG_WEIGHT
 VARIABLE_SCOPE	SESSION
 VARIABLE_TYPE	BIGINT UNSIGNED

--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
@@ -3503,9 +3503,33 @@ bool calculate_cond_selectivity_for_table(THD *thd, TABLE *table, Item **cond)
              */
              selectivity_mult= ((double)(i+1)) / i;
            }
-            table->cond_selectivity*= selectivity_mult;
            selectivity_for_index.add("selectivity_multiplier",
                                      selectivity_mult);
+
+            /*
+              Ok, now we assume that selectivity that range condition on
+              this index adds over selectivities on indexes that we've already
+              examined is
+
+                $SEL= (quick_cond_selectivity * selectivity_mult)
+
+              The heuristic that we used to obtain selectivity_mult may not be
+              correct (actually is known to be incorrect in simple cases), so
+              we make sure here that $SEL <= 1.0.
+
+              We adjust selectivity_mult (table->cond_selectivity was already
+              multiplied by quick_cond_selectivity above, so we will only
+              multiply it with selectivity_mult).
+            */
+            if (selectivity_mult > 1.0 / quick_cond_selectivity)
+            {
+              selectivity_for_index.add("note", "multiplier too high, clipping");
+              selectivity_mult= 1.0/quick_cond_selectivity;
+              selectivity_for_index.add("clipped_multiplier", selectivity_mult);
+              DBUG_ASSERT(quick_cond_selectivity * selectivity_mult <= 1.0);
+            }
+
+            table->cond_selectivity*= selectivity_mult;
          }
          /*
            We need to set selectivity for fields supported by indexes.