Fix cost calculation for get_best_group_min_max()

If the final range restrictions (SEL_ARG tree) over GROUP BY columns are single-point, we can compute the number of GROUP BY groups. Example: in the query: SELECT ... FROM tbl WHERE keypart1 IN (1,2,3) and keypart2 IN ('foo','bar') Other things: - Fixed cost calculation to more correctly count the number of blocks that may be read. The old code could use the total blocks in the file even if a range was available.

Fix cost calculation for get_best_group_min_max()
If the final range restrictions (SEL_ARG tree) over GROUP BY columns are single-point, we can compute the number of GROUP BY groups. Example: in the query: SELECT ... FROM tbl WHERE keypart1 IN (1,2,3) and keypart2 IN ('foo','bar') Other things: - Fixed cost calculation to more correctly count the number of blocks that may be read. The old code could use the total blocks in the file even if a range was available.
7afa819f · Monty · Sergei Petrunia · 009db228 · 7afa819f · 7afa819f
Commit 7afa819f authored Sep 25, 2022 by Monty Committed by Sergei Petrunia Feb 02, 2023
4 changed files
--- a/mysql-test/main/group_by.result
+++ b/mysql-test/main/group_by.result
@@ -552,12 +552,12 @@ a	b
 3	1
 explain select t1.a,t2.b from t1,t2 where t1.a=t2.a group by t1.a,t2.b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using temporary; Using filesort
-1	SIMPLE	t2	ALL	a	NULL	NULL	NULL	4	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	t2	ALL	a	NULL	NULL	NULL	4	Using temporary; Using filesort
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using where; Using join buffer (flat, BNL join)
 explain select t1.a,t2.b from t1,t2 where t1.a=t2.a group by t1.a,t2.b ORDER BY NULL;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using temporary
-1	SIMPLE	t2	ALL	a	NULL	NULL	NULL	4	Using where; Using join buffer (flat, BNL join)
+1	SIMPLE	t2	ALL	a	NULL	NULL	NULL	4	Using temporary
+1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	6	Using where; Using join buffer (flat, BNL join)
 drop table t1,t2;
 SET @@optimizer_use_condition_selectivity=@save_optimizer_use_condition_selectivity,@@optimizer_switch=@save_optimizer_switch;
 create table t1 (a int, b int);
@@ -890,7 +890,8 @@ Level	Code	Message
 drop table t1, t2;
 CREATE TABLE t1 (a tinyint(3), b varchar(255), PRIMARY KEY  (a));
 INSERT INTO t1 VALUES (1,'-----'), (6,'Allemagne'), (17,'Autriche'), 
-(25,'Belgique'), (54,'Danemark'), (62,'Espagne'), (68,'France');
+(25,'Belgique'), (54,'Danemark'), (62,'Espagne'), (68,'France'),
+(100,"No land"), (101,"No land");
 CREATE TABLE t2 (a tinyint(3), b tinyint(3), PRIMARY KEY  (a), KEY b (b));
 INSERT INTO t2 VALUES (1,1), (2,1), (6,6), (18,17), (15,25), (16,25),
 (17,25), (10,54), (5,62),(3,68);
@@ -1352,7 +1353,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	range	PRIMARY,i2	PRIMARY	4	NULL	1	Using where; Using index
 EXPLAIN SELECT a FROM t1 WHERE a < 2 GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	PRIMARY,i2	PRIMARY	4	NULL	1	Using where; Using index
+1	SIMPLE	t1	range	PRIMARY,i2	i2	4	NULL	1	Using where; Using index for group-by
 EXPLAIN SELECT a FROM t1 IGNORE INDEX (PRIMARY,i2);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ALL	NULL	NULL	NULL	NULL	144	
@@ -1701,7 +1702,7 @@ NULL	1
 1	2
 EXPLAIN SELECT a from t2 GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	index	NULL	a	10	NULL	6	Using index
+1	SIMPLE	t2	range	NULL	a	5	NULL	6	Using index for group-by
 SELECT a from t2 GROUP BY a;
 a
 NULL
@@ -1715,10 +1716,17 @@ NULL
 1
 2
 insert into t2 SELECT NULL, NULL from seq_1_to_10;
+EXPLAIN SELECT b from t2 GROUP BY a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t2	index	NULL	a	10	NULL	16	Using index
 # Expect: Using index for group-by
+analyze table t2;
+Table	Op	Msg_type	Msg_text
+test.t2	analyze	status	Engine-independent statistics collected
+test.t2	analyze	status	OK
 EXPLAIN SELECT b from t2 GROUP BY a;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	a	5	NULL	9	Using index for group-by
+1	SIMPLE	t2	range	NULL	a	5	NULL	6	Using index for group-by
 DROP TABLE t1;
 DROP TABLE t2;
 CREATE TABLE t1 ( a INT, b INT );
@@ -2255,11 +2263,11 @@ INSERT INTO t2(col1, col2) VALUES
 explain
 select col1 f1, col2 f2, col1 f3 from t2 group by f1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	idx	5	NULL	7	Using index for group-by
+1	SIMPLE	t2	index	NULL	idx	10	NULL	20	Using index
 explain
 select SQL_BIG_RESULT col1 f1, col2 f2, col1 f3 from t2 group by f1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	idx	5	NULL	7	Using index for group-by
+1	SIMPLE	t2	index	NULL	idx	10	NULL	20	Using index; Using filesort
 explain
 select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -2267,7 +2275,7 @@ id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 explain
 select col1 f1, col1 f2 from t2 group by f1, 1+1;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t2	range	NULL	idx	5	NULL	7	Using index for group-by
+1	SIMPLE	t2	index	NULL	idx	10	NULL	20	Using index
 explain
 select col1 f1, col2 f2, col1 f3 from t2 group by f1, f2, f3+0;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
@@ -2457,7 +2465,7 @@ test.t1	analyze	status	OK

 EXPLAIN SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	b	b	9	NULL	2	Using where; Using index for group-by; Using temporary
+1	SIMPLE	t1	range	b	b	9	NULL	1	Using where; Using index for group-by; Using temporary

 SELECT SQL_BUFFER_RESULT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 MIN(a)	b
@@ -2465,7 +2473,7 @@ MIN(a)	b

 EXPLAIN SELECT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	SIMPLE	t1	range	b	b	9	NULL	2	Using where; Using index for group-by
+1	SIMPLE	t1	range	b	b	9	NULL	1	Using where; Using index for group-by

 SELECT MIN(a), b FROM t1 WHERE t1.b = 'a' GROUP BY b;
 MIN(a)	b
@@ -2994,3 +3002,34 @@ drop table t20, t21, t22;
 #
 # End of 10.3 tests
 #
+#
+# Test new group_min_max optimization
+#
+create table t1 (a int, b int, c int, key(a,b,c));
+insert into t1 select mod(seq,23),mod(seq,13), mod(seq,5) from seq_1_to_10000;
+explain select a from t1 where a in (1,2,3) group by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	5	NULL	3	Using where; Using index for group-by
+explain select a from t1 where a in (1,2,3) or a = 22 group by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	5	NULL	4	Using where; Using index for group-by
+explain select a from t1 where a in (1,2,3) and a < 3 group by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	5	NULL	2	Using where; Using index for group-by
+explain select a,b from t1 where (a) in (1,2,3) and b in (5,6,7) group by a,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	10	NULL	9	Using where; Using index for group-by
+explain select a,b from t1 where (a,b) in ((1,1),(2,2),(3,3)) group by a,b;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	10	NULL	3	Using where; Using index for group-by
+explain select a,b,c from t1 where (a,b) in ((1,1),(2,2),(3,3)) and c=3 group by a,b,c;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	15	NULL	3	Using where; Using index for group-by
+# Will not use index for group-by
+explain select a from t1 where a in (1,2,3) and b>1 group by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	10	NULL	983	Using where; Using index
+explain select a from t1 where a in (1,2,3) and c=1 group by a;
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	5	NULL	1161	Using where; Using index
+drop table t1;
--- a/mysql-test/main/group_by.test
+++ b/mysql-test/main/group_by.test
@@ -718,7 +718,8 @@ drop table t1, t2;
 CREATE TABLE t1 (a tinyint(3), b varchar(255), PRIMARY KEY  (a));

 INSERT INTO t1 VALUES (1,'-----'), (6,'Allemagne'), (17,'Autriche'), 
-    (25,'Belgique'), (54,'Danemark'), (62,'Espagne'), (68,'France');
+    (25,'Belgique'), (54,'Danemark'), (62,'Espagne'), (68,'France'),
+    (100,"No land"), (101,"No land");

 CREATE TABLE t2 (a tinyint(3), b tinyint(3), PRIMARY KEY  (a), KEY b (b));

@@ -1168,7 +1169,9 @@ SELECT b from t2 GROUP BY b;

 # Show that we are using 'range' when there is more NULL rows in the table
 insert into t2 SELECT NULL, NULL from seq_1_to_10;
+EXPLAIN SELECT b from t2 GROUP BY a;
 --echo # Expect: Using index for group-by
+analyze table t2;
 EXPLAIN SELECT b from t2 GROUP BY a;

 DROP TABLE t1;
@@ -2131,3 +2134,22 @@ drop table t20, t21, t22;
 --echo #
 --echo # End of 10.3 tests
 --echo #
+
+--echo #
+--echo # Test new group_min_max optimization
+--echo #
+
+create table t1 (a int, b int, c int, key(a,b,c));
+insert into t1 select mod(seq,23),mod(seq,13), mod(seq,5) from seq_1_to_10000;
+
+explain select a from t1 where a in (1,2,3) group by a;
+explain select a from t1 where a in (1,2,3) or a = 22 group by a;
+explain select a from t1 where a in (1,2,3) and a < 3 group by a;
+explain select a,b from t1 where (a) in (1,2,3) and b in (5,6,7) group by a,b;
+explain select a,b from t1 where (a,b) in ((1,1),(2,2),(3,3)) group by a,b;
+explain select a,b,c from t1 where (a,b) in ((1,1),(2,2),(3,3)) and c=3 group by a,b,c;
+
+--echo # Will not use index for group-by
+explain select a from t1 where a in (1,2,3) and b>1 group by a;
+explain select a from t1 where a in (1,2,3) and c=1 group by a;
+drop table t1;
--- a/sql/opt_range.cc
+++ b/sql/opt_range.cc
--- a/sql/opt_range.h
+++ b/sql/opt_range.h
@@ -302,6 +302,7 @@ class SEL_ARG :public Sql_alloc
 {
  static int sel_cmp(Field *field, uchar *a, uchar *b, uint8 a_flag,
                     uint8 b_flag);
+  bool min_max_are_equal() const;
 public:
  uint8 min_flag,max_flag,maybe_flag;
  uint8 part;					// Which key part
@@ -401,6 +402,7 @@ class SEL_ARG :public Sql_alloc
      return false;
    return true;
  }
+  int number_of_eq_groups(uint group_key_parts) const;
  inline void merge_flags(SEL_ARG *arg) { maybe_flag|=arg->maybe_flag; }
  inline void maybe_smaller() { maybe_flag=1; }
  /* Return true iff it's a single-point null interval */