Commit 38058c04 authored by Monty's avatar Monty

MDEV-26585 Wrong query results when `using index for group-by`

The problem was that "group_min_max optimization" does not work if
some aggregate functions, like COUNT(*), is used.
The function get_best_group_min_max() is using the join->sum_funcs
array to check which aggregate functions are used.
The bug was that aggregates in HAVING where not yet added to
join->sum_funcs at the time get_best_group_min_max() was called.

Fixed by populate join->sum_funcs already in prepare, which means that
all sum functions will be in join->sum_funcs in get_best_group_min_max().
A benefit of this approach is that we can remove several calls to
make_sum_func_list() from the code and simplify the function.

I removed some wrong setting of 'sort_and_group'.
This variable is set when alloc_group_fields() is called, as part
of allocating the cache needed by end_send_group() and does not need
to be set by other functions.

One problematic thing was that Spider is using *join->sum_funcs to detect
at which stage the optimizer is and do internal calculations of aggregate
functions. Updating join->sum_funcs early caused Spider to fail when trying
to find min/max values in opt_sum_query().
Fixed by temporarily resetting sum_funcs during opt_sum_query().

Reviewer: Sergei Petrunia
parent d314bd26
......@@ -4043,3 +4043,24 @@ b
101
102
DROP TABLE t1;
#
# MDEV-26585 Wrong query results when `using index for group-by`
#
CREATE TABLE `t1` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`owner_id` int(11) DEFAULT NULL,
`foo` tinyint(1) DEFAULT 0,
`whatever` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `index_t1_on_owner_id_and_foo` (`owner_id`,`foo`)
) engine=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO t1 (owner_id, foo, whatever)
VALUES (1, TRUE, "yello"), (1, FALSE, "yello"), (2, TRUE, "yello"),
(2, TRUE, "yello"), (2, FALSE, "yello");
EXPLAIN SELECT DISTINCT owner_id FROM t1 WHERE foo = true GROUP BY owner_id HAVING (COUNT(*) = 1);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 index NULL index_t1_on_owner_id_and_foo 7 NULL 5 Using where; Using index
SELECT DISTINCT owner_id FROM t1 WHERE foo = true GROUP BY owner_id HAVING (COUNT(*) = 1);
owner_id
1
DROP TABLE t1;
......@@ -4,7 +4,7 @@
#
--source include/default_optimizer_switch.inc
--source include/have_innodb.inc
#
# TODO:
# Add queries with:
......@@ -1700,3 +1700,23 @@ INSERT INTO t1 VALUES (0,100),(2,100),(2,101),(3,102);
explain SELECT DISTINCT b FROM t1 WHERE EXISTS ( SELECT 1 FROM DUAL WHERE a > 1 );
SELECT DISTINCT b FROM t1 WHERE EXISTS ( SELECT 1 FROM DUAL WHERE a > 1 );
DROP TABLE t1;
--echo #
--echo # MDEV-26585 Wrong query results when `using index for group-by`
--echo #
CREATE TABLE `t1` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`owner_id` int(11) DEFAULT NULL,
`foo` tinyint(1) DEFAULT 0,
`whatever` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `index_t1_on_owner_id_and_foo` (`owner_id`,`foo`)
) engine=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO t1 (owner_id, foo, whatever)
VALUES (1, TRUE, "yello"), (1, FALSE, "yello"), (2, TRUE, "yello"),
(2, TRUE, "yello"), (2, FALSE, "yello");
EXPLAIN SELECT DISTINCT owner_id FROM t1 WHERE foo = true GROUP BY owner_id HAVING (COUNT(*) = 1);
SELECT DISTINCT owner_id FROM t1 WHERE foo = true GROUP BY owner_id HAVING (COUNT(*) = 1);
DROP TABLE t1;
......@@ -1589,7 +1589,8 @@ bool JOIN::prepare_stage2()
#endif
if (select_lex->olap == ROLLUP_TYPE && rollup_init())
goto err;
if (alloc_func_list())
if (alloc_func_list() ||
make_sum_func_list(all_fields, fields_list, false))
goto err;
res= FALSE;
......@@ -2204,7 +2205,21 @@ JOIN::optimize_inner()
If all items were resolved by opt_sum_query, there is no need to
open any tables.
*/
if ((res=opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds)))
/*
The following resetting and restoring of sum_funcs is needed to
go around a bug in spider where it assumes that
make_sum_func_list() has not been called yet and do logical
choices based on this if special handling of min/max functions should
be done. We disable this special handling while we are trying to find
out if we can replace MIN/MAX values with constants.
*/
Item_sum **save_func_sums= sum_funcs, *tmp_sum_funcs= 0;
sum_funcs= &tmp_sum_funcs;
res= opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds);
sum_funcs= save_func_sums;
if (res)
{
DBUG_ASSERT(res >= 0);
if (res == HA_ERR_KEY_NOT_FOUND)
......@@ -2776,13 +2791,15 @@ int JOIN::optimize_stage2()
calc_group_buffer(this, group_list);
}
if (test_if_subpart(group_list, order) ||
(!group_list && tmp_table_param.sum_func_count))
{
/*
We can ignore ORDER BY if it's a prefix of the GROUP BY list
(as MariaDB is by default sorting on GROUP BY) or
if there is no GROUP BY and aggregate functions are used
(as the result will only contain one row).
*/
if (order && (test_if_subpart(group_list, order) ||
(!group_list && tmp_table_param.sum_func_count)))
order=0;
if (is_indexed_agg_distinct(this, NULL))
sort_and_group= 0;
}
// Can't use sort on head table if using join buffering
if (full_join || hash_join)
......@@ -2814,7 +2831,6 @@ int JOIN::optimize_stage2()
if (select_lex->have_window_funcs())
simple_order= FALSE;
/*
If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
whose columns are required to be returned in a sorted order, then
......@@ -3540,7 +3556,7 @@ bool JOIN::make_aggr_tables_info()
// for the first table
if (group_list || tmp_table_param.sum_func_count)
{
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
DBUG_RETURN(true);
if (prepare_sum_aggregators(sum_funcs,
!join_tab->is_using_agg_loose_index_scan()))
......@@ -3650,7 +3666,7 @@ bool JOIN::make_aggr_tables_info()
last_tab->all_fields= &tmp_all_fields3;
last_tab->fields= &tmp_fields_list3;
}
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true, true))
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
DBUG_RETURN(true);
if (prepare_sum_aggregators(sum_funcs,
!join_tab ||
......@@ -3846,8 +3862,6 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
}
else
{
if (make_sum_func_list(all_fields, fields_list, false))
goto err;
if (prepare_sum_aggregators(sum_funcs,
!join_tab->is_using_agg_loose_index_scan()))
goto err;
......@@ -7089,7 +7103,6 @@ void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
Check for the presence of AGGFN(DISTINCT a) queries that may be subject
to loose index scan.
Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
(QUICK_GROUP_MIN_MAX_SELECT).
Optionally (if out_args is supplied) will push the arguments of
......@@ -7128,9 +7141,6 @@ is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
return false;
if (join->make_sum_func_list(join->all_fields, join->fields_list, true))
return false;
Bitmap<MAX_FIELDS> first_aggdistinct_fields;
bool first_aggdistinct_fields_initialized= false;
for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
......@@ -7232,16 +7242,23 @@ add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
while ((item= select_items_it++))
item->walk(&Item::collect_item_field_processor, 0, &indexed_fields);
}
else if (join->tmp_table_param.sum_func_count &&
is_indexed_agg_distinct(join, &indexed_fields))
else if (!join->tmp_table_param.sum_func_count ||
!is_indexed_agg_distinct(join, &indexed_fields))
{
join->sort_and_group= 1;
}
else
/*
There where no GROUP BY fields and also either no aggregate
functions or not all aggregate functions where used with the
same DISTINCT (or MIN() / MAX() that works similarly).
Nothing to do there.
*/
return;
}
if (indexed_fields.elements == 0)
{
/* There where no index we could use to satisfy the GROUP BY */
return;
}
/* Intersect the keys of all group fields. */
cur_item= indexed_fields_it++;
......@@ -25692,16 +25709,13 @@ bool JOIN::alloc_func_list()
bool JOIN::make_sum_func_list(List<Item> &field_list,
List<Item> &send_result_set_metadata,
bool before_group_by, bool recompute)
bool before_group_by)
{
List_iterator_fast<Item> it(field_list);
Item_sum **func;
Item *item;
DBUG_ENTER("make_sum_func_list");
if (*sum_funcs && !recompute)
DBUG_RETURN(FALSE); /* We have already initialized sum_funcs. */
func= sum_funcs;
while ((item=it++))
{
......@@ -25848,7 +25862,7 @@ change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
Change all funcs to be fields in tmp table.
@param thd THD pointer
@param ref_pointer_array array of pointers to top elements of filed list
@param ref_pointer_array array of pointers to top elements of field list
@param res_selected_fields new list of items of select item list
@param res_all_fields new list of all items
@param elements number of elements in select item list
......
......@@ -1196,7 +1196,17 @@ class JOIN :public Sql_alloc
Indicates that grouping will be performed on the result set during
query execution. This field belongs to query execution.
@see make_group_fields, alloc_group_fields, JOIN::exec
If 'sort_and_group' is set, then the optimizer is going to use on of
the following algorithms to resolve GROUP BY.
- If one table, sort the table and then calculate groups on the fly.
- If more than one table, create a temporary table to hold the join,
sort it and then resolve group by on the fly.
The 'on the fly' calculation is done in end_send_group()
@see make_group_fields, alloc_group_fields, JOIN::exec,
setup_end_select_func
*/
bool sort_and_group;
bool first_record,full_join, no_field_update;
......@@ -1654,7 +1664,7 @@ class JOIN :public Sql_alloc
bool make_range_rowid_filters();
bool init_range_rowid_filters();
bool make_sum_func_list(List<Item> &all_fields, List<Item> &send_fields,
bool before_group_by, bool recompute= FALSE);
bool before_group_by);
/// Initialzes a slice, see comments for ref_ptrs above.
Ref_ptr_array ref_ptr_array_slice(size_t slice_num)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment