Commit 3ab5751a authored by Jorgen Loland's avatar Jorgen Loland

Bug#47280 - strange results from count(*) with order by multiple

            columns without where/group
                     
Simple SELECT with implicit grouping used to return many rows if
the query was ordered by the aggregated column in the SELECT
list. This was incorrect because queries with implicit grouping
should only return a single record.
                              
The problem was that when JOIN:exec() decided if execution needed
to handle grouping, it was assumed that sum_func_count==0 meant
that there were no aggregate functions in the query. This
assumption was not correct in JOIN::exec() because the aggregate
functions might have been optimized away during JOIN::optimize().
                  
The reason why queries without ordering behaved correctly was
that sum_func_count is only recalculated if the optimizer chooses
to use temporary tables (which it does in the ordered case).
Hence, non-ordered queries were correctly treated as grouped.
                  
The fix for this bug was to remove the assumption that
sum_func_count==0 means that there is no need for grouping. This
was done by introducing variable "bool implicit_grouping" in the
JOIN object.

mysql-test/r/func_group.result:
  Add test for BUG#47280
mysql-test/t/func_group.test:
  Add test for BUG#47280
sql/opt_sum.cc:
  Improve comment for opt_sum_query()
sql/sql_class.h:
  Add comment for variables in TMP_TABLE_PARAM
sql/sql_select.cc:
  Introduce and use variable implicit_grouping instead of (!group_list && sum_func_count) in places that need to test if grouping is required. Also added comments for: optimization of aggregate fields for implicitly grouped queries  (JOIN::optimize) and choice of end_select method (JOIN::execute)
sql/sql_select.h:
  Add variable implicit_grouping, which will be TRUE for queries that contain aggregate functions but no GROUP BY clause. Also added comment to sort_and_group variable.
parent dafe440f
......@@ -1477,3 +1477,47 @@ COUNT(*)
SET SQL_MODE=default;
DROP TABLE t1;
End of 5.0 tests
#
# BUG#47280 - strange results from count(*) with order by multiple
# columns without where/group
#
#
# Initialize test
#
CREATE TABLE t1 (
pk INT NOT NULL,
i INT,
PRIMARY KEY (pk)
);
INSERT INTO t1 VALUES (1,11),(2,12),(3,13);
#
# Start test
# All the following queries shall return 1 record
#
# Masking all correct values {11...13} for column i in this result.
SELECT MAX(pk) as max, i
FROM t1
ORDER BY max;
max i
3 #
EXPLAIN
SELECT MAX(pk) as max, i
FROM t1
ORDER BY max;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 3 Using temporary
# Only 11 is correct for collumn i in this result
SELECT MAX(pk) as max, i
FROM t1
WHERE pk<2
ORDER BY max;
max i
1 11
#
# Cleanup
#
DROP TABLE t1;
End of 5.1 tests
......@@ -1006,3 +1006,51 @@ DROP TABLE t1;
###
--echo End of 5.0 tests
--echo #
--echo # BUG#47280 - strange results from count(*) with order by multiple
--echo # columns without where/group
--echo #
--echo #
--echo # Initialize test
--echo #
CREATE TABLE t1 (
pk INT NOT NULL,
i INT,
PRIMARY KEY (pk)
);
INSERT INTO t1 VALUES (1,11),(2,12),(3,13);
--echo #
--echo # Start test
--echo # All the following queries shall return 1 record
--echo #
--echo
--echo # Masking all correct values {11...13} for column i in this result.
--replace_column 2 #
SELECT MAX(pk) as max, i
FROM t1
ORDER BY max;
--echo
EXPLAIN
SELECT MAX(pk) as max, i
FROM t1
ORDER BY max;
--echo
--echo # Only 11 is correct for collumn i in this result
SELECT MAX(pk) as max, i
FROM t1
WHERE pk<2
ORDER BY max;
--echo #
--echo # Cleanup
--echo #
DROP TABLE t1;
--echo End of 5.1 tests
......@@ -97,7 +97,8 @@ static ulonglong get_exact_record_count(TABLE_LIST *tables)
@note
This function is only called for queries with sum functions and no
GROUP BY part.
GROUP BY part. This means that the result set shall contain a single
row only
@retval
0 no errors
......
......@@ -2647,7 +2647,32 @@ class TMP_TABLE_PARAM :public Sql_alloc
MI_COLUMNDEF *recinfo,*start_recinfo;
KEY *keyinfo;
ha_rows end_write_records;
uint field_count,sum_func_count,func_count;
/**
Number of normal fields in the query, including those referred to
from aggregate functions. Hence, "SELECT `field1`,
SUM(`field2`) from t1" sets this counter to 2.
@see count_field_types
*/
uint field_count;
/**
Number of fields in the query that have functions. Includes both
aggregate functions (e.g., SUM) and non-aggregates (e.g., RAND).
Also counts functions referred to from aggregate functions, i.e.,
"SELECT SUM(RAND())" sets this counter to 2.
@see count_field_types
*/
uint func_count;
/**
Number of fields in the query that have aggregate functions. Note
that the optimizer may choose to optimize away these fields by
replacing them with constants, in which case sum_func_count will
need to be updated.
@see opt_sum_query, count_field_types
*/
uint sum_func_count;
uint hidden_field_count;
uint group_parts,group_length,group_null_parts;
uint quick_group;
......
......@@ -644,8 +644,11 @@ JOIN::prepare(Item ***rref_pointer_array,
this->group= group_list != 0;
unit= unit_arg;
if (tmp_table_param.sum_func_count && !group_list)
implicit_grouping= TRUE;
#ifdef RESTRICTED_GROUP
if (sum_func_count && !group_list && (func_count || field_count))
if (implicit_grouping)
{
my_message(ER_WRONG_SUM_SELECT,ER(ER_WRONG_SUM_SELECT),MYF(0));
goto err;
......@@ -881,15 +884,23 @@ JOIN::optimize()
}
#endif
/* Optimize count(*), min() and max() */
if (tables_list && tmp_table_param.sum_func_count && ! group_list)
/*
Try to optimize count(*), min() and max() to const fields if
there is implicit grouping (aggregate functions but no
group_list). In this case, the result set shall only contain one
row.
*/
if (tables_list && implicit_grouping)
{
int res;
/*
opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
to the WHERE conditions,
or 1 if all items were resolved,
or 1 if all items were resolved (optimized away),
or 0, or an error number HA_ERR_...
If all items were resolved by opt_sum_query, there is no need to
open any tables.
*/
if ((res=opt_sum_query(select_lex->leaf_tables, all_fields, conds)))
{
......@@ -2024,7 +2035,7 @@ JOIN::exec()
count_field_types(select_lex, &curr_join->tmp_table_param,
*curr_all_fields, 0);
if (curr_join->group || curr_join->tmp_table_param.sum_func_count ||
if (curr_join->group || curr_join->implicit_grouping ||
(procedure && (procedure->flags & PROC_GROUP)))
{
if (make_group_fields(this, curr_join))
......@@ -10811,6 +10822,12 @@ Next_select_func setup_end_select_func(JOIN *join)
}
else
{
/*
Choose method for presenting result to user. Use end_send_group
if the query requires grouping (has a GROUP BY clause and/or one or
more aggregate functions). Use end_send if the query should not
be grouped.
*/
if ((join->sort_and_group ||
(join->procedure && join->procedure->flags & PROC_GROUP)) &&
!tmp_tbl->precomputed_group_by)
......
......@@ -278,7 +278,14 @@ class JOIN :public Sql_alloc
TABLE **table,**all_tables,*sort_by_table;
uint tables,const_tables;
uint send_group_parts;
bool sort_and_group,first_record,full_join,group, no_field_update;
/**
Indicates that grouping will be performed on the result set during
query execution. This field belongs to query execution.
@see make_group_fields, alloc_group_fields, JOIN::exec
*/
bool sort_and_group;
bool first_record,full_join,group, no_field_update;
bool do_send_rows;
/**
TRUE when we want to resume nested loop iterations when
......@@ -428,6 +435,7 @@ class JOIN :public Sql_alloc
tables= 0;
const_tables= 0;
join_list= 0;
implicit_grouping= FALSE;
sort_and_group= 0;
first_record= 0;
do_send_rows= 1;
......@@ -533,6 +541,11 @@ class JOIN :public Sql_alloc
select_lex == unit->fake_select_lex));
}
private:
/**
TRUE if the query contains an aggregate function but has no GROUP
BY clause.
*/
bool implicit_grouping;
bool make_simple_join(JOIN *join, TABLE *tmp_table);
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment