Commit 57e5f848 authored by Gleb Shchepa's avatar Gleb Shchepa

Bug #45640: optimizer bug produces wrong results

Grouping by a subquery in a query with a distinct aggregate
function lead to a wrong result (wrong and unordered
grouping values).

There are two related problems:

1) The query like this:

   SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) c
   FROM t1 GROUP BY aa

returned wrong result, because the outer reference "t1.a"
in the subquery was substituted with the Item_ref item.

The Item_ref item obtains data from the result_field object
that refreshes once after the end of each group. This data
is not applicable to filesort since filesort() doesn't care
about groups (and doesn't update result_field objects with
copy_fields() and so on). Also that data is not applicable
to group separation algorithm: end_send_group() checks every
record with test_if_group_changed() that evaluates Item_ref
items, but it refreshes those Item_ref-s only after the end
of group, that is a vicious circle and the grouped column
values in the output are shifted.

Fix: if
       a) we grouping by a subquery and
       b) that subquery has outer references to FROM list
          of the grouping query,
     then we substitute these outer references with
     Item_direct_ref like references under aggregate
     functions: Item_direct_ref obtains data directly
     from the current record.

2) The query with a non-trivial grouping expression like:

   SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) c
   FROM t1 GROUP BY aa+0

also returned wrong result, since JOIN::exec() substitutes
references to top-level aliases in SELECT list with Item_copy
caching items. Item_copy items have same refreshing policy
as Item_ref items, so the whole groping expression with
Item_copy inside returns wrong result in filesort() and
end_send_group().

Fix: include aliased items into GROUP BY item tree instead
     of Item_ref references to them.
parent a26ab94e
......@@ -1703,3 +1703,91 @@ COUNT(i)
1
DROP TABLE t1;
SET @@sql_mode = @old_sql_mode;
#
# Bug #45640: optimizer bug produces wrong results
#
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (4, 40), (1, 10), (2, 20), (2, 20), (3, 30);
# should return 4 ordered records:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa;
aa COUNT(DISTINCT b)
1 1
2 1
3 1
4 1
SELECT (SELECT (SELECT t1.a)) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa;
aa COUNT(DISTINCT b)
1 1
2 1
3 1
4 1
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0;
aa COUNT(DISTINCT b)
1 1
2 1
3 1
4 1
# should return the same result in a reverse order:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa;
aa COUNT(DISTINCT b)
4 1
3 1
2 1
1 1
# execution plan should not use temporary table:
EXPLAIN EXTENDED
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5 100.00 Using filesort
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1
Note 1003 select (select `test`.`t1`.`a` AS `a`) AS `aa`,count(distinct `test`.`t1`.`b`) AS `COUNT(DISTINCT b)` from `test`.`t1` group by ((select `test`.`t1`.`a` AS `a`) + 0)
EXPLAIN EXTENDED
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5 100.00 Using filesort
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1
Note 1003 select (select `test`.`t1`.`a` AS `a`) AS `aa`,count(distinct `test`.`t1`.`b`) AS `COUNT(DISTINCT b)` from `test`.`t1` group by -((select `test`.`t1`.`a` AS `a`))
# should return only one record
SELECT (SELECT tt.a FROM t1 tt LIMIT 1) aa, COUNT(DISTINCT b) FROM t1
GROUP BY aa;
aa COUNT(DISTINCT b)
4 4
CREATE TABLE t2 SELECT DISTINCT a FROM t1;
# originally reported queries (1st two columns of next two query
# results should be same):
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b)
FROM t1 GROUP BY aa, b;
aa b COUNT(DISTINCT b)
1 10 1
2 20 1
3 30 1
4 40 1
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b)
FROM t1 GROUP BY aa, b;
aa b COUNT( b)
1 10 1
2 20 2
3 30 1
4 40 1
# ORDER BY for sure:
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b)
FROM t1 GROUP BY aa, b ORDER BY -aa, -b;
aa b COUNT(DISTINCT b)
4 40 1
3 30 1
2 20 1
1 10 1
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b)
FROM t1 GROUP BY aa, b ORDER BY -aa, -b;
aa b COUNT( b)
4 40 1
3 30 1
2 20 2
1 10 1
DROP TABLE t1, t2;
#
# End of 5.1 tests
......@@ -1158,3 +1158,53 @@ SELECT COUNT(i) FROM t1 WHERE i > 1;
DROP TABLE t1;
SET @@sql_mode = @old_sql_mode;
--echo #
--echo # Bug #45640: optimizer bug produces wrong results
--echo #
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (4, 40), (1, 10), (2, 20), (2, 20), (3, 30);
--echo # should return 4 ordered records:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa;
SELECT (SELECT (SELECT t1.a)) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa;
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0;
--echo # should return the same result in a reverse order:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa;
--echo # execution plan should not use temporary table:
EXPLAIN EXTENDED
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0;
EXPLAIN EXTENDED
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa;
--echo # should return only one record
SELECT (SELECT tt.a FROM t1 tt LIMIT 1) aa, COUNT(DISTINCT b) FROM t1
GROUP BY aa;
CREATE TABLE t2 SELECT DISTINCT a FROM t1;
--echo # originally reported queries (1st two columns of next two query
--echo # results should be same):
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b)
FROM t1 GROUP BY aa, b;
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b)
FROM t1 GROUP BY aa, b;
--echo # ORDER BY for sure:
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b)
FROM t1 GROUP BY aa, b ORDER BY -aa, -b;
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b)
FROM t1 GROUP BY aa, b ORDER BY -aa, -b;
DROP TABLE t1, t2;
--echo #
--echo # End of 5.1 tests
......@@ -4300,17 +4300,33 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
It's not an Item_field in the select list so we must make a new
Item_ref to point to the Item in the select list and replace the
Item_field created by the parser with the new Item_ref.
NOTE: If we are fixing an alias reference inside ORDER/GROUP BY
item tree, then we use new Item_ref as an intermediate value
to resolve referenced item only.
In this case the new Item_ref item is unused.
*/
Item_ref *rf= new Item_ref(context, db_name,table_name,field_name);
if (!rf)
return 1;
thd->change_item_tree(reference, rf);
bool save_group_fix_field= thd->lex->current_select->group_fix_field;
/*
Because Item_ref never substitutes itself with other items
in Item_ref::fix_fields(), we can safely use the original
pointer to it even after fix_fields()
No need for recursive resolving of aliases.
*/
return rf->fix_fields(thd, reference) || rf->check_cols(1);
thd->lex->current_select->group_fix_field= 0;
bool ret= rf->fix_fields(thd, (Item **) &rf) || rf->check_cols(1);
thd->lex->current_select->group_fix_field= save_group_fix_field;
if (ret)
return TRUE;
if (save_group_fix_field && alias_name_used)
thd->change_item_tree(reference, *rf->ref);
else
thd->change_item_tree(reference, rf);
return FALSE;
}
}
}
......
......@@ -901,6 +901,7 @@ class Item {
virtual bool change_context_processor(uchar *context) { return 0; }
virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; }
virtual bool is_expensive_processor(uchar *arg) { return 0; }
virtual bool find_item_processor(uchar *arg) { return this == (void *) arg; }
virtual bool register_field_in_read_map(uchar *arg) { return 0; }
/*
Check if a partition function is allowed
......@@ -2275,7 +2276,10 @@ class Item_ref :public Item_ident
return ref ? (*ref)->real_item() : this;
}
bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
{ return (*ref)->walk(processor, walk_subquery, arg); }
{
return (*ref)->walk(processor, walk_subquery, arg) ||
(this->*processor)(arg);
}
virtual void print(String *str, enum_query_type query_type);
bool result_as_longlong()
{
......
......@@ -1192,7 +1192,7 @@ int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
List<Item> &fields, List<Item> &all_fields, ORDER *order,
bool *hidden_group_fields);
bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
Item **ref_pointer_array);
Item **ref_pointer_array, ORDER *group_list= NULL);
bool handle_select(THD *thd, LEX *lex, select_result *result,
ulong setup_tables_done_option);
......
......@@ -1592,6 +1592,7 @@ void st_select_lex::init_query()
having= prep_having= where= prep_where= 0;
olap= UNSPECIFIED_OLAP_TYPE;
having_fix_field= 0;
group_fix_field= 0;
context.select_lex= this;
context.init();
/*
......
......@@ -647,6 +647,8 @@ class st_select_lex: public st_select_lex_node
bool braces; /* SELECT ... UNION (SELECT ... ) <- this braces */
/* TRUE when having fix field called in processing of this SELECT */
bool having_fix_field;
/* TRUE when GROUP BY fix field called in processing of this SELECT */
bool group_fix_field;
/* List of references to fields referenced from inner selects */
List<Item_outer_ref> inner_refs_list;
/* Number of Item_sum-derived objects in this SELECT */
......
......@@ -287,6 +287,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
all_fields List of all fields used in select
select Current select
ref_pointer_array Array of references to Items used in current select
group_list GROUP BY list (is NULL by default)
DESCRIPTION
The function serves 3 purposes - adds fields referenced from inner
......@@ -305,6 +306,8 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
function is aggregated in the select where the outer field was
resolved or in some more inner select then the Item_direct_ref
class should be used.
Also it should be used if we are grouping by a subquery containing
the outer field.
The resolution is done here and not at the fix_fields() stage as
it can be done only after sum functions are fixed and pulled up to
selects where they are have to be aggregated.
......@@ -321,7 +324,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
bool
fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
Item **ref_pointer_array)
Item **ref_pointer_array, ORDER *group_list)
{
Item_outer_ref *ref;
bool res= FALSE;
......@@ -371,6 +374,22 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
}
}
}
else
{
/*
Check if GROUP BY item trees contain the outer ref:
in this case we have to use Item_direct_ref instead of Item_ref.
*/
for (ORDER *group= group_list; group; group= group->next)
{
if ((*group->item)->walk(&Item::find_item_processor, TRUE,
(uchar *) ref))
{
direct_ref= TRUE;
break;
}
}
}
new_ref= direct_ref ?
new Item_direct_ref(ref->context, item_ref, ref->table_name,
ref->field_name, ref->alias_name_used) :
......@@ -577,7 +596,8 @@ JOIN::prepare(Item ***rref_pointer_array,
}
if (select_lex->inner_refs_list.elements &&
fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array))
fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array,
group_list))
DBUG_RETURN(-1);
if (group_list)
......@@ -14547,11 +14567,29 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
We check order_item->fixed because Item_func_group_concat can put
arguments for which fix_fields already was called.
*/
if (!order_item->fixed &&
group_fix_field= TRUE is to resolve aliases from the SELECT list
without creating of Item_ref-s: JOIN::exec() wraps aliased items
in SELECT list with Item_copy items. To re-evaluate such a tree
that includes Item_copy items we have to refresh Item_copy caches,
but:
- filesort() never refresh Item_copy items,
- end_send_group() checks every record for group boundary by the
test_if_group_changed function that obtain data from these
Item_copy items, but the copy_fields function that
refreshes Item copy items is called after group boundaries only -
that is a vicious circle.
So we prevent inclusion of Item_copy items.
*/
bool save_group_fix_field= thd->lex->current_select->group_fix_field;
if (is_group_field)
thd->lex->current_select->group_fix_field= TRUE;
bool ret= (!order_item->fixed &&
(order_item->fix_fields(thd, order->item) ||
(order_item= *order->item)->check_cols(1) ||
thd->is_fatal_error))
thd->is_fatal_error));
thd->lex->current_select->group_fix_field= save_group_fix_field;
if (ret)
return TRUE; /* Wrong field. */
uint el= all_fields.elements;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment