Commit ccac0a6e authored by Gleb Shchepa's avatar Gleb Shchepa

Bug #45640: optimizer bug produces wrong results

Grouping by a subquery in a query with a distinct aggregate
function lead to a wrong result (wrong and unordered
grouping values).

There are two related problems:

1) The query like this:

   SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) c
   FROM t1 GROUP BY aa

returned wrong result, because the outer reference "t1.a"
in the subquery was substituted with the Item_ref item.

The Item_ref item obtains data from the result_field object
that refreshes once after the end of each group. This data
is not applicable to filesort since filesort() doesn't care
about groups (and doesn't update result_field objects with
copy_fields() and so on). Also that data is not applicable
to group separation algorithm: end_send_group() checks every
record with test_if_group_changed() that evaluates Item_ref
items, but it refreshes those Item_ref-s only after the end
of group, that is a vicious circle and the grouped column
values in the output are shifted.

Fix: if
       a) we grouping by a subquery and
       b) that subquery has outer references to FROM list
          of the grouping query,
     then we substitute these outer references with
     Item_direct_ref like references under aggregate
     functions: Item_direct_ref obtains data directly
     from the current record.

2) The query with a non-trivial grouping expression like:

   SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) c
   FROM t1 GROUP BY aa+0

also returned wrong result, since JOIN::exec() substitutes
references to top-level aliases in SELECT list with Item_copy
caching items. Item_copy items have same refreshing policy
as Item_ref items, so the whole groping expression with
Item_copy inside returns wrong result in filesort() and
end_send_group().

Fix: include aliased items into GROUP BY item tree instead
     of Item_ref references to them.



mysql-test/r/group_by.result:
  Test case for bug #45640
mysql-test/t/group_by.test:
  Test case for bug #45640
sql/item.cc:
  Bug #45640: optimizer bug produces wrong results
  
  Item_field::fix_fields() has been modified to resolve
  aliases in GROUP BY item trees into aliased items instead
  of Item_ref items.
sql/item.h:
  Bug #45640: optimizer bug produces wrong results
  
  - Item::find_item_processor() has been introduced.
  - Item_ref::walk() has been modified to apply processors
    to itself too (not only to referenced item).
sql/mysql_priv.h:
  Bug #45640: optimizer bug produces wrong results
  
  fix_inner_refs() has been modified to accept group_list
  parameter.
sql/sql_lex.cc:
  Bug #45640: optimizer bug produces wrong results
  
  Initialization of st_select_lex::group_fix_field has
  been added.
sql/sql_lex.h:
  Bug #45640: optimizer bug produces wrong results
  
  The st_select_lex::group_fix_field field has been introduced
  to control alias resolution in Itef_fied::fix_fields.
sql/sql_select.cc:
  Bug #45640: optimizer bug produces wrong results
  
  - The fix_inner_refs function has been modified to treat
    subquery outer references like outer fields under aggregate
    functions, if they are included in GROUP BY item tree.
  
  - The find_order_in_list function has been modified to
    fix Item_field alias fields included in the GROUP BY item
    trees in a special manner.
parent fd10dda9
...@@ -1703,3 +1703,91 @@ COUNT(i) ...@@ -1703,3 +1703,91 @@ COUNT(i)
1 1
DROP TABLE t1; DROP TABLE t1;
SET @@sql_mode = @old_sql_mode; SET @@sql_mode = @old_sql_mode;
#
# Bug #45640: optimizer bug produces wrong results
#
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (4, 40), (1, 10), (2, 20), (2, 20), (3, 30);
# should return 4 ordered records:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa;
aa COUNT(DISTINCT b)
1 1
2 1
3 1
4 1
SELECT (SELECT (SELECT t1.a)) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa;
aa COUNT(DISTINCT b)
1 1
2 1
3 1
4 1
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0;
aa COUNT(DISTINCT b)
1 1
2 1
3 1
4 1
# should return the same result in a reverse order:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa;
aa COUNT(DISTINCT b)
4 1
3 1
2 1
1 1
# execution plan should not use temporary table:
EXPLAIN EXTENDED
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5 100.00 Using filesort
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1
Note 1003 select (select `test`.`t1`.`a` AS `a`) AS `aa`,count(distinct `test`.`t1`.`b`) AS `COUNT(DISTINCT b)` from `test`.`t1` group by ((select `test`.`t1`.`a` AS `a`) + 0)
EXPLAIN EXTENDED
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa;
id select_type table type possible_keys key key_len ref rows filtered Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 5 100.00 Using filesort
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL NULL No tables used
Warnings:
Note 1276 Field or reference 'test.t1.a' of SELECT #2 was resolved in SELECT #1
Note 1003 select (select `test`.`t1`.`a` AS `a`) AS `aa`,count(distinct `test`.`t1`.`b`) AS `COUNT(DISTINCT b)` from `test`.`t1` group by -((select `test`.`t1`.`a` AS `a`))
# should return only one record
SELECT (SELECT tt.a FROM t1 tt LIMIT 1) aa, COUNT(DISTINCT b) FROM t1
GROUP BY aa;
aa COUNT(DISTINCT b)
4 4
CREATE TABLE t2 SELECT DISTINCT a FROM t1;
# originally reported queries (1st two columns of next two query
# results should be same):
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b)
FROM t1 GROUP BY aa, b;
aa b COUNT(DISTINCT b)
1 10 1
2 20 1
3 30 1
4 40 1
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b)
FROM t1 GROUP BY aa, b;
aa b COUNT( b)
1 10 1
2 20 2
3 30 1
4 40 1
# ORDER BY for sure:
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b)
FROM t1 GROUP BY aa, b ORDER BY -aa, -b;
aa b COUNT(DISTINCT b)
4 40 1
3 30 1
2 20 1
1 10 1
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b)
FROM t1 GROUP BY aa, b ORDER BY -aa, -b;
aa b COUNT( b)
4 40 1
3 30 1
2 20 2
1 10 1
DROP TABLE t1, t2;
#
# End of 5.1 tests
...@@ -1158,3 +1158,53 @@ SELECT COUNT(i) FROM t1 WHERE i > 1; ...@@ -1158,3 +1158,53 @@ SELECT COUNT(i) FROM t1 WHERE i > 1;
DROP TABLE t1; DROP TABLE t1;
SET @@sql_mode = @old_sql_mode; SET @@sql_mode = @old_sql_mode;
--echo #
--echo # Bug #45640: optimizer bug produces wrong results
--echo #
CREATE TABLE t1 (a INT, b INT);
INSERT INTO t1 VALUES (4, 40), (1, 10), (2, 20), (2, 20), (3, 30);
--echo # should return 4 ordered records:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa;
SELECT (SELECT (SELECT t1.a)) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa;
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0;
--echo # should return the same result in a reverse order:
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa;
--echo # execution plan should not use temporary table:
EXPLAIN EXTENDED
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY aa+0;
EXPLAIN EXTENDED
SELECT (SELECT t1.a) aa, COUNT(DISTINCT b) FROM t1 GROUP BY -aa;
--echo # should return only one record
SELECT (SELECT tt.a FROM t1 tt LIMIT 1) aa, COUNT(DISTINCT b) FROM t1
GROUP BY aa;
CREATE TABLE t2 SELECT DISTINCT a FROM t1;
--echo # originally reported queries (1st two columns of next two query
--echo # results should be same):
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b)
FROM t1 GROUP BY aa, b;
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b)
FROM t1 GROUP BY aa, b;
--echo # ORDER BY for sure:
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT(DISTINCT b)
FROM t1 GROUP BY aa, b ORDER BY -aa, -b;
SELECT (SELECT t2.a FROM t2 WHERE t2.a = t1.a) aa, b, COUNT( b)
FROM t1 GROUP BY aa, b ORDER BY -aa, -b;
DROP TABLE t1, t2;
--echo #
--echo # End of 5.1 tests
...@@ -4300,17 +4300,33 @@ bool Item_field::fix_fields(THD *thd, Item **reference) ...@@ -4300,17 +4300,33 @@ bool Item_field::fix_fields(THD *thd, Item **reference)
It's not an Item_field in the select list so we must make a new It's not an Item_field in the select list so we must make a new
Item_ref to point to the Item in the select list and replace the Item_ref to point to the Item in the select list and replace the
Item_field created by the parser with the new Item_ref. Item_field created by the parser with the new Item_ref.
NOTE: If we are fixing an alias reference inside ORDER/GROUP BY
item tree, then we use new Item_ref as an intermediate value
to resolve referenced item only.
In this case the new Item_ref item is unused.
*/ */
Item_ref *rf= new Item_ref(context, db_name,table_name,field_name); Item_ref *rf= new Item_ref(context, db_name,table_name,field_name);
if (!rf) if (!rf)
return 1; return 1;
thd->change_item_tree(reference, rf);
bool save_group_fix_field= thd->lex->current_select->group_fix_field;
/* /*
Because Item_ref never substitutes itself with other items No need for recursive resolving of aliases.
in Item_ref::fix_fields(), we can safely use the original */
pointer to it even after fix_fields() thd->lex->current_select->group_fix_field= 0;
*/
return rf->fix_fields(thd, reference) || rf->check_cols(1); bool ret= rf->fix_fields(thd, (Item **) &rf) || rf->check_cols(1);
thd->lex->current_select->group_fix_field= save_group_fix_field;
if (ret)
return TRUE;
if (save_group_fix_field && alias_name_used)
thd->change_item_tree(reference, *rf->ref);
else
thd->change_item_tree(reference, rf);
return FALSE;
} }
} }
} }
......
...@@ -901,6 +901,7 @@ class Item { ...@@ -901,6 +901,7 @@ class Item {
virtual bool change_context_processor(uchar *context) { return 0; } virtual bool change_context_processor(uchar *context) { return 0; }
virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; } virtual bool reset_query_id_processor(uchar *query_id_arg) { return 0; }
virtual bool is_expensive_processor(uchar *arg) { return 0; } virtual bool is_expensive_processor(uchar *arg) { return 0; }
virtual bool find_item_processor(uchar *arg) { return this == (void *) arg; }
virtual bool register_field_in_read_map(uchar *arg) { return 0; } virtual bool register_field_in_read_map(uchar *arg) { return 0; }
/* /*
Check if a partition function is allowed Check if a partition function is allowed
...@@ -2275,7 +2276,10 @@ class Item_ref :public Item_ident ...@@ -2275,7 +2276,10 @@ class Item_ref :public Item_ident
return ref ? (*ref)->real_item() : this; return ref ? (*ref)->real_item() : this;
} }
bool walk(Item_processor processor, bool walk_subquery, uchar *arg) bool walk(Item_processor processor, bool walk_subquery, uchar *arg)
{ return (*ref)->walk(processor, walk_subquery, arg); } {
return (*ref)->walk(processor, walk_subquery, arg) ||
(this->*processor)(arg);
}
virtual void print(String *str, enum_query_type query_type); virtual void print(String *str, enum_query_type query_type);
bool result_as_longlong() bool result_as_longlong()
{ {
......
...@@ -1192,7 +1192,7 @@ int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, ...@@ -1192,7 +1192,7 @@ int setup_group(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
List<Item> &fields, List<Item> &all_fields, ORDER *order, List<Item> &fields, List<Item> &all_fields, ORDER *order,
bool *hidden_group_fields); bool *hidden_group_fields);
bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, bool fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
Item **ref_pointer_array); Item **ref_pointer_array, ORDER *group_list= NULL);
bool handle_select(THD *thd, LEX *lex, select_result *result, bool handle_select(THD *thd, LEX *lex, select_result *result,
ulong setup_tables_done_option); ulong setup_tables_done_option);
......
...@@ -1592,6 +1592,7 @@ void st_select_lex::init_query() ...@@ -1592,6 +1592,7 @@ void st_select_lex::init_query()
having= prep_having= where= prep_where= 0; having= prep_having= where= prep_where= 0;
olap= UNSPECIFIED_OLAP_TYPE; olap= UNSPECIFIED_OLAP_TYPE;
having_fix_field= 0; having_fix_field= 0;
group_fix_field= 0;
context.select_lex= this; context.select_lex= this;
context.init(); context.init();
/* /*
......
...@@ -647,6 +647,8 @@ class st_select_lex: public st_select_lex_node ...@@ -647,6 +647,8 @@ class st_select_lex: public st_select_lex_node
bool braces; /* SELECT ... UNION (SELECT ... ) <- this braces */ bool braces; /* SELECT ... UNION (SELECT ... ) <- this braces */
/* TRUE when having fix field called in processing of this SELECT */ /* TRUE when having fix field called in processing of this SELECT */
bool having_fix_field; bool having_fix_field;
/* TRUE when GROUP BY fix field called in processing of this SELECT */
bool group_fix_field;
/* List of references to fields referenced from inner selects */ /* List of references to fields referenced from inner selects */
List<Item_outer_ref> inner_refs_list; List<Item_outer_ref> inner_refs_list;
/* Number of Item_sum-derived objects in this SELECT */ /* Number of Item_sum-derived objects in this SELECT */
......
...@@ -287,6 +287,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, ...@@ -287,6 +287,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
all_fields List of all fields used in select all_fields List of all fields used in select
select Current select select Current select
ref_pointer_array Array of references to Items used in current select ref_pointer_array Array of references to Items used in current select
group_list GROUP BY list (is NULL by default)
DESCRIPTION DESCRIPTION
The function serves 3 purposes - adds fields referenced from inner The function serves 3 purposes - adds fields referenced from inner
...@@ -305,6 +306,8 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, ...@@ -305,6 +306,8 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
function is aggregated in the select where the outer field was function is aggregated in the select where the outer field was
resolved or in some more inner select then the Item_direct_ref resolved or in some more inner select then the Item_direct_ref
class should be used. class should be used.
Also it should be used if we are grouping by a subquery containing
the outer field.
The resolution is done here and not at the fix_fields() stage as The resolution is done here and not at the fix_fields() stage as
it can be done only after sum functions are fixed and pulled up to it can be done only after sum functions are fixed and pulled up to
selects where they are have to be aggregated. selects where they are have to be aggregated.
...@@ -321,7 +324,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result, ...@@ -321,7 +324,7 @@ bool handle_select(THD *thd, LEX *lex, select_result *result,
bool bool
fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
Item **ref_pointer_array) Item **ref_pointer_array, ORDER *group_list)
{ {
Item_outer_ref *ref; Item_outer_ref *ref;
bool res= FALSE; bool res= FALSE;
...@@ -371,6 +374,22 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select, ...@@ -371,6 +374,22 @@ fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
} }
} }
} }
else
{
/*
Check if GROUP BY item trees contain the outer ref:
in this case we have to use Item_direct_ref instead of Item_ref.
*/
for (ORDER *group= group_list; group; group= group->next)
{
if ((*group->item)->walk(&Item::find_item_processor, TRUE,
(uchar *) ref))
{
direct_ref= TRUE;
break;
}
}
}
new_ref= direct_ref ? new_ref= direct_ref ?
new Item_direct_ref(ref->context, item_ref, ref->table_name, new Item_direct_ref(ref->context, item_ref, ref->table_name,
ref->field_name, ref->alias_name_used) : ref->field_name, ref->alias_name_used) :
...@@ -577,7 +596,8 @@ JOIN::prepare(Item ***rref_pointer_array, ...@@ -577,7 +596,8 @@ JOIN::prepare(Item ***rref_pointer_array,
} }
if (select_lex->inner_refs_list.elements && if (select_lex->inner_refs_list.elements &&
fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array)) fix_inner_refs(thd, all_fields, select_lex, ref_pointer_array,
group_list))
DBUG_RETURN(-1); DBUG_RETURN(-1);
if (group_list) if (group_list)
...@@ -14547,11 +14567,29 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables, ...@@ -14547,11 +14567,29 @@ find_order_in_list(THD *thd, Item **ref_pointer_array, TABLE_LIST *tables,
We check order_item->fixed because Item_func_group_concat can put We check order_item->fixed because Item_func_group_concat can put
arguments for which fix_fields already was called. arguments for which fix_fields already was called.
group_fix_field= TRUE is to resolve aliases from the SELECT list
without creating of Item_ref-s: JOIN::exec() wraps aliased items
in SELECT list with Item_copy items. To re-evaluate such a tree
that includes Item_copy items we have to refresh Item_copy caches,
but:
- filesort() never refresh Item_copy items,
- end_send_group() checks every record for group boundary by the
test_if_group_changed function that obtain data from these
Item_copy items, but the copy_fields function that
refreshes Item copy items is called after group boundaries only -
that is a vicious circle.
So we prevent inclusion of Item_copy items.
*/ */
if (!order_item->fixed && bool save_group_fix_field= thd->lex->current_select->group_fix_field;
if (is_group_field)
thd->lex->current_select->group_fix_field= TRUE;
bool ret= (!order_item->fixed &&
(order_item->fix_fields(thd, order->item) || (order_item->fix_fields(thd, order->item) ||
(order_item= *order->item)->check_cols(1) || (order_item= *order->item)->check_cols(1) ||
thd->is_fatal_error)) thd->is_fatal_error));
thd->lex->current_select->group_fix_field= save_group_fix_field;
if (ret)
return TRUE; /* Wrong field. */ return TRUE; /* Wrong field. */
uint el= all_fields.elements; uint el= all_fields.elements;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment