Bug#21904 (parser problem when using IN with a double "(())")

Before this fix, a IN predicate of the form: "IN (( subselect ))", with two
parenthesis, would be evaluated as a single row subselect: if the subselect
returns more that 1 row, the statement would fail.

The SQL:2003 standard defines a special exception in the specification,
and mandates that this particular form of IN predicate shall be equivalent
to "IN ( subselect )", which involves a table subquery and works with more
than 1 row.

This fix implements "IN (( subselect ))", "IN ((( subselect )))" etc
as per the SQL:2003 requirement.

All the details related to the implementation of this change have been
commented in the code, and the relevant sections of the SQL:2003 spec
are given for reference, so they are not repeated here.

Having access to the spec is a requirement to review in depth this patch.
parent ba7a0375
......@@ -3592,3 +3592,116 @@ FROM t1) t;
COUNT(*)
3000
DROP TABLE t1,t2;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t1xt2;
CREATE TABLE t1 (
id_1 int(5) NOT NULL,
t varchar(4) DEFAULT NULL
);
CREATE TABLE t2 (
id_2 int(5) NOT NULL,
t varchar(4) DEFAULT NULL
);
CREATE TABLE t1xt2 (
id_1 int(5) NOT NULL,
id_2 int(5) NOT NULL
);
INSERT INTO t1 VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd');
INSERT INTO t2 VALUES (2, 'bb'), (3, 'cc'), (4, 'dd'), (12, 'aa');
INSERT INTO t1xt2 VALUES (2, 2), (3, 3), (4, 4);
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
id_1
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
id_1
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
id_1
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
id_1
1
2
3
4
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN ((SELECT t1xt2.id_2 FROM t1xt2 where t1.id_1 = t1xt2.id_1)));
id_1
1
2
3
4
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (((SELECT t1xt2.id_2 FROM t1xt2 where t1.id_1 = t1xt2.id_1))));
id_1
1
2
3
4
insert INTO t1xt2 VALUES (1, 12);
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
id_1
1
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
id_1
1
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
id_1
1
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
id_1
2
3
4
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
id_1
2
3
4
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
id_1
2
3
4
insert INTO t1xt2 VALUES (2, 12);
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
id_1
1
2
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
id_1
1
2
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
id_1
1
2
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
id_1
3
4
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
id_1
3
4
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
id_1
3
4
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t1xt2;
......@@ -2496,3 +2496,103 @@ SELECT SQL_NO_CACHE COUNT(*)
FROM t1) t;
DROP TABLE t1,t2;
#
# Bug#21904 (parser problem when using IN with a double "(())")
#
--disable_warnings
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
DROP TABLE IF EXISTS t1xt2;
--enable_warnings
CREATE TABLE t1 (
id_1 int(5) NOT NULL,
t varchar(4) DEFAULT NULL
);
CREATE TABLE t2 (
id_2 int(5) NOT NULL,
t varchar(4) DEFAULT NULL
);
CREATE TABLE t1xt2 (
id_1 int(5) NOT NULL,
id_2 int(5) NOT NULL
);
INSERT INTO t1 VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd');
INSERT INTO t2 VALUES (2, 'bb'), (3, 'cc'), (4, 'dd'), (12, 'aa');
INSERT INTO t1xt2 VALUES (2, 2), (3, 3), (4, 4);
# subselect returns 0 rows
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN ((SELECT t1xt2.id_2 FROM t1xt2 where t1.id_1 = t1xt2.id_1)));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (((SELECT t1xt2.id_2 FROM t1xt2 where t1.id_1 = t1xt2.id_1))));
insert INTO t1xt2 VALUES (1, 12);
# subselect returns 1 row
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
insert INTO t1xt2 VALUES (2, 12);
# subselect returns more than 1 row
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN ((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1)));
SELECT DISTINCT t1.id_1 FROM t1 WHERE
(12 NOT IN (((SELECT t1xt2.id_2 FROM t1xt2 WHERE t1.id_1 = t1xt2.id_1))));
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t1xt2;
......@@ -52,6 +52,10 @@ Item_subselect::Item_subselect():
void Item_subselect::init(st_select_lex *select_lex,
select_subselect *result)
{
/*
Please see Item_singlerow_subselect::invalidate_and_restore_select_lex(),
which depends on alterations to the parse tree implemented here.
*/
DBUG_ENTER("Item_subselect::init");
DBUG_PRINT("enter", ("select_lex: 0x%lx", (long) select_lex));
......@@ -92,6 +96,12 @@ void Item_subselect::init(st_select_lex *select_lex,
DBUG_VOID_RETURN;
}
st_select_lex *
Item_subselect::get_select_lex()
{
return unit->first_select();
}
void Item_subselect::cleanup()
{
DBUG_ENTER("Item_subselect::cleanup");
......@@ -265,6 +275,26 @@ Item_singlerow_subselect::Item_singlerow_subselect(st_select_lex *select_lex)
DBUG_VOID_RETURN;
}
st_select_lex *
Item_singlerow_subselect::invalidate_and_restore_select_lex()
{
DBUG_ENTER("Item_singlerow_subselect::invalidate_and_restore_select_lex");
st_select_lex *result= get_select_lex();
DBUG_ASSERT(result);
/*
This code restore the parse tree in it's state before the execution of
Item_singlerow_subselect::Item_singlerow_subselect(),
and in particular decouples this object from the SELECT_LEX,
so that the SELECT_LEX can be used with a different flavor
or Item_subselect instead, as part of query rewriting.
*/
unit->item= NULL;
DBUG_RETURN(result);
}
Item_maxmin_subselect::Item_maxmin_subselect(THD *thd_param,
Item_subselect *parent,
st_select_lex *select_lex,
......
......@@ -126,6 +126,12 @@ class Item_subselect :public Item_result_field
virtual void reset_value_registration() {}
enum_parsing_place place() { return parsing_place; }
/**
Get the SELECT_LEX structure associated with this Item.
@return the SELECT_LEX structure associated with this Item
*/
st_select_lex* get_select_lex();
friend class select_subselect;
friend class Item_in_optimizer;
friend bool Item_field::fix_fields(THD *, Item **);
......@@ -169,6 +175,16 @@ class Item_singlerow_subselect :public Item_subselect
bool null_inside();
void bring_value();
/**
This method is used to implement a special case of semantic tree
rewriting, mandated by a SQL:2003 exception in the specification.
The only caller of this method is handle_sql2003_note184_exception(),
see the code there for more details.
Do not call this method for other purposes.
@return the SELECT_LEX structure that was given in the constructor.
*/
st_select_lex* invalidate_and_restore_select_lex();
friend class select_singlerow_subselect;
};
......
......@@ -277,6 +277,81 @@ void case_stmt_action_end_case(LEX *lex, bool simple)
lex->sphead->do_cont_backpatch();
}
/**
Helper to resolve the SQL:2003 Syntax exception 1) in <in predicate>.
See SQL:2003, Part 2, section 8.4 <in predicate>, Note 184, page 383.
This function returns the proper item for the SQL expression
<code>left [NOT] IN ( expr )</code>
@param thd the current thread
@param left the in predicand
@param equal true for IN predicates, false for NOT IN predicates
@param expr first and only expression of the in value list
@return an expression representing the IN predicate.
*/
Item* handle_sql2003_note184_exception(THD *thd, Item* left, bool equal,
Item *expr)
{
/*
Relevant references for this issue:
- SQL:2003, Part 2, section 8.4 <in predicate>, page 383,
- SQL:2003, Part 2, section 7.2 <row value expression>, page 296,
- SQL:2003, Part 2, section 6.3 <value expression primary>, page 174,
- SQL:2003, Part 2, section 7.15 <subquery>, page 370,
- SQL:2003 Feature F561, "Full value expressions".
The exception in SQL:2003 Note 184 means:
Item_singlerow_subselect, which corresponds to a <scalar subquery>,
should be re-interpreted as an Item_in_subselect, which corresponds
to a <table subquery> when used inside an <in predicate>.
Our reading of Note 184 is reccursive, so that all:
- IN (( <subquery> ))
- IN ((( <subquery> )))
- IN '('^N <subquery> ')'^N
- etc
should be interpreted as a <table subquery>, no matter how deep in the
expression the <subquery> is.
*/
Item *result;
DBUG_ENTER("handle_sql2003_note184_exception");
if (expr->type() == Item::SUBSELECT_ITEM)
{
Item_subselect *expr2 = (Item_subselect*) expr;
if (expr2->substype() == Item_subselect::SINGLEROW_SUBS)
{
Item_singlerow_subselect *expr3 = (Item_singlerow_subselect*) expr2;
st_select_lex *subselect;
/*
Implement the mandated change, by altering the semantic tree:
left IN Item_singlerow_subselect(subselect)
is modified to
left IN (subselect)
which is represented as
Item_in_subselect(left, subselect)
*/
subselect= expr3->invalidate_and_restore_select_lex();
result= new (thd->mem_root) Item_in_subselect(left, subselect);
if (! equal)
result = negate_expression(thd, result);
DBUG_RETURN(result);
}
}
if (equal)
result= new (thd->mem_root) Item_func_eq(left, expr);
else
result= new (thd->mem_root) Item_func_ne(left, expr);
DBUG_RETURN(result);
}
%}
%union {
int num;
......@@ -4402,28 +4477,34 @@ bool_pri:
predicate:
bit_expr IN_SYM '(' subselect ')'
{ $$= new Item_in_subselect($1, $4); }
{
$$= new (YYTHD->mem_root) Item_in_subselect($1, $4);
}
| bit_expr not IN_SYM '(' subselect ')'
{ $$= negate_expression(YYTHD, new Item_in_subselect($1, $5)); }
{
THD *thd= YYTHD;
Item *item= new (thd->mem_root) Item_in_subselect($1, $5);
$$= negate_expression(thd, item);
}
| bit_expr IN_SYM '(' expr ')'
{
$$= new Item_func_eq($1, $4);
$$= handle_sql2003_note184_exception(YYTHD, $1, true, $4);
}
| bit_expr IN_SYM '(' expr ',' expr_list ')'
{
$6->push_front($4);
$6->push_front($1);
$$= new Item_func_in(*$6);
$$= new (YYTHD->mem_root) Item_func_in(*$6);
}
| bit_expr not IN_SYM '(' expr ')'
{
$$= new Item_func_ne($1, $5);
$$= handle_sql2003_note184_exception(YYTHD, $1, false, $5);
}
| bit_expr not IN_SYM '(' expr ',' expr_list ')'
{
$7->push_front($5);
$7->push_front($1);
Item_func_in *item = new Item_func_in(*$7);
Item_func_in *item = new (YYTHD->mem_root) Item_func_in(*$7);
item->negate();
$$= item;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment