Commit 8540fa83 authored by Igor Babaev's avatar Igor Babaev

MDEV-19421 Basic 3-way join queries are not parsed.

The parser returned a syntax error message for the queries with join
expressions like this t1 JOIN t2 [LEFT | RIGHT] JOIN t3 ON ... ON ... when
the second operand of the outer JOIN operation with ON clause was another
join expression with ON clause. In this expression the JOIN operator is
right-associative, i.e. expression has to be parsed as the expression
t1 JOIN (t2 [LEFT | RIGHT] JOIN t3 ON ... ) ON ...
Such join expressions are hard to parse because the outer JOIN is
left-associative if there is no ON clause for the first outer JOIN operator.
The patch implements the solution when the JOIN operator is always parsed
as right-associative and builds first the right-associative tree. If it
happens that there is no corresponding ON clause for this operator the
tree is converted to left-associative.

The idea of the solution was taken from the patch by Martin Hansson
"WL#8083: Fixed the join_table rule" from MySQL-8.0 code line.
As the grammar rules related to join expressions in MySQL-8.0 and
MariaDB-5.5+ are quite different MariaDB solution could not borrow
any code from the MySQL-8.0 solution.
parent 8997f20f
This diff is collapsed.
......@@ -486,7 +486,7 @@ Note 1051 Unknown table 't2'
Note 1051 Unknown table 't3'
create table t2(a int,unique key (a)) engine=innodb;
create table t3(b int) engine=innodb;
create table t1(a int,b int)engine=innodb;
create table t1(c int,b int)engine=innodb;
set @mdev4270_opl= @@optimizer_prune_level;
set @mdev4270_osd= @@optimizer_search_depth;
set optimizer_prune_level=0;
......
This diff is collapsed.
......@@ -358,7 +358,7 @@ drop table t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,t12,t13,t14,t15,t16;
drop table if exists t1,t2,t3;
create table t2(a int,unique key (a)) engine=innodb;
create table t3(b int) engine=innodb;
create table t1(a int,b int)engine=innodb;
create table t1(c int,b int)engine=innodb;
set @mdev4270_opl= @@optimizer_prune_level;
set @mdev4270_osd= @@optimizer_search_depth;
......
......@@ -957,6 +957,8 @@ class st_select_lex: public st_select_lex_node
TABLE_LIST *end_nested_join(THD *thd);
TABLE_LIST *nest_last_join(THD *thd);
void add_joined_table(TABLE_LIST *table);
bool add_cross_joined_table(TABLE_LIST *left_op, TABLE_LIST *right_op,
bool straight_fl);
TABLE_LIST *convert_right_join();
List<Item>* get_item_list();
ulong get_table_join_options();
......@@ -2745,9 +2747,9 @@ struct LEX: public Query_tables_list
return context_stack.push_front(context);
}
void pop_context()
Name_resolution_context *pop_context()
{
context_stack.pop();
return context_stack.pop();
}
bool copy_db_to(char **p_db, size_t *p_db_length) const;
......
......@@ -6390,6 +6390,7 @@ TABLE_LIST *st_select_lex::end_nested_join(THD *thd)
join_list= ptr->join_list;
embedding= ptr->embedding;
nested_join= ptr->nested_join;
nested_join->nest_type= 0;
if (nested_join->join_list.elements == 1)
{
TABLE_LIST *embedded= nested_join->join_list.head();
......@@ -6399,6 +6400,8 @@ TABLE_LIST *st_select_lex::end_nested_join(THD *thd)
join_list->push_front(embedded);
ptr= embedded;
embedded->lifted= 1;
if (embedded->nested_join)
embedded->nested_join->nest_type= 0;
}
else if (nested_join->join_list.elements == 0)
{
......@@ -6429,6 +6432,16 @@ TABLE_LIST *st_select_lex::nest_last_join(THD *thd)
List<TABLE_LIST> *embedded_list;
DBUG_ENTER("nest_last_join");
TABLE_LIST *head= join_list->head();
if (head->nested_join && head->nested_join->nest_type & REBALANCED_NEST)
{
List_iterator<TABLE_LIST> li(*join_list);
li++;
while (li++)
li.remove();
DBUG_RETURN(head);
}
if (!(ptr= (TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+
sizeof(NESTED_JOIN))))
DBUG_RETURN(0);
......@@ -6440,6 +6453,7 @@ TABLE_LIST *st_select_lex::nest_last_join(THD *thd)
ptr->alias= (char*) "(nest_last_join)";
embedded_list= &nested_join->join_list;
embedded_list->empty();
nested_join->nest_type= JOIN_OP_NEST;
for (uint i=0; i < 2; i++)
{
......@@ -6490,6 +6504,227 @@ void st_select_lex::add_joined_table(TABLE_LIST *table)
}
/**
@brief
Create a node for JOIN/INNER JOIN/CROSS JOIN/STRAIGHT_JOIN operation
@param left_op the node for the left operand constructed by the parser
@param right_op the node for the right operand constructed by the parser
@param straight_fl TRUE if STRAIGHT_JOIN is used
@retval
false on success
true otherwise
@details
JOIN operator can be left-associative with other join operators in one
context and right-associative in another context.
In this query
SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a (Q1)
JOIN is left-associative and the query Q1 is interpreted as
SELECT * FROM (t1 JOIN t2) LEFT JOIN t3 ON t2.a=t3.a.
While in this query
SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a ON t1.b=t2.b (Q2)
JOIN is right-associative and the query Q2 is interpreted as
SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a) ON t1.b=t2.b
JOIN is right-associative if it is used with ON clause or with USING clause.
Otherwise it is left-associative.
When parsing a join expression with JOIN operator we can't determine
whether this operation left or right associative until either we read the
corresponding ON clause or we reach the end of the expression. This creates
a problem for the parser to build a proper internal representation of the
used join expression.
For Q1 and Q2 the trees representing the used join expressions look like
LJ - ON J - ON
/ \ / \
J t3 (TQ1) t1 LJ - ON (TQ2)
/ \ / \
t1 t2 t2 t3
To build TQ1 the parser has to reduce the expression for JOIN right after
it has read the reference to t2. To build TQ2 the parser reduces JOIN
when he has read the whole join expression. There is no way to determine
whether an early reduction is needed until the whole join expression is
read.
A solution here is always to do a late reduction. In this case the parser
first builds an incorrect tree TQ1* that has to be rebalanced right after
it has been constructed.
J LJ - ON
/ \ / \
t1 LJ - ON (TQ1*) => J t3
/ \ / \
t2 t3 t1 t2
Actually the transformation is performed over the nodes t1 and LJ before the
node for J is created in the function st_select_lex::add_cross_joined_table.
The function creates a node for J which replaces the node t2. Then it
attaches the nodes t1 and t2 to this newly created node. The node LJ becomes
the top node of the tree.
For the query
SELECT * FROM t1 JOIN t2 RIGHT JOIN t3 ON t2.a=t3.a (Q3)
the transformation looks slightly differently because the parser
replaces the RIGHT JOIN tree for an equivalent LEFT JOIN tree.
J LJ - ON
/ \ / \
t1 LJ - ON (TQ3*) => J t2
/ \ / \
t3 t2 t1 t3
With several left associative JOINs
SELECT * FROM t1 JOIN t2 JOIN t3 LEFT JOIN t4 ON t3.a=t4.a (Q4)
the newly created node for JOIN replaces the left most node of the tree:
J1 LJ - ON
/ \ / \
t1 LJ - ON J2 t4
/ \ => / \
J2 t4 J1 t3
/ \ / \
t2 t3 t1 t2
Here's another example:
SELECT *
FROM t1 JOIN t2 LEFT JOIN t3 JOIN t4 ON t3.a=t4.a ON t2.b=t3.b (Q5)
J LJ - ON
/ \ / \
t1 LJ - ON J J - ON
/ \ => / \ / \
t2 J - ON t1 t2 t3 t4
/ \
t3 t4
If the transformed nested join node node is a natural join node like in
the following query
SELECT * FROM t1 JOIN t2 LEFT JOIN t3 USING(a) (Q6)
the transformation additionally has to take care about setting proper
references in the field natural_join for both operands of the natural
join operation.
The function also has to change the name resolution context for ON
expressions used in the transformed join expression to take into
account the tables of the left_op node.
*/
bool st_select_lex::add_cross_joined_table(TABLE_LIST *left_op,
TABLE_LIST *right_op,
bool straight_fl)
{
DBUG_ENTER("add_cross_joined_table");
THD *thd= parent_lex->thd;
if (!(right_op->nested_join &&
(right_op->nested_join->nest_type & JOIN_OP_NEST)))
{
/*
This handles the cases when the right operand is not a nested join.
like in queries
SELECT * FROM t1 JOIN t2;
SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.a JOIN t3
*/
right_op->straight= straight_fl;
DBUG_RETURN(false);
}
TABLE_LIST *tbl;
List<TABLE_LIST> *jl= &right_op->nested_join->join_list;
TABLE_LIST *cj_nest;
/*
Create the node NJ for a new nested join for the future inclusion
of left_op in it. Initially the nest is empty.
*/
if (unlikely(!(cj_nest=
(TABLE_LIST*) thd->calloc(ALIGN_SIZE(sizeof(TABLE_LIST))+
sizeof(NESTED_JOIN)))))
DBUG_RETURN(true);
cj_nest->nested_join=
((NESTED_JOIN*) ((uchar*) cj_nest + ALIGN_SIZE(sizeof(TABLE_LIST))));
cj_nest->nested_join->nest_type= JOIN_OP_NEST;
List<TABLE_LIST> *cjl= &cj_nest->nested_join->join_list;
cjl->empty();
/* Look for the left most node tbl of the right_op tree */
for ( ; ; )
{
TABLE_LIST *pair_tbl= 0; /* useful only for operands of natural joins */
List_iterator<TABLE_LIST> li(*jl);
tbl= li++;
/* Expand name resolution context */
Name_resolution_context *on_context;
if ((on_context= tbl->on_context))
{
on_context->first_name_resolution_table=
left_op->first_leaf_for_name_resolution();
}
if (!(tbl->outer_join & JOIN_TYPE_RIGHT))
{
pair_tbl= tbl;
tbl= li++;
}
if (tbl->nested_join &&
tbl->nested_join->nest_type & JOIN_OP_NEST)
{
jl= &tbl->nested_join->join_list;
continue;
}
/* Replace the tbl node in the tree for the newly created NJ node */
cj_nest->outer_join= tbl->outer_join;
cj_nest->on_expr= tbl->on_expr;
cj_nest->embedding= tbl->embedding;
cj_nest->join_list= jl;
cj_nest->alias= (char*) "(nest_last_join)";
li.replace(cj_nest);
/*
If tbl is an operand of a natural join set properly the references
in the fields natural_join for both operands of the operation.
*/
if(tbl->embedding && tbl->embedding->is_natural_join)
{
if (!pair_tbl)
pair_tbl= li++;
pair_tbl->natural_join= cj_nest;
cj_nest->natural_join= pair_tbl;
}
break;
}
/* Attach tbl as the right operand of NJ */
if (unlikely(cjl->push_back(tbl, thd->mem_root)))
DBUG_RETURN(true);
tbl->outer_join= 0;
tbl->on_expr= 0;
tbl->straight= straight_fl;
tbl->natural_join= 0;
tbl->embedding= cj_nest;
tbl->join_list= cjl;
/* Add left_op as the left operand of NJ */
if (unlikely(cjl->push_back(left_op, thd->mem_root)))
DBUG_RETURN(true);
left_op->embedding= cj_nest;
left_op->join_list= cjl;
/*
Mark right_op as a rebalanced nested join in order not to
create a new top level nested join node.
*/
right_op->nested_join->nest_type|= REBALANCED_NEST;
DBUG_RETURN(false);
}
/**
Convert a right join into equivalent left join.
......
......@@ -1428,9 +1428,9 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token IMPOSSIBLE_ACTION /* To avoid warning for yyerrlab1 */
%left JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT
/* A dummy token to force the priority of table_ref production in a join. */
%left TABLE_REF_PRIORITY
%left CONDITIONLESS_JOIN
%left JOIN_SYM INNER_SYM STRAIGHT_JOIN CROSS LEFT RIGHT ON_SYM USING
%left SET_VAR
%left OR_OR_SYM OR_SYM OR2_SYM
%left XOR
......@@ -9607,9 +9607,9 @@ join_table_list:
and are ignored.
*/
esc_table_ref:
table_ref { $$=$1; }
| '{' ident table_ref '}' { $$=$3; }
;
table_ref { $$=$1; }
| '{' ident table_ref '}' { $$=$3; }
;
/* Equivalent to <table reference list> in the SQL:2003 standard. */
/* Warning - may return NULL in case of incomplete SELECT */
......@@ -9622,23 +9622,24 @@ derived_table_list:
;
/*
Notice that JOIN is a left-associative operation, and it must be parsed
as such, that is, the parser must process first the left join operand
then the right one. Such order of processing ensures that the parser
produces correct join trees which is essential for semantic analysis
and subsequent optimization phases.
Notice that JOIN can be a left-associative operator in one context and
a right-associative operator in another context (see the comment for
st_select_lex::add_cross_joined_table).
*/
join_table:
/* INNER JOIN variants */
/*
Use %prec to evaluate production 'table_ref' before 'normal_join'
so that [INNER | CROSS] JOIN is properly nested as other
left-associative joins.
*/
table_ref normal_join table_ref %prec TABLE_REF_PRIORITY
{ MYSQL_YYABORT_UNLESS($1 && ($$=$3)); }
| table_ref STRAIGHT_JOIN table_factor
{ MYSQL_YYABORT_UNLESS($1 && ($$=$3)); $3->straight=1; }
table_ref normal_join table_ref %prec CONDITIONLESS_JOIN
{
MYSQL_YYABORT_UNLESS($1 && ($$=$3));
if (unlikely(Select->add_cross_joined_table($1, $3, false)))
MYSQL_YYABORT;
}
| table_ref STRAIGHT_JOIN table_ref %prec CONDITIONLESS_JOIN
{
MYSQL_YYABORT_UNLESS($1 && ($$=$3));
if (unlikely(Select->add_cross_joined_table($1, $3, true)))
MYSQL_YYABORT;
}
| table_ref normal_join table_ref
ON
{
......@@ -9651,10 +9652,10 @@ join_table:
expr
{
add_join_on($3,$6);
Lex->pop_context();
$3->on_context= Lex->pop_context();
Select->parsing_place= NO_MATTER;
}
| table_ref STRAIGHT_JOIN table_factor
| table_ref STRAIGHT_JOIN table_ref
ON
{
MYSQL_YYABORT_UNLESS($1 && $3);
......@@ -9667,7 +9668,7 @@ join_table:
{
$3->straight=1;
add_join_on($3,$6);
Lex->pop_context();
$3->on_context= Lex->pop_context();
Select->parsing_place= NO_MATTER;
}
| table_ref normal_join table_ref
......@@ -9696,7 +9697,7 @@ join_table:
expr
{
add_join_on($5,$8);
Lex->pop_context();
$5->on_context= Lex->pop_context();
$5->outer_join|=JOIN_TYPE_LEFT;
$$=$5;
Select->parsing_place= NO_MATTER;
......@@ -9735,7 +9736,7 @@ join_table:
if (!($$= lex->current_select->convert_right_join()))
MYSQL_YYABORT;
add_join_on($$, $8);
Lex->pop_context();
$1->on_context= Lex->pop_context();
Select->parsing_place= NO_MATTER;
}
| table_ref RIGHT opt_outer JOIN_SYM table_factor
......
......@@ -46,6 +46,7 @@ struct TABLE_LIST;
class ACL_internal_schema_access;
class ACL_internal_table_access;
class Field;
struct Name_resolution_context;
/*
Used to identify NESTED_JOIN structures within a join (applicable only to
......@@ -1618,6 +1619,7 @@ struct TABLE_LIST
char *db, *alias, *table_name, *schema_table_name;
char *option; /* Used by cache index */
Item *on_expr; /* Used with outer join */
Name_resolution_context *on_context; /* For ON expressions */
Item *sj_on_expr;
/*
......@@ -2332,9 +2334,31 @@ class Field_iterator_table_ref: public Field_iterator
};
#define JOIN_OP_NEST 1
#define REBALANCED_NEST 2
typedef struct st_nested_join
{
List<TABLE_LIST> join_list; /* list of elements in the nested join */
/*
Currently the valid values for nest type are:
JOIN_OP_NEST - for nest created for JOIN operation used as an operand in
a join expression, contains 2 elements;
JOIN_OP_NEST | REBALANCED_NEST - nest created after tree re-balancing
in st_select_lex::add_cross_joined_table(), contains 1 element;
0 - for all other nests.
Examples:
1. SELECT * FROM t1 JOIN t2 LEFT JOIN t3 ON t2.a=t3.a;
Here the nest created for LEFT JOIN at first has nest_type==JOIN_OP_NEST.
After re-balancing in st_select_lex::add_cross_joined_table() this nest
has nest_type==JOIN_OP_NEST | REBALANCED_NEST. The nest for JOIN created
in st_select_lex::add_cross_joined_table() has nest_type== JOIN_OP_NEST.
2. SELECT * FROM t1 JOIN (t2 LEFT JOIN t3 ON t2.a=t3.a)
Here the nest created for LEFT JOIN has nest_type==0, because it's not
an operand in a join expression. The nest created for JOIN has nest_type
set to JOIN_OP_NEST.
*/
uint nest_type;
/*
Bitmap of tables within this nested join (including those embedded within
its children), including tables removed by table elimination.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment