Bug #30355: Incorrect ordering of UDF results

There's currently no way of knowing the determinicity of an UDF.
And the optimizer and the sequence() UDFs were making wrong
assumptions about what the is_const member means.
Plus there was no implementation of update_system_tables()
causing the optimizer to overwrite the information returned by
the <udf>_init function.

Fixed by equating the assumptions about the semantics of 
is_const and providing a implementation of update_used_tables().
Added a TODO item for the UDF API change needed to make a better 
implementation.
parent 07b83736
...@@ -393,6 +393,10 @@ typedef struct st_udf_init ...@@ -393,6 +393,10 @@ typedef struct st_udf_init
char *ptr; /* free pointer for function data */ char *ptr; /* free pointer for function data */
my_bool const_item; /* 0 if result is independent of arguments */ my_bool const_item; /* 0 if result is independent of arguments */
} UDF_INIT; } UDF_INIT;
/*
TODO: add a notion for determinism of the UDF.
See Item_udf_func::update_used_tables ()
*/
/* Constants when using compression */ /* Constants when using compression */
#define NET_HEADER_SIZE 4 /* standard header size */ #define NET_HEADER_SIZE 4 /* standard header size */
......
...@@ -327,4 +327,31 @@ DROP FUNCTION check_const_len; ...@@ -327,4 +327,31 @@ DROP FUNCTION check_const_len;
DROP PROCEDURE check_const_len_sp; DROP PROCEDURE check_const_len_sp;
DROP TRIGGER check_const_len_trigger; DROP TRIGGER check_const_len_trigger;
DROP TABLE const_len_bug; DROP TABLE const_len_bug;
CREATE FUNCTION sequence RETURNS INTEGER SONAME "UDF_EXAMPLE_LIB";
CREATE TABLE t1 (a INT);
CREATE TABLE t2 (a INT PRIMARY KEY);
INSERT INTO t1 VALUES (4),(3),(2),(1);
INSERT INTO t2 SELECT * FROM t1;
SELECT sequence() AS seq, a FROM t1 ORDER BY seq ASC;
seq a
1 4
2 3
3 2
4 1
SELECT sequence() AS seq, a FROM t1 ORDER BY seq DESC;
seq a
4 1
3 2
2 3
1 4
SELECT * FROM t1 WHERE a = sequence();
a
SELECT * FROM t2 WHERE a = sequence();
a
1
2
3
4
DROP FUNCTION sequence;
DROP TABLE t1,t2;
End of 5.0 tests. End of 5.0 tests.
...@@ -362,4 +362,25 @@ DROP PROCEDURE check_const_len_sp; ...@@ -362,4 +362,25 @@ DROP PROCEDURE check_const_len_sp;
DROP TRIGGER check_const_len_trigger; DROP TRIGGER check_const_len_trigger;
DROP TABLE const_len_bug; DROP TABLE const_len_bug;
#
# Bug #30355: Incorrect ordering of UDF results
#
--replace_result $UDF_EXAMPLE_LIB UDF_EXAMPLE_LIB
eval CREATE FUNCTION sequence RETURNS INTEGER SONAME "$UDF_EXAMPLE_LIB";
CREATE TABLE t1 (a INT);
CREATE TABLE t2 (a INT PRIMARY KEY);
INSERT INTO t1 VALUES (4),(3),(2),(1);
INSERT INTO t2 SELECT * FROM t1;
SELECT sequence() AS seq, a FROM t1 ORDER BY seq ASC;
SELECT sequence() AS seq, a FROM t1 ORDER BY seq DESC;
SELECT * FROM t1 WHERE a = sequence();
SELECT * FROM t2 WHERE a = sequence();
DROP FUNCTION sequence;
DROP TABLE t1,t2;
--echo End of 5.0 tests. --echo End of 5.0 tests.
...@@ -2968,6 +2968,12 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func, ...@@ -2968,6 +2968,12 @@ udf_handler::fix_fields(THD *thd, Item_result_field *func,
func->max_length=min(initid.max_length,MAX_BLOB_WIDTH); func->max_length=min(initid.max_length,MAX_BLOB_WIDTH);
func->maybe_null=initid.maybe_null; func->maybe_null=initid.maybe_null;
const_item_cache=initid.const_item; const_item_cache=initid.const_item;
/*
Keep used_tables_cache in sync with const_item_cache.
See the comment in Item_udf_func::update_used tables.
*/
if (!const_item_cache && !used_tables_cache)
used_tables_cache= RAND_TABLE_BIT;
func->decimals=min(initid.decimals,NOT_FIXED_DEC); func->decimals=min(initid.decimals,NOT_FIXED_DEC);
} }
initialized=1; initialized=1;
......
...@@ -1016,6 +1016,56 @@ public: ...@@ -1016,6 +1016,56 @@ public:
fixed= 1; fixed= 1;
return res; return res;
} }
void update_used_tables()
{
/*
TODO: Make a member in UDF_INIT and return if a UDF is deterministic or
not.
Currently UDF_INIT has a member (const_item) that is an in/out
parameter to the init() call.
The code in udf_handler::fix_fields also duplicates the arguments
handling code in Item_func::fix_fields().
The lack of information if a UDF is deterministic makes writing
a correct update_used_tables() for UDFs impossible.
One solution to this would be :
- Add a is_deterministic member of UDF_INIT
- (optionally) deprecate the const_item member of UDF_INIT
- Take away the duplicate code from udf_handler::fix_fields() and
make Item_udf_func call Item_func::fix_fields() to process its
arguments as for any other function.
- Store the deterministic flag returned by <udf>_init into the
udf_handler.
- Don't implement Item_udf_func::fix_fields, implement
Item_udf_func::fix_length_and_dec() instead (similar to non-UDF
functions).
- Override Item_func::update_used_tables to call
Item_func::update_used_tables() and add a RAND_TABLE_BIT to the
result of Item_func::update_used_tables() if the UDF is
non-deterministic.
- (optionally) rename RAND_TABLE_BIT to NONDETERMINISTIC_BIT to
better describe its usage.
The above would require a change of the UDF API.
Until that change is done here's how the current code works:
We call Item_func::update_used_tables() only when we know that
the function depends on real non-const tables and is deterministic.
This can be done only because we know that the optimizer will
call update_used_tables() only when there's possibly a new const
table. So update_used_tables() can only make a Item_func more
constant than it is currently.
That's why we don't need to do anything if a function is guaranteed
to return non-constant (it's non-deterministic) or is already a
const.
*/
if ((used_tables_cache & ~PSEUDO_TABLE_BITS) &&
!(used_tables_cache & RAND_TABLE_BIT))
{
Item_func::update_used_tables();
if (!const_item_cache && !used_tables_cache)
used_tables_cache= RAND_TABLE_BIT;
}
}
void cleanup(); void cleanup();
Item_result result_type () const { return udf.result_type(); } Item_result result_type () const { return udf.result_type(); }
table_map not_null_tables() const { return 0; } table_map not_null_tables() const { return 0; }
......
...@@ -648,13 +648,11 @@ my_bool sequence_init(UDF_INIT *initid, UDF_ARGS *args, char *message) ...@@ -648,13 +648,11 @@ my_bool sequence_init(UDF_INIT *initid, UDF_ARGS *args, char *message)
return 1; return 1;
} }
bzero(initid->ptr,sizeof(longlong)); bzero(initid->ptr,sizeof(longlong));
/* /*
Fool MySQL to think that this function is a constant sequence() is a non-deterministic function : it has different value
This will ensure that MySQL only evalutes the function even if called with the same arguments.
when the rows are sent to the client and not before any ORDER BY
clauses
*/ */
initid->const_item=1; initid->const_item=0;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment