Commit 53cf265b authored by Vicențiu Ciorbaru's avatar Vicențiu Ciorbaru

Implement LEAD and LAG and NTH_VALUE functions

Refactour out (into a copy for now) the logic of Item_sum_hybrid, to
allow for multiple arguments. It does not contain the comparator
members. The result is the class Item_sum_hybrid_simple.

LEAD and LAG make use of this Item to store previous rows in a chache.
It also helps in specifying the field type. Currently LEAD/LAG do not
support default values.

NTH_VALUE behaves identical to LEAD and LAG, except that the starting
position cursor is placed on the top of the frame instead of the current
row.
parent 29b227c3
create table t1 (
pk int primary key,
a int,
b int,
c char(10),
d decimal(10, 3),
e real
);
insert into t1 values
( 1, 0, 1, 'one', 0.1, 0.001),
( 2, 0, 2, 'two', 0.2, 0.002),
( 3, 0, 3, 'three', 0.3, 0.003),
( 4, 1, 2, 'three', 0.4, 0.004),
( 5, 1, 1, 'two', 0.5, 0.005),
( 6, 1, 1, 'one', 0.6, 0.006),
( 7, 2, NULL, 'n_one', 0.5, 0.007),
( 8, 2, 1, 'n_two', NULL, 0.008),
( 9, 2, 2, NULL, 0.7, 0.009),
(10, 2, 0, 'n_four', 0.8, 0.010),
(11, 2, 10, NULL, 0.9, NULL);
select pk,
lead(pk) over (order by pk),
lead(pk, 1) over (order by pk),
lead(pk, 2) over (order by pk),
lead(pk, 0) over (order by pk),
lead(pk, -1) over (order by pk),
lead(pk, -2) over (order by pk)
from t1
order by pk asc;
pk lead(pk) over (order by pk) lead(pk, 1) over (order by pk) lead(pk, 2) over (order by pk) lead(pk, 0) over (order by pk) lead(pk, -1) over (order by pk) lead(pk, -2) over (order by pk)
1 2 2 3 1 NULL NULL
2 3 3 4 2 1 NULL
3 4 4 5 3 2 1
4 5 5 6 4 3 2
5 6 6 7 5 4 3
6 7 7 8 6 5 4
7 8 8 9 7 6 5
8 9 9 10 8 7 6
9 10 10 11 9 8 7
10 11 11 NULL 10 9 8
11 NULL NULL NULL 11 10 9
select pk,
lag(pk) over (order by pk),
lag(pk, 1) over (order by pk),
lag(pk, 2) over (order by pk),
lag(pk, 0) over (order by pk),
lag(pk, -1) over (order by pk),
lag(pk, -2) over (order by pk)
from t1
order by pk asc;
pk lag(pk) over (order by pk) lag(pk, 1) over (order by pk) lag(pk, 2) over (order by pk) lag(pk, 0) over (order by pk) lag(pk, -1) over (order by pk) lag(pk, -2) over (order by pk)
1 NULL NULL NULL 1 2 3
2 1 1 NULL 2 3 4
3 2 2 1 3 4 5
4 3 3 2 4 5 6
5 4 4 3 5 6 7
6 5 5 4 6 7 8
7 6 6 5 7 8 9
8 7 7 6 8 9 10
9 8 8 7 9 10 11
10 9 9 8 10 11 NULL
11 10 10 9 11 NULL NULL
select pk, pk - 2,
lag(pk, pk - 2) over (order by pk),
lead(pk, pk - 2) over (order by pk)
from t1
order by pk asc;
pk pk - 2 lag(pk, pk - 2) over (order by pk) lead(pk, pk - 2) over (order by pk)
1 -1 2 NULL
2 0 2 2
3 1 2 4
4 2 2 6
5 3 2 8
6 4 2 10
7 5 2 NULL
8 6 2 NULL
9 7 2 NULL
10 8 2 NULL
11 9 2 NULL
select pk, pk - 2,
lag(pk, pk + 2) over (order by pk),
lead(pk, pk + 2) over (order by pk)
from t1
order by pk asc;
pk pk - 2 lag(pk, pk + 2) over (order by pk) lead(pk, pk + 2) over (order by pk)
1 -1 NULL 4
2 0 NULL 6
3 1 NULL 8
4 2 NULL 10
5 3 NULL NULL
6 4 NULL NULL
7 5 NULL NULL
8 6 NULL NULL
9 7 NULL NULL
10 8 NULL NULL
11 9 NULL NULL
select pk, a,
lead(pk) over (partition by a order by pk),
lead(pk, 1) over (partition by a order by pk),
lead(pk, 2) over (partition by a order by pk),
lead(pk, 0) over (partition by a order by pk),
lead(pk, -1) over (partition by a order by pk),
lead(pk, -2) over (partition by a order by pk)
from t1
order by pk asc;
pk a lead(pk) over (partition by a order by pk) lead(pk, 1) over (partition by a order by pk) lead(pk, 2) over (partition by a order by pk) lead(pk, 0) over (partition by a order by pk) lead(pk, -1) over (partition by a order by pk) lead(pk, -2) over (partition by a order by pk)
1 0 2 2 3 1 NULL NULL
2 0 3 3 NULL 2 1 NULL
3 0 NULL NULL NULL 3 2 1
4 1 5 5 6 4 NULL NULL
5 1 6 6 NULL 5 4 NULL
6 1 NULL NULL NULL 6 5 4
7 2 8 8 9 7 NULL NULL
8 2 9 9 10 8 7 NULL
9 2 10 10 11 9 8 7
10 2 11 11 NULL 10 9 8
11 2 NULL NULL NULL 11 10 9
select pk, a,
lag(pk) over (partition by a order by pk),
lag(pk, 1) over (partition by a order by pk),
lag(pk, 2) over (partition by a order by pk),
lag(pk, 0) over (partition by a order by pk),
lag(pk, -1) over (partition by a order by pk),
lag(pk, -2) over (partition by a order by pk)
from t1
order by pk asc;
pk a lag(pk) over (partition by a order by pk) lag(pk, 1) over (partition by a order by pk) lag(pk, 2) over (partition by a order by pk) lag(pk, 0) over (partition by a order by pk) lag(pk, -1) over (partition by a order by pk) lag(pk, -2) over (partition by a order by pk)
1 0 NULL NULL NULL 1 2 3
2 0 1 1 NULL 2 3 NULL
3 0 2 2 1 3 NULL NULL
4 1 NULL NULL NULL 4 5 6
5 1 4 4 NULL 5 6 NULL
6 1 5 5 4 6 NULL NULL
7 2 NULL NULL NULL 7 8 9
8 2 7 7 NULL 8 9 10
9 2 8 8 7 9 10 11
10 2 9 9 8 10 11 NULL
11 2 10 10 9 11 NULL NULL
select pk, a, pk - 2,
lag(pk, pk - 2) over (partition by a order by pk),
lead(pk, pk - 2) over (partition by a order by pk),
lag(pk, a - 2) over (partition by a order by pk),
lead(pk, a - 2) over (partition by a order by pk)
from t1
order by pk asc;
pk a pk - 2 lag(pk, pk - 2) over (partition by a order by pk) lead(pk, pk - 2) over (partition by a order by pk) lag(pk, a - 2) over (partition by a order by pk) lead(pk, a - 2) over (partition by a order by pk)
1 0 -1 2 NULL 3 NULL
2 0 0 2 2 NULL NULL
3 0 1 2 NULL NULL 1
4 1 2 NULL 6 5 NULL
5 1 3 NULL NULL 6 4
6 1 4 NULL NULL NULL 5
7 2 5 NULL NULL 7 7
8 2 6 NULL NULL 8 8
9 2 7 NULL NULL 9 9
10 2 8 NULL NULL 10 10
11 2 9 NULL NULL 11 11
select pk, a, pk - 2,
lag(pk, pk + 2) over (partition by a order by pk),
lead(pk, pk + 2) over (partition by a order by pk),
lag(pk, a + 2) over (partition by a order by pk),
lead(pk, a + 2) over (partition by a order by pk)
from t1
order by pk asc;
pk a pk - 2 lag(pk, pk + 2) over (partition by a order by pk) lead(pk, pk + 2) over (partition by a order by pk) lag(pk, a + 2) over (partition by a order by pk) lead(pk, a + 2) over (partition by a order by pk)
1 0 -1 NULL NULL NULL 3
2 0 0 NULL NULL NULL NULL
3 0 1 NULL NULL 1 NULL
4 1 2 NULL NULL NULL NULL
5 1 3 NULL NULL NULL NULL
6 1 4 NULL NULL NULL NULL
7 2 5 NULL NULL NULL 11
8 2 6 NULL NULL NULL NULL
9 2 7 NULL NULL NULL NULL
10 2 8 NULL NULL NULL NULL
11 2 9 NULL NULL 7 NULL
select pk, a, b, c, d, e,
lag(a) over (partition by a order by pk),
lag(b) over (partition by a order by pk),
lag(c) over (partition by a order by pk),
lag(d) over (partition by a order by pk),
lag(e) over (partition by a order by pk)
from t1
order by pk asc;
pk a b c d e lag(a) over (partition by a order by pk) lag(b) over (partition by a order by pk) lag(c) over (partition by a order by pk) lag(d) over (partition by a order by pk) lag(e) over (partition by a order by pk)
1 0 1 one 0.100 0.001 NULL NULL NULL NULL NULL
2 0 2 two 0.200 0.002 0 1 one 0.100 0.001
3 0 3 three 0.300 0.003 0 2 two 0.200 0.002
4 1 2 three 0.400 0.004 NULL NULL NULL NULL NULL
5 1 1 two 0.500 0.005 1 2 three 0.400 0.004
6 1 1 one 0.600 0.006 1 1 two 0.500 0.005
7 2 NULL n_one 0.500 0.007 NULL NULL NULL NULL NULL
8 2 1 n_two NULL 0.008 2 NULL n_one 0.500 0.007
9 2 2 NULL 0.700 0.009 2 1 n_two NULL 0.008
10 2 0 n_four 0.800 0.01 2 2 NULL 0.700 0.009
11 2 10 NULL 0.900 NULL 2 0 n_four 0.800 0.01
select pk, a, b, a+b,
lag(a + b) over (partition by a order by pk)
from t1
order by pk asc;
pk a b a+b lag(a + b) over (partition by a order by pk)
1 0 1 1 NULL
2 0 2 2 1
3 0 3 3 2
4 1 2 3 NULL
5 1 1 2 3
6 1 1 2 2
7 2 NULL NULL NULL
8 2 1 3 NULL
9 2 2 4 3
10 2 0 2 4
11 2 10 12 2
select pk, a, b, a+b,
lag(a + b) over (partition by a order by pk) + pk
from t1
order by pk asc;
pk a b a+b lag(a + b) over (partition by a order by pk) + pk
1 0 1 1 NULL
2 0 2 2 3
3 0 3 3 5
4 1 2 3 NULL
5 1 1 2 8
6 1 1 2 8
7 2 NULL NULL NULL
8 2 1 3 NULL
9 2 2 4 12
10 2 0 2 14
11 2 10 12 13
drop table t1;
create table t1 (
pk int primary key,
a int,
b int,
c char(10),
d decimal(10, 3),
e real
);
insert into t1 values
( 1, 0, 1, 'one', 0.1, 0.001),
( 2, 0, 2, 'two', 0.2, 0.002),
( 3, 0, 3, 'three', 0.3, 0.003),
( 4, 1, 2, 'three', 0.4, 0.004),
( 5, 1, 1, 'two', 0.5, 0.005),
( 6, 1, 1, 'one', 0.6, 0.006),
( 7, 2, NULL, 'n_one', 0.5, 0.007),
( 8, 2, 1, 'n_two', NULL, 0.008),
( 9, 2, 2, NULL, 0.7, 0.009),
(10, 2, 0, 'n_four', 0.8, 0.010),
(11, 2, 10, NULL, 0.9, NULL);
select pk,
lead(pk) over (order by pk),
lead(pk, 1) over (order by pk),
lead(pk, 2) over (order by pk),
lead(pk, 0) over (order by pk),
lead(pk, -1) over (order by pk),
lead(pk, -2) over (order by pk)
from t1
order by pk asc;
select pk,
lag(pk) over (order by pk),
lag(pk, 1) over (order by pk),
lag(pk, 2) over (order by pk),
lag(pk, 0) over (order by pk),
lag(pk, -1) over (order by pk),
lag(pk, -2) over (order by pk)
from t1
order by pk asc;
select pk, pk - 2,
lag(pk, pk - 2) over (order by pk),
lead(pk, pk - 2) over (order by pk)
from t1
order by pk asc;
select pk, pk - 2,
lag(pk, pk + 2) over (order by pk),
lead(pk, pk + 2) over (order by pk)
from t1
order by pk asc;
select pk, a,
lead(pk) over (partition by a order by pk),
lead(pk, 1) over (partition by a order by pk),
lead(pk, 2) over (partition by a order by pk),
lead(pk, 0) over (partition by a order by pk),
lead(pk, -1) over (partition by a order by pk),
lead(pk, -2) over (partition by a order by pk)
from t1
order by pk asc;
select pk, a,
lag(pk) over (partition by a order by pk),
lag(pk, 1) over (partition by a order by pk),
lag(pk, 2) over (partition by a order by pk),
lag(pk, 0) over (partition by a order by pk),
lag(pk, -1) over (partition by a order by pk),
lag(pk, -2) over (partition by a order by pk)
from t1
order by pk asc;
select pk, a, pk - 2,
lag(pk, pk - 2) over (partition by a order by pk),
lead(pk, pk - 2) over (partition by a order by pk),
lag(pk, a - 2) over (partition by a order by pk),
lead(pk, a - 2) over (partition by a order by pk)
from t1
order by pk asc;
select pk, a, pk - 2,
lag(pk, pk + 2) over (partition by a order by pk),
lead(pk, pk + 2) over (partition by a order by pk),
lag(pk, a + 2) over (partition by a order by pk),
lead(pk, a + 2) over (partition by a order by pk)
from t1
order by pk asc;
select pk, a, b, c, d, e,
lag(a) over (partition by a order by pk),
lag(b) over (partition by a order by pk),
lag(c) over (partition by a order by pk),
lag(d) over (partition by a order by pk),
lag(e) over (partition by a order by pk)
from t1
order by pk asc;
select pk, a, b, a+b,
lag(a + b) over (partition by a order by pk)
from t1
order by pk asc;
select pk, a, b, a+b,
lag(a + b) over (partition by a order by pk) + pk
from t1
order by pk asc;
drop table t1;
......@@ -350,7 +350,8 @@ class Item_sum :public Item_func_or_sum
AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, STD_FUNC,
VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC,
ROW_NUMBER_FUNC, RANK_FUNC, DENSE_RANK_FUNC, PERCENT_RANK_FUNC,
CUME_DIST_FUNC, NTILE_FUNC, FIRST_VALUE_FUNC, LAST_VALUE_FUNC
CUME_DIST_FUNC, NTILE_FUNC, FIRST_VALUE_FUNC, LAST_VALUE_FUNC,
NTH_VALUE_FUNC, LEAD_FUNC, LAG_FUNC
};
Item **ref_by; /* pointer to a ref to the object used to register it */
......
......@@ -229,16 +229,220 @@ bool Item_sum_first_value::add()
For this usecase we can actually get rid of arg_cache. arg_cache is just
for running a comparison function. */
value_added= true;
arg_cache->cache_value();
value->store(arg_cache);
null_value= arg_cache->null_value;
Item_sum_hybrid_simple::add();
return false;
}
bool Item_sum_last_value::add()
bool Item_sum_hybrid_simple::fix_fields(THD *thd, Item **ref)
{
arg_cache->cache_value();
value->store(arg_cache);
null_value= arg_cache->null_value;
DBUG_ASSERT(fixed == 0);
if (init_sum_func_check(thd))
return TRUE;
for (uint i= 0; i < arg_count; i++)
{
Item *item= args[i];
// 'item' can be changed during fix_fields
if ((!item->fixed && item->fix_fields(thd, args)) ||
(item= args[i])->check_cols(1))
return TRUE;
}
Type_std_attributes::set(args[0]);
for (uint i= 0; i < arg_count && !with_subselect; i++)
with_subselect= with_subselect || args[i]->with_subselect;
Item *item2= args[0]->real_item();
if (item2->type() == Item::FIELD_ITEM)
set_handler_by_field_type(((Item_field*) item2)->field->type());
else if (args[0]->cmp_type() == TIME_RESULT)
set_handler_by_field_type(item2->field_type());
else
set_handler_by_result_type(item2->result_type(),
max_length, collation.collation);
switch (Item_sum_hybrid_simple::result_type()) {
case INT_RESULT:
case DECIMAL_RESULT:
case STRING_RESULT:
break;
case REAL_RESULT:
max_length= float_length(decimals);
break;
case ROW_RESULT:
case TIME_RESULT:
DBUG_ASSERT(0); // XXX(cvicentiu) Should this never happen?
return TRUE;
};
setup_hybrid(thd, args[0]);
/* MIN/MAX can return NULL for empty set indepedent of the used column */
maybe_null= 1;
result_field=0;
null_value=1;
fix_length_and_dec();
if (check_sum_func(thd, ref))
return TRUE;
for (uint i= 0; i < arg_count; i++)
{
orig_args[i]= args[i];
}
fixed= 1;
return FALSE;
}
bool Item_sum_hybrid_simple::add()
{
value->store(args[0]);
value->cache_value();
null_value= value->null_value;
return false;
}
void Item_sum_hybrid_simple::setup_hybrid(THD *thd, Item *item)
{
if (!(value= Item_cache::get_cache(thd, item, item->cmp_type())))
return;
value->setup(thd, item);
value->store(item);
if (!item->const_item())
value->set_used_tables(RAND_TABLE_BIT);
collation.set(item->collation);
}
double Item_sum_hybrid_simple::val_real()
{
DBUG_ASSERT(fixed == 1);
if (null_value)
return 0.0;
double retval= value->val_real();
if ((null_value= value->null_value))
DBUG_ASSERT(retval == 0.0);
return retval;
}
longlong Item_sum_hybrid_simple::val_int()
{
DBUG_ASSERT(fixed == 1);
if (null_value)
return 0;
longlong retval= value->val_int();
if ((null_value= value->null_value))
DBUG_ASSERT(retval == 0);
return retval;
}
my_decimal *Item_sum_hybrid_simple::val_decimal(my_decimal *val)
{
DBUG_ASSERT(fixed == 1);
if (null_value)
return 0;
my_decimal *retval= value->val_decimal(val);
if ((null_value= value->null_value))
DBUG_ASSERT(retval == NULL);
return retval;
}
String *
Item_sum_hybrid_simple::val_str(String *str)
{
DBUG_ASSERT(fixed == 1);
if (null_value)
return 0;
String *retval= value->val_str(str);
if ((null_value= value->null_value))
DBUG_ASSERT(retval == NULL);
return retval;
}
Field *Item_sum_hybrid_simple::create_tmp_field(bool group, TABLE *table)
{
DBUG_ASSERT(0);
return NULL;
}
void Item_sum_hybrid_simple::reset_field()
{
switch(Item_sum_hybrid_simple::result_type()) {
case STRING_RESULT:
{
char buff[MAX_FIELD_WIDTH];
String tmp(buff,sizeof(buff),result_field->charset()),*res;
res=args[0]->val_str(&tmp);
if (args[0]->null_value)
{
result_field->set_null();
result_field->reset();
}
else
{
result_field->set_notnull();
result_field->store(res->ptr(),res->length(),tmp.charset());
}
break;
}
case INT_RESULT:
{
longlong nr=args[0]->val_int();
if (maybe_null)
{
if (args[0]->null_value)
{
nr=0;
result_field->set_null();
}
else
result_field->set_notnull();
}
result_field->store(nr, unsigned_flag);
break;
}
case REAL_RESULT:
{
double nr= args[0]->val_real();
if (maybe_null)
{
if (args[0]->null_value)
{
nr=0.0;
result_field->set_null();
}
else
result_field->set_notnull();
}
result_field->store(nr);
break;
}
case DECIMAL_RESULT:
{
my_decimal value_buff, *arg_dec= args[0]->val_decimal(&value_buff);
if (maybe_null)
{
if (args[0]->null_value)
result_field->set_null();
else
result_field->set_notnull();
}
/*
We must store zero in the field as we will use the field value in
add()
*/
if (!arg_dec) // Null
arg_dec= &decimal_zero;
result_field->store_decimal(arg_dec);
break;
}
case ROW_RESULT:
case TIME_RESULT:
DBUG_ASSERT(0);
}
}
void Item_sum_hybrid_simple::update_field()
{
DBUG_ASSERT(0);
}
......@@ -279,23 +279,59 @@ class Item_sum_dense_rank: public Item_sum_int
{ return get_item_copy<Item_sum_dense_rank>(thd, mem_root, this); }
};
class Item_sum_hybrid_simple : public Item_sum,
public Type_handler_hybrid_field_type
{
public:
Item_sum_hybrid_simple(THD *thd, Item *arg):
Item_sum(thd, arg),
Type_handler_hybrid_field_type(MYSQL_TYPE_LONGLONG),
value(NULL)
{ collation.set(&my_charset_bin); }
Item_sum_hybrid_simple(THD *thd, Item *arg1, Item *arg2):
Item_sum(thd, arg1, arg2),
Type_handler_hybrid_field_type(MYSQL_TYPE_LONGLONG),
value(NULL)
{ collation.set(&my_charset_bin); }
bool add();
bool fix_fields(THD *, Item **);
void setup_hybrid(THD *thd, Item *item);
double val_real();
longlong val_int();
my_decimal *val_decimal(my_decimal *);
void reset_field();
String *val_str(String *);
/* TODO(cvicentiu) copied from Item_sum_hybrid, what does it do? */
bool keep_field_type(void) const { return 1; }
enum Item_result result_type() const
{ return Type_handler_hybrid_field_type::result_type(); }
enum Item_result cmp_type() const
{ return Type_handler_hybrid_field_type::cmp_type(); }
enum enum_field_types field_type() const
{ return Type_handler_hybrid_field_type::field_type(); }
void update_field();
Field *create_tmp_field(bool group, TABLE *table);
void clear()
{
value->clear();
null_value= 1;
}
private:
Item_cache *value;
};
/*
This item will remember the first value added to it. It will not update
the value unless it is cleared.
TODO(cvicentiu) Item_sum_hybrid is a pretty heavyweight class. It holds
logic that allows comparing values. It was generally thought out for MIN/MAX
functions, but we can use it here as well.
Refactor Item_sum_hybrid to only include basic field handling and
make a more specialized class for min/max. It might be useful if we'd like
to optimize how min/max is computed as a window function. We can potentially
implement a PQ within the specialized class to support removal.
*/
class Item_sum_first_value : public Item_sum_hybrid
class Item_sum_first_value : public Item_sum_hybrid_simple
{
public:
Item_sum_first_value(THD* thd, Item* arg_expr) :
Item_sum_hybrid(thd, arg_expr, -1 /* This cmp parameter is not needed */),
Item_sum_hybrid_simple(thd, arg_expr),
value_added(false) {}
bool add();
......@@ -303,7 +339,7 @@ class Item_sum_first_value : public Item_sum_hybrid
void clear()
{
value_added= false;
Item_sum_hybrid::clear();
Item_sum_hybrid_simple::clear();
}
enum Sumfunctype sum_func () const
......@@ -329,14 +365,13 @@ class Item_sum_first_value : public Item_sum_hybrid
This item does not support removal, and can be cleared only by calling
clear().
*/
class Item_sum_last_value : public Item_sum_hybrid
class Item_sum_last_value : public Item_sum_hybrid_simple
{
public:
Item_sum_last_value(THD* thd, Item* arg_expr) :
Item_sum_hybrid(thd, arg_expr, -1 /* This cmp parameter is not needed */) {}
Item_sum_hybrid_simple(thd, arg_expr) {}
bool add();
enum Sumfunctype sum_func () const
enum Sumfunctype sum_func() const
{
return LAST_VALUE_FUNC;
}
......@@ -350,6 +385,91 @@ class Item_sum_last_value : public Item_sum_hybrid
{ return get_item_copy<Item_sum_last_value>(thd, mem_root, this); }
};
class Item_sum_nth_value : public Item_sum_last_value
{
public:
Item_sum_nth_value(THD *thd, Item *arg_expr, Item* offset_expr) :
Item_sum_last_value(thd, arg_expr) {
/* TODO(cvicentiu) This is messy. Item_args starts with 2 args by chance.
Clean this up by pulling out the common code from Item_sum_hybrid! */
arg_count= 2;
args[1]= offset_expr;
}
bool fix_fields(THD *thd, Item **ref)
{
Item *offset= args[1];
if (offset->fix_fields(thd, args))
return true;
/* Fix fields for the second argument as well. */
orig_args[1]= offset;
/* Item_sum_last_value fixes fields for first argument only. */
if (Item_sum_last_value::fix_fields(thd, ref))
return true;
return false;
}
bool add()
{
Item_sum_last_value::add();
return false;
}
enum Sumfunctype sum_func() const
{
return NTH_VALUE_FUNC;
}
const char*func_name() const
{
return "nth_value";
}
Item *get_copy(THD *thd, MEM_ROOT *mem_root)
{ return get_item_copy<Item_sum_nth_value>(thd, mem_root, this); }
};
class Item_sum_lead : public Item_sum_hybrid_simple
{
public:
Item_sum_lead(THD *thd, Item *arg_expr, Item* offset_expr) :
Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {}
enum Sumfunctype sum_func() const
{
return LEAD_FUNC;
}
const char*func_name() const
{
return "lead";
}
Item *get_copy(THD *thd, MEM_ROOT *mem_root)
{ return get_item_copy<Item_sum_lead>(thd, mem_root, this); }
};
class Item_sum_lag : public Item_sum_hybrid_simple
{
public:
Item_sum_lag(THD *thd, Item *arg_expr, Item* offset_expr) :
Item_sum_hybrid_simple(thd, arg_expr, offset_expr) {}
enum Sumfunctype sum_func() const
{
return LAG_FUNC;
}
const char*func_name() const
{
return "lag";
}
Item *get_copy(THD *thd, MEM_ROOT *mem_root)
{ return get_item_copy<Item_sum_lag>(thd, mem_root, this); }
};
/*
A base window function (aggregate) that also holds a counter for the number
of rows.
......@@ -668,6 +788,8 @@ class Item_window_func : public Item_func_or_sum
switch (window_func()->sum_func()) {
case Item_sum::FIRST_VALUE_FUNC:
case Item_sum::LAST_VALUE_FUNC:
case Item_sum::LAG_FUNC:
case Item_sum::LEAD_FUNC:
return true;
default:
return false;
......
......@@ -697,10 +697,13 @@ static SYMBOL sql_functions[] = {
{ "EXTRACT", SYM(EXTRACT_SYM)},
{ "FIRST_VALUE", SYM(FIRST_VALUE_SYM)},
{ "GROUP_CONCAT", SYM(GROUP_CONCAT_SYM)},
{ "LAG", SYM(LAG_SYM)},
{ "LEAD", SYM(LEAD_SYM)},
{ "MAX", SYM(MAX_SYM)},
{ "MID", SYM(SUBSTRING)}, /* unireg function */
{ "MIN", SYM(MIN_SYM)},
{ "NOW", SYM(NOW_SYM)},
{ "NTH_VALUE", SYM(NTH_VALUE_SYM)},
{ "NTILE", SYM(NTILE_SYM)},
{ "POSITION", SYM(POSITION_SYM)},
{ "PERCENT_RANK", SYM(PERCENT_RANK_SYM)},
......
......@@ -26243,7 +26243,15 @@ AGGR_OP::end_send()
corresponding temp table fields. Do this for each row in the table.
*/
if (join_tab->window_funcs_step)
copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd);
{
Item **func_ptr= join_tab->tmp_table_param->items_to_copy;
Item *func;
for (; (func = *func_ptr) ; func_ptr++)
{
if (func->with_window_func)
func->save_in_result_field(true);
}
}
rc= evaluate_join_record(join, join_tab, 0);
}
}
......
......@@ -686,7 +686,17 @@ class Partition_read_cursor : public Table_read_cursor
if ((res= Table_read_cursor::next()) ||
(res= fetch()))
{
/* TODO(cvicentiu) This does not consider table read failures.
Perhaps assuming end of table like this is fine in that case. */
/* This row is the final row in the table. To maintain semantics
that cursors always point to the last valid row, move back one step,
but mark end_of_partition as true. */
Table_read_cursor::prev();
end_of_partition= true;
return res;
}
if (bound_tracker.compare_with_cache())
{
......@@ -1886,19 +1896,25 @@ class Frame_scan_cursor : public Frame_cursor
/* A cursor that follows a target cursor. Each time a new row is added,
the window functions are cleared and only have the row at which the target
is point at added to them.
The window functions are cleared if the bounds or the position cursors are
outside computational bounds.
*/
class Frame_positional_cursor : public Frame_cursor
{
public:
Frame_positional_cursor(const Frame_cursor &position_cursor) :
position_cursor(position_cursor), bound(NULL), offset(NULL),
position_cursor(position_cursor), top_bound(NULL),
bottom_bound(NULL), offset(NULL), overflowed(false),
negative_offset(false) {}
Frame_positional_cursor(const Frame_cursor &position_cursor,
const Frame_cursor &bound,
const Frame_cursor &top_bound,
const Frame_cursor &bottom_bound,
Item &offset,
bool negative_offset) :
position_cursor(position_cursor), bound(&bound), offset(&offset),
position_cursor(position_cursor), top_bound(&top_bound),
bottom_bound(&bottom_bound), offset(&offset),
negative_offset(negative_offset) {}
void init(READ_RECORD *info)
......@@ -1908,35 +1924,26 @@ class Frame_positional_cursor : public Frame_cursor
void pre_next_partition(ha_rows rownum)
{
clear_sum_functions();
/* The offset is dependant on the current row values. We can only get
* it here accurately. When fetching other rows, it changes. */
save_offset_value();
}
void next_partition(ha_rows rownum)
{
ha_rows position= get_current_position();
if (position_is_within_bounds(position))
{
cursor.move_to(position);
cursor.fetch();
add_value_to_items();
}
save_positional_value();
}
void pre_next_row()
{
/* The offset is dependant on the current row values. We can only get
* it here accurately. When fetching other rows, it changes. */
save_offset_value();
}
void next_row()
{
ha_rows position= get_current_position();
if (!position_is_within_bounds(position))
clear_sum_functions();
else
{
cursor.move_to(position_cursor.get_curr_rownum());
cursor.fetch();
add_value_to_items();
}
save_positional_value();
}
ha_rows get_curr_rownum() const
......@@ -1947,30 +1954,25 @@ class Frame_positional_cursor : public Frame_cursor
private:
/* Check if a our position is within bounds.
* The position is passed as a parameter to avoid recalculating it. */
bool position_is_within_bounds(ha_rows position)
bool position_is_within_bounds()
{
if (!offset)
return !position_cursor.is_outside_computation_bounds();
if (overflowed)
return false;
/* No valid bound to compare to. */
if (position_cursor.is_outside_computation_bounds() ||
bound->is_outside_computation_bounds())
top_bound->is_outside_computation_bounds() ||
bottom_bound->is_outside_computation_bounds())
return false;
if (negative_offset)
{
if (position_cursor.get_curr_rownum() < position)
return false; /* Overflow below 0. */
if (position < bound->get_curr_rownum()) /* We are over the bound. */
/* We are over the bound. */
if (position < top_bound->get_curr_rownum())
return false;
}
else
{
if (position_cursor.get_curr_rownum() > position)
return false; /* Overflow over MAX_HA_ROWS. */
if (position > bound->get_curr_rownum()) /* We are over the bound. */
if (position > bottom_bound->get_curr_rownum())
return false;
}
return true;
}
......@@ -1978,18 +1980,55 @@ class Frame_positional_cursor : public Frame_cursor
/* Get the current position, accounting for the offset value, if present.
NOTE: This function does not check over/underflow.
*/
ha_rows get_current_position()
void get_current_position()
{
ha_rows position = position_cursor.get_curr_rownum();
position = position_cursor.get_curr_rownum();
overflowed= false;
if (offset)
position += offset->val_int() * (negative_offset ? -1 : 1);
return position;
{
if (offset_value < 0 &&
position + offset_value > position)
{
overflowed= true;
}
if (offset_value > 0 &&
position + offset_value < position)
{
overflowed= true;
}
position += offset_value;
}
}
void save_offset_value()
{
if (offset)
offset_value= offset->val_int() * (negative_offset ? -1 : 1);
else
offset_value= 0;
}
void save_positional_value()
{
get_current_position();
if (!position_is_within_bounds())
clear_sum_functions();
else
{
cursor.move_to(position);
cursor.fetch();
add_value_to_items();
}
}
const Frame_cursor &position_cursor;
const Frame_cursor *bound;
const Frame_cursor *top_bound;
const Frame_cursor *bottom_bound;
Item *offset;
Table_read_cursor cursor;
ha_rows position;
longlong offset_value;
bool overflowed;
bool negative_offset;
};
......@@ -2107,6 +2146,7 @@ void add_special_frame_cursors(THD *thd, Cursor_manager *cursor_manager,
{
Window_spec *spec= window_func->window_spec;
Item_sum *item_sum= window_func->window_func();
DBUG_PRINT("info", ("Get arg count: %d", item_sum->get_arg_count()));
Frame_cursor *fc;
switch (item_sum->sum_func())
{
......@@ -2135,6 +2175,44 @@ void add_special_frame_cursors(THD *thd, Cursor_manager *cursor_manager,
fc->add_sum_func(item_sum);
cursor_manager->add_cursor(fc);
break;
case Item_sum::LEAD_FUNC:
case Item_sum::LAG_FUNC:
{
Frame_cursor *bottom_bound= new Frame_unbounded_following(thd,
spec->partition_list,
spec->order_list);
Frame_cursor *top_bound= new Frame_unbounded_preceding(thd,
spec->partition_list,
spec->order_list);
Frame_cursor *current_row_pos= new Frame_rows_current_row_bottom;
cursor_manager->add_cursor(bottom_bound);
cursor_manager->add_cursor(top_bound);
cursor_manager->add_cursor(current_row_pos);
DBUG_ASSERT(item_sum->fixed);
bool negative_offset= item_sum->sum_func() == Item_sum::LAG_FUNC;
fc= new Frame_positional_cursor(*current_row_pos,
*top_bound, *bottom_bound,
*item_sum->get_arg(1),
negative_offset);
fc->add_sum_func(item_sum);
cursor_manager->add_cursor(fc);
break;
}
case Item_sum::NTH_VALUE_FUNC:
{
Frame_cursor *bottom_bound= get_frame_cursor(thd, spec, false);
Frame_cursor *top_bound= get_frame_cursor(thd, spec, true);
cursor_manager->add_cursor(bottom_bound);
cursor_manager->add_cursor(top_bound);
DBUG_ASSERT(item_sum->fixed);
fc= new Frame_positional_cursor(*top_bound,
*top_bound, *bottom_bound,
*item_sum->get_arg(1),
false);
fc->add_sum_func(item_sum);
cursor_manager->add_cursor(fc);
break;
}
default:
fc= new Frame_unbounded_preceding(
thd, spec->partition_list, spec->order_list);
......
......@@ -1245,7 +1245,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token FAULTS_SYM
%token FETCH_SYM /* SQL-2003-R */
%token FILE_SYM
%token FIRST_VALUE_SYM /* SQL-2012 */
%token FIRST_VALUE_SYM /* SQL-2011 */
%token FIRST_SYM /* SQL-2003-N */
%token FIXED_SYM
%token FLOAT_NUM
......@@ -1273,6 +1273,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token GRANTS
%token GROUP_SYM /* SQL-2003-R */
%token GROUP_CONCAT_SYM
%token LAG_SYM /* SQL-2011 */
%token LEAD_SYM /* SQL-2011 */
%token HANDLER_SYM
%token HARD_SYM
%token HASH_SYM
......@@ -1431,6 +1433,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b, ulong *yystacksize);
%token NUM
%token NUMBER_SYM /* SQL-2003-N */
%token NUMERIC_SYM /* SQL-2003-R */
%token NTH_VALUE_SYM /* SQL-2011 */
%token NVARCHAR_SYM
%token OFFSET_SYM
%token OLD_PASSWORD_SYM
......@@ -10518,6 +10521,49 @@ simple_window_func:
if ($$ == NULL)
MYSQL_YYABORT;
}
|
NTH_VALUE_SYM '(' expr ',' expr ')'
{
$$= new (thd->mem_root) Item_sum_nth_value(thd, $3, $5);
if ($$ == NULL)
MYSQL_YYABORT;
}
|
LEAD_SYM '(' expr ')'
{
/* No second argument defaults to 1. */
Item* item_offset= new (thd->mem_root) Item_uint(thd, 1);
if (item_offset == NULL)
MYSQL_YYABORT;
$$= new (thd->mem_root) Item_sum_lead(thd, $3, item_offset);
if ($$ == NULL)
MYSQL_YYABORT;
}
|
LEAD_SYM '(' expr ',' expr ')'
{
$$= new (thd->mem_root) Item_sum_lead(thd, $3, $5);
if ($$ == NULL)
MYSQL_YYABORT;
}
|
LAG_SYM '(' expr ')'
{
/* No second argument defaults to 1. */
Item* item_offset= new (thd->mem_root) Item_uint(thd, 1);
if (item_offset == NULL)
MYSQL_YYABORT;
$$= new (thd->mem_root) Item_sum_lag(thd, $3, item_offset);
if ($$ == NULL)
MYSQL_YYABORT;
}
|
LAG_SYM '(' expr ',' expr ')'
{
$$= new (thd->mem_root) Item_sum_lag(thd, $3, $5);
if ($$ == NULL)
MYSQL_YYABORT;
}
;
window_name:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment