Commit de35787a authored by Vicențiu Ciorbaru's avatar Vicențiu Ciorbaru

Merge branch 'cume_dist' into bb-10.2-mdev9543

parents 4fe6fbbb 3544fe01
create table t1 (
pk int primary key,
a int,
b int
);
insert into t1 values
( 1 , 0, 10),
( 2 , 0, 10),
( 3 , 1, 10),
( 4 , 1, 10),
( 8 , 2, 10),
( 5 , 2, 20),
( 6 , 2, 20),
( 7 , 2, 20),
( 9 , 4, 20),
(10 , 4, 20);
select pk, a, b,
percent_rank() over (order by a),
cume_dist() over (order by a)
from t1;
pk a b percent_rank() over (order by a) cume_dist() over (order by a)
1 0 10 0.0000000000 0.2000000000
2 0 10 0.0000000000 0.2000000000
3 1 10 0.2222222222 0.4000000000
4 1 10 0.2222222222 0.4000000000
8 2 10 0.4444444444 0.8000000000
5 2 20 0.4444444444 0.8000000000
6 2 20 0.4444444444 0.8000000000
7 2 20 0.4444444444 0.8000000000
9 4 20 0.8888888889 1.0000000000
10 4 20 0.8888888889 1.0000000000
select pk, a, b,
percent_rank() over (order by pk),
cume_dist() over (order by pk)
from t1 order by pk;
pk a b percent_rank() over (order by pk) cume_dist() over (order by pk)
1 0 10 0.0000000000 0.1000000000
2 0 10 0.1111111111 0.2000000000
3 1 10 0.2222222222 0.3000000000
4 1 10 0.3333333333 0.4000000000
5 2 20 0.4444444444 0.5000000000
6 2 20 0.5555555556 0.6000000000
7 2 20 0.6666666667 0.7000000000
8 2 10 0.7777777778 0.8000000000
9 4 20 0.8888888889 0.9000000000
10 4 20 1.0000000000 1.0000000000
select pk, a, b,
percent_rank() over (partition by a order by a),
cume_dist() over (partition by a order by a)
from t1;
pk a b percent_rank() over (partition by a order by a) cume_dist() over (partition by a order by a)
1 0 10 0.0000000000 1.0000000000
2 0 10 0.0000000000 1.0000000000
3 1 10 0.0000000000 1.0000000000
4 1 10 0.0000000000 1.0000000000
8 2 10 0.0000000000 1.0000000000
5 2 20 0.0000000000 1.0000000000
6 2 20 0.0000000000 1.0000000000
7 2 20 0.0000000000 1.0000000000
9 4 20 0.0000000000 1.0000000000
10 4 20 0.0000000000 1.0000000000
drop table t1;
create table t1 (
pk int primary key,
a int,
b int
);
insert into t1 values
( 1 , 0, 10),
( 2 , 0, 10),
( 3 , 1, 10),
( 4 , 1, 10),
( 8 , 2, 10),
( 5 , 2, 20),
( 6 , 2, 20),
( 7 , 2, 20),
( 9 , 4, 20),
(10 , 4, 20);
select pk, a, b,
percent_rank() over (order by a),
cume_dist() over (order by a)
from t1;
select pk, a, b,
percent_rank() over (order by pk),
cume_dist() over (order by pk)
from t1 order by pk;
select pk, a, b,
percent_rank() over (partition by a order by a),
cume_dist() over (partition by a order by a)
from t1;
drop table t1;
...@@ -317,12 +317,18 @@ class Item_context ...@@ -317,12 +317,18 @@ class Item_context
NOTE: All two pass window functions need to implement NOTE: All two pass window functions need to implement
this interface. this interface.
*/ */
class Item_sum_window_with_context : public Item_sum_num, class Item_sum_window_with_row_count : public Item_sum_num
public Item_context
{ {
public: public:
Item_sum_window_with_context(THD *thd) Item_sum_window_with_row_count(THD *thd) : Item_sum_num(thd),
: Item_sum_num(thd), Item_context() {} partition_row_count_(0){}
void set_row_count(ulonglong count) { partition_row_count_ = count; }
protected:
longlong get_row_count() { return partition_row_count_; }
private:
ulonglong partition_row_count_;
}; };
/* /*
...@@ -336,12 +342,11 @@ class Item_sum_window_with_context : public Item_sum_num, ...@@ -336,12 +342,11 @@ class Item_sum_window_with_context : public Item_sum_num,
This is held within the row_count context. This is held within the row_count context.
- Second pass to compute rank of current row and the value of the function - Second pass to compute rank of current row and the value of the function
*/ */
class Item_sum_percent_rank: public Item_sum_window_with_context, class Item_sum_percent_rank: public Item_sum_window_with_row_count
public Window_context_row_count
{ {
public: public:
Item_sum_percent_rank(THD *thd) Item_sum_percent_rank(THD *thd)
: Item_sum_window_with_context(thd), cur_rank(1) {} : Item_sum_window_with_row_count(thd), cur_rank(1) {}
longlong val_int() longlong val_int()
{ {
...@@ -359,14 +364,9 @@ class Item_sum_percent_rank: public Item_sum_window_with_context, ...@@ -359,14 +364,9 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
We can not get the real value without knowing the number of rows We can not get the real value without knowing the number of rows
in the partition. Don't divide by 0. in the partition. Don't divide by 0.
*/ */
if (!get_context_()) ulonglong partition_rows = get_row_count();
{ null_value= partition_rows > 0 ? false : true;
// Calling this kind of function with a context makes no sense.
DBUG_ASSERT(0);
return 0;
}
longlong partition_rows = get_context_()->get_field_context(result_field);
return partition_rows > 1 ? return partition_rows > 1 ?
static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0; static_cast<double>(cur_rank - 1) / (partition_rows - 1) : 0;
} }
...@@ -381,25 +381,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context, ...@@ -381,25 +381,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
return "percent_rank"; return "percent_rank";
} }
bool create_window_context()
{
// TODO-cvicentiu: Currently this means we must make sure to delete
// the window context. We can potentially allocate this on the THD memroot.
// At the same time, this is only necessary for a small portion of the
// query execution and it does not make sense to keep it for all of it.
context_ = new Window_context_row_count();
if (context_ == NULL)
return true;
return false;
}
void delete_window_context()
{
if (context_)
delete get_context_();
context_ = NULL;
}
void update_field() {} void update_field() {}
void clear() void clear()
...@@ -428,13 +409,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context, ...@@ -428,13 +409,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
void cleanup() void cleanup()
{ {
peer_tracker.cleanup(); peer_tracker.cleanup();
Item_sum_window_with_context::cleanup();
}
/* Helper function so that we don't cast the context every time. */
Window_context_row_count* get_context_()
{
return static_cast<Window_context_row_count *>(context_);
} }
}; };
...@@ -448,27 +422,62 @@ class Item_sum_percent_rank: public Item_sum_window_with_context, ...@@ -448,27 +422,62 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
window ordering of the window partition of R window ordering of the window partition of R
- NR is defined to be the number of rows in the window partition of R. - NR is defined to be the number of rows in the window partition of R.
Just like with Item_sum_percent_rank, compuation of this function requires Just like with Item_sum_percent_rank, computation of this function requires
two passes. two passes.
*/ */
class Item_sum_cume_dist: public Item_sum_percent_rank class Item_sum_cume_dist: public Item_sum_window_with_row_count
{ {
public: public:
Item_sum_cume_dist(THD *thd) Item_sum_cume_dist(THD *thd) : Item_sum_window_with_row_count(thd),
: Item_sum_percent_rank(thd) {} current_row_count_(0) {}
double val_real()
{
if (get_row_count() == 0)
{
null_value= true;
return 0;
}
ulonglong partition_row_count= get_row_count();
null_value= false;
return static_cast<double>(current_row_count_) / partition_row_count;
}
double val_real() { return 0; } bool add()
{
current_row_count_++;
return false;
}
enum Sumfunctype sum_func () const enum Sumfunctype sum_func () const
{ {
return CUME_DIST_FUNC; return CUME_DIST_FUNC;
} }
void clear()
{
current_row_count_= 0;
set_row_count(0);
}
const char*func_name() const const char*func_name() const
{ {
return "cume_dist"; return "cume_dist";
} }
void update_field() {}
enum Item_result result_type () const { return REAL_RESULT; }
enum_field_types field_type() const { return MYSQL_TYPE_DOUBLE; }
void fix_length_and_dec()
{
decimals = 10; // TODO-cvicentiu find out how many decimals the standard
// requires.
}
private:
ulonglong current_row_count_;
}; };
...@@ -499,11 +508,11 @@ class Item_window_func : public Item_func_or_sum ...@@ -499,11 +508,11 @@ class Item_window_func : public Item_func_or_sum
force_return_blank(true), force_return_blank(true),
read_value_from_result_field(false) {} read_value_from_result_field(false) {}
Item_sum *window_func() { return (Item_sum *) args[0]; } Item_sum *window_func() const { return (Item_sum *) args[0]; }
void update_used_tables(); void update_used_tables();
bool is_frame_prohibited() bool is_frame_prohibited() const
{ {
switch (window_func()->sum_func()) { switch (window_func()->sum_func()) {
case Item_sum::ROW_NUMBER_FUNC: case Item_sum::ROW_NUMBER_FUNC:
...@@ -517,7 +526,28 @@ class Item_window_func : public Item_func_or_sum ...@@ -517,7 +526,28 @@ class Item_window_func : public Item_func_or_sum
} }
} }
bool is_order_list_mandatory() bool requires_partition_size() const
{
switch (window_func()->sum_func()) {
case Item_sum::PERCENT_RANK_FUNC:
case Item_sum::CUME_DIST_FUNC:
return true;
default:
return false;
}
}
bool requires_peer_size() const
{
switch (window_func()->sum_func()) {
case Item_sum::CUME_DIST_FUNC:
return true;
default:
return false;
}
}
bool is_order_list_mandatory() const
{ {
switch (window_func()->sum_func()) { switch (window_func()->sum_func()) {
case Item_sum::RANK_FUNC: case Item_sum::RANK_FUNC:
......
...@@ -865,15 +865,18 @@ class Frame_unbounded_preceding : public Frame_cursor ...@@ -865,15 +865,18 @@ class Frame_unbounded_preceding : public Frame_cursor
} }
}; };
/* /*
UNBOUNDED FOLLOWING frame bound UNBOUNDED FOLLOWING frame bound
*/ */
class Frame_unbounded_following : public Frame_cursor class Frame_unbounded_following : public Frame_cursor
{ {
Table_read_cursor cursor;
protected:
Table_read_cursor cursor;
Group_bound_tracker bound_tracker; Group_bound_tracker bound_tracker;
public: public:
void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list, void init(THD *thd, READ_RECORD *info, SQL_I_List<ORDER> *partition_list,
SQL_I_List<ORDER> *order_list) SQL_I_List<ORDER> *order_list)
...@@ -910,6 +913,35 @@ class Frame_unbounded_following : public Frame_cursor ...@@ -910,6 +913,35 @@ class Frame_unbounded_following : public Frame_cursor
}; };
class Frame_unbounded_following_set_count : public Frame_unbounded_following
{
void next_partition(longlong rownum, Item_sum* item)
{
ulonglong num_rows_in_partition= 0;
if (!rownum)
{
/* Read the first row */
if (cursor.get_next())
return;
}
num_rows_in_partition++;
/* Remember which partition we are in */
bound_tracker.check_if_next_group();
/* Walk to the end of the partition, find how many rows there are. */
while (!cursor.get_next())
{
if (bound_tracker.check_if_next_group())
break;
num_rows_in_partition++;
}
Item_sum_window_with_row_count* item_with_row_count =
static_cast<Item_sum_window_with_row_count *>(item);
item_with_row_count->set_row_count(num_rows_in_partition);
}
};
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
// ROWS-type frame bounds // ROWS-type frame bounds
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
...@@ -1212,6 +1244,47 @@ Frame_cursor *get_frame_cursor(Window_frame *frame, bool is_top_bound) ...@@ -1212,6 +1244,47 @@ Frame_cursor *get_frame_cursor(Window_frame *frame, bool is_top_bound)
return NULL; return NULL;
} }
void add_extra_frame_cursors(List<Frame_cursor> *cursors,
const Item_sum *window_func)
{
switch (window_func->sum_func())
{
case Item_sum::CUME_DIST_FUNC:
cursors->push_back(new Frame_unbounded_preceding);
cursors->push_back(new Frame_range_current_row_bottom);
break;
default:
cursors->push_back(new Frame_unbounded_preceding);
cursors->push_back(new Frame_rows_current_row_bottom);
}
}
List<Frame_cursor> get_window_func_required_cursors(
const Item_window_func* item_win)
{
List<Frame_cursor> result;
if (item_win->requires_partition_size())
result.push_back(new Frame_unbounded_following_set_count);
/*
If it is not a regular window function that follows frame specifications,
specific cursors are required.
*/
if (item_win->is_frame_prohibited())
{
add_extra_frame_cursors(&result, item_win->window_func());
return result;
}
/* A regular window function follows the frame specification. */
result.push_back(get_frame_cursor(item_win->window_spec->window_frame,
false));
result.push_back(get_frame_cursor(item_win->window_spec->window_frame,
true));
return result;
}
/* /*
Streamed window function computation with window frames. Streamed window function computation with window frames.
...@@ -1254,21 +1327,20 @@ bool compute_window_func_with_frames(Item_window_func *item_win, ...@@ -1254,21 +1327,20 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
{ {
THD *thd= current_thd; THD *thd= current_thd;
int err= 0; int err= 0;
Frame_cursor *top_bound;
Frame_cursor *bottom_bound;
Item_sum *sum_func= item_win->window_func(); Item_sum *sum_func= item_win->window_func();
/* This algorithm doesn't support DISTINCT aggregator */ /* This algorithm doesn't support DISTINCT aggregator */
sum_func->set_aggregator(Aggregator::SIMPLE_AGGREGATOR); sum_func->set_aggregator(Aggregator::SIMPLE_AGGREGATOR);
Window_frame *window_frame= item_win->window_spec->window_frame; List<Frame_cursor> cursors= get_window_func_required_cursors(item_win);
top_bound= get_frame_cursor(window_frame, true);
bottom_bound= get_frame_cursor(window_frame, false);
top_bound->init(thd, info, item_win->window_spec->partition_list, List_iterator_fast<Frame_cursor> it(cursors);
item_win->window_spec->order_list); Frame_cursor *c;
bottom_bound->init(thd, info, item_win->window_spec->partition_list, while((c= it++))
{
c->init(thd, info, item_win->window_spec->partition_list,
item_win->window_spec->order_list); item_win->window_spec->order_list);
}
bool is_error= false; bool is_error= false;
longlong rownum= 0; longlong rownum= 0;
...@@ -1293,24 +1365,28 @@ bool compute_window_func_with_frames(Item_window_func *item_win, ...@@ -1293,24 +1365,28 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
pre_XXX functions assume that tbl->record[0] contains current_row, and pre_XXX functions assume that tbl->record[0] contains current_row, and
they may not change it. they may not change it.
*/ */
bottom_bound->pre_next_partition(rownum, sum_func); it.rewind();
top_bound->pre_next_partition(rownum, sum_func); while ((c= it++))
c->pre_next_partition(rownum, sum_func);
/* /*
We move bottom_bound first, because we want rows to be added into the We move bottom_bound first, because we want rows to be added into the
aggregate before top_bound attempts to remove them. aggregate before top_bound attempts to remove them.
*/ */
bottom_bound->next_partition(rownum, sum_func); it.rewind();
top_bound->next_partition(rownum, sum_func); while ((c= it++))
c->next_partition(rownum, sum_func);
} }
else else
{ {
/* Again, both pre_XXX function can find current_row in tbl->record[0] */ /* Again, both pre_XXX function can find current_row in tbl->record[0] */
bottom_bound->pre_next_row(sum_func); it.rewind();
top_bound->pre_next_row(sum_func); while ((c= it++))
c->pre_next_row(sum_func);
/* These make no assumptions about tbl->record[0] and may change it */ /* These make no assumptions about tbl->record[0] and may change it */
bottom_bound->next_row(sum_func); it.rewind();
top_bound->next_row(sum_func); while ((c= it++))
c->next_row(sum_func);
} }
rownum++; rownum++;
...@@ -1331,115 +1407,11 @@ bool compute_window_func_with_frames(Item_window_func *item_win, ...@@ -1331,115 +1407,11 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
} }
my_free(rowid_buf); my_free(rowid_buf);
delete top_bound; cursors.delete_elements();
delete bottom_bound;
return is_error? true: false; return is_error? true: false;
} }
bool compute_two_pass_window_functions(Item_window_func *item_win,
TABLE *table, READ_RECORD *info)
{
/* Perform first pass. */
// TODO-cvicentiu why not initialize the record for when we need, _in_
// this function.
READ_RECORD *info2= new READ_RECORD();
int err;
bool is_error = false;
bool first_row= true;
clone_read_record(info, info2);
Item_sum_window_with_context *window_func=
static_cast<Item_sum_window_with_context *>(item_win->window_func());
uchar *rowid_buf= (uchar*) my_malloc(table->file->ref_length, MYF(0));
is_error= window_func->create_window_context();
/* Unable to allocate a new context. */
if (is_error)
return true;
Window_context *context = window_func->get_window_context();
/*
The two pass algorithm is as follows:
We have a sorted table according to the partition and order by clauses.
1. Scan through the table till we reach a partition boundary.
2. For each row that we scan, add it to the context.
3. Once the partition boundary is met, do a second scan through the
current partition and use the context information to compute the value for
the window function for that partition.
4. Reset the context.
5. Repeat from 1 till end of table.
*/
bool done = false;
longlong rows_in_current_partition = 0;
// TODO handle end of table updating.
while (!done)
{
if ((err= info->read_record(info)))
{
done = true;
}
bool partition_changed= done || item_win->check_if_partition_changed();
// The first time we always have a partition changed. Ignore it.
if (first_row)
{
partition_changed= false;
first_row= false;
}
if (partition_changed)
{
/*
We are now looking at the first row for the next partition, or at the
end of the table. Either way, we must remember this position for when
we finish doing the second pass.
*/
table->file->position(table->record[0]);
memcpy(rowid_buf, table->file->ref, table->file->ref_length);
for (longlong row_number = 0; row_number < rows_in_current_partition;
row_number++)
{
if ((err= info2->read_record(info2)))
{
is_error= true;
break;
}
window_func->add();
// Save the window function into the table.
item_win->save_in_field(item_win->result_field, true);
err= table->file->ha_update_row(table->record[1], table->record[0]);
if (err && err != HA_ERR_RECORD_IS_THE_SAME)
{
is_error= true;
break;
}
}
if (is_error)
break;
rows_in_current_partition= 0;
window_func->clear();
context->reset();
// Return to the beginning of the new partition.
table->file->ha_rnd_pos(table->record[0], rowid_buf);
}
rows_in_current_partition++;
context->add_field_to_context(item_win->result_field);
}
window_func->delete_window_context();
delete info2;
my_free(rowid_buf);
return is_error;
}
/* Make a list that is a concation of two lists of ORDER elements */ /* Make a list that is a concation of two lists of ORDER elements */
static ORDER* concat_order_lists(MEM_ROOT *mem_root, ORDER *list1, ORDER *list2) static ORDER* concat_order_lists(MEM_ROOT *mem_root, ORDER *list1, ORDER *list2)
...@@ -1502,16 +1474,12 @@ bool Window_func_runner::setup(THD *thd) ...@@ -1502,16 +1474,12 @@ bool Window_func_runner::setup(THD *thd)
compute_func= compute_window_func_values; compute_func= compute_window_func_values;
break; break;
} }
case Item_sum::PERCENT_RANK_FUNC:
case Item_sum::CUME_DIST_FUNC:
{
compute_func= compute_two_pass_window_functions;
break;
}
case Item_sum::COUNT_FUNC: case Item_sum::COUNT_FUNC:
case Item_sum::SUM_BIT_FUNC: case Item_sum::SUM_BIT_FUNC:
case Item_sum::SUM_FUNC: case Item_sum::SUM_FUNC:
case Item_sum::AVG_FUNC: case Item_sum::AVG_FUNC:
case Item_sum::PERCENT_RANK_FUNC:
case Item_sum::CUME_DIST_FUNC:
{ {
/* /*
Frame-aware window function computation. It does one pass, but Frame-aware window function computation. It does one pass, but
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment