Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
de35787a
Commit
de35787a
authored
Mar 28, 2016
by
Vicențiu Ciorbaru
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'cume_dist' into bb-10.2-mdev9543
parents
4fe6fbbb
3544fe01
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
276 additions
and
180 deletions
+276
-180
mysql-test/r/win_percent_cume.result
mysql-test/r/win_percent_cume.result
+62
-0
mysql-test/t/win_percent_cume.test
mysql-test/t/win_percent_cume.test
+36
-0
sql/item_windowfunc.h
sql/item_windowfunc.h
+79
-49
sql/sql_window.cc
sql/sql_window.cc
+99
-131
No files found.
mysql-test/r/win_percent_cume.result
0 → 100644
View file @
de35787a
create table t1 (
pk int primary key,
a int,
b int
);
insert into t1 values
( 1 , 0, 10),
( 2 , 0, 10),
( 3 , 1, 10),
( 4 , 1, 10),
( 8 , 2, 10),
( 5 , 2, 20),
( 6 , 2, 20),
( 7 , 2, 20),
( 9 , 4, 20),
(10 , 4, 20);
select pk, a, b,
percent_rank() over (order by a),
cume_dist() over (order by a)
from t1;
pk a b percent_rank() over (order by a) cume_dist() over (order by a)
1 0 10 0.0000000000 0.2000000000
2 0 10 0.0000000000 0.2000000000
3 1 10 0.2222222222 0.4000000000
4 1 10 0.2222222222 0.4000000000
8 2 10 0.4444444444 0.8000000000
5 2 20 0.4444444444 0.8000000000
6 2 20 0.4444444444 0.8000000000
7 2 20 0.4444444444 0.8000000000
9 4 20 0.8888888889 1.0000000000
10 4 20 0.8888888889 1.0000000000
select pk, a, b,
percent_rank() over (order by pk),
cume_dist() over (order by pk)
from t1 order by pk;
pk a b percent_rank() over (order by pk) cume_dist() over (order by pk)
1 0 10 0.0000000000 0.1000000000
2 0 10 0.1111111111 0.2000000000
3 1 10 0.2222222222 0.3000000000
4 1 10 0.3333333333 0.4000000000
5 2 20 0.4444444444 0.5000000000
6 2 20 0.5555555556 0.6000000000
7 2 20 0.6666666667 0.7000000000
8 2 10 0.7777777778 0.8000000000
9 4 20 0.8888888889 0.9000000000
10 4 20 1.0000000000 1.0000000000
select pk, a, b,
percent_rank() over (partition by a order by a),
cume_dist() over (partition by a order by a)
from t1;
pk a b percent_rank() over (partition by a order by a) cume_dist() over (partition by a order by a)
1 0 10 0.0000000000 1.0000000000
2 0 10 0.0000000000 1.0000000000
3 1 10 0.0000000000 1.0000000000
4 1 10 0.0000000000 1.0000000000
8 2 10 0.0000000000 1.0000000000
5 2 20 0.0000000000 1.0000000000
6 2 20 0.0000000000 1.0000000000
7 2 20 0.0000000000 1.0000000000
9 4 20 0.0000000000 1.0000000000
10 4 20 0.0000000000 1.0000000000
drop table t1;
mysql-test/t/win_percent_cume.test
0 → 100644
View file @
de35787a
create
table
t1
(
pk
int
primary
key
,
a
int
,
b
int
);
insert
into
t1
values
(
1
,
0
,
10
),
(
2
,
0
,
10
),
(
3
,
1
,
10
),
(
4
,
1
,
10
),
(
8
,
2
,
10
),
(
5
,
2
,
20
),
(
6
,
2
,
20
),
(
7
,
2
,
20
),
(
9
,
4
,
20
),
(
10
,
4
,
20
);
select
pk
,
a
,
b
,
percent_rank
()
over
(
order
by
a
),
cume_dist
()
over
(
order
by
a
)
from
t1
;
select
pk
,
a
,
b
,
percent_rank
()
over
(
order
by
pk
),
cume_dist
()
over
(
order
by
pk
)
from
t1
order
by
pk
;
select
pk
,
a
,
b
,
percent_rank
()
over
(
partition
by
a
order
by
a
),
cume_dist
()
over
(
partition
by
a
order
by
a
)
from
t1
;
drop
table
t1
;
sql/item_windowfunc.h
View file @
de35787a
...
...
@@ -317,12 +317,18 @@ class Item_context
NOTE: All two pass window functions need to implement
this interface.
*/
class
Item_sum_window_with_context
:
public
Item_sum_num
,
public
Item_context
class
Item_sum_window_with_row_count
:
public
Item_sum_num
{
public:
Item_sum_window_with_context
(
THD
*
thd
)
:
Item_sum_num
(
thd
),
Item_context
()
{}
Item_sum_window_with_row_count
(
THD
*
thd
)
:
Item_sum_num
(
thd
),
partition_row_count_
(
0
){}
void
set_row_count
(
ulonglong
count
)
{
partition_row_count_
=
count
;
}
protected:
longlong
get_row_count
()
{
return
partition_row_count_
;
}
private:
ulonglong
partition_row_count_
;
};
/*
...
...
@@ -336,12 +342,11 @@ class Item_sum_window_with_context : public Item_sum_num,
This is held within the row_count context.
- Second pass to compute rank of current row and the value of the function
*/
class
Item_sum_percent_rank
:
public
Item_sum_window_with_context
,
public
Window_context_row_count
class
Item_sum_percent_rank
:
public
Item_sum_window_with_row_count
{
public:
Item_sum_percent_rank
(
THD
*
thd
)
:
Item_sum_window_with_
contex
t
(
thd
),
cur_rank
(
1
)
{}
:
Item_sum_window_with_
row_coun
t
(
thd
),
cur_rank
(
1
)
{}
longlong
val_int
()
{
...
...
@@ -359,14 +364,9 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
We can not get the real value without knowing the number of rows
in the partition. Don't divide by 0.
*/
if
(
!
get_context_
())
{
// Calling this kind of function with a context makes no sense.
DBUG_ASSERT
(
0
);
return
0
;
}
ulonglong
partition_rows
=
get_row_count
();
null_value
=
partition_rows
>
0
?
false
:
true
;
longlong
partition_rows
=
get_context_
()
->
get_field_context
(
result_field
);
return
partition_rows
>
1
?
static_cast
<
double
>
(
cur_rank
-
1
)
/
(
partition_rows
-
1
)
:
0
;
}
...
...
@@ -381,25 +381,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
return
"percent_rank"
;
}
bool
create_window_context
()
{
// TODO-cvicentiu: Currently this means we must make sure to delete
// the window context. We can potentially allocate this on the THD memroot.
// At the same time, this is only necessary for a small portion of the
// query execution and it does not make sense to keep it for all of it.
context_
=
new
Window_context_row_count
();
if
(
context_
==
NULL
)
return
true
;
return
false
;
}
void
delete_window_context
()
{
if
(
context_
)
delete
get_context_
();
context_
=
NULL
;
}
void
update_field
()
{}
void
clear
()
...
...
@@ -428,13 +409,6 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
void
cleanup
()
{
peer_tracker
.
cleanup
();
Item_sum_window_with_context
::
cleanup
();
}
/* Helper function so that we don't cast the context every time. */
Window_context_row_count
*
get_context_
()
{
return
static_cast
<
Window_context_row_count
*>
(
context_
);
}
};
...
...
@@ -448,27 +422,62 @@ class Item_sum_percent_rank: public Item_sum_window_with_context,
window ordering of the window partition of R
- NR is defined to be the number of rows in the window partition of R.
Just like with Item_sum_percent_rank, compuation of this function requires
Just like with Item_sum_percent_rank, compu
t
ation of this function requires
two passes.
*/
class
Item_sum_cume_dist
:
public
Item_sum_
percent_rank
class
Item_sum_cume_dist
:
public
Item_sum_
window_with_row_count
{
public:
Item_sum_cume_dist
(
THD
*
thd
)
:
Item_sum_percent_rank
(
thd
)
{}
Item_sum_cume_dist
(
THD
*
thd
)
:
Item_sum_window_with_row_count
(
thd
),
current_row_count_
(
0
)
{}
double
val_real
()
{
if
(
get_row_count
()
==
0
)
{
null_value
=
true
;
return
0
;
}
ulonglong
partition_row_count
=
get_row_count
();
null_value
=
false
;
return
static_cast
<
double
>
(
current_row_count_
)
/
partition_row_count
;
}
double
val_real
()
{
return
0
;
}
bool
add
()
{
current_row_count_
++
;
return
false
;
}
enum
Sumfunctype
sum_func
()
const
{
return
CUME_DIST_FUNC
;
}
void
clear
()
{
current_row_count_
=
0
;
set_row_count
(
0
);
}
const
char
*
func_name
()
const
{
return
"cume_dist"
;
}
void
update_field
()
{}
enum
Item_result
result_type
()
const
{
return
REAL_RESULT
;
}
enum_field_types
field_type
()
const
{
return
MYSQL_TYPE_DOUBLE
;
}
void
fix_length_and_dec
()
{
decimals
=
10
;
// TODO-cvicentiu find out how many decimals the standard
// requires.
}
private:
ulonglong
current_row_count_
;
};
...
...
@@ -499,11 +508,11 @@ class Item_window_func : public Item_func_or_sum
force_return_blank
(
true
),
read_value_from_result_field
(
false
)
{}
Item_sum
*
window_func
()
{
return
(
Item_sum
*
)
args
[
0
];
}
Item_sum
*
window_func
()
const
{
return
(
Item_sum
*
)
args
[
0
];
}
void
update_used_tables
();
bool
is_frame_prohibited
()
bool
is_frame_prohibited
()
const
{
switch
(
window_func
()
->
sum_func
())
{
case
Item_sum
:
:
ROW_NUMBER_FUNC
:
...
...
@@ -517,7 +526,28 @@ class Item_window_func : public Item_func_or_sum
}
}
bool
is_order_list_mandatory
()
bool
requires_partition_size
()
const
{
switch
(
window_func
()
->
sum_func
())
{
case
Item_sum
:
:
PERCENT_RANK_FUNC
:
case
Item_sum
:
:
CUME_DIST_FUNC
:
return
true
;
default:
return
false
;
}
}
bool
requires_peer_size
()
const
{
switch
(
window_func
()
->
sum_func
())
{
case
Item_sum
:
:
CUME_DIST_FUNC
:
return
true
;
default:
return
false
;
}
}
bool
is_order_list_mandatory
()
const
{
switch
(
window_func
()
->
sum_func
())
{
case
Item_sum
:
:
RANK_FUNC
:
...
...
sql/sql_window.cc
View file @
de35787a
...
...
@@ -865,15 +865,18 @@ class Frame_unbounded_preceding : public Frame_cursor
}
};
/*
UNBOUNDED FOLLOWING frame bound
*/
class
Frame_unbounded_following
:
public
Frame_cursor
{
Table_read_cursor
cursor
;
protected:
Table_read_cursor
cursor
;
Group_bound_tracker
bound_tracker
;
public:
void
init
(
THD
*
thd
,
READ_RECORD
*
info
,
SQL_I_List
<
ORDER
>
*
partition_list
,
SQL_I_List
<
ORDER
>
*
order_list
)
...
...
@@ -910,6 +913,35 @@ class Frame_unbounded_following : public Frame_cursor
};
class
Frame_unbounded_following_set_count
:
public
Frame_unbounded_following
{
void
next_partition
(
longlong
rownum
,
Item_sum
*
item
)
{
ulonglong
num_rows_in_partition
=
0
;
if
(
!
rownum
)
{
/* Read the first row */
if
(
cursor
.
get_next
())
return
;
}
num_rows_in_partition
++
;
/* Remember which partition we are in */
bound_tracker
.
check_if_next_group
();
/* Walk to the end of the partition, find how many rows there are. */
while
(
!
cursor
.
get_next
())
{
if
(
bound_tracker
.
check_if_next_group
())
break
;
num_rows_in_partition
++
;
}
Item_sum_window_with_row_count
*
item_with_row_count
=
static_cast
<
Item_sum_window_with_row_count
*>
(
item
);
item_with_row_count
->
set_row_count
(
num_rows_in_partition
);
}
};
/////////////////////////////////////////////////////////////////////////////
// ROWS-type frame bounds
/////////////////////////////////////////////////////////////////////////////
...
...
@@ -1212,6 +1244,47 @@ Frame_cursor *get_frame_cursor(Window_frame *frame, bool is_top_bound)
return
NULL
;
}
void
add_extra_frame_cursors
(
List
<
Frame_cursor
>
*
cursors
,
const
Item_sum
*
window_func
)
{
switch
(
window_func
->
sum_func
())
{
case
Item_sum
:
:
CUME_DIST_FUNC
:
cursors
->
push_back
(
new
Frame_unbounded_preceding
);
cursors
->
push_back
(
new
Frame_range_current_row_bottom
);
break
;
default:
cursors
->
push_back
(
new
Frame_unbounded_preceding
);
cursors
->
push_back
(
new
Frame_rows_current_row_bottom
);
}
}
List
<
Frame_cursor
>
get_window_func_required_cursors
(
const
Item_window_func
*
item_win
)
{
List
<
Frame_cursor
>
result
;
if
(
item_win
->
requires_partition_size
())
result
.
push_back
(
new
Frame_unbounded_following_set_count
);
/*
If it is not a regular window function that follows frame specifications,
specific cursors are required.
*/
if
(
item_win
->
is_frame_prohibited
())
{
add_extra_frame_cursors
(
&
result
,
item_win
->
window_func
());
return
result
;
}
/* A regular window function follows the frame specification. */
result
.
push_back
(
get_frame_cursor
(
item_win
->
window_spec
->
window_frame
,
false
));
result
.
push_back
(
get_frame_cursor
(
item_win
->
window_spec
->
window_frame
,
true
));
return
result
;
}
/*
Streamed window function computation with window frames.
...
...
@@ -1254,21 +1327,20 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
{
THD
*
thd
=
current_thd
;
int
err
=
0
;
Frame_cursor
*
top_bound
;
Frame_cursor
*
bottom_bound
;
Item_sum
*
sum_func
=
item_win
->
window_func
();
/* This algorithm doesn't support DISTINCT aggregator */
sum_func
->
set_aggregator
(
Aggregator
::
SIMPLE_AGGREGATOR
);
Window_frame
*
window_frame
=
item_win
->
window_spec
->
window_frame
;
top_bound
=
get_frame_cursor
(
window_frame
,
true
);
bottom_bound
=
get_frame_cursor
(
window_frame
,
false
);
List
<
Frame_cursor
>
cursors
=
get_window_func_required_cursors
(
item_win
);
top_bound
->
init
(
thd
,
info
,
item_win
->
window_spec
->
partition_list
,
item_win
->
window_spec
->
order_list
);
bottom_bound
->
init
(
thd
,
info
,
item_win
->
window_spec
->
partition_list
,
List_iterator_fast
<
Frame_cursor
>
it
(
cursors
);
Frame_cursor
*
c
;
while
((
c
=
it
++
))
{
c
->
init
(
thd
,
info
,
item_win
->
window_spec
->
partition_list
,
item_win
->
window_spec
->
order_list
);
}
bool
is_error
=
false
;
longlong
rownum
=
0
;
...
...
@@ -1293,24 +1365,28 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
pre_XXX functions assume that tbl->record[0] contains current_row, and
they may not change it.
*/
bottom_bound
->
pre_next_partition
(
rownum
,
sum_func
);
top_bound
->
pre_next_partition
(
rownum
,
sum_func
);
it
.
rewind
();
while
((
c
=
it
++
))
c
->
pre_next_partition
(
rownum
,
sum_func
);
/*
We move bottom_bound first, because we want rows to be added into the
aggregate before top_bound attempts to remove them.
*/
bottom_bound
->
next_partition
(
rownum
,
sum_func
);
top_bound
->
next_partition
(
rownum
,
sum_func
);
it
.
rewind
();
while
((
c
=
it
++
))
c
->
next_partition
(
rownum
,
sum_func
);
}
else
{
/* Again, both pre_XXX function can find current_row in tbl->record[0] */
bottom_bound
->
pre_next_row
(
sum_func
);
top_bound
->
pre_next_row
(
sum_func
);
it
.
rewind
();
while
((
c
=
it
++
))
c
->
pre_next_row
(
sum_func
);
/* These make no assumptions about tbl->record[0] and may change it */
bottom_bound
->
next_row
(
sum_func
);
top_bound
->
next_row
(
sum_func
);
it
.
rewind
();
while
((
c
=
it
++
))
c
->
next_row
(
sum_func
);
}
rownum
++
;
...
...
@@ -1331,115 +1407,11 @@ bool compute_window_func_with_frames(Item_window_func *item_win,
}
my_free
(
rowid_buf
);
delete
top_bound
;
delete
bottom_bound
;
cursors
.
delete_elements
();
return
is_error
?
true
:
false
;
}
bool
compute_two_pass_window_functions
(
Item_window_func
*
item_win
,
TABLE
*
table
,
READ_RECORD
*
info
)
{
/* Perform first pass. */
// TODO-cvicentiu why not initialize the record for when we need, _in_
// this function.
READ_RECORD
*
info2
=
new
READ_RECORD
();
int
err
;
bool
is_error
=
false
;
bool
first_row
=
true
;
clone_read_record
(
info
,
info2
);
Item_sum_window_with_context
*
window_func
=
static_cast
<
Item_sum_window_with_context
*>
(
item_win
->
window_func
());
uchar
*
rowid_buf
=
(
uchar
*
)
my_malloc
(
table
->
file
->
ref_length
,
MYF
(
0
));
is_error
=
window_func
->
create_window_context
();
/* Unable to allocate a new context. */
if
(
is_error
)
return
true
;
Window_context
*
context
=
window_func
->
get_window_context
();
/*
The two pass algorithm is as follows:
We have a sorted table according to the partition and order by clauses.
1. Scan through the table till we reach a partition boundary.
2. For each row that we scan, add it to the context.
3. Once the partition boundary is met, do a second scan through the
current partition and use the context information to compute the value for
the window function for that partition.
4. Reset the context.
5. Repeat from 1 till end of table.
*/
bool
done
=
false
;
longlong
rows_in_current_partition
=
0
;
// TODO handle end of table updating.
while
(
!
done
)
{
if
((
err
=
info
->
read_record
(
info
)))
{
done
=
true
;
}
bool
partition_changed
=
done
||
item_win
->
check_if_partition_changed
();
// The first time we always have a partition changed. Ignore it.
if
(
first_row
)
{
partition_changed
=
false
;
first_row
=
false
;
}
if
(
partition_changed
)
{
/*
We are now looking at the first row for the next partition, or at the
end of the table. Either way, we must remember this position for when
we finish doing the second pass.
*/
table
->
file
->
position
(
table
->
record
[
0
]);
memcpy
(
rowid_buf
,
table
->
file
->
ref
,
table
->
file
->
ref_length
);
for
(
longlong
row_number
=
0
;
row_number
<
rows_in_current_partition
;
row_number
++
)
{
if
((
err
=
info2
->
read_record
(
info2
)))
{
is_error
=
true
;
break
;
}
window_func
->
add
();
// Save the window function into the table.
item_win
->
save_in_field
(
item_win
->
result_field
,
true
);
err
=
table
->
file
->
ha_update_row
(
table
->
record
[
1
],
table
->
record
[
0
]);
if
(
err
&&
err
!=
HA_ERR_RECORD_IS_THE_SAME
)
{
is_error
=
true
;
break
;
}
}
if
(
is_error
)
break
;
rows_in_current_partition
=
0
;
window_func
->
clear
();
context
->
reset
();
// Return to the beginning of the new partition.
table
->
file
->
ha_rnd_pos
(
table
->
record
[
0
],
rowid_buf
);
}
rows_in_current_partition
++
;
context
->
add_field_to_context
(
item_win
->
result_field
);
}
window_func
->
delete_window_context
();
delete
info2
;
my_free
(
rowid_buf
);
return
is_error
;
}
/* Make a list that is a concation of two lists of ORDER elements */
static
ORDER
*
concat_order_lists
(
MEM_ROOT
*
mem_root
,
ORDER
*
list1
,
ORDER
*
list2
)
...
...
@@ -1502,16 +1474,12 @@ bool Window_func_runner::setup(THD *thd)
compute_func
=
compute_window_func_values
;
break
;
}
case
Item_sum
:
:
PERCENT_RANK_FUNC
:
case
Item_sum
:
:
CUME_DIST_FUNC
:
{
compute_func
=
compute_two_pass_window_functions
;
break
;
}
case
Item_sum
:
:
COUNT_FUNC
:
case
Item_sum
:
:
SUM_BIT_FUNC
:
case
Item_sum
:
:
SUM_FUNC
:
case
Item_sum
:
:
AVG_FUNC
:
case
Item_sum
:
:
PERCENT_RANK_FUNC
:
case
Item_sum
:
:
CUME_DIST_FUNC
:
{
/*
Frame-aware window function computation. It does one pass, but
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment