Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
7fdf6740
Commit
7fdf6740
authored
Dec 19, 2003
by
konstantin@oak.local
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implementation of SUM(DISTINCT), tests cases
parent
89b2ddcf
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
834 additions
and
35 deletions
+834
-35
mysql-test/r/sum_distinct.result
mysql-test/r/sum_distinct.result
+203
-0
mysql-test/t/sum_distinct.test
mysql-test/t/sum_distinct.test
+188
-0
sql/filesort.cc
sql/filesort.cc
+34
-22
sql/item_sum.cc
sql/item_sum.cc
+116
-5
sql/item_sum.h
sql/item_sum.h
+36
-3
sql/sql_class.h
sql/sql_class.h
+12
-4
sql/sql_sort.h
sql/sql_sort.h
+1
-0
sql/sql_yacc.yy
sql/sql_yacc.yy
+11
-0
sql/uniques.cc
sql/uniques.cc
+233
-1
No files found.
mysql-test/r/sum_distinct.result
0 → 100644
View file @
7fdf6740
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (
id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT,
gender CHAR(1),
name VARCHAR(20)
);
SELECT SUM(DISTINCT LENGTH(name)) s1 FROM t1;
s1
NULL
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
INSERT INTO t1 (gender, name) VALUES (NULL, NULL);
SELECT SUM(DISTINCT LENGTH(name)) s1 FROM t1;
s1
NULL
INSERT INTO t1 (gender, name) VALUES ('F', 'Helen'), ('F', 'Anastasia'),
('F', 'Katherine'), ('F', 'Margo'), ('F', 'Magdalene'), ('F', 'Mary');
CREATE TABLE t2 SELECT name FROM t1;
SELECT (SELECT SUM(DISTINCT LENGTH(name)) FROM t1) FROM t2;
(SELECT SUM(DISTINCT LENGTH(name)) FROM t1)
18
18
18
18
18
18
18
18
18
DROP TABLE t2;
INSERT INTO t1 (gender, name) VALUES ('F', 'Eva'), ('F', 'Sofia'),
('F', 'Sara'), ('F', 'Golda'), ('F', 'Toba'), ('F', 'Victory'),
('F', 'Faina'), ('F', 'Miriam'), ('F', 'Beki'), ('F', 'America'),
('F', 'Susan'), ('F', 'Glory'), ('F', 'Priscilla'), ('F', 'Rosmary'),
('F', 'Rose'), ('F', 'Margareth'), ('F', 'Elizabeth'), ('F', 'Meredith'),
('F', 'Julie'), ('F', 'Xenia'), ('F', 'Zena'), ('F', 'Olga'),
('F', 'Brunhilda'), ('F', 'Nataly'), ('F', 'Lara'), ('F', 'Svetlana'),
('F', 'Grethem'), ('F', 'Irene');
SELECT
SUM(DISTINCT LENGTH(name)) s1,
SUM(DISTINCT SUBSTRING(NAME, 1, 3)) s2,
SUM(DISTINCT LENGTH(SUBSTRING(name, 1, 4))) s3
FROM t1;
s1 s2 s3
42 0 7
SELECT
SUM(DISTINCT LENGTH(g1.name)) s1,
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
FROM t1 g1, t1 g2, t1 g3;
s1 s2 s3
42 0 7
SELECT
SUM(DISTINCT LENGTH(g1.name)) s1,
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
FROM t1 g1, t1 g2, t1 g3 GROUP BY LENGTH(SUBSTRING(g3.name, 5, 10));
s1 s2 s3
42 0 NULL
42 0 7
42 0 4
42 0 4
42 0 4
42 0 4
42 0 4
SELECT SQL_BUFFER_RESULT
SUM(DISTINCT LENGTH(name)) s1,
SUM(DISTINCT SUBSTRING(NAME, 1, 3)) s2,
SUM(DISTINCT LENGTH(SUBSTRING(name, 1, 4))) s3
FROM t1;
s1 s2 s3
42 0 7
SELECT SQL_BUFFER_RESULT
SUM(DISTINCT LENGTH(g1.name)) s1,
SUM(DISTINCT SUBSTRING(g2.name, 1, 3)) s2,
SUM(DISTINCT LENGTH(SUBSTRING(g3.name, 1, 4))) s3
FROM t1 g1, t1 g2, t1 g3 GROUP BY LENGTH(SUBSTRING(g3.name, 5, 10));
s1 s2 s3
42 0 NULL
42 0 7
42 0 4
42 0 4
42 0 4
42 0 4
42 0 4
SET @l=1;
UPDATE t1 SET name=CONCAT(name, @l:=@l+1);
SELECT SUM(DISTINCT RIGHT(name, 1)) FROM t1;
SUM(DISTINCT RIGHT(name, 1))
45
SELECT SUM(DISTINCT id) FROM t1;
SUM(DISTINCT id)
703
SELECT SUM(DISTINCT id % 11) FROM t1;
SUM(DISTINCT id % 11)
55
DROP TABLE t1;
CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1;
INSERT INTO t1 (id) SELECT id FROM t2;
/* 8 */
INSERT INTO t1 (id) SELECT id FROM t2;
/* 12 */
INSERT INTO t1 (id) SELECT id FROM t2;
/* 16 */
INSERT INTO t1 (id) SELECT id FROM t2;
/* 20 */
INSERT INTO t1 (id) SELECT id FROM t2;
/* 24 */
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+512 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT SUM(DISTINCT id) sm FROM t1;
sm
134225920
SELECT SUM(DISTINCT id) sm FROM t2;
sm
134225920
SELECT SUM(DISTINCT id) sm FROM t1 group by id % 13;
sm
10327590
10328851
10330112
10331373
10332634
10317510
10318770
10320030
10321290
10322550
10323810
10325070
10326330
SET max_heap_table_size=16384;
SHOW variables LIKE 'max_heap_table_size';
Variable_name Value
max_heap_table_size 16384
SELECT SUM(DISTINCT id) sm FROM t1;
sm
134225920
SELECT SUM(DISTINCT id) sm FROM t2;
sm
134225920
SELECT SUM(DISTINCT id) sm FROM t1 GROUP BY id % 13;
sm
10327590
10328851
10330112
10331373
10332634
10317510
10318770
10320030
10321290
10322550
10323810
10325070
10326330
DROP TABLE t1;
DROP TABLE t2;
mysql-test/t/sum_distinct.test
0 → 100644
View file @
7fdf6740
#
# Various tests for SUM(DISTINCT ...)
#
--
disable_warnings
DROP
TABLE
IF
EXISTS
t1
;
--
enable_warnings
CREATE
TABLE
t1
(
id
INTEGER
NOT
NULL
PRIMARY
KEY
AUTO_INCREMENT
,
gender
CHAR
(
1
),
name
VARCHAR
(
20
)
);
# According to ANSI SQL, SUM(DISTINCT ...) should return NULL for empty
# record set
SELECT
SUM
(
DISTINCT
LENGTH
(
name
))
s1
FROM
t1
;
# According to ANSI SQL, SUM(DISTINCT ...) should return NULL for records sets
# entirely consisting of NULLs
INSERT
INTO
t1
(
gender
,
name
)
VALUES
(
NULL
,
NULL
);
INSERT
INTO
t1
(
gender
,
name
)
VALUES
(
NULL
,
NULL
);
INSERT
INTO
t1
(
gender
,
name
)
VALUES
(
NULL
,
NULL
);
SELECT
SUM
(
DISTINCT
LENGTH
(
name
))
s1
FROM
t1
;
# Filling table with t1
INSERT
INTO
t1
(
gender
,
name
)
VALUES
(
'F'
,
'Helen'
),
(
'F'
,
'Anastasia'
),
(
'F'
,
'Katherine'
),
(
'F'
,
'Margo'
),
(
'F'
,
'Magdalene'
),
(
'F'
,
'Mary'
);
CREATE
TABLE
t2
SELECT
name
FROM
t1
;
SELECT
(
SELECT
SUM
(
DISTINCT
LENGTH
(
name
))
FROM
t1
)
FROM
t2
;
DROP
TABLE
t2
;
INSERT
INTO
t1
(
gender
,
name
)
VALUES
(
'F'
,
'Eva'
),
(
'F'
,
'Sofia'
),
(
'F'
,
'Sara'
),
(
'F'
,
'Golda'
),
(
'F'
,
'Toba'
),
(
'F'
,
'Victory'
),
(
'F'
,
'Faina'
),
(
'F'
,
'Miriam'
),
(
'F'
,
'Beki'
),
(
'F'
,
'America'
),
(
'F'
,
'Susan'
),
(
'F'
,
'Glory'
),
(
'F'
,
'Priscilla'
),
(
'F'
,
'Rosmary'
),
(
'F'
,
'Rose'
),
(
'F'
,
'Margareth'
),
(
'F'
,
'Elizabeth'
),
(
'F'
,
'Meredith'
),
(
'F'
,
'Julie'
),
(
'F'
,
'Xenia'
),
(
'F'
,
'Zena'
),
(
'F'
,
'Olga'
),
(
'F'
,
'Brunhilda'
),
(
'F'
,
'Nataly'
),
(
'F'
,
'Lara'
),
(
'F'
,
'Svetlana'
),
(
'F'
,
'Grethem'
),
(
'F'
,
'Irene'
);
SELECT
SUM
(
DISTINCT
LENGTH
(
name
))
s1
,
SUM
(
DISTINCT
SUBSTRING
(
NAME
,
1
,
3
))
s2
,
SUM
(
DISTINCT
LENGTH
(
SUBSTRING
(
name
,
1
,
4
)))
s3
FROM
t1
;
SELECT
SUM
(
DISTINCT
LENGTH
(
g1
.
name
))
s1
,
SUM
(
DISTINCT
SUBSTRING
(
g2
.
name
,
1
,
3
))
s2
,
SUM
(
DISTINCT
LENGTH
(
SUBSTRING
(
g3
.
name
,
1
,
4
)))
s3
FROM
t1
g1
,
t1
g2
,
t1
g3
;
SELECT
SUM
(
DISTINCT
LENGTH
(
g1
.
name
))
s1
,
SUM
(
DISTINCT
SUBSTRING
(
g2
.
name
,
1
,
3
))
s2
,
SUM
(
DISTINCT
LENGTH
(
SUBSTRING
(
g3
.
name
,
1
,
4
)))
s3
FROM
t1
g1
,
t1
g2
,
t1
g3
GROUP
BY
LENGTH
(
SUBSTRING
(
g3
.
name
,
5
,
10
));
# here we explicitly request summing through temporary table (so
# Item_sum_sum_distinct::copy_or_same() is called)
SELECT
SQL_BUFFER_RESULT
SUM
(
DISTINCT
LENGTH
(
name
))
s1
,
SUM
(
DISTINCT
SUBSTRING
(
NAME
,
1
,
3
))
s2
,
SUM
(
DISTINCT
LENGTH
(
SUBSTRING
(
name
,
1
,
4
)))
s3
FROM
t1
;
SELECT
SQL_BUFFER_RESULT
SUM
(
DISTINCT
LENGTH
(
g1
.
name
))
s1
,
SUM
(
DISTINCT
SUBSTRING
(
g2
.
name
,
1
,
3
))
s2
,
SUM
(
DISTINCT
LENGTH
(
SUBSTRING
(
g3
.
name
,
1
,
4
)))
s3
FROM
t1
g1
,
t1
g2
,
t1
g3
GROUP
BY
LENGTH
(
SUBSTRING
(
g3
.
name
,
5
,
10
));
# this test demonstrates that strings are automatically converted to numbers
# before summing
SET
@
l
=
1
;
UPDATE
t1
SET
name
=
CONCAT
(
name
,
@
l
:=@
l
+
1
);
SELECT
SUM
(
DISTINCT
RIGHT
(
name
,
1
))
FROM
t1
;
# this is a test case for ordinary t1
SELECT
SUM
(
DISTINCT
id
)
FROM
t1
;
SELECT
SUM
(
DISTINCT
id
%
11
)
FROM
t1
;
DROP
TABLE
t1
;
#
# Test the case when distinct values doesn't fit in memory and
# filesort is used (see uniques.cc:merge_walk)
#
CREATE
TABLE
t1
(
id
INTEGER
);
CREATE
TABLE
t2
(
id
INTEGER
);
INSERT
INTO
t1
(
id
)
VALUES
(
1
),
(
1
),
(
1
),(
1
);
INSERT
INTO
t2
(
id
)
SELECT
id
FROM
t1
;
INSERT
INTO
t1
(
id
)
SELECT
id
FROM
t2
;
/* 8 */
INSERT
INTO
t1
(
id
)
SELECT
id
FROM
t2
;
/* 12 */
INSERT
INTO
t1
(
id
)
SELECT
id
FROM
t2
;
/* 16 */
INSERT
INTO
t1
(
id
)
SELECT
id
FROM
t2
;
/* 20 */
INSERT
INTO
t1
(
id
)
SELECT
id
FROM
t2
;
/* 24 */
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
1
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
2
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
4
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
8
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
16
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
32
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
64
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
128
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
256
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
512
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
1024
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
2048
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
4096
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
INSERT
INTO
t2
(
id
)
SELECT
id
+
8192
FROM
t1
;
INSERT
INTO
t1
SELECT
id
FROM
t2
;
DELETE
FROM
t2
;
#INSERT INTO t2 (id) SELECT id+16384 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+32768 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+65536 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
INSERT
INTO
t2
SELECT
id
FROM
t1
ORDER
BY
id
*
rand
();
# SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
SELECT
SUM
(
DISTINCT
id
)
sm
FROM
t1
;
SELECT
SUM
(
DISTINCT
id
)
sm
FROM
t2
;
SELECT
SUM
(
DISTINCT
id
)
sm
FROM
t1
group
by
id
%
13
;
# this limit for max_heap_table_size is set to force testing the case, when
# all distinct sum values can not fit in memory and must be stored in a
# temporary table
SET
max_heap_table_size
=
16384
;
# to check that max_heap_table_size was actually set (hard limit for minimum
# max_heap_table_size is set in mysqld.cc):
SHOW
variables
LIKE
'max_heap_table_size'
;
SELECT
SUM
(
DISTINCT
id
)
sm
FROM
t1
;
SELECT
SUM
(
DISTINCT
id
)
sm
FROM
t2
;
SELECT
SUM
(
DISTINCT
id
)
sm
FROM
t1
GROUP
BY
id
%
13
;
DROP
TABLE
t1
;
DROP
TABLE
t2
;
sql/filesort.cc
View file @
7fdf6740
...
...
@@ -756,6 +756,39 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
}
/* read_to_buffer */
/*
Put all room used by freed buffer to use in adjacent buffer. Note, that
we can't simply distribute memory evenly between all buffers, because
new areas must not overlap with old ones.
SYNOPSYS
reuse_freed_buff()
queue IN list of non-empty buffers, without freed buffer
reuse IN empty buffer
key_length IN key length
*/
void
reuse_freed_buff
(
QUEUE
*
queue
,
BUFFPEK
*
reuse
,
uint
key_length
)
{
uchar
*
reuse_end
=
reuse
->
base
+
reuse
->
max_keys
*
key_length
;
for
(
uint
i
=
0
;
i
<
queue
->
elements
;
++
i
)
{
BUFFPEK
*
bp
=
(
BUFFPEK
*
)
queue_element
(
queue
,
i
);
if
(
bp
->
base
+
bp
->
max_keys
*
key_length
==
reuse
->
base
)
{
bp
->
max_keys
+=
reuse
->
max_keys
;
return
;
}
else
if
(
bp
->
base
==
reuse_end
)
{
bp
->
base
=
reuse
->
base
;
bp
->
max_keys
+=
reuse
->
max_keys
;
return
;
}
}
DBUG_ASSERT
(
0
);
}
/*
Merge buffers to one buffer
*/
...
...
@@ -881,29 +914,8 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
if
(
!
(
error
=
(
int
)
read_to_buffer
(
from_file
,
buffpek
,
rec_length
)))
{
uchar
*
base
=
buffpek
->
base
;
ulong
max_keys
=
buffpek
->
max_keys
;
VOID
(
queue_remove
(
&
queue
,
0
));
/* Put room used by buffer to use in other buffer */
for
(
refpek
=
(
BUFFPEK
**
)
&
queue_top
(
&
queue
);
refpek
<=
(
BUFFPEK
**
)
&
queue_end
(
&
queue
);
refpek
++
)
{
buffpek
=
*
refpek
;
if
(
buffpek
->
base
+
buffpek
->
max_keys
*
rec_length
==
base
)
{
buffpek
->
max_keys
+=
max_keys
;
break
;
}
else
if
(
base
+
max_keys
*
rec_length
==
buffpek
->
base
)
{
buffpek
->
base
=
base
;
buffpek
->
max_keys
+=
max_keys
;
break
;
}
}
reuse_freed_buff
(
&
queue
,
buffpek
,
rec_length
);
break
;
/* One buffer have been removed */
}
else
if
(
error
==
-
1
)
...
...
sql/item_sum.cc
View file @
7fdf6740
...
...
@@ -262,6 +262,122 @@ double Item_sum_sum::val()
}
/* Item_sum_sum_distinct */
Item_sum_sum_distinct
::
Item_sum_sum_distinct
(
Item
*
item
)
:
Item_sum_num
(
item
),
sum
(
0.0
),
tree
(
0
)
{
/*
quick_group is an optimizer hint, which means that GROUP BY can be
handled with help of index on grouped columns.
By setting quick_group to zero we force creation of temporary table
to perform GROUP BY.
*/
quick_group
=
0
;
}
Item_sum_sum_distinct
::
Item_sum_sum_distinct
(
THD
*
thd
,
Item_sum_sum_distinct
&
original
)
:
Item_sum_num
(
thd
,
original
),
sum
(
0.0
),
tree
(
0
)
{
quick_group
=
0
;
}
Item_sum_sum_distinct
::~
Item_sum_sum_distinct
()
{
delete
tree
;
}
Item
*
Item_sum_sum_distinct
::
copy_or_same
(
THD
*
thd
)
{
return
new
(
&
thd
->
mem_root
)
Item_sum_sum_distinct
(
thd
,
*
this
);
}
C_MODE_START
static
int
simple_raw_key_cmp
(
void
*
arg
,
const
void
*
key1
,
const
void
*
key2
)
{
return
memcmp
(
key1
,
key2
,
*
(
uint
*
)
arg
);
}
C_MODE_END
bool
Item_sum_sum_distinct
::
setup
(
THD
*
thd
)
{
SELECT_LEX
*
select_lex
=
thd
->
lex
->
current_select
;
/* what does it mean??? */
if
(
select_lex
->
linkage
==
GLOBAL_OPTIONS_TYPE
)
return
1
;
DBUG_ASSERT
(
tree
==
0
);
/* setup can not be called twice */
/*
Uniques handles all unique elements in a tree until they can't fit in.
Then thee tree is dumped to the temporary file.
See class Unique for details.
*/
null_value
=
maybe_null
=
1
;
/*
TODO: if underlying item result fits in 4 bytes we can take advantage
of it and have tree of long/ulong. It gives 10% performance boost
*/
static
uint
key_length
=
sizeof
(
double
);
tree
=
new
Unique
(
simple_raw_key_cmp
,
&
key_length
,
key_length
,
thd
->
variables
.
max_heap_table_size
);
return
tree
==
0
;
}
void
Item_sum_sum_distinct
::
clear
()
{
DBUG_ASSERT
(
tree
);
/* we always have a tree */
null_value
=
1
;
tree
->
reset
();
}
bool
Item_sum_sum_distinct
::
add
()
{
/* args[0]->val() may reset args[0]->null_value */
double
val
=
args
[
0
]
->
val
();
if
(
!
args
[
0
]
->
null_value
)
{
DBUG_ASSERT
(
tree
);
null_value
=
0
;
if
(
val
)
return
tree
->
unique_add
(
&
val
);
}
return
0
;
}
C_MODE_START
static
int
sum_sum_distinct
(
void
*
element
,
element_count
num_of_dups
,
void
*
item_sum_sum_distinct
)
{
((
Item_sum_sum_distinct
*
)
(
item_sum_sum_distinct
))
->
add
(
*
(
double
*
)
element
);
return
0
;
}
C_MODE_END
double
Item_sum_sum_distinct
::
val
()
{
/*
We don't have a tree only if 'setup()' hasn't been called;
this is the case of sql_select.cc:return_zero_rows.
*/
sum
=
0.0
;
if
(
tree
)
tree
->
walk
(
sum_sum_distinct
,
(
void
*
)
this
);
return
sum
;
}
/* end of Item_sum_sum_distinct */
Item
*
Item_sum_count
::
copy_or_same
(
THD
*
thd
)
{
return
new
(
&
thd
->
mem_root
)
Item_sum_count
(
thd
,
*
this
);
...
...
@@ -1036,11 +1152,6 @@ String *Item_variance_field::val_str(String *str)
#include "sql_select.h"
int
simple_raw_key_cmp
(
void
*
arg
,
byte
*
key1
,
byte
*
key2
)
{
return
memcmp
(
key1
,
key2
,
*
(
uint
*
)
arg
);
}
int
simple_str_key_cmp
(
void
*
arg
,
byte
*
key1
,
byte
*
key2
)
{
Item_sum_count_distinct
*
item
=
(
Item_sum_count_distinct
*
)
arg
;
...
...
sql/item_sum.h
View file @
7fdf6740
...
...
@@ -27,9 +27,9 @@ class Item_sum :public Item_result_field
{
public:
enum
Sumfunctype
{
COUNT_FUNC
,
COUNT_DISTINCT_FUNC
,
SUM_FUNC
,
AVG_FUNC
,
MIN
_FUNC
,
M
AX_FUNC
,
UNIQUE_USERS_FUNC
,
STD_FUNC
,
VARIANCE_FUNC
,
SUM_BIT
_FUNC
,
UDF_SUM_FUNC
,
GROUP_CONCAT_FUNC
{
COUNT_FUNC
,
COUNT_DISTINCT_FUNC
,
SUM_FUNC
,
SUM_DISTINCT_FUNC
,
AVG
_FUNC
,
M
IN_FUNC
,
MAX_FUNC
,
UNIQUE_USERS_FUNC
,
STD_FUNC
,
VARIANCE
_FUNC
,
SUM_BIT_FUNC
,
UDF_SUM_FUNC
,
GROUP_CONCAT_FUNC
};
Item
**
args
,
*
tmp_args
[
2
];
...
...
@@ -138,6 +138,39 @@ class Item_sum_sum :public Item_sum_num
};
/*
Item_sum_sum_distinct - SELECT SUM(DISTINCT expr) FROM ...
support. See also: MySQL manual, chapter 'Adding New Functions To MySQL'
and comments in item_sum.cc.
*/
class
Unique
;
class
Item_sum_sum_distinct
:
public
Item_sum_num
{
double
sum
;
Unique
*
tree
;
private:
Item_sum_sum_distinct
(
THD
*
thd
,
Item_sum_sum_distinct
&
item
);
public:
Item_sum_sum_distinct
(
Item
*
item_par
);
~
Item_sum_sum_distinct
();
bool
setup
(
THD
*
thd
);
void
clear
();
bool
add
();
double
val
();
inline
void
add
(
double
val
)
{
sum
+=
val
;
}
enum
Sumfunctype
sum_func
()
const
{
return
SUM_DISTINCT_FUNC
;
}
void
reset_field
()
{}
// not used
void
update_field
()
{}
// not used
const
char
*
func_name
()
const
{
return
"sum_distinct"
;
}
Item
*
copy_or_same
(
THD
*
thd
);
virtual
void
no_rows_in_result
()
{}
};
class
Item_sum_count
:
public
Item_sum_int
{
longlong
count
;
...
...
sql/sql_class.h
View file @
7fdf6740
...
...
@@ -1207,8 +1207,13 @@ class user_var_entry
DTCollation
collation
;
};
/* Class for unique (removing of duplicates) */
/*
Unique -- class for unique (removing of duplicates).
Puts all values to the TREE. If the tree becomes too big,
it's dumped to the file. User can request sorted values, or
just iterate through them. In the last case tree merging is performed in
memory simultaneously with iteration, so it should be ~2-3x faster.
*/
class
Unique
:
public
Sql_alloc
{
...
...
@@ -1222,10 +1227,10 @@ class Unique :public Sql_alloc
public:
ulong
elements
;
Unique
(
qsort_cmp2
comp_func
,
void
*
comp_func_fixed_arg
,
Unique
(
qsort_cmp2
comp_func
,
void
*
comp_func_fixed_arg
,
uint
size_arg
,
ulong
max_in_memory_size_arg
);
~
Unique
();
inline
bool
unique_add
(
gptr
ptr
)
inline
bool
unique_add
(
void
*
ptr
)
{
if
(
tree
.
elements_in_tree
>
max_elements
&&
flush
())
return
1
;
...
...
@@ -1234,6 +1239,9 @@ class Unique :public Sql_alloc
bool
get
(
TABLE
*
table
);
void
reset
();
bool
walk
(
tree_walk_action
action
,
void
*
walk_action_arg
);
friend
int
unique_write_to_file
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
friend
int
unique_write_to_ptrs
(
gptr
key
,
element_count
count
,
Unique
*
unique
);
};
...
...
sql/sql_sort.h
View file @
7fdf6740
...
...
@@ -78,3 +78,4 @@ int merge_buffers(SORTPARAM *param,IO_CACHE *from_file,
IO_CACHE
*
to_file
,
uchar
*
sort_buffer
,
BUFFPEK
*
lastbuff
,
BUFFPEK
*
Fb
,
BUFFPEK
*
Tb
,
int
flag
);
void
reuse_freed_buff
(
QUEUE
*
queue
,
BUFFPEK
*
reuse
,
uint
key_length
);
sql/sql_yacc.yy
View file @
7fdf6740
...
...
@@ -4055,14 +4055,25 @@ sum_expr:
{ $$= new Item_sum_unique_users($3,atoi($5.str),atoi($7.str),$9); }
| MIN_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_min($3); }
/*
According to ANSI SQL, DISTINCT is allowed and has
no sence inside MIN and MAX grouping functions; so MIN|MAX(DISTINCT ...)
is processed like an ordinary MIN | MAX()
*/
| MIN_SYM '(' DISTINCT in_sum_expr ')'
{ $$=new Item_sum_min($4); }
| MAX_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_max($3); }
| MAX_SYM '(' DISTINCT in_sum_expr ')'
{ $$=new Item_sum_max($4); }
| STD_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_std($3); }
| VARIANCE_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_variance($3); }
| SUM_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_sum($3); }
| SUM_SYM '(' DISTINCT in_sum_expr ')'
{ $$=new Item_sum_sum_distinct($4); }
| GROUP_CONCAT_SYM '(' opt_distinct expr_list opt_gorder_clause
opt_gconcat_separator ')'
{
...
...
sql/uniques.cc
View file @
7fdf6740
...
...
@@ -84,6 +84,7 @@ bool Unique::flush()
elements
+=
tree
.
elements_in_tree
;
file_ptr
.
count
=
tree
.
elements_in_tree
;
file_ptr
.
file_pos
=
my_b_tell
(
&
file
);
if
(
tree_walk
(
&
tree
,
(
tree_walk_action
)
unique_write_to_file
,
(
void
*
)
this
,
left_root_right
)
||
insert_dynamic
(
&
file_ptrs
,
(
gptr
)
&
file_ptr
))
...
...
@@ -93,6 +94,237 @@ bool Unique::flush()
}
/*
Clear the tree and the file.
You must call reset() if you want to reuse Unique after walk().
*/
void
Unique
::
reset
()
{
reset_tree
(
&
tree
);
/*
If elements != 0, some trees were stored in the file (see how
flush() works). Note, that we can not count on my_b_tell(&file) == 0
here, because it can return 0 right after walk(), and walk() does not
reset any Unique member.
*/
if
(
elements
)
{
reset_dynamic
(
&
file_ptrs
);
reinit_io_cache
(
&
file
,
WRITE_CACHE
,
0L
,
0
,
1
);
}
elements
=
0
;
}
/*
The comparison function, passed to queue_init() in merge_walk() must
use comparison function of Uniques::tree, but compare members of struct
BUFFPEK.
*/
struct
BUFFPEK_COMPARE_CONTEXT
{
qsort_cmp2
key_compare
;
void
*
key_compare_arg
;
};
C_MODE_START
static
int
buffpek_compare
(
void
*
arg
,
byte
*
key_ptr1
,
byte
*
key_ptr2
)
{
BUFFPEK_COMPARE_CONTEXT
*
ctx
=
(
BUFFPEK_COMPARE_CONTEXT
*
)
arg
;
return
ctx
->
key_compare
(
ctx
->
key_compare_arg
,
*
((
byte
**
)
key_ptr1
),
*
((
byte
**
)
key_ptr2
));
}
C_MODE_END
/*
DESCRIPTION
Function is very similar to merge_buffers, but instead of writing sorted
unique keys to the output file, it invokes walk_action for each key.
This saves I/O if you need to pass through all unique keys only once.
SYNOPSIS
merge_walk()
All params are 'IN' (but see comment for begin, end):
merge_buffer buffer to perform cached piece-by-piece loading
of trees; initially the buffer is empty
merge_buffer_size size of merge_buffer. Must be aligned with
key_length
key_length size of tree element; key_length * (end - begin)
must be less or equal than merge_buffer_size.
begin pointer to BUFFPEK struct for the first tree.
end pointer to BUFFPEK struct for the last tree;
end > begin and [begin, end) form a consecutive
range. BUFFPEKs structs in that range are used and
overwritten in merge_walk().
walk_action element visitor. Action is called for each unique
key.
walk_action_arg argument to walk action. Passed to it on each call.
compare elements comparison function
compare_arg comparison function argument
file file with all trees dumped. Trees in the file
must contain sorted unique values. Cache must be
initialized in read mode.
RETURN VALUE
0 ok
<> 0 error
*/
static
bool
merge_walk
(
uchar
*
merge_buffer
,
uint
merge_buffer_size
,
uint
key_length
,
BUFFPEK
*
begin
,
BUFFPEK
*
end
,
tree_walk_action
walk_action
,
void
*
walk_action_arg
,
qsort_cmp2
compare
,
void
*
compare_arg
,
IO_CACHE
*
file
)
{
BUFFPEK_COMPARE_CONTEXT
compare_context
=
{
compare
,
compare_arg
};
QUEUE
queue
;
if
(
end
<=
begin
||
merge_buffer_size
<
key_length
*
(
end
-
begin
+
1
)
||
init_queue
(
&
queue
,
end
-
begin
,
offsetof
(
BUFFPEK
,
key
),
0
,
buffpek_compare
,
&
compare_context
))
return
1
;
/* we need space for one key when a piece of merge buffer is re-read */
merge_buffer_size
-=
key_length
;
uchar
*
save_key_buff
=
merge_buffer
+
merge_buffer_size
;
uint
max_key_count_per_piece
=
merge_buffer_size
/
(
end
-
begin
)
/
key_length
;
/* if piece_size is aligned reuse_freed_buffer will always hit */
uint
piece_size
=
max_key_count_per_piece
*
key_length
;
uint
bytes_read
;
/* to hold return value of read_to_buffer */
BUFFPEK
*
top
;
int
res
=
1
;
/*
Invariant: queue must contain top element from each tree, until a tree
is not completely walked through.
Here we're forcing the invariant, inserting one element from each tree
to the queue.
*/
for
(
top
=
begin
;
top
!=
end
;
++
top
)
{
top
->
base
=
merge_buffer
+
(
top
-
begin
)
*
piece_size
;
top
->
max_keys
=
max_key_count_per_piece
;
bytes_read
=
read_to_buffer
(
file
,
top
,
key_length
);
if
(
bytes_read
==
(
uint
)
(
-
1
))
goto
end
;
DBUG_ASSERT
(
bytes_read
);
queue_insert
(
&
queue
,
(
byte
*
)
top
);
}
top
=
(
BUFFPEK
*
)
queue_top
(
&
queue
);
while
(
queue
.
elements
>
1
)
{
/*
Every iteration one element is removed from the queue, and one is
inserted by the rules of the invariant. If two adjacent elements on
the top of the queue are not equal, biggest one is unique, because all
elements in each tree are unique. Action is applied only to unique
elements.
*/
void
*
old_key
=
top
->
key
;
/*
read next key from the cache or from the file and push it to the
queue; this gives new top.
*/
top
->
key
+=
key_length
;
if
(
--
top
->
mem_count
)
queue_replaced
(
&
queue
);
else
/* next piece should be read */
{
/* save old_key not to overwrite it in read_to_buffer */
memcpy
(
save_key_buff
,
old_key
,
key_length
);
old_key
=
save_key_buff
;
bytes_read
=
read_to_buffer
(
file
,
top
,
key_length
);
if
(
bytes_read
==
(
uint
)
(
-
1
))
goto
end
;
else
if
(
bytes_read
>
0
)
/* top->key, top->mem_count are reset */
queue_replaced
(
&
queue
);
/* in read_to_buffer */
else
{
/*
Tree for old 'top' element is empty: remove it from the queue and
give all its memory to the nearest tree.
*/
queue_remove
(
&
queue
,
0
);
reuse_freed_buff
(
&
queue
,
top
,
key_length
);
}
}
top
=
(
BUFFPEK
*
)
queue_top
(
&
queue
);
/* new top has been obtained; if old top is unique, apply the action */
if
(
compare
(
compare_arg
,
old_key
,
top
->
key
))
{
if
(
walk_action
(
old_key
,
1
,
walk_action_arg
))
goto
end
;
}
}
/*
Applying walk_action to the tail of the last tree: this is safe because
either we had only one tree in the beginning, either we work with the
last tree in the queue.
*/
do
{
do
{
if
(
walk_action
(
top
->
key
,
1
,
walk_action_arg
))
goto
end
;
top
->
key
+=
key_length
;
}
while
(
--
top
->
mem_count
);
bytes_read
=
read_to_buffer
(
file
,
top
,
key_length
);
if
(
bytes_read
==
(
uint
)
(
-
1
))
goto
end
;
}
while
(
bytes_read
);
res
=
0
;
end:
delete_queue
(
&
queue
);
return
res
;
}
/*
DESCRIPTION
Walks consecutively through all unique elements:
if all elements are in memory, then it simply invokes 'tree_walk', else
all flushed trees are loaded to memory piece-by-piece, pieces are
sorted, and action is called for each unique value.
Note: so as merging resets file_ptrs state, this method can change
internal Unique state to undefined: if you want to reuse Unique after
walk() you must call reset() first!
SYNOPSIS
Unique:walk()
All params are 'IN':
action function-visitor, typed in include/my_tree.h
function is called for each unique element
arg argument for visitor, which is passed to it on each call
RETURN VALUE
0 OK
<> 0 error
*/
bool
Unique
::
walk
(
tree_walk_action
action
,
void
*
walk_action_arg
)
{
if
(
elements
==
0
)
/* the whole tree is in memory */
return
tree_walk
(
&
tree
,
action
,
walk_action_arg
,
left_root_right
);
/* flush current tree to the file to have some memory for merge buffer */
if
(
flush
())
return
1
;
if
(
flush_io_cache
(
&
file
)
||
reinit_io_cache
(
&
file
,
READ_CACHE
,
0L
,
0
,
0
))
return
1
;
uchar
*
merge_buffer
=
(
uchar
*
)
my_malloc
(
max_in_memory_size
,
MYF
(
0
));
if
(
merge_buffer
==
0
)
return
1
;
int
res
=
merge_walk
(
merge_buffer
,
max_in_memory_size
,
size
,
(
BUFFPEK
*
)
file_ptrs
.
buffer
,
(
BUFFPEK
*
)
file_ptrs
.
buffer
+
file_ptrs
.
elements
,
action
,
walk_action_arg
,
tree
.
compare
,
tree
.
custom_arg
,
&
file
);
x_free
(
merge_buffer
);
return
res
;
}
/*
Modify the TABLE element so that when one calls init_records()
the rows will be read in priority order.
...
...
@@ -114,7 +346,7 @@ bool Unique::get(TABLE *table)
return
0
;
}
}
/* Not enough memory; Save the result to file */
/* Not enough memory; Save the result to file
&& free memory used by tree
*/
if
(
flush
())
return
1
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment