Commit 897849e5 authored by unknown's avatar unknown

WL#926 "AVG(DISTINCT) and other distincts", part 2 (out of 3): clean up

Item_sum_count_distinct, and
deploy Unique for use with COUNT(DISTINCT) if there is no blob
column in the list of DISTINCT arguments.


mysql-test/r/count_distinct2.result:
  Test results fixed.
mysql-test/r/func_group.result:
  Updated.
mysql-test/r/sum_distinct.result:
  Updated.
mysql-test/t/func_group.test:
  Add a test for COUNT(DISTINCT) and true varchar and case-insensitive
  collation. The table in the test contains only two distinct values.
mysql-test/t/sum_distinct.test:
  Since now we support INSERT INTO t1 (a) SELECT a+1 FROM t1, shorten
  the test.
  Add a nominal test for AVG(DISTINCT)
sql/item_sum.cc:
  Implementation of cleaned up Item_sum_count_distinct.
  Fixed a bug with COUNT(DISTINCT) and new VARCHAR and collations.
  Fixed a bug wiht AVG(DISTINCT) and wrong number of output digits
  after decimal point.
sql/item_sum.h:
  Cleanup for Item_sum_count_distinct.
  Now if the list of distinct arguments doesn't contain a blob column,
  we always use Unique and merge-sort to find distinct values.
sql/sql_class.h:
  Added a short-cut to find number of elements in Unique if all elements fit
  into memory.
parent ce4f43cb
......@@ -116,7 +116,7 @@ count(distinct n)
5000
show status like 'Created_tmp_disk_tables';
Variable_name Value
Created_tmp_disk_tables 1
Created_tmp_disk_tables 0
drop table t1;
create table t1 (s text);
flush status;
......
......@@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
MAX(id)
NULL
DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(400));
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
("B"), ("b"), ("b "), ("b ");
SELECT COUNT(DISTINCT a) FROM t1;
COUNT(DISTINCT a)
2
DROP TABLE t1;
......@@ -98,60 +98,60 @@ DROP TABLE t1;
CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1;
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 8 */
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 12 */
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 16 */
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 20 */
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 24 */
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+512 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id+512 FROM t1;
SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
AVG(DISTINCT id)
513.5000
508.0000
509.0000
510.0000
511.0000
512.0000
513.0000
514.0000
515.0000
516.0000
517.0000
511.5000
512.5000
SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
SUM(DISTINCT id)/COUNT(DISTINCT id)
513.50000
508.00000
509.00000
510.00000
511.00000
512.00000
513.00000
514.00000
515.00000
516.00000
517.00000
511.50000
512.50000
INSERT INTO t1 SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id+8192 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT SUM(DISTINCT id) sm FROM t1;
sm
......
......@@ -591,3 +591,13 @@ INSERT INTO t1 VALUES
(1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1);
SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
DROP TABLE t1;
#
# Test that new VARCHAR correctly works with COUNT(DISTINCT)
#
CREATE TABLE t1 (a VARCHAR(400));
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
("B"), ("b"), ("b "), ("b ");
SELECT COUNT(DISTINCT a) FROM t1;
DROP TABLE t1;
......@@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1;
INSERT INTO t1 (id) SELECT id FROM t2; /* 8 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 12 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 16 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 20 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 24 */
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+512 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+16384 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+32768 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+65536 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1; /* 8 */
INSERT INTO t1 (id) SELECT id FROM t1; /* 12 */
INSERT INTO t1 (id) SELECT id FROM t1; /* 16 */
INSERT INTO t1 (id) SELECT id FROM t1; /* 20 */
INSERT INTO t1 (id) SELECT id FROM t1; /* 24 */
INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id+512 FROM t1;
# Just test that AVG(DISTINCT) is there
SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
INSERT INTO t1 SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id+8192 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
# SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
......
This diff is collapsed.
......@@ -239,6 +239,7 @@ class Item_sum_avg_distinct: public Item_sum_distinct
public:
Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
void fix_length_and_dec();
virtual void calculate_val_and_count();
enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
const char *func_name() const { return "avg_distinct"; }
......@@ -280,68 +281,44 @@ class TMP_TABLE_PARAM;
class Item_sum_count_distinct :public Item_sum_int
{
TABLE *table;
table_map used_table_cache;
uint32 *field_lengths;
TMP_TABLE_PARAM *tmp_table_param;
TREE tree_base;
TREE *tree;
/*
If there are no blobs, we can use a tree, which
is faster than heap table. In that case, we still use the table
to help get things set up, but we insert nothing in it
*/
Unique *tree;
/*
Following is 0 normal object and pointer to original one for copy
(to correctly free resources)
*/
Item_sum_count_distinct *original;
uint tree_key_length;
uint key_length;
CHARSET_INFO *key_charset;
/*
Calculated based on max_heap_table_size. If reached,
walk the tree and dump it into MyISAM table
*/
uint max_elements_in_tree;
/*
The first few bytes of record ( at least one)
are just markers for deleted and NULLs. We want to skip them since
they will just bloat the tree without providing any valuable info
*/
int rec_offset;
/*
If there are no blobs, we can use a tree, which
is faster than heap table. In that case, we still use the table
to help get things set up, but we insert nothing in it
*/
bool use_tree;
bool always_null; // Set to 1 if the result is always NULL
int tree_to_myisam();
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
friend int simple_raw_key_cmp(void* arg, byte* key1, byte* key2);
friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
Item_sum_count_distinct* item);
public:
public:
Item_sum_count_distinct(List<Item> &list)
:Item_sum_int(list), table(0), used_table_cache(~(table_map) 0),
tmp_table_param(0), tree(&tree_base), original(0), use_tree(0),
always_null(0)
:Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
tree(0), original(0), always_null(FALSE)
{ quick_group= 0; }
Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
:Item_sum_int(thd, item), table(item->table),
used_table_cache(item->used_table_cache),
field_lengths(item->field_lengths),
tmp_table_param(item->tmp_table_param),
tree(item->tree), original(item), key_length(item->key_length),
max_elements_in_tree(item->max_elements_in_tree),
rec_offset(item->rec_offset), use_tree(item->use_tree),
tree(item->tree), original(item), tree_key_length(item->tree_key_length),
always_null(item->always_null)
{}
~Item_sum_count_distinct();
void cleanup();
table_map used_tables() const { return used_table_cache; }
enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
void clear();
bool add();
......
......@@ -1831,6 +1831,7 @@ class Unique :public Sql_alloc
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
uint size_arg, ulong max_in_memory_size_arg);
~Unique();
ulong elements_in_tree() { return tree.elements_in_tree; }
inline bool unique_add(void *ptr)
{
DBUG_ENTER("unique_add");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment