Commit 97816c1f authored by unknown's avatar unknown

WL#926 "AVG(DISTINCT) and other distincts", part 2 (out of 3): clean up

Item_sum_count_distinct, and
deploy Unique for use with COUNT(DISTINCT) if there is no blob
column in the list of DISTINCT arguments.


mysql-test/r/count_distinct2.result:
  Test results fixed.
mysql-test/r/func_group.result:
  Updated.
mysql-test/r/sum_distinct.result:
  Updated.
mysql-test/t/func_group.test:
  Add a test for COUNT(DISTINCT) and true varchar and case-insensitive
  collation. The table in the test contains only two distinct values.
mysql-test/t/sum_distinct.test:
  Since now we support INSERT INTO t1 (a) SELECT a+1 FROM t1, shorten
  the test.
  Add a nominal test for AVG(DISTINCT)
sql/item_sum.cc:
  Implementation of cleaned up Item_sum_count_distinct.
  Fixed a bug with COUNT(DISTINCT) and new VARCHAR and collations.
  Fixed a bug wiht AVG(DISTINCT) and wrong number of output digits
  after decimal point.
sql/item_sum.h:
  Cleanup for Item_sum_count_distinct.
  Now if the list of distinct arguments doesn't contain a blob column,
  we always use Unique and merge-sort to find distinct values.
sql/sql_class.h:
  Added a short-cut to find number of elements in Unique if all elements fit
  into memory.
parent ba133947
......@@ -116,7 +116,7 @@ count(distinct n)
5000
show status like 'Created_tmp_disk_tables';
Variable_name Value
Created_tmp_disk_tables 1
Created_tmp_disk_tables 0
drop table t1;
create table t1 (s text);
flush status;
......
......@@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
MAX(id)
NULL
DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(400));
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
("B"), ("b"), ("b "), ("b ");
SELECT COUNT(DISTINCT a) FROM t1;
COUNT(DISTINCT a)
2
DROP TABLE t1;
......@@ -98,60 +98,60 @@ DROP TABLE t1;
CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1;
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 8 */
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 12 */
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 16 */
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 20 */
INSERT INTO t1 (id) SELECT id FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1;
/* 24 */
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+512 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id+512 FROM t1;
SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
AVG(DISTINCT id)
513.5000
508.0000
509.0000
510.0000
511.0000
512.0000
513.0000
514.0000
515.0000
516.0000
517.0000
511.5000
512.5000
SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
SUM(DISTINCT id)/COUNT(DISTINCT id)
513.50000
508.00000
509.00000
510.00000
511.00000
512.00000
513.00000
514.00000
515.00000
516.00000
517.00000
511.50000
512.50000
INSERT INTO t1 SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id+8192 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT SUM(DISTINCT id) sm FROM t1;
sm
......
......@@ -591,3 +591,13 @@ INSERT INTO t1 VALUES
(1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1);
SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
DROP TABLE t1;
#
# Test that new VARCHAR correctly works with COUNT(DISTINCT)
#
CREATE TABLE t1 (a VARCHAR(400));
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
("B"), ("b"), ("b "), ("b ");
SELECT COUNT(DISTINCT a) FROM t1;
DROP TABLE t1;
......@@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1;
INSERT INTO t1 (id) SELECT id FROM t2; /* 8 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 12 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 16 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 20 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 24 */
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+512 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+16384 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+32768 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+65536 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
INSERT INTO t1 (id) SELECT id FROM t1; /* 8 */
INSERT INTO t1 (id) SELECT id FROM t1; /* 12 */
INSERT INTO t1 (id) SELECT id FROM t1; /* 16 */
INSERT INTO t1 (id) SELECT id FROM t1; /* 20 */
INSERT INTO t1 (id) SELECT id FROM t1; /* 24 */
INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id+512 FROM t1;
# Just test that AVG(DISTINCT) is there
SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
INSERT INTO t1 SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id+8192 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
# SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
......
......@@ -719,6 +719,18 @@ String *Item_sum_distinct::val_str(String *str)
/* Item_sum_avg_distinct */
void
Item_sum_avg_distinct::fix_length_and_dec()
{
Item_sum_distinct::fix_length_and_dec();
/*
AVG() will divide val by count. We need to reserve digits
after decimal point as the result can be fractional.
*/
decimals+= 4;
}
void
Item_sum_avg_distinct::calculate_val_and_count()
{
......@@ -2115,12 +2127,8 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf)
int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
{
Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
CHARSET_INFO *cs=item->key_charset;
uint len=item->key_length;
return cs->coll->strnncollsp(cs,
(const uchar*) key1, len,
(const uchar*) key2, len, 0);
Field *f= (Field*) arg;
return f->cmp(key1, key2);
}
/*
......@@ -2149,54 +2157,42 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
return 0;
}
/*
helper function for walking the tree when we dump it to MyISAM -
tree_walk will call it for each leaf
*/
int dump_leaf(byte* key, uint32 count __attribute__((unused)),
Item_sum_count_distinct* item)
C_MODE_START
static int count_distinct_walk(void *elem, unsigned int count, void *arg)
{
byte* buf = item->table->record[0];
int error;
/*
The first item->rec_offset bytes are taken care of with
restore_record(table,default_values) in setup()
*/
memcpy(buf + item->rec_offset, key, item->tree->size_of_element);
if ((error = item->table->file->write_row(buf)))
{
if (error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
return 1;
}
(*((ulonglong*)arg))++;
return 0;
}
C_MODE_END
void Item_sum_count_distinct::cleanup()
{
DBUG_ENTER("Item_sum_count_distinct::cleanup");
Item_sum_int::cleanup();
/*
Free table and tree if they belong to this item (if item have not pointer
to original item from which was made copy => it own its objects )
*/
/* Free objects only if we own them. */
if (!original)
{
/*
We need to delete the table and the tree in cleanup() as
they were allocated in the runtime memroot. Using the runtime
memroot reduces memory footprint for PS/SP and simplifies setup().
*/
delete tree;
tree= 0;
if (table)
{
free_tmp_table(current_thd, table);
free_tmp_table(table->in_use, table);
table= 0;
}
delete tmp_table_param;
tmp_table_param= 0;
if (use_tree)
{
delete_tree(tree);
use_tree= 0;
}
}
always_null= FALSE;
DBUG_VOID_RETURN;
}
......@@ -2207,8 +2203,15 @@ void Item_sum_count_distinct::make_unique()
{
table=0;
original= 0;
use_tree= 0; // to prevent delete_tree call on uninitialized tree
tree= &tree_base;
tree= 0;
tmp_table_param= 0;
always_null= FALSE;
}
Item_sum_count_distinct::~Item_sum_count_distinct()
{
cleanup();
}
......@@ -2216,9 +2219,14 @@ bool Item_sum_count_distinct::setup(THD *thd)
{
List<Item> list;
SELECT_LEX *select_lex= thd->lex->current_select;
if (select_lex->linkage == GLOBAL_OPTIONS_TYPE)
return 1;
/*
Setup can be called twice for ROLLUP items. This is a bug.
Please add DBUG_ASSERT(tree == 0) here when it's fixed.
*/
if (tree || table || tmp_table_param)
return FALSE;
if (!(tmp_table_param= new TMP_TABLE_PARAM))
return 1;
......@@ -2238,11 +2246,7 @@ bool Item_sum_count_distinct::setup(THD *thd)
if (always_null)
return 0;
count_field_types(tmp_table_param,list,0);
if (table)
{
free_tmp_table(thd, table);
tmp_table_param->cleanup();
}
DBUG_ASSERT(table == 0);
if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
0,
select_lex->options | thd->options,
......@@ -2251,123 +2255,77 @@ bool Item_sum_count_distinct::setup(THD *thd)
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
table->no_rows=1;
// no blobs, otherwise it would be MyISAM
if (table->s->db_type == DB_TYPE_HEAP)
{
/*
No blobs, otherwise it would have been MyISAM: set up a compare
function and its arguments to use with Unique.
*/
qsort_cmp2 compare_key;
void* cmp_arg;
Field **field= table->field;
Field **field_end= field + table->s->fields;
bool all_binary= TRUE;
// to make things easier for dump_leaf if we ever have to dump to MyISAM
restore_record(table,s->default_values);
if (table->s->fields == 1)
for (tree_key_length= 0; field < field_end; ++field)
{
/*
If we have only one field, which is the most common use of
count(distinct), it is much faster to use a simpler key
compare method that can take advantage of not having to worry
about other fields
*/
Field* field = table->field[0];
switch (field->type()) {
case MYSQL_TYPE_STRING:
case MYSQL_TYPE_VAR_STRING:
if (field->binary())
{
compare_key = (qsort_cmp2)simple_raw_key_cmp;
cmp_arg = (void*) &key_length;
}
else
{
/*
If we have a string, we must take care of charsets and case
sensitivity
*/
compare_key = (qsort_cmp2)simple_str_key_cmp;
cmp_arg = (void*) this;
}
break;
default:
/*
Since at this point we cannot have blobs anything else can
be compared with memcmp
*/
compare_key = (qsort_cmp2)simple_raw_key_cmp;
cmp_arg = (void*) &key_length;
break;
Field *f= *field;
enum enum_field_types type= f->type();
tree_key_length+= f->pack_length();
if (!f->binary() && (type == MYSQL_TYPE_STRING ||
type == MYSQL_TYPE_VAR_STRING ||
type == MYSQL_TYPE_VARCHAR))
{
all_binary= FALSE;
break;
}
key_charset = field->charset();
key_length = field->pack_length();
rec_offset = 1;
}
else // too bad, cannot cheat - there is more than one field
if (all_binary)
{
bool all_binary = 1;
Field** field, **field_end;
field_end = (field = table->field) + table->s->fields;
uint32 *lengths;
if (!(field_lengths=
(uint32*) thd->alloc(sizeof(uint32) * table->s->fields)))
return 1;
for (key_length = 0, lengths=field_lengths; field < field_end; ++field)
{
uint32 length= (*field)->pack_length();
key_length += length;
*lengths++ = length;
if (!(*field)->binary())
all_binary = 0; // Can't break loop here
}
rec_offset= table->s->reclength - key_length;
if (all_binary)
cmp_arg= (void*) &tree_key_length;
compare_key= (qsort_cmp2) simple_raw_key_cmp;
}
else
{
if (table->s->fields == 1)
{
compare_key = (qsort_cmp2)simple_raw_key_cmp;
cmp_arg = (void*) &key_length;
/*
If we have only one field, which is the most common use of
count(distinct), it is much faster to use a simpler key
compare method that can take advantage of not having to worry
about other fields.
*/
compare_key= (qsort_cmp2) simple_str_key_cmp;
cmp_arg= (void*) table->field[0];
/* tree_key_length has been set already */
}
else
{
compare_key = (qsort_cmp2) composite_key_cmp ;
cmp_arg = (void*) this;
uint32 *length;
compare_key= (qsort_cmp2) composite_key_cmp;
cmp_arg= (void*) this;
field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
for (tree_key_length= 0, length= field_lengths, field= table->field;
field < field_end; ++field, ++length)
{
*length= (*field)->pack_length();
tree_key_length+= *length;
}
}
}
if (use_tree)
delete_tree(tree);
init_tree(tree, min(thd->variables.max_heap_table_size,
thd->variables.sortbuff_size/16), 0,
key_length, compare_key, 0, NULL, cmp_arg);
use_tree = 1;
DBUG_ASSERT(tree == 0);
tree= new Unique(compare_key, cmp_arg, tree_key_length,
thd->variables.max_heap_table_size);
/*
The only time key_length could be 0 is if someone does
The only time tree_key_length could be 0 is if someone does
count(distinct) on a char(0) field - stupid thing to do,
but this has to be handled - otherwise someone can crash
the server with a DoS attack
*/
max_elements_in_tree = ((key_length) ?
thd->variables.max_heap_table_size/key_length : 1);
}
if (original)
{
original->table= table;
original->use_tree= use_tree;
if (! tree)
return TRUE;
}
return 0;
}
int Item_sum_count_distinct::tree_to_myisam()
{
if (create_myisam_from_heap(current_thd, table, tmp_table_param,
HA_ERR_RECORD_FILE_FULL, 1) ||
tree_walk(tree, (tree_walk_action)&dump_leaf, (void*)this,
left_root_right))
return 1;
delete_tree(tree);
use_tree = 0;
return 0;
return FALSE;
}
......@@ -2379,8 +2337,9 @@ Item *Item_sum_count_distinct::copy_or_same(THD* thd)
void Item_sum_count_distinct::clear()
{
if (use_tree)
reset_tree(tree);
/* tree and table can be both null only if always_null */
if (tree)
tree->reset();
else if (table)
{
table->file->extra(HA_EXTRA_NO_CACHE);
......@@ -2401,32 +2360,21 @@ bool Item_sum_count_distinct::add()
if ((*field)->is_real_null(0))
return 0; // Don't count NULL
if (use_tree)
if (tree)
{
/*
If the tree got too big, convert to MyISAM, otherwise insert into the
tree.
The first few bytes of record (at least one) are just markers
for deleted and NULLs. We want to skip them since they will
bloat the tree without providing any valuable info. Besides,
key_length used to initialize the tree didn't include space for them.
*/
if (tree->elements_in_tree > max_elements_in_tree)
{
if (tree_to_myisam())
return 1;
}
else if (!tree_insert(tree, table->record[0] + rec_offset, 0,
tree->custom_arg))
return 1;
}
else if ((error=table->file->write_row(table->record[0])))
{
if (error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
{
if (create_myisam_from_heap(current_thd, table, tmp_table_param, error,
1))
return 1; // Not a table_is_full error
}
return tree->unique_add(table->record[0] + table->s->null_bytes);
}
return 0;
if ((error= table->file->write_row(table->record[0])) &&
error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
return TRUE;
return FALSE;
}
......@@ -2435,8 +2383,16 @@ longlong Item_sum_count_distinct::val_int()
DBUG_ASSERT(fixed == 1);
if (!table) // Empty query
return LL(0);
if (use_tree)
return tree->elements_in_tree;
if (tree)
{
ulonglong count;
if (tree->elements == 0)
return (longlong) tree->elements_in_tree(); // everything fits in memory
count= 0;
tree->walk(count_distinct_walk, (void*) &count);
return (longlong) count;
}
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
return table->file->records;
}
......
......@@ -239,6 +239,7 @@ class Item_sum_avg_distinct: public Item_sum_distinct
public:
Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
void fix_length_and_dec();
virtual void calculate_val_and_count();
enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
const char *func_name() const { return "avg_distinct"; }
......@@ -280,68 +281,44 @@ class TMP_TABLE_PARAM;
class Item_sum_count_distinct :public Item_sum_int
{
TABLE *table;
table_map used_table_cache;
uint32 *field_lengths;
TMP_TABLE_PARAM *tmp_table_param;
TREE tree_base;
TREE *tree;
/*
If there are no blobs, we can use a tree, which
is faster than heap table. In that case, we still use the table
to help get things set up, but we insert nothing in it
*/
Unique *tree;
/*
Following is 0 normal object and pointer to original one for copy
(to correctly free resources)
*/
Item_sum_count_distinct *original;
uint tree_key_length;
uint key_length;
CHARSET_INFO *key_charset;
/*
Calculated based on max_heap_table_size. If reached,
walk the tree and dump it into MyISAM table
*/
uint max_elements_in_tree;
/*
The first few bytes of record ( at least one)
are just markers for deleted and NULLs. We want to skip them since
they will just bloat the tree without providing any valuable info
*/
int rec_offset;
/*
If there are no blobs, we can use a tree, which
is faster than heap table. In that case, we still use the table
to help get things set up, but we insert nothing in it
*/
bool use_tree;
bool always_null; // Set to 1 if the result is always NULL
int tree_to_myisam();
friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
friend int simple_raw_key_cmp(void* arg, byte* key1, byte* key2);
friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
Item_sum_count_distinct* item);
public:
public:
Item_sum_count_distinct(List<Item> &list)
:Item_sum_int(list), table(0), used_table_cache(~(table_map) 0),
tmp_table_param(0), tree(&tree_base), original(0), use_tree(0),
always_null(0)
:Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
tree(0), original(0), always_null(FALSE)
{ quick_group= 0; }
Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
:Item_sum_int(thd, item), table(item->table),
used_table_cache(item->used_table_cache),
field_lengths(item->field_lengths),
tmp_table_param(item->tmp_table_param),
tree(item->tree), original(item), key_length(item->key_length),
max_elements_in_tree(item->max_elements_in_tree),
rec_offset(item->rec_offset), use_tree(item->use_tree),
tree(item->tree), original(item), tree_key_length(item->tree_key_length),
always_null(item->always_null)
{}
~Item_sum_count_distinct();
void cleanup();
table_map used_tables() const { return used_table_cache; }
enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
void clear();
bool add();
......
......@@ -1831,6 +1831,7 @@ class Unique :public Sql_alloc
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
uint size_arg, ulong max_in_memory_size_arg);
~Unique();
ulong elements_in_tree() { return tree.elements_in_tree; }
inline bool unique_add(void *ptr)
{
DBUG_ENTER("unique_add");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment