Commit 69dd773b authored by Igor Babaev's avatar Igor Babaev

An implementation of index intersect via a modified Unique class.

This code is planned to be used for mwl#21.
parent 5719ca5c
...@@ -31,6 +31,7 @@ extern "C" { ...@@ -31,6 +31,7 @@ extern "C" {
#define tree_set_pointer(element,ptr) *((uchar **) (element+1))=((uchar*) (ptr)) #define tree_set_pointer(element,ptr) *((uchar **) (element+1))=((uchar*) (ptr))
#define TREE_NO_DUPS 1 #define TREE_NO_DUPS 1
#define TREE_ONLY_DUPS 2
typedef enum { left_root_right, right_root_left } TREE_WALK; typedef enum { left_root_right, right_root_left } TREE_WALK;
typedef uint32 element_count; typedef uint32 element_count;
......
...@@ -221,6 +221,8 @@ TREE_ELEMENT *tree_insert(TREE *tree, void *key, uint key_size, ...@@ -221,6 +221,8 @@ TREE_ELEMENT *tree_insert(TREE *tree, void *key, uint key_size,
} }
if (element == &tree->null_element) if (element == &tree->null_element)
{ {
if (tree->flag & TREE_ONLY_DUPS)
return((TREE_ELEMENT *) 1);
uint alloc_size=sizeof(TREE_ELEMENT)+key_size+tree->size_of_element; uint alloc_size=sizeof(TREE_ELEMENT)+key_size+tree->size_of_element;
tree->allocated+=alloc_size; tree->allocated+=alloc_size;
......
...@@ -50,10 +50,6 @@ static int write_keys(SORTPARAM *param,uchar * *sort_keys, ...@@ -50,10 +50,6 @@ static int write_keys(SORTPARAM *param,uchar * *sort_keys,
uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile); uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
static void make_sortkey(SORTPARAM *param,uchar *to, uchar *ref_pos); static void make_sortkey(SORTPARAM *param,uchar *to, uchar *ref_pos);
static void register_used_fields(SORTPARAM *param); static void register_used_fields(SORTPARAM *param);
static int merge_index(SORTPARAM *param,uchar *sort_buffer,
BUFFPEK *buffpek,
uint maxbuffer,IO_CACHE *tempfile,
IO_CACHE *outfile);
static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count, static bool save_index(SORTPARAM *param,uchar **sort_keys, uint count,
FILESORT_INFO *table_sort); FILESORT_INFO *table_sort);
static uint suffix_length(ulong string_length); static uint suffix_length(ulong string_length);
...@@ -145,6 +141,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length, ...@@ -145,6 +141,7 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
/* filesort cannot handle zero-length records. */ /* filesort cannot handle zero-length records. */
DBUG_ASSERT(param.sort_length); DBUG_ASSERT(param.sort_length);
param.ref_length= table->file->ref_length; param.ref_length= table->file->ref_length;
param.min_dupl_count= 0;
param.addon_field= 0; param.addon_field= 0;
param.addon_length= 0; param.addon_length= 0;
if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) && if (!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
...@@ -1216,7 +1213,13 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, ...@@ -1216,7 +1213,13 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
rec_length= param->rec_length; rec_length= param->rec_length;
res_length= param->res_length; res_length= param->res_length;
sort_length= param->sort_length; sort_length= param->sort_length;
offset= rec_length-res_length; element_count dupl_count;
uchar *src;
uint dupl_count_ofs= rec_length-sizeof(element_count);
uint min_dupl_count= param->min_dupl_count;
offset= rec_length-
(flag && min_dupl_count ? sizeof(dupl_count) : 0)-res_length;
uint wr_len= flag ? res_length : rec_length;
maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1)); maxcount= (ulong) (param->keys/((uint) (Tb-Fb) +1));
to_start_filepos= my_b_tell(to_file); to_start_filepos= my_b_tell(to_file);
strpos= sort_buffer; strpos= sort_buffer;
...@@ -1262,16 +1265,20 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, ...@@ -1262,16 +1265,20 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
*/ */
buffpek= (BUFFPEK*) queue_top(&queue); buffpek= (BUFFPEK*) queue_top(&queue);
memcpy(param->unique_buff, buffpek->key, rec_length); memcpy(param->unique_buff, buffpek->key, rec_length);
if (my_b_write(to_file, (uchar*) buffpek->key, rec_length)) if (min_dupl_count)
{ memcpy(&dupl_count, param->unique_buff+dupl_count_ofs,
error=1; goto err; /* purecov: inspected */ sizeof(dupl_count));
}
buffpek->key+= rec_length; buffpek->key+= rec_length;
buffpek->mem_count--; if (! --buffpek->mem_count)
if (!--max_rows)
{ {
error= 0; /* purecov: inspected */ if (!(error= (int) read_to_buffer(from_file,buffpek,
goto end; /* purecov: inspected */ rec_length)))
{
VOID(queue_remove(&queue,0));
reuse_freed_buff(&queue, buffpek, rec_length);
}
else if (error == -1)
goto err; /* purecov: inspected */
} }
queue_replaced(&queue); // Top element has been used queue_replaced(&queue); // Top element has been used
} }
...@@ -1287,27 +1294,42 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, ...@@ -1287,27 +1294,42 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
for (;;) for (;;)
{ {
buffpek= (BUFFPEK*) queue_top(&queue); buffpek= (BUFFPEK*) queue_top(&queue);
src= buffpek->key;
if (cmp) // Remove duplicates if (cmp) // Remove duplicates
{ {
if (!(*cmp)(first_cmp_arg, &(param->unique_buff), if (!(*cmp)(first_cmp_arg, &(param->unique_buff),
(uchar**) &buffpek->key)) (uchar**) &buffpek->key))
goto skip_duplicate; {
memcpy(param->unique_buff, (uchar*) buffpek->key, rec_length); if (min_dupl_count)
} {
if (flag == 0) element_count cnt;
{ memcpy(&cnt, (uchar *) buffpek->key+dupl_count_ofs, sizeof(cnt));
if (my_b_write(to_file,(uchar*) buffpek->key, rec_length)) dupl_count+= cnt;
{ }
error=1; goto err; /* purecov: inspected */ goto skip_duplicate;
} }
if (min_dupl_count)
{
memcpy(param->unique_buff+dupl_count_ofs, &dupl_count,
sizeof(dupl_count));
}
src= param->unique_buff;
} }
else
if (!flag || !min_dupl_count || dupl_count >= min_dupl_count)
{ {
if (my_b_write(to_file, (uchar*) buffpek->key+offset, res_length)) if (my_b_write(to_file, src+(flag ? offset : 0), wr_len))
{ {
error=1; goto err; /* purecov: inspected */ error=1; goto err; /* purecov: inspected */
} }
} }
if (cmp)
{
memcpy(param->unique_buff, (uchar*) buffpek->key, rec_length);
if (min_dupl_count)
memcpy(&dupl_count, param->unique_buff+dupl_count_ofs,
sizeof(dupl_count));
}
if (!--max_rows) if (!--max_rows)
{ {
error= 0; /* purecov: inspected */ error= 0; /* purecov: inspected */
...@@ -1343,9 +1365,33 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, ...@@ -1343,9 +1365,33 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
{ {
if (!(*cmp)(first_cmp_arg, &(param->unique_buff), (uchar**) &buffpek->key)) if (!(*cmp)(first_cmp_arg, &(param->unique_buff), (uchar**) &buffpek->key))
{ {
buffpek->key+= rec_length; // Remove duplicate if (min_dupl_count)
{
element_count cnt;
memcpy(&cnt, (uchar *) buffpek->key+dupl_count_ofs, sizeof(cnt));
dupl_count+= cnt;
}
buffpek->key+= rec_length;
--buffpek->mem_count; --buffpek->mem_count;
} }
if (min_dupl_count)
memcpy(param->unique_buff+dupl_count_ofs, &dupl_count,
sizeof(dupl_count));
if (!flag || !min_dupl_count || dupl_count >= min_dupl_count)
{
src= param->unique_buff;
if (my_b_write(to_file, src+(flag ? offset : 0), wr_len))
{
error=1; goto err; /* purecov: inspected */
}
if (!--max_rows)
{
error= 0;
goto end;
}
}
} }
do do
...@@ -1367,12 +1413,17 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, ...@@ -1367,12 +1413,17 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
else else
{ {
register uchar *end; register uchar *end;
strpos= buffpek->key+offset; src= buffpek->key+offset;
for (end= strpos+buffpek->mem_count*rec_length ; for (end= src+buffpek->mem_count*rec_length ;
strpos != end ; src != end ;
strpos+= rec_length) src+= rec_length)
{ {
if (my_b_write(to_file, strpos, res_length)) if (flag && min_dupl_count &&
memcmp(&min_dupl_count, src+dupl_count_ofs,
sizeof(dupl_count_ofs))<0)
continue;
if (my_b_write(to_file, src, wr_len))
{ {
error=1; goto err; error=1; goto err;
} }
...@@ -1393,7 +1444,7 @@ err: ...@@ -1393,7 +1444,7 @@ err:
/* Do a merge to output-file (save only positions) */ /* Do a merge to output-file (save only positions) */
static int merge_index(SORTPARAM *param, uchar *sort_buffer, int merge_index(SORTPARAM *param, uchar *sort_buffer,
BUFFPEK *buffpek, uint maxbuffer, BUFFPEK *buffpek, uint maxbuffer,
IO_CACHE *tempfile, IO_CACHE *outfile) IO_CACHE *tempfile, IO_CACHE *outfile)
{ {
......
This diff is collapsed.
...@@ -274,12 +274,13 @@ public: ...@@ -274,12 +274,13 @@ public:
enum { enum {
QS_TYPE_RANGE = 0, QS_TYPE_RANGE = 0,
QS_TYPE_INDEX_MERGE = 1, QS_TYPE_INDEX_INTERSECT = 1,
QS_TYPE_RANGE_DESC = 2, QS_TYPE_INDEX_MERGE = 2,
QS_TYPE_FULLTEXT = 3, QS_TYPE_RANGE_DESC = 3,
QS_TYPE_ROR_INTERSECT = 4, QS_TYPE_FULLTEXT = 4,
QS_TYPE_ROR_UNION = 5, QS_TYPE_ROR_INTERSECT = 5,
QS_TYPE_GROUP_MIN_MAX = 6 QS_TYPE_ROR_UNION = 6,
QS_TYPE_GROUP_MIN_MAX = 7
}; };
/* Get type of this quick select - one of the QS_TYPE_* values */ /* Get type of this quick select - one of the QS_TYPE_* values */
...@@ -393,8 +394,17 @@ protected: ...@@ -393,8 +394,17 @@ protected:
friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx, friend QUICK_RANGE_SELECT *get_quick_select(PARAM*,uint idx,
SEL_ARG *key_tree, SEL_ARG *key_tree,
MEM_ROOT *alloc); MEM_ROOT *alloc);
friend
int read_keys_and_merge_scans(THD *thd, TABLE *head,
List<QUICK_RANGE_SELECT> quick_selects,
QUICK_RANGE_SELECT *pk_quick_select,
READ_RECORD *read_record,
bool intersection,
Unique **unique_ptr);
friend class QUICK_SELECT_DESC; friend class QUICK_SELECT_DESC;
friend class QUICK_INDEX_MERGE_SELECT; friend class QUICK_INDEX_MERGE_SELECT;
friend class QUICK_INDEX_INTERSECT_SELECT;
friend class QUICK_ROR_INTERSECT_SELECT; friend class QUICK_ROR_INTERSECT_SELECT;
friend class QUICK_GROUP_MIN_MAX_SELECT; friend class QUICK_GROUP_MIN_MAX_SELECT;
...@@ -545,6 +555,45 @@ public: ...@@ -545,6 +555,45 @@ public:
READ_RECORD read_record; READ_RECORD read_record;
}; };
class QUICK_INDEX_INTERSECT_SELECT : public QUICK_SELECT_I
{
Unique *unique;
public:
QUICK_INDEX_INTERSECT_SELECT(THD *thd, TABLE *table);
~QUICK_INDEX_INTERSECT_SELECT();
int init();
int reset(void);
int get_next();
bool reverse_sorted() { return false; }
bool unique_key_range() { return false; }
int get_type() { return QS_TYPE_INDEX_INTERSECT; }
void add_keys_and_lengths(String *key_names, String *used_lengths);
void add_info_string(String *str);
bool is_keys_used(const MY_BITMAP *fields);
#ifndef DBUG_OFF
void dbug_dump(int indent, bool verbose);
#endif
bool push_quick_back(QUICK_RANGE_SELECT *quick_sel_range);
/* range quick selects this index_merge read consists of */
List<QUICK_RANGE_SELECT> quick_selects;
/* quick select that uses clustered primary key (NULL if none) */
QUICK_RANGE_SELECT* pk_quick_select;
/* true if this select is currently doing a clustered PK scan */
bool doing_pk_scan;
MEM_ROOT alloc;
THD *thd;
int read_keys_and_merge();
/* used to get rows collected in Unique */
READ_RECORD read_record;
};
/* /*
Rowid-Ordered Retrieval (ROR) index intersection quick select. Rowid-Ordered Retrieval (ROR) index intersection quick select.
......
...@@ -2949,6 +2949,7 @@ class user_var_entry ...@@ -2949,6 +2949,7 @@ class user_var_entry
DTCollation collation; DTCollation collation;
}; };
/* /*
Unique -- class for unique (removing of duplicates). Unique -- class for unique (removing of duplicates).
Puts all values to the TREE. If the tree becomes too big, Puts all values to the TREE. If the tree becomes too big,
...@@ -2967,11 +2968,14 @@ class Unique :public Sql_alloc ...@@ -2967,11 +2968,14 @@ class Unique :public Sql_alloc
uchar *record_pointers; uchar *record_pointers;
bool flush(); bool flush();
uint size; uint size;
uint full_size;
uint min_dupl_count;
public: public:
ulong elements; ulong elements;
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
uint size_arg, ulonglong max_in_memory_size_arg); uint size_arg, ulonglong max_in_memory_size_arg,
uint min_dupl_count_arg= 0);
~Unique(); ~Unique();
ulong elements_in_tree() { return tree.elements_in_tree; } ulong elements_in_tree() { return tree.elements_in_tree; }
inline bool unique_add(void *ptr) inline bool unique_add(void *ptr)
...@@ -2983,6 +2987,9 @@ public: ...@@ -2983,6 +2987,9 @@ public:
DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg)); DBUG_RETURN(!tree_insert(&tree, ptr, 0, tree.custom_arg));
} }
bool is_in_memory() { return (my_b_tell(&file) == 0); }
void close_for_expansion() { tree.flag= TREE_ONLY_DUPS; }
bool get(TABLE *table); bool get(TABLE *table);
static double get_use_cost(uint *buffer, uint nkeys, uint key_size, static double get_use_cost(uint *buffer, uint nkeys, uint key_size,
ulonglong max_in_memory_size); ulonglong max_in_memory_size);
...@@ -3002,6 +3009,11 @@ public: ...@@ -3002,6 +3009,11 @@ public:
friend int unique_write_to_file(uchar* key, element_count count, Unique *unique); friend int unique_write_to_file(uchar* key, element_count count, Unique *unique);
friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique); friend int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique);
friend int unique_write_to_file_with_count(uchar* key, element_count count,
Unique *unique);
friend int unique_intersect_write_to_ptrs(uchar* key, element_count count,
Unique *unique);
}; };
......
...@@ -13636,7 +13636,8 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit, ...@@ -13636,7 +13636,8 @@ test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
by clustered PK values. by clustered PK values.
*/ */
if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
DBUG_RETURN(0); DBUG_RETURN(0);
...@@ -14042,6 +14043,7 @@ check_reverse_order: ...@@ -14042,6 +14043,7 @@ check_reverse_order:
QUICK_SELECT_DESC *tmp; QUICK_SELECT_DESC *tmp;
int quick_type= select->quick->get_type(); int quick_type= select->quick->get_type();
if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE || if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
...@@ -16810,6 +16812,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, ...@@ -16810,6 +16812,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
{ {
quick_type= tab->select->quick->get_type(); quick_type= tab->select->quick->get_type();
if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) || if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
(quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
(quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) || (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
(quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION)) (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
tab->type = JT_INDEX_MERGE; tab->type = JT_INDEX_MERGE;
...@@ -17015,6 +17018,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order, ...@@ -17015,6 +17018,7 @@ static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
{ {
if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION || if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT || quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
{ {
extra.append(STRING_WITH_LEN("; Using ")); extra.append(STRING_WITH_LEN("; Using "));
......
...@@ -57,6 +57,7 @@ typedef struct st_sort_param { ...@@ -57,6 +57,7 @@ typedef struct st_sort_param {
uint addon_length; /* Length of added packed fields */ uint addon_length; /* Length of added packed fields */
uint res_length; /* Length of records in final sorted file/buffer */ uint res_length; /* Length of records in final sorted file/buffer */
uint keys; /* Max keys / buffer */ uint keys; /* Max keys / buffer */
element_count min_dupl_count;
ha_rows max_rows,examined_rows; ha_rows max_rows,examined_rows;
TABLE *sort_form; /* For quicker make_sortkey */ TABLE *sort_form; /* For quicker make_sortkey */
SORT_FIELD *local_sortorder; SORT_FIELD *local_sortorder;
...@@ -80,4 +81,9 @@ int merge_buffers(SORTPARAM *param,IO_CACHE *from_file, ...@@ -80,4 +81,9 @@ int merge_buffers(SORTPARAM *param,IO_CACHE *from_file,
IO_CACHE *to_file, uchar *sort_buffer, IO_CACHE *to_file, uchar *sort_buffer,
BUFFPEK *lastbuff,BUFFPEK *Fb, BUFFPEK *lastbuff,BUFFPEK *Fb,
BUFFPEK *Tb,int flag); BUFFPEK *Tb,int flag);
int merge_index(SORTPARAM *param, uchar *sort_buffer,
BUFFPEK *buffpek, uint maxbuffer,
IO_CACHE *tempfile, IO_CACHE *outfile);
void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length); void reuse_freed_buff(QUEUE *queue, BUFFPEK *reuse, uint key_length);
...@@ -33,7 +33,6 @@ ...@@ -33,7 +33,6 @@
#include "mysql_priv.h" #include "mysql_priv.h"
#include "sql_sort.h" #include "sql_sort.h"
int unique_write_to_file(uchar* key, element_count count, Unique *unique) int unique_write_to_file(uchar* key, element_count count, Unique *unique)
{ {
/* /*
...@@ -45,6 +44,12 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique) ...@@ -45,6 +44,12 @@ int unique_write_to_file(uchar* key, element_count count, Unique *unique)
return my_b_write(&unique->file, key, unique->size) ? 1 : 0; return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
} }
int unique_write_to_file_with_count(uchar* key, element_count count, Unique *unique)
{
return my_b_write(&unique->file, key, unique->size) ||
my_b_write(&unique->file, &count, sizeof(element_count)) ? 1 : 0;
}
int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
{ {
memcpy(unique->record_pointers, key, unique->size); memcpy(unique->record_pointers, key, unique->size);
...@@ -52,10 +57,26 @@ int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique) ...@@ -52,10 +57,26 @@ int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
return 0; return 0;
} }
int unique_intersect_write_to_ptrs(uchar* key, element_count count, Unique *unique)
{
if (count >= unique->min_dupl_count)
{
memcpy(unique->record_pointers, key, unique->size);
unique->record_pointers+=unique->size;
}
return 0;
}
Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
uint size_arg, ulonglong max_in_memory_size_arg) uint size_arg, ulonglong max_in_memory_size_arg,
uint min_dupl_count_arg)
:max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0) :max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0)
{ {
min_dupl_count= min_dupl_count_arg;
full_size= size;
if (min_dupl_count_arg)
full_size+= sizeof(element_count);
my_b_clear(&file); my_b_clear(&file);
init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, 0, init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, 0,
NULL, comp_func_fixed_arg); NULL, comp_func_fixed_arg);
...@@ -276,7 +297,11 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size, ...@@ -276,7 +297,11 @@ double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size,
result= 2*log2_n_fact(last_tree_elems + 1.0); result= 2*log2_n_fact(last_tree_elems + 1.0);
if (n_full_trees) if (n_full_trees)
result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0); result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
#if 1
result /= TIME_FOR_COMPARE_ROWID; result /= TIME_FOR_COMPARE_ROWID;
#else
result /= TIME_FOR_COMPARE_ROWID * 10;
#endif
DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys, DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
n_full_trees, n_full_trees?max_elements_in_tree:0, n_full_trees, n_full_trees?max_elements_in_tree:0,
...@@ -327,7 +352,10 @@ bool Unique::flush() ...@@ -327,7 +352,10 @@ bool Unique::flush()
file_ptr.count=tree.elements_in_tree; file_ptr.count=tree.elements_in_tree;
file_ptr.file_pos=my_b_tell(&file); file_ptr.file_pos=my_b_tell(&file);
if (tree_walk(&tree, (tree_walk_action) unique_write_to_file, tree_walk_action action= min_dupl_count ?
(tree_walk_action) unique_write_to_file_with_count :
(tree_walk_action) unique_write_to_file;
if (tree_walk(&tree, action,
(void*) this, left_root_right) || (void*) this, left_root_right) ||
insert_dynamic(&file_ptrs, (uchar*) &file_ptr)) insert_dynamic(&file_ptrs, (uchar*) &file_ptr))
return 1; return 1;
...@@ -357,6 +385,7 @@ Unique::reset() ...@@ -357,6 +385,7 @@ Unique::reset()
reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1); reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
} }
elements= 0; elements= 0;
tree.flag= 0;
} }
/* /*
...@@ -576,14 +605,16 @@ bool Unique::get(TABLE *table) ...@@ -576,14 +605,16 @@ bool Unique::get(TABLE *table)
{ {
SORTPARAM sort_param; SORTPARAM sort_param;
table->sort.found_records=elements+tree.elements_in_tree; table->sort.found_records=elements+tree.elements_in_tree;
if (my_b_tell(&file) == 0) if (my_b_tell(&file) == 0)
{ {
/* Whole tree is in memory; Don't use disk if you don't need to */ /* Whole tree is in memory; Don't use disk if you don't need to */
if ((record_pointers=table->sort.record_pointers= (uchar*) if ((record_pointers=table->sort.record_pointers= (uchar*)
my_malloc(size * tree.elements_in_tree, MYF(0)))) my_malloc(size * tree.elements_in_tree, MYF(0))))
{ {
(void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs, tree_walk_action action= min_dupl_count ?
(tree_walk_action) unique_intersect_write_to_ptrs :
(tree_walk_action) unique_write_to_ptrs;
(void) tree_walk(&tree, action,
this, left_root_right); this, left_root_right);
return 0; return 0;
} }
...@@ -614,7 +645,10 @@ bool Unique::get(TABLE *table) ...@@ -614,7 +645,10 @@ bool Unique::get(TABLE *table)
sort_param.max_rows= elements; sort_param.max_rows= elements;
sort_param.sort_form=table; sort_param.sort_form=table;
sort_param.rec_length= sort_param.sort_length= sort_param.ref_length= sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
size; sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
full_size;
sort_param.min_dupl_count= min_dupl_count;
sort_param.res_length= 0;
sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length); sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length);
sort_param.not_killable=1; sort_param.not_killable=1;
...@@ -635,8 +669,9 @@ bool Unique::get(TABLE *table) ...@@ -635,8 +669,9 @@ bool Unique::get(TABLE *table)
if (flush_io_cache(&file) || if (flush_io_cache(&file) ||
reinit_io_cache(&file,READ_CACHE,0L,0,0)) reinit_io_cache(&file,READ_CACHE,0L,0,0))
goto err; goto err;
if (merge_buffers(&sort_param, &file, outfile, sort_buffer, file_ptr, sort_param.res_length= sort_param.rec_length-
file_ptr, file_ptr+maxbuffer,0)) (min_dupl_count ? sizeof(min_dupl_count) : 0);
if (merge_index(&sort_param, sort_buffer, file_ptr, maxbuffer, &file, outfile))
goto err; goto err;
error=0; error=0;
err: err:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment