Commit 1534ed8e authored by unknown's avatar unknown

WL#926 "SUM(DISTINCT) and AVG(DISTINCT)": improvement of SUM(DISTINCT) and

 implementation of AVG(DISTINCT) which utilizes the approach with Fields.
The patch implemented in October is portede to the up-to-date tree 
containing DECIMAL type.
Tests for AVG(DISTINCT) (although there is not much to test provided
that SUM(DISTINCT) works), cleanups for COUNT(DISTINCT) and GROUP_CONCAT()
will follow in another changeset.


sql/field.cc:
  A handy way to init create_field used for use with virtual tmp tables.
  Feel free to extend it for your own needs.
sql/field.h:
  Declaration for create_field::init_for_tmp_table()
sql/item.cc:
  Implementation for a framework used to easily handle different result
  types of SQL expressions. Instead of having instances of each possible 
  result type (integer, decimal, double) in every item, variables
  of all used types are moved to struct Hybrid_type.
  Hybrid_type can change its dynamic type in runtime, and become,
  for instance, DECIMAL from INTEGER.
  All type-specific Item operations are moved to the class hierarchy
  Hybrid_type_traits. Item::decimals and Item::max_length can
  be moved to Hybrid_type as well.
sql/item.h:
  Declaration for Hybrid_type framework. See also comments for item.cc
  in this changeset.
sql/item_sum.cc:
  Rewritten implementation for Item_sum_sum_distinct (SUM(DISTINCT))
  and added implementation for Item_sum_avg_distinct (AVG(DISTINCT)).
  The classes utilize Hybrid_type class hierarchy and Fields to
  convert SUM/AVG arguments to binary representation and store in a RB-tree.
sql/item_sum.h:
  Declarations for Item_sum_distinct (the new intermediate class used
  for SUM and AVG distinct), Item_sum_sum_distinct, Item_sum_avg_distinct.
sql/sql_select.cc:
  Implementatio of create_virtual_tmp_table().
sql/sql_select.h:
  Declaration for create_virtual_tmp_table.
sql/sql_yacc.yy:
  Grammar support for Item_sum_avg_distinct.
parent 08e23eb6
......@@ -7150,6 +7150,24 @@ void create_field::create_length_to_internal_length(void)
}
void create_field::init_for_tmp_table(enum_field_types sql_type_arg,
uint32 length_arg, uint32 decimals,
bool maybe_null, bool is_unsigned)
{
field_name= "";
sql_type= sql_type_arg;
length= length_arg;;
unireg_check= Field::NONE;
interval= 0;
charset= &my_charset_bin;
geom_type= Field::GEOM_GEOMETRY;
pack_flag= (FIELDFLAG_NUMBER |
((decimals & FIELDFLAG_MAX_DEC) << FIELDFLAG_DEC_SHIFT) |
(maybe_null ? FIELDFLAG_MAYBE_NULL : 0) |
(is_unsigned ? 0 : FIELDFLAG_DECIMAL));
}
enum_field_types get_blob_type_from_length(ulong length)
{
enum_field_types type;
......
......@@ -1340,7 +1340,8 @@ class Field_bit :public Field {
Create field class for CREATE TABLE
*/
class create_field :public Sql_alloc {
class create_field :public Sql_alloc
{
public:
const char *field_name;
const char *change; // If done with alter table
......@@ -1362,6 +1363,11 @@ class create_field :public Sql_alloc {
create_field() :after(0) {}
create_field(Field *field, Field *orig_field);
void create_length_to_internal_length(void);
/* Init for a tmp table field. To be extended if need be. */
void init_for_tmp_table(enum_field_types sql_type_arg,
uint32 max_length, uint32 decimals,
bool maybe_null, bool is_unsigned);
};
......
......@@ -33,6 +33,131 @@ static void mark_as_dependent(THD *thd,
const String my_null_string("NULL", 4, default_charset_info);
/****************************************************************************/
/* Hybrid_type_traits {_real} */
void Hybrid_type_traits::fix_length_and_dec(Item *item, Item *arg) const
{
item->decimals= NOT_FIXED_DEC;
item->max_length= item->float_length(arg->decimals);
}
const Hybrid_type_traits *Hybrid_type_traits::instance()
{
const static Hybrid_type_traits real_traits;
return &real_traits;
}
my_decimal *
Hybrid_type_traits::val_decimal(Hybrid_type *val, my_decimal *to) const
{
double2my_decimal(E_DEC_FATAL_ERROR, val->real, val->dec_buf);
return val->dec_buf;
}
String *
Hybrid_type_traits::val_str(Hybrid_type *val, String *to, uint8 decimals) const
{
to->set(val->real, decimals, &my_charset_bin);
return to;
}
/* Hybrid_type_traits_decimal */
const Hybrid_type_traits_decimal *Hybrid_type_traits_decimal::instance()
{
const static Hybrid_type_traits_decimal decimal_traits;
return &decimal_traits;
}
void
Hybrid_type_traits_decimal::fix_length_and_dec(Item *item, Item *arg) const
{
item->decimals= arg->decimals;
item->max_length= min(arg->max_length + DECIMAL_LONGLONG_DIGITS,
DECIMAL_MAX_LENGTH);
}
void Hybrid_type_traits_decimal::set_zero(Hybrid_type *val) const
{
my_decimal_set_zero(&val->dec_buf[0]);
val->used_dec_buf_no= 0;
}
void Hybrid_type_traits_decimal::add(Hybrid_type *val, Field *f) const
{
my_decimal_add(E_DEC_FATAL_ERROR,
&val->dec_buf[val->used_dec_buf_no ^ 1],
&val->dec_buf[val->used_dec_buf_no],
f->val_decimal(&val->dec_buf[2]));
val->used_dec_buf_no^= 1;
}
void Hybrid_type_traits_decimal::div(Hybrid_type *val, ulonglong u) const
{
int2my_decimal(E_DEC_FATAL_ERROR, u, TRUE, &val->dec_buf[2]);
/* XXX: what is '4' for scale? */
my_decimal_div(E_DEC_FATAL_ERROR,
&val->dec_buf[val->used_dec_buf_no ^ 1],
&val->dec_buf[val->used_dec_buf_no],
&val->dec_buf[2], 4);
val->used_dec_buf_no^= 1;
}
longlong
Hybrid_type_traits_decimal::val_int(Hybrid_type *val, bool unsigned_flag) const
{
longlong result;
my_decimal2int(E_DEC_FATAL_ERROR, &val->dec_buf[val->used_dec_buf_no],
unsigned_flag, &result);
return result;
}
double
Hybrid_type_traits_decimal::val_real(Hybrid_type *val) const
{
my_decimal2double(E_DEC_FATAL_ERROR, &val->dec_buf[val->used_dec_buf_no],
&val->real);
return val->real;
}
String *
Hybrid_type_traits_decimal::val_str(Hybrid_type *val, String *to,
uint8 decimals) const
{
my_decimal_round(E_DEC_FATAL_ERROR, &val->dec_buf[val->used_dec_buf_no],
decimals, FALSE, &val->dec_buf[2]);
my_decimal2string(E_DEC_FATAL_ERROR, &val->dec_buf[2], 0, 0, 0, to);
return to;
}
/* Hybrid_type_traits_integer */
const Hybrid_type_traits_integer *Hybrid_type_traits_integer::instance()
{
const static Hybrid_type_traits_integer integer_traits;
return &integer_traits;
}
void
Hybrid_type_traits_integer::fix_length_and_dec(Item *item, Item *arg) const
{
item->decimals= 0;
item->max_length= 21;
item->unsigned_flag= 0;
}
/*****************************************************************************
** Item functions
*****************************************************************************/
......
......@@ -106,6 +106,120 @@ class DTCollation {
}
};
/*************************************************************************/
/*
A framework to easily handle different return types for hybrid items
(hybrid item is an item whose operand can be of any type, e.g. integer,
real, decimal).
*/
struct Hybrid_type_traits;
struct Hybrid_type
{
longlong integer;
double real;
/*
Use two decimal buffers interchangeably to speed up += operation
which has no native support in decimal library.
Hybrid_type+= arg is implemented as dec_buf[1]= dec_buf[0] + arg.
The third decimal is used as a handy temporary storage.
*/
my_decimal dec_buf[3];
int used_dec_buf_no;
/*
Traits moved to a separate class to
a) be able to easily change object traits in runtime
b) they work as a differentiator for the union above
*/
const Hybrid_type_traits *traits;
Hybrid_type() {}
/* XXX: add traits->copy() when needed */
Hybrid_type(const Hybrid_type &rhs) :traits(rhs.traits) {}
};
/* Hybryd_type_traits interface + default implementation for REAL_RESULT */
struct Hybrid_type_traits
{
virtual Item_result type() const { return REAL_RESULT; }
virtual void
fix_length_and_dec(Item *item, Item *arg) const;
/* Hybrid_type operations. */
virtual void set_zero(Hybrid_type *val) const { val->real= 0.0; }
virtual void add(Hybrid_type *val, Field *f) const
{ val->real+= f->val_real(); }
virtual void div(Hybrid_type *val, ulonglong u) const
{ val->real/= ulonglong2double(u); }
virtual longlong val_int(Hybrid_type *val, bool unsigned_flag) const
{ return (longlong) val->real; }
virtual double val_real(Hybrid_type *val) const { return val->real; }
virtual my_decimal *val_decimal(Hybrid_type *val, my_decimal *buf) const;
virtual String *val_str(Hybrid_type *val, String *buf, uint8 decimals) const;
static const Hybrid_type_traits *instance();
};
struct Hybrid_type_traits_decimal: public Hybrid_type_traits
{
virtual Item_result type() const { return DECIMAL_RESULT; }
virtual void
fix_length_and_dec(Item *arg, Item *item) const;
/* Hybrid_type operations. */
virtual void set_zero(Hybrid_type *val) const;
virtual void add(Hybrid_type *val, Field *f) const;
virtual void div(Hybrid_type *val, ulonglong u) const;
virtual longlong val_int(Hybrid_type *val, bool unsigned_flag) const;
virtual double val_real(Hybrid_type *val) const;
virtual my_decimal *val_decimal(Hybrid_type *val, my_decimal *buf) const
{ return &val->dec_buf[val->used_dec_buf_no]; }
virtual String *val_str(Hybrid_type *val, String *buf, uint8 decimals) const;
static const Hybrid_type_traits_decimal *instance();
};
struct Hybrid_type_traits_integer: public Hybrid_type_traits
{
virtual Item_result type() const { return INT_RESULT; }
virtual void
fix_length_and_dec(Item *arg, Item *item) const;
/* Hybrid_type operations. */
virtual void set_zero(Hybrid_type *val) const
{ val->integer= 0; }
virtual void add(Hybrid_type *val, Field *f) const
{ val->integer+= f->val_int(); }
virtual void div(Hybrid_type *val, ulonglong u) const
{ val->integer/= (longlong) u; }
virtual longlong val_int(Hybrid_type *val, bool unsigned_flag) const
{ return val->integer; }
virtual double val_real(Hybrid_type *val) const
{ return (double) val->integer; }
virtual my_decimal *val_decimal(Hybrid_type *val, my_decimal *buf) const
{
int2my_decimal(E_DEC_FATAL_ERROR, val->integer, 0, &val->dec_buf[2]);
return &val->dec_buf[2];
}
virtual String *val_str(Hybrid_type *val, String *buf, uint8 decimals) const
{ buf->set(val->integer, &my_charset_bin); return buf;}
static const Hybrid_type_traits_integer *instance();
};
/*************************************************************************/
typedef bool (Item::*Item_processor)(byte *arg);
typedef Item* (Item::*Item_transformer) (byte *arg);
......
This diff is collapsed.
......@@ -30,8 +30,8 @@ class Item_sum :public Item_result_field
public:
enum Sumfunctype
{ COUNT_FUNC, COUNT_DISTINCT_FUNC, SUM_FUNC, SUM_DISTINCT_FUNC, AVG_FUNC,
MIN_FUNC, MAX_FUNC, UNIQUE_USERS_FUNC, STD_FUNC, VARIANCE_FUNC,
SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC
AVG_DISTINCT_FUNC, MIN_FUNC, MAX_FUNC, UNIQUE_USERS_FUNC, STD_FUNC,
VARIANCE_FUNC, SUM_BIT_FUNC, UDF_SUM_FUNC, GROUP_CONCAT_FUNC
};
Item **args, *tmp_args[2];
......@@ -68,6 +68,9 @@ class Item_sum :public Item_result_field
a temporary table. Similar to reset(), but must also store value in
result_field. Like reset() it is supposed to reset start value to
default.
This set of methods (reult_field(), reset_field, update_field()) of
Item_sum is used only if quick_group is not null. Otherwise
copy_or_same() is used to obtain a copy of this item.
*/
virtual void reset_field()=0;
/*
......@@ -161,26 +164,28 @@ class Item_sum_sum :public Item_sum_num
};
/*
Item_sum_sum_distinct - SELECT SUM(DISTINCT expr) FROM ...
support. See also: MySQL manual, chapter 'Adding New Functions To MySQL'
and comments in item_sum.cc.
*/
/* Common class for SUM(DISTINCT), AVG(DISTINCT) */
class Unique;
class Item_sum_sum_distinct :public Item_sum_sum
class Item_sum_distinct :public Item_sum_num
{
protected:
/* storage for the summation result */
ulonglong count;
Hybrid_type val;
/* storage for unique elements */
Unique *tree;
byte *dec_bin_buff;
my_decimal tmp_dec;
uint key_length;
private:
Item_sum_sum_distinct(THD *thd, Item_sum_sum_distinct *item);
TABLE *table;
enum enum_field_types table_field_type;
uint tree_key_length;
protected:
Item_sum_distinct(THD *thd, Item_sum_distinct *item);
public:
Item_sum_sum_distinct(Item *item_par);
~Item_sum_sum_distinct() {}
Item_sum_distinct(Item *item_par);
~Item_sum_distinct();
bool setup(THD *thd);
void clear();
void cleanup();
......@@ -190,15 +195,54 @@ class Item_sum_sum_distinct :public Item_sum_sum
longlong val_int();
String *val_str(String *str);
void add_real(double val);
void add_decimal(byte *val);
/* XXX: does it need make_unique? */
enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
void reset_field() {} // not used
void update_field() {} // not used
const char *func_name() const { return "sum_distinct"; }
Item *copy_or_same(THD* thd);
virtual void no_rows_in_result() {}
void fix_length_and_dec();
enum Item_result result_type () const { return val.traits->type(); }
virtual void calculate_val_and_count();
virtual bool unique_walk_function(void *elem);
};
/*
Item_sum_sum_distinct - implementation of SUM(DISTINCT expr).
See also: MySQL manual, chapter 'Adding New Functions To MySQL'
and comments in item_sum.cc.
*/
class Item_sum_sum_distinct :public Item_sum_distinct
{
private:
Item_sum_sum_distinct(THD *thd, Item_sum_sum_distinct *item)
:Item_sum_distinct(thd, item) {}
public:
Item_sum_sum_distinct(Item *item_arg) :Item_sum_distinct(item_arg) {}
enum Sumfunctype sum_func () const { return SUM_DISTINCT_FUNC; }
const char *func_name() const { return "sum_distinct"; }
Item *copy_or_same(THD* thd) { return new Item_sum_sum_distinct(thd, this); }
};
/* Item_sum_avg_distinct - SELECT AVG(DISTINCT expr) FROM ... */
class Item_sum_avg_distinct: public Item_sum_distinct
{
private:
Item_sum_avg_distinct(THD *thd, Item_sum_avg_distinct *original)
:Item_sum_distinct(thd, original) {}
public:
Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
virtual void calculate_val_and_count();
enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
const char *func_name() const { return "avg_distinct"; }
Item *copy_or_same(THD* thd) { return new Item_sum_avg_distinct(thd, this); }
};
......
......@@ -8359,6 +8359,117 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
}
/****************************************************************************/
/*
Create a reduced TABLE object with properly set up Field list from a
list of field definitions.
SYNOPSIS
create_virtual_tmp_table()
thd connection handle
field_list list of column definitions
DESCRIPTION
The created table doesn't have a table handler assotiated with
it, has no keys, no group/distinct, no copy_funcs array.
The sole purpose of this TABLE object is to use the power of Field
class to read/write data to/from table->record[0]. Then one can store
the record in any container (RB tree, hash, etc).
The table is created in THD mem_root, so are the table's fields.
Consequently, if you don't BLOB fields, you don't need to free it.
RETURN
0 if out of memory, TABLE object in case of success
*/
TABLE *create_virtual_tmp_table(THD *thd, List<create_field> &field_list)
{
uint field_count= field_list.elements;
Field **field;
create_field *cdef; /* column definition */
uint record_length= 0;
uint null_count= 0; /* number of columns which may be null */
uint null_pack_length; /* NULL representation array length */
TABLE_SHARE *s;
/* Create the table and list of all fields */
TABLE *table= (TABLE*) thd->calloc(sizeof(*table));
field= (Field**) thd->alloc((field_count + 1) * sizeof(Field*));
if (!table || !field)
return 0;
table->field= field;
table->s= s= &table->share_not_to_be_used;
s->fields= field_count;
/* Create all fields and calculate the total length of record */
List_iterator_fast<create_field> it(field_list);
while ((cdef= it++))
{
*field= make_field(0, cdef->length,
(uchar*) (f_maybe_null(cdef->pack_flag) ? "" : 0),
f_maybe_null(cdef->pack_flag) ? 1 : 0,
cdef->pack_flag, cdef->sql_type, cdef->charset,
cdef->geom_type, cdef->unireg_check,
cdef->interval, cdef->field_name, table);
if (!*field)
goto error;
record_length+= (**field).pack_length();
if (! ((**field).flags & NOT_NULL_FLAG))
++null_count;
++field;
}
*field= NULL; /* mark the end of the list */
null_pack_length= (null_count + 7)/8;
s->reclength= record_length + null_pack_length;
s->rec_buff_length= ALIGN_SIZE(s->reclength + 1);
table->record[0]= (byte*) thd->alloc(s->rec_buff_length);
if (!table->record[0])
goto error;
if (null_pack_length)
{
table->null_flags= (uchar*) table->record[0];
s->null_fields= null_count;
s->null_bytes= null_pack_length;
}
table->in_use= thd; /* field->reset() may access table->in_use */
{
/* Set up field pointers */
byte *null_pos= table->record[0];
byte *field_pos= null_pos + s->null_bytes;
uint null_bit= 1;
for (field= table->field; *field; ++field)
{
Field *cur_field= *field;
if ((cur_field->flags & NOT_NULL_FLAG))
cur_field->move_field((char*) field_pos);
else
{
cur_field->move_field((char*) field_pos, (uchar*) null_pos, null_bit);
null_bit<<= 1;
if (null_bit == (1 << 8))
{
++null_pos;
null_bit= 1;
}
}
cur_field->reset();
field_pos+= cur_field->pack_length();
}
}
return table;
error:
for (field= table->field; *field; ++field)
delete *field; /* just invokes field destructor */
return 0;
}
static bool open_tmp_table(TABLE *table)
{
int error;
......
......@@ -387,6 +387,7 @@ TABLE *create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
ORDER *group, bool distinct, bool save_sum_fields,
ulong select_options, ha_rows rows_limit,
char* alias);
TABLE *create_virtual_tmp_table(THD *thd, List<create_field> &field_list);
void free_tmp_table(THD *thd, TABLE *entry);
void count_field_types(TMP_TABLE_PARAM *param, List<Item> &fields,
bool reset_with_sum_func);
......
......@@ -4754,6 +4754,8 @@ udf_expr:
sum_expr:
AVG_SYM '(' in_sum_expr ')'
{ $$=new Item_sum_avg($3); }
| AVG_SYM '(' DISTINCT in_sum_expr ')'
{ $$=new Item_sum_avg_distinct($4); }
| BIT_AND '(' in_sum_expr ')'
{ $$=new Item_sum_and($3); }
| BIT_OR '(' in_sum_expr ')'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment