Commit cf50e13f authored by Monty's avatar Monty Committed by Sergei Golubchik

MDEV-6080: Allowing storage engine to shortcut group by queries

This task is to allow storage engines that can execute GROUP BY or
summary queries efficiently to intercept a full query or sub query from
MariaDB and deliver the result either to the client or to a temporary
table for further processing.

- Added code in sql_select.cc to intercept GROUP BY queries.
  Creation of group_by_handler is done after all optimizations to allow
  storage engine to benefit of an optimized WHERE clause and suggested
  indexes to use.
- Added group by handler to sequence engine and a group_by test suite as
  a way to test the new interface.
- Intercept EXPLAIN with a message "Storage engine handles GROUP BY"

libmysqld/CMakeLists.txt:
  Added new group_by_handler files
sql/CMakeLists.txt:
  Added new group_by_handler files
sql/group_by_handler.cc:
  Implementation of group_by_handler functions
sql/group_by_handler.h:
  Definition of group_by_handler class
sql/handler.h:
  Added handlerton function to create a group_by_handler, if the storage
  engine can intercept the query.
sql/item_cmpfunc.cc:
  Allow one to evaluate item_equal any time.
sql/sql_select.cc:
  Added code to intercept GROUP BY queries
  - If all tables are from the same storage engine and the query is
    using sum functions, call create_group_by() to check if the storage
    engine can intercept the query.
    - If yes:
       - create a temporary table to hold a GROUP_BY row or result
       - In do_select() intercept normal query execution by instead
         calling the group_by_handler to get the result
       - Intercept EXPLAIN
sql/sql_select.h:
  Added handling of group_by_handler
  Added caching of the original join tab (needed for cleanup after
  group_by handler)
storage/sequence/mysql-test/sequence/group_by.result:
  Test group_by_handler interface
storage/sequence/mysql-test/sequence/group_by.test:
  Test group_by_handler interface
storage/sequence/sequence.cc:
  Added simple group_by_engine for handling COUNT(*) and
  SUM(primary_key).  This was done as a test of the group_by_handler
  interface
parent d8df2b94
......@@ -75,6 +75,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc libmysqld.c lib_sql.cc
../sql/debug_sync.cc ../sql/opt_table_elimination.cc
../sql/sql_prepare.cc ../sql/sql_rename.cc ../sql/sql_repl.cc
../sql/sql_select.cc ../sql/sql_servers.cc
../sql/group_by_handler.cc
../sql/sql_show.cc ../sql/sql_state.c
../sql/sql_statistics.cc ../sql/sql_string.cc
../sql/sql_tablespace.cc ../sql/sql_table.cc ../sql/sql_test.cc
......
......@@ -108,6 +108,7 @@ SET (SQL_SOURCE
sql_partition.cc sql_plugin.cc sql_prepare.cc sql_rename.cc
debug_sync.cc debug_sync.h
sql_repl.cc sql_select.cc sql_show.cc sql_state.c
group_by_handler.cc
sql_statistics.cc sql_string.cc
sql_table.cc sql_test.cc sql_trigger.cc sql_udf.cc sql_union.cc
sql_update.cc sql_view.cc strfunc.cc table.cc thr_malloc.cc
......
/*
Copyright (c) 2014, SkySQL Ab & MariaDB Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
This file implements the group_by_handler code. This interface
can be used by storage handlers that can intercept summary or GROUP
BY queries from MariaDB and itself return the result to the user or
upper level.
*/
#ifdef USE_PRAGMA_IMPLEMENTATION
#pragma implementation // gcc: Class implementation
#endif
#include "sql_priv.h"
#include "sql_select.h"
/*
Same return values as do_select();
@retval
0 if ok
@retval
1 if error is sent
@retval
-1 if error should be sent
*/
int group_by_handler::execute(JOIN *join)
{
int err;
ha_rows max_limit;
ha_rows *reset_limit= 0;
Item **reset_item= 0;
DBUG_ENTER("group_by_handler");
if ((err= init_scan()))
goto error;
if (store_data_in_temp_table)
{
max_limit= join->tmp_table_param.end_write_records;
reset_limit= &join->unit->select_limit_cnt;
}
else
{
max_limit= join->unit->select_limit_cnt;
if (join->unit->fake_select_lex)
reset_item= &join->unit->fake_select_lex->select_limit;
}
while (!(err= next_row()))
{
if (thd->check_killed())
{
thd->send_kill_message();
(void) end_scan();
DBUG_RETURN(-1);
}
/* Check if we can accept the row */
if (!having || having->val_bool())
{
if (store_data_in_temp_table)
{
if ((err= table->file->ha_write_tmp_row(table->record[0])))
{
bool is_duplicate;
if (!table->file->is_fatal_error(err, HA_CHECK_DUP))
continue; // Distinct elimination
if (create_internal_tmp_table_from_heap(thd, table,
join->tmp_table_param.
start_recinfo,
&join->tmp_table_param.
recinfo,
err, 1, &is_duplicate))
DBUG_RETURN(1);
if (is_duplicate)
continue;
}
}
else
{
if (join->do_send_rows)
{
int error;
/* result < 0 if row was not accepted and should not be counted */
if ((error= join->result->send_data(*join->fields)))
{
(void) end_scan();
DBUG_RETURN(error < 0 ? 0 : -1);
}
}
}
/* limit handling */
if (++join->send_records >= max_limit && join->do_send_rows)
{
if (!(join->select_options & OPTION_FOUND_ROWS))
break; // LIMIT reached
join->do_send_rows= 0; // Calculate FOUND_ROWS()
if (reset_limit)
*reset_limit= HA_POS_ERROR;
if (reset_item)
*reset_item= 0;
}
}
}
if (err != 0 && err != HA_ERR_END_OF_FILE)
goto error;
if ((err= end_scan()))
goto error_2;
if (!store_data_in_temp_table && join->result->send_eof())
DBUG_RETURN(1); // Don't send error to client
DBUG_RETURN(0);
error:
(void) end_scan();
error_2:
print_error(err, MYF(0));
DBUG_RETURN(-1); // Error not sent to client
}
void group_by_handler::print_error(int error, myf errflag)
{
my_error(ER_GET_ERRNO, MYF(0), error, hton_name(ht)->str);
}
/*
Copyright (c) 2014, SkySQL Ab & MariaDB Foundation
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
This file implements the group_by_handler interface. This interface
can be used by storage handlers that can intercept summary or GROUP
BY queries from MariaDB and itself return the result to the user or
upper level.
Both main and sub queries are supported. Here are some examples of what the
storage engine could intersept:
SELECT count(*) FROM t1;
SELECT a,count(*) FROM t1 group by a;
SELECT a,count(*) as sum FROM t1 where b > 10 group by a, order by sum;
SELECT a,count(*) FROM t1,t2;
SELECT a, (select sum(*) from t2 where t1.a=t2.a) from t2;
See https://mariadb.atlassian.net/browse/MDEV-6080 for more information.
*/
class JOIN;
class group_by_handler
{
public:
/* Arguments for group_by_handler, for usage later */
THD *thd;
SELECT_LEX *select_lex;
List<Item> *fields;
TABLE_LIST *table_list;
ORDER *group_by, *order_by;
Item *where, *having;
handlerton *ht; /* storage engine of this handler */
/*
Bit's of things the storage engine can do for this query.
Should be initialized on object creation.
*/
/* Temporary table where all results should be stored in record[0] */
TABLE *table;
bool store_data_in_temp_table; /* Set by mariadb */
group_by_handler(THD *thd_arg, SELECT_LEX *select_lex_arg,
List<Item> *fields_arg,
TABLE_LIST *table_list_arg, ORDER *group_by_arg,
ORDER *order_by_arg, Item *where_arg,
Item *having_arg, handlerton *ht_arg)
: thd(thd_arg), select_lex(select_lex_arg), fields(fields_arg),
table_list(table_list_arg), group_by(group_by_arg),
order_by(order_by_arg), where(where_arg), having(having_arg),
ht(ht_arg), table(0), store_data_in_temp_table(0)
{}
virtual ~group_by_handler() {}
/*
Store pointer to temporary table and objects modified to point to
the temporary table. This will happen during the optimize phase.
We provide new 'having' and 'order_by' elements here. The differ from the
original ones in that these are modified to point to fields in the
temporary table 'table'.
Return 1 if the storage handler cannot handle the GROUP BY after all,
in which case we have to give an error to the end user for the query.
This is becasue we can't revert back the old having and order_by elements.
*/
virtual bool init(TABLE *temporary_table, Item *having_arg,
ORDER *order_by_arg)
{
table= temporary_table;
having= having_arg;
order_by= order_by_arg;
return 0;
}
/*
Result data is sorted by the storage engine according to order_by (if it
exists) else according to the group_by. If this is not specified,
MariaDB will store the result set into the temporary table and sort the
result.
*/
#define GROUP_BY_ORDER_BY 1
/* The storage engine can handle DISTINCT */
#define GROUP_BY_DISTINCT 2
virtual uint flags() { return 0; }
/*
Functions to scan data. All these returns 0 if ok, error code in case
of error
*/
/*
Initialize group_by scan, prepare for next_row().
If this is a sub query with group by, this can be called many times for
a query.
*/
virtual int init_scan()= 0;
/*
Return next group by result in table->record[0].
Return 0 if row found, HA_ERR_END_OF_FILE if last row and other error
number in case of fatal error.
*/
virtual int next_row()= 0;
/* End scanning */
virtual int end_scan()=0;
/* Information for optimizer (used by EXPLAIN) */
virtual int info(uint flag, ha_statistics *stats)= 0;
/* Function that calls the above scan functions */
int execute(JOIN *join);
/* Report errors */
virtual void print_error(int error, myf errflag);
};
......@@ -949,6 +949,10 @@ struct handler_iterator {
};
class handler;
class group_by_handler;
typedef class st_select_lex SELECT_LEX;
typedef struct st_order ORDER;
/*
handlerton is a singleton structure - one instance per storage engine -
to provide access to storage engine functionality that works on the
......@@ -1251,6 +1255,24 @@ struct handlerton
*/
const char **tablefile_extensions; // by default - empty list
/**********************************************************************
Functions to intercept queries
**********************************************************************/
/*
Create and return a group_by_handler, if the storage engine can execute
the summary / group by query.
If the storage engine can't do that, return NULL.
This is only called for SELECT's where all tables are from the same
storage engine.
*/
group_by_handler *(*create_group_by)(THD *thd, SELECT_LEX *select_lex,
List<Item> *fields,
TABLE_LIST *table_list, ORDER *group_by,
ORDER *order_by, Item *where,
Item *having);
/*********************************************************************
Table discovery API.
It allows the server to "discover" tables that exist in the storage
......@@ -4080,6 +4102,7 @@ class handler :public Sql_alloc
};
#include "multi_range_read.h"
#include "group_by_handler.h"
bool key_uses_partial_cols(TABLE_SHARE *table, uint keyno);
......
......@@ -759,6 +759,12 @@ JOIN::prepare(Item ***rref_pointer_array,
TABLE_LIST *tbl;
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
/*
If all tables comes from the same storage engine, one_storge_engine will
be set to point to the handlerton of this engine.
*/
one_storage_engine= 0;
uint table_loop_count= 0;
while ((tbl= li++))
{
/*
......@@ -770,8 +776,17 @@ JOIN::prepare(Item ***rref_pointer_array,
Note: this loop doesn't touch tables inside merged semi-joins, because
subquery-to-semijoin conversion has not been done yet. This is intended.
*/
if (mixed_implicit_grouping && tbl->table)
tbl->table->maybe_null= 1;
if (tbl->table)
{
if (mixed_implicit_grouping)
tbl->table->maybe_null= 1;
if (!table_loop_count++)
one_storage_engine= tbl->table->file->ht;
else if (one_storage_engine != tbl->table->file->ht)
one_storage_engine= 0;
}
else
one_storage_engine= 0;
}
if ((wild_num && setup_wild(thd, tables_list, fields_list, &all_fields,
......@@ -1893,6 +1908,102 @@ TODO: make view to decide if it is possible to write to WHERE directly or make S
if ((select_lex->options & OPTION_SCHEMA_TABLE))
optimize_schema_tables_reads(this);
/*
All optimization is done. Check if we can use the storage engines
group by handler to evaluate the group by
*/
if ((tmp_table_param.sum_func_count || group_list) && !procedure &&
(one_storage_engine && one_storage_engine->create_group_by))
{
/* Check if the storage engine can intercept the query */
if ((storage_handler_for_group_by=
(one_storage_engine->create_group_by)(thd, select_lex,
&all_fields,
tables_list,
group_list, order,
conds, having)))
{
uint handler_flags= storage_handler_for_group_by->flags();
int err;
/*
We must store rows in the tmp table if we need to do an ORDER BY
or DISTINCT and the storage handler can't handle it.
*/
need_tmp= ((!(handler_flags & GROUP_BY_ORDER_BY) &&
(order || group_list)) ||
(!(handler_flags & GROUP_BY_DISTINCT) && select_distinct));
tmp_table_param.hidden_field_count= (all_fields.elements -
fields_list.elements);
if (!(exec_tmp_table1=
create_tmp_table(thd, &tmp_table_param, all_fields,
0, handler_flags & GROUP_BY_DISTINCT ?
0 : select_distinct, 1,
select_options, HA_POS_ERROR, "",
!need_tmp,
(!order ||
(handler_flags & GROUP_BY_ORDER_BY)))))
DBUG_RETURN(1);
/*
Setup reference fields, used by summary functions and group by fields,
to point to the temporary table.
The actual switching to the temporary tables fields for HAVING
and ORDER BY is done in do_select() by calling
set_items_ref_array(items1).
*/
init_items_ref_array();
items1= items0 + all_fields.elements;
if (change_to_use_tmp_fields(thd, items1,
tmp_fields_list1, tmp_all_fields1,
fields_list.elements, all_fields))
DBUG_RETURN(1);
/* Give storage engine access to temporary table */
if ((err= storage_handler_for_group_by->init(exec_tmp_table1,
having, order)))
{
storage_handler_for_group_by->print_error(err, MYF(0));
DBUG_RETURN(1);
}
storage_handler_for_group_by->store_data_in_temp_table= need_tmp;
/*
If there is not specified ORDER BY, we should sort things according
to the group_by
*/
if (!order)
order= group_list;
/*
Group by and having is calculated by the group_by handler.
Reset the group by and having
*/
group= 0; group_list= 0;
having= tmp_having= 0;
/*
Select distinct is handled by handler or by creating an unique index
over all fields in the temporary table
*/
select_distinct= 0;
if (handler_flags & GROUP_BY_ORDER_BY)
order= 0;
tmp_table_param.field_count+= tmp_table_param.sum_func_count;
tmp_table_param.sum_func_count= 0;
/* Remember information about the original join */
original_join_tab= join_tab;
original_table_count= table_count;
/* Set up one join tab to get sorting to work */
const_tables= 0;
table_count= 1;
join_tab= (JOIN_TAB*) thd->calloc(sizeof(JOIN_TAB));
join_tab[0].table= exec_tmp_table1;
DBUG_RETURN(thd->is_fatal_error);
}
}
/*
The loose index scan access method guarantees that all grouping or
duplicate row elimination (for distinct) is already performed
......@@ -1970,7 +2081,7 @@ int JOIN::init_execution()
thd->lex->set_limit_rows_examined();
/* Create a tmp table if distinct or if the sort is too complicated */
if (need_tmp)
if (need_tmp && ! storage_handler_for_group_by)
{
DBUG_PRINT("info",("Creating tmp table"));
THD_STAGE_INFO(thd, stage_copying_to_tmp_table);
......@@ -2632,6 +2743,8 @@ void JOIN::exec_inner()
*/
curr_join->join_examined_rows= 0;
curr_join->do_select_call_count= 0;
/* Create a tmp table if distinct or if the sort is too complicated */
if (need_tmp)
{
......@@ -11823,6 +11936,14 @@ void JOIN::cleanup(bool full)
if (full)
have_query_plan= QEP_DELETED;
if (original_join_tab)
{
/* Free the original optimized join created for the group_by_handler */
join_tab= original_join_tab;
original_join_tab= 0;
table_count= original_table_count;
}
if (table)
{
JOIN_TAB *tab;
......@@ -11931,6 +12052,9 @@ void JOIN::cleanup(bool full)
}
tmp_table_param.cleanup();
delete storage_handler_for_group_by;
storage_handler_for_group_by= 0;
if (!join_tab)
{
List_iterator<TABLE_LIST> li(*join_list);
......@@ -17727,6 +17851,18 @@ do_select(JOIN *join,List<Item> *fields,TABLE *table,Procedure *procedure)
join->procedure=procedure;
join->tmp_table= table; /* Save for easy recursion */
join->fields= fields;
join->do_select_call_count++;
if (join->storage_handler_for_group_by &&
join->do_select_call_count == 1)
{
/* Select fields are in the temporary table */
join->fields= &join->tmp_fields_list1;
/* Setup HAVING to work with fields in temporary table */
join->set_items_ref_array(join->items1);
/* The storage engine will take care of the group by query result */
DBUG_RETURN(join->storage_handler_for_group_by->execute(join));
}
if (table)
{
......@@ -24192,6 +24328,22 @@ int JOIN::save_explain_data_intern(Explain_query *output, bool need_tmp_table,
explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
output->add_node(explain);
}
else if (storage_handler_for_group_by)
{
explain= new (output->mem_root) Explain_select(output->mem_root,
thd->lex->analyze_stmt);
select_lex->set_explain_type(true);
explain->select_id= select_lex->select_number;
explain->select_type= select_lex->type;
explain->using_temporary= need_tmp;
explain->using_filesort= need_order;
explain->message= "Storage engine handles GROUP BY";
if (select_lex->master_unit()->derived)
explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
output->add_node(explain);
}
else
{
Explain_select *xpl_sel;
......
......@@ -1011,6 +1011,7 @@ class JOIN :public Sql_alloc
*/
uint top_join_tab_count;
uint send_group_parts;
uint do_select_call_count;
/*
True if the query has GROUP BY.
(that is, if group_by != NULL. when DISTINCT is converted into GROUP BY, it
......@@ -1080,6 +1081,12 @@ class JOIN :public Sql_alloc
/* Finally picked QEP. This is result of join optimization */
POSITION *best_positions;
/* points to a storage engine if all tables comes from the storage engine */
handlerton *one_storage_engine;
group_by_handler *storage_handler_for_group_by;
JOIN_TAB *original_join_tab;
uint original_table_count;
/******* Join optimization state members start *******/
/*
pointer - we're doing optimization for a semi-join materialization nest.
......@@ -1378,6 +1385,10 @@ class JOIN :public Sql_alloc
group_optimized_away= 0;
no_rows_in_result_called= 0;
positions= best_positions= 0;
one_storage_engine= 0;
storage_handler_for_group_by= 0;
original_join_tab= 0;
do_select_call_count= 0;
explain= NULL;
......
show create table seq_1_to_15_step_2;
Table Create Table
seq_1_to_15_step_2 CREATE TABLE `seq_1_to_15_step_2` (
`seq` bigint(20) unsigned NOT NULL,
PRIMARY KEY (`seq`)
) ENGINE=SEQUENCE DEFAULT CHARSET=latin1
select count(seq),sum(seq),1 from seq_1_to_15_step_2;
count(seq) sum(seq) 1
8 64 1
#
# The engine should be able to optimize the following requests
#
select count(*) from seq_1_to_15_step_2;
count(*)
8
explain select count(*) from seq_1_to_15_step_2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
select count(seq) from seq_1_to_15_step_2;
count(seq)
8
explain select count(seq) from seq_1_to_15_step_2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
select sum(seq) from seq_1_to_15_step_2;
sum(seq)
64
explain select sum(seq) from seq_1_to_15_step_2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
select count(seq),sum(seq) from seq_1_to_15_step_2;
count(seq) sum(seq)
8 64
explain select count(seq),sum(seq) from seq_1_to_15_step_2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
select count(seq) as c from seq_1_to_15_step_2 having c > 5;
c
8
explain select count(seq) as c from seq_1_to_15_step_2 having c > 5;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
c
explain select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
select distinct count(*) from seq_1_to_15_step_2;
count(*)
8
explain select distinct count(*) from seq_1_to_15_step_2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE NULL NULL NULL NULL NULL NULL NULL Storage engine handles GROUP BY
select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
seq count(*)
1 8
3 8
5 8
7 8
9 8
11 8
13 8
15 8
explain select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY seq_1_to_15_step_2 index NULL PRIMARY 8 NULL # Using index
1 PRIMARY <derived2> ALL NULL NULL NULL NULL # Using join buffer (flat, BNL join)
2 DERIVED NULL NULL NULL NULL NULL NULL # Storage engine handles GROUP BY
#
# The engine can't optimize the following queries
#
select count(seq),sum(seq),1 from seq_1_to_15_step_2;
count(seq) sum(seq) 1
8 64 1
explain select count(seq),sum(seq),1 from seq_1_to_15_step_2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index
explain select count(*) from seq_1_to_15_step_2, seq_1_to_15_step_2 as t2;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index
1 SIMPLE t2 index NULL PRIMARY 8 NULL 8 Using index; Using join buffer (flat, BNL join)
explain select count(*) from seq_1_to_15_step_2 where seq > 0;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE seq_1_to_15_step_2 index PRIMARY PRIMARY 8 NULL 8 Using where; Using index
explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2);
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE seq_1_to_15_step_2 index NULL PRIMARY 8 NULL 8 Using index; Using temporary; Using filesort
drop table seq_1_to_15_step_2;
--source inc.inc
# Check that group by handler forks for the sequence engine.
# The sequence engine can only optimize queries with COUNT(primary_key) or
# SUM(primary_key) when there is no GROUP BY.
show create table seq_1_to_15_step_2;
# Get the correct results
select count(seq),sum(seq),1 from seq_1_to_15_step_2;
--echo #
--echo # The engine should be able to optimize the following requests
--echo #
select count(*) from seq_1_to_15_step_2;
explain select count(*) from seq_1_to_15_step_2;
select count(seq) from seq_1_to_15_step_2;
explain select count(seq) from seq_1_to_15_step_2;
select sum(seq) from seq_1_to_15_step_2;
explain select sum(seq) from seq_1_to_15_step_2;
select count(seq),sum(seq) from seq_1_to_15_step_2;
explain select count(seq),sum(seq) from seq_1_to_15_step_2;
select count(seq) as c from seq_1_to_15_step_2 having c > 5;
explain select count(seq) as c from seq_1_to_15_step_2 having c > 5;
select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
explain select count(seq) as c from seq_1_to_15_step_2 having c > 1000;
select distinct count(*) from seq_1_to_15_step_2;
explain select distinct count(*) from seq_1_to_15_step_2;
select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
--replace_column 9 #
explain select * from seq_1_to_15_step_2, (select count(*) from seq_1_to_15_step_2) as t1;
--echo #
--echo # The engine can't optimize the following queries
--echo #
select count(seq),sum(seq),1 from seq_1_to_15_step_2;
explain select count(seq),sum(seq),1 from seq_1_to_15_step_2;
explain select count(*) from seq_1_to_15_step_2, seq_1_to_15_step_2 as t2;
explain select count(*) from seq_1_to_15_step_2 where seq > 0;
explain select count(*) from seq_1_to_15_step_2 group by mod(seq,2);
drop table seq_1_to_15_step_2;
......@@ -23,10 +23,14 @@
#include <my_config.h>
#include <ctype.h>
#include <mysql_version.h>
#include <item.h>
#include <item_sum.h>
#include <handler.h>
#include <table.h>
#include <field.h>
handlerton *sequence_hton;
class Sequence_share : public Handler_share {
public:
const char *name;
......@@ -52,11 +56,11 @@ class ha_seq: public handler
{
private:
THR_LOCK_DATA lock;
Sequence_share *seqs;
Sequence_share *get_share();
ulonglong cur;
public:
Sequence_share *seqs;
ha_seq(handlerton *hton, TABLE_SHARE *table_arg)
: handler(hton, table_arg), seqs(0) { }
ulonglong table_flags() const
......@@ -346,9 +350,166 @@ static int discover_table_existence(handlerton *hton, const char *db,
static int dummy_ret_int() { return 0; }
/*****************************************************************************
Example of a simple group by handler for queries like:
SELECT SUM(seq) from sequence_table;
This implementation supports SUM() and COUNT() on primary key.
*****************************************************************************/
class ha_seq_group_by_handler: public group_by_handler
{
bool first_row;
public:
ha_seq_group_by_handler(THD *thd, SELECT_LEX *select_lex,
List<Item> *fields,
TABLE_LIST *table_list, ORDER *group_by,
ORDER *order_by, Item *where,
Item *having)
:group_by_handler(thd, select_lex, fields, table_list, group_by,
order_by, where, having, sequence_hton)
{
}
~ha_seq_group_by_handler() {}
bool init(TABLE *temporary_table, Item *having_arg,
ORDER *order_by_arg);
int init_scan() { first_row= 1 ; return 0; }
int next_row();
int end_scan() { return 0; }
int info(uint flag, ha_statistics *stats);
};
static group_by_handler *
create_group_by_handler(THD *thd, SELECT_LEX *select_lex,
List<Item> *fields,
TABLE_LIST *table_list, ORDER *group_by,
ORDER *order_by, Item *where,
Item *having)
{
ha_seq_group_by_handler *handler;
Item *item;
List_iterator_fast<Item> it(*fields);
/* check that only one table is used in FROM clause and no sub queries */
if (table_list->next_local != 0)
return 0;
/* check that there is no where clause and no group_by */
if (where != 0 || group_by != 0)
return 0;
/*
Check that all fields are sum(primary_key) or count(primary_key)
For more ways to work with the field list and sum functions, see
opt_sum.cc::opt_sum_query().
*/
while ((item= it++))
{
Item *arg0;
Field *field;
if (item->type() != Item::SUM_FUNC_ITEM ||
(((Item_sum*) item)->sum_func() != Item_sum::SUM_FUNC &&
((Item_sum*) item)->sum_func() != Item_sum::COUNT_FUNC))
return 0; // Not a SUM() function
arg0= ((Item_sum*) item)->get_arg(0);
if (arg0->type() != Item::FIELD_ITEM)
{
if ((((Item_sum*) item)->sum_func() == Item_sum::COUNT_FUNC) &&
arg0->basic_const_item())
continue; // Allow count(1)
return 0;
}
field= ((Item_field*) arg0)->field;
/*
Check that we are using the sequence table (the only table in the FROM
clause) and not an outer table.
*/
if (field->table != table_list->table)
return 0;
/* Check that we are using a SUM() on the primary key */
if (strcmp(field->field_name, "seq"))
return 0;
}
/* Create handler and return it */
handler= new ha_seq_group_by_handler(thd, select_lex, fields, table_list,
group_by,
order_by, where, having);
return handler;
}
bool ha_seq_group_by_handler::init(TABLE *temporary_table, Item *having_arg,
ORDER *order_by_arg)
{
/*
Here we could add checks if the temporary table was created correctly
*/
return group_by_handler::init(temporary_table, having_arg, order_by_arg);
}
int ha_seq_group_by_handler::info(uint flag, ha_statistics *stats)
{
bzero(stats, sizeof(*stats));
/* We only return one records for a SUM(*) without a group by */
stats->records= 1;
return 0;
}
int ha_seq_group_by_handler::next_row()
{
List_iterator_fast<Item> it(*fields);
Item_sum *item_sum;
Sequence_share *seqs= ((ha_seq*) table_list->table->file)->seqs;
DBUG_ENTER("ha_seq_group_by_handler");
/*
Check if this is the first call to the function. If not, we have already
returned all data.
*/
if (!first_row)
DBUG_RETURN(HA_ERR_END_OF_FILE);
first_row= 0;
/* Pointer to first field in temporary table where we should store summary*/
Field **field_ptr= table->field;
ulonglong elements= (seqs->to - seqs->from + seqs->step - 1) / seqs->step;
while ((item_sum= (Item_sum*) it++))
{
Field *field= *(field_ptr++);
switch (item_sum->sum_func()) {
case Item_sum::COUNT_FUNC:
{
field->store((longlong) elements, 1);
break;
}
case Item_sum::SUM_FUNC:
{
/* Calculate SUM(f, f+step, f+step*2 ... to) */
ulonglong sum;
sum= seqs->from * elements + seqs->step * (elements*elements-elements)/2;
field->store((longlong) sum, 1);
break;
}
default:
DBUG_ASSERT(0);
}
field->set_notnull();
}
DBUG_RETURN(0);
}
/*****************************************************************************
Initialize the interface between the sequence engine and MariaDB
*****************************************************************************/
static int init(void *p)
{
handlerton *hton= (handlerton *)p;
sequence_hton= hton;
hton->create= create_handler;
hton->discover_table= discover_table;
hton->discover_table_existence= discover_table_existence;
......@@ -356,7 +517,7 @@ static int init(void *p)
(int (*)(handlerton *, THD *, bool)) &dummy_ret_int;
hton->savepoint_set= hton->savepoint_rollback= hton->savepoint_release=
(int (*)(handlerton *, THD *, void *)) &dummy_ret_int;
hton->create_group_by= create_group_by_handler;
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment