Commit 884b83e2 authored by Jacob Mathew's avatar Jacob Mathew

MDEV-14500: Support engines without rnd_pos() and

            engines with inefficient rnd_pos()

Some engines have not implemented rnd_pos().  There are other engines whose
implementation of rnd_pos() is inherently inefficient.  Spider is such an
engine, whose implementation of rnd_pos() needs to access a table on a remote
data node to retrieve a single table row.

To address these limitations, a new temporary table has been added to filesort.
When filesort sequentially reads the table being sorted, each row is written to
the filesort temp table in addition to being copied to the sort buffer.
Subsequent calls to rnd_pos() will then access the table row in the filesort
temp table instead of in the table being sorted.

The following logic changes incorporate the new filesort temp table into the
server:
- A new handler method to determine whether a call to the engine's
  rnd_pos() is expensive.  The default return value is FALSE.  Engines without
  rnd_pos() or with an inefficient rnd_pos() should return TRUE.
- Create the filesort temp table only if:
  - There are no add-on columns for filesort; and
  - The engine's implementation of rnd_pos() is expensive.
- Write to the temp table each row that is read from the table being sorted.
- Do subsequent row retrievals that use rnd_pos() on the temp table instead of
  on the table being sorted.  Upon retrieving a row from the temp table, copy
  its column values to the record of the table being sorted.
- Upon completion of retrieval of the sorted result rows, delete the filesort
  temp table and free the memory allocated for using it.

The logic changes are in the following areas:
- Table handler.
- Partition engine.
- Spider engine.
- Filesort.
- Read record manager.

Note that these changes only address the use of rnd_pos() by filesort.  They do
not address the use of rnd_pos() in other areas such as:
- Quick select.
- Insert.
- Update.
- Window functions.
- Multi Range Read.

Author:
  Jacob Mathew.

Reviewer:
  Sergei Golubchik.
parent d23fcc42
This diff is collapsed.
......@@ -27,6 +27,7 @@ class Filesort_tracker;
struct SORT_FIELD;
typedef struct st_order ORDER;
class JOIN;
class Copy_field;
/**
......@@ -87,7 +88,8 @@ class SORT_INFO
public:
SORT_INFO()
:addon_field(0), record_pointers(0)
:addon_field(0), record_pointers(0),
fs_tmp_table(NULL), tmp_field(NULL), tmp_fields(0)
{
buffpek.str= 0;
my_b_clear(&io_cache);
......@@ -101,6 +103,9 @@ class SORT_INFO
my_free(record_pointers);
my_free(buffpek.str);
my_free(addon_field);
fs_tmp_table= NULL; // Freed in end_read_record()
tmp_field= NULL; // Freed in end_read_record()
tmp_fields= 0;
}
void reset()
......@@ -119,6 +124,11 @@ class SORT_INFO
/* To unpack back */
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
uchar *record_pointers; /* If sorted in memory */
TABLE *fs_tmp_table; /* Optional temp table used by filesort to */
/* eliminate rnd_pos() calls to the table
/* being sorted */
Copy_field *tmp_field; /* Filesort temp table field array */
uint tmp_fields; /* Number of filesort temp table fields */
/*
How many rows in final result.
Also how many rows in record_pointers, if used
......
......@@ -5185,6 +5185,36 @@ int ha_partition::rnd_pos_by_record(uchar *record)
}
/*
Determine whether a call to rnd_pos() is expensive
SYNOPSIS
is_rnd_pos_expensive()
RETURN VALUE
FALSE No inherent inefficiencies in rnd_pos()
TRUE rnd_pos() call is inefficient
DESCRIPTION
Some engines, such as Spider, have an inefficient implementation of
rnd_pos(), because they need to do a remote access to fetch the
single table row. Determine whether the rnd_pos() implementation
for any of the partitions is expensive.
*/
bool ha_partition::is_rnd_pos_expensive()
{
DBUG_ENTER("ha_partition::is_rnd_pos_expensive");
uint i;
for (i= 0; i < m_tot_parts; i++)
if (m_file[i]->ha_is_rnd_pos_expensive())
DBUG_RETURN(TRUE);
DBUG_RETURN(FALSE);
}
/****************************************************************************
MODULE index scan
****************************************************************************/
......
......@@ -691,6 +691,7 @@ class ha_partition :public handler
virtual int rnd_next(uchar * buf);
virtual int rnd_pos(uchar * buf, uchar * pos);
virtual int rnd_pos_by_record(uchar *record);
virtual bool is_rnd_pos_expensive();
virtual void position(const uchar * record);
/*
......
......@@ -3536,6 +3536,17 @@ class handler :public Sql_alloc
position(record);
return rnd_pos(record, ref);
}
/**
Some engines are unable to provide an efficient implementation
for rnd_pos(). Spider is such an engine, as a call to rnd_pos()
needs to access a table on a remote data node to retrieve the
single table row.
*/
virtual bool is_rnd_pos_expensive()
{
/* Engine's rnd_pos() implementation has no inherent inefficiencies */
return FALSE;
}
virtual int read_first_row(uchar *buf, uint primary_key);
public:
......@@ -3545,6 +3556,10 @@ class handler :public Sql_alloc
int ha_rnd_next(uchar *buf);
int ha_rnd_pos(uchar *buf, uchar *pos);
inline int ha_rnd_pos_by_record(uchar *buf);
inline bool ha_is_rnd_pos_expensive()
{
return is_rnd_pos_expensive();
}
inline int ha_read_first_row(uchar *buf, uint primary_key);
/**
......
This diff is collapsed.
......@@ -27,6 +27,8 @@ class SQL_SELECT;
class Copy_field;
class SORT_INFO;
#include "my_bitmap.h"
struct READ_RECORD;
void end_read_record(READ_RECORD *info);
......@@ -53,7 +55,9 @@ struct READ_RECORD
TABLE *table; /* Head-form */
//handler *file;
TABLE **forms; /* head and ref forms */
TABLE *copy_table; /* Original table that a */
/* filesort temp table */
/* row is copied to */
Unlock_row_func unlock_row;
Read_func read_record_func;
THD *thd;
......@@ -61,25 +65,38 @@ struct READ_RECORD
uint cache_records;
uint ref_length,struct_length,reclength,rec_cache_size,error_offset;
uint index;
uchar *ref_pos; /* pointer to form->refpos */
uint tmp_fields; /* Number of filesort temp
table fields */
MY_BITMAP *save_write_set; /* Original write set when */
/* records are read from /*
/* the filesort temp table */
/* and copied to the */
/* original table */
uchar *ref_pos; /* Pointer to form->refpos */
uchar *record;
uchar *rec_buf; /* to read field values after filesort */
uchar *cache,*cache_pos,*cache_end,*read_positions;
struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
Copy_field *tmp_field; /* Filesort temp table */
/* field array */
struct st_io_cache *io_cache;
bool print_error, ignore_not_found_rows;
bool print_error, ignore_not_found_rows, free_tmp_table;
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
int read_record() { return read_record_func(this); }
/*
/*
SJ-Materialization runtime may need to read fields from the materialized
table and unpack them into original table fields:
table and unpack them into original table fields.
Read following a filesort may need to read fields from its temp table
and unpack them into the corresponding original table fields.
*/
Copy_field *copy_field;
Copy_field *copy_field_end;
public:
READ_RECORD() : table(NULL), cache(NULL) {}
READ_RECORD()
: table(NULL), cache(NULL), copy_field(NULL), copy_field_end(NULL) {}
~READ_RECORD() { end_read_record(this); }
};
......
......@@ -5767,7 +5767,6 @@ class user_var_entry
user_var_entry *get_variable(HASH *hash, LEX_CSTRING *name,
bool create_if_not_exists);
class SORT_INFO;
class multi_delete :public select_result_interceptor
{
TABLE_LIST *delete_tables, *table_being_deleted;
......
......@@ -93,7 +93,24 @@ class Sort_param {
}
void init_for_filesort(uint sortlen, TABLE *table,
ulong max_length_for_sort_data,
ha_rows maxrows, bool sort_positions);
ha_rows maxrows, bool sort_positions,
uint *tmp_fields);
void update_ref_length(uint new_ref_length)
{
if (!addon_field)
{
if (ref_length)
{
res_length-= ref_length;
sort_length-= ref_length;
rec_length-= ref_length;
}
res_length+= new_ref_length;
sort_length+= new_ref_length;
rec_length+= new_ref_length;
ref_length= new_ref_length;
}
}
};
......
......@@ -429,6 +429,15 @@ class ha_spider: public handler
KEY_MULTI_RANGE **found_range_p
);
#endif
/**
Spider's implementation of rnd_pos() is inherently inefficient.
A call to rnd_pos() needs to access a table on a remote data node
to retrieve the single table row.
*/
virtual bool is_rnd_pos_expensive()
{
return TRUE;
}
int rnd_init(
bool scan
);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment