Commit 7e9ac7b8 authored by Sergey Vojtovich's avatar Sergey Vojtovich

MDEV-10296 - Multi-instance table cache

Improve scalability by implementing multi-instance table cache.
parent 6c1c27ea
......@@ -1091,6 +1091,8 @@ The following options may be given as the first argument:
The number of cached table definitions
--table-open-cache=#
The number of cached open tables
--table-open-cache-instances=#
The number of table cache instances
--tc-heuristic-recover=name
Decision to use in heuristic recover process. One of: OFF,
COMMIT, ROLLBACK
......@@ -1457,6 +1459,7 @@ sysdate-is-now FALSE
table-cache 431
table-definition-cache 400
table-open-cache 431
table-open-cache-instances 1
tc-heuristic-recover OFF
thread-cache-size 151
thread-pool-idle-timeout 60
......
......@@ -3887,6 +3887,20 @@ NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME TABLE_OPEN_CACHE_INSTANCES
SESSION_VALUE NULL
GLOBAL_VALUE 1
GLOBAL_VALUE_ORIGIN COMPILE-TIME
DEFAULT_VALUE 1
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT The number of table cache instances
NUMERIC_MIN_VALUE 1
NUMERIC_MAX_VALUE 64
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME THREAD_CACHE_SIZE
SESSION_VALUE NULL
GLOBAL_VALUE 151
......
......@@ -4657,6 +4657,20 @@ NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME TABLE_OPEN_CACHE_INSTANCES
SESSION_VALUE NULL
GLOBAL_VALUE 1
GLOBAL_VALUE_ORIGIN COMPILE-TIME
DEFAULT_VALUE 1
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT The number of table cache instances
NUMERIC_MIN_VALUE 1
NUMERIC_MAX_VALUE 64
NUMERIC_BLOCK_SIZE 1
ENUM_VALUE_LIST NULL
READ_ONLY YES
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME THREAD_CACHE_SIZE
SESSION_VALUE NULL
GLOBAL_VALUE 151
......
......@@ -4974,8 +4974,7 @@ static int init_server_components()
all things are initialized so that unireg_abort() doesn't fail
*/
mdl_init();
tdc_init();
if (hostname_cache_init())
if (tdc_init() || hostname_cache_init())
unireg_abort(1);
query_cache_set_min_res_unit(query_cache_min_res_unit);
......@@ -7643,7 +7642,6 @@ struct my_option my_long_options[]=
MYSQL_TO_BE_IMPLEMENTED_OPTION("eq-range-index-dive-limit"),
MYSQL_COMPATIBILITY_OPTION("server-id-bits"),
MYSQL_TO_BE_IMPLEMENTED_OPTION("slave-rows-search-algorithms"), // HAVE_REPLICATION
MYSQL_COMPATIBILITY_OPTION("table-open-cache-instances"),
MYSQL_TO_BE_IMPLEMENTED_OPTION("slave-allow-batching"), // HAVE_REPLICATION
MYSQL_COMPATIBILITY_OPTION("slave-checkpoint-period"), // HAVE_REPLICATION
MYSQL_COMPATIBILITY_OPTION("slave-checkpoint-group"), // HAVE_REPLICATION
......
......@@ -878,8 +878,7 @@ void close_thread_table(THD *thd, TABLE **table_ptr)
Do this *before* entering the TABLE_SHARE::tdc.LOCK_table_share
critical section.
*/
if (table->file != NULL)
MYSQL_UNBIND_TABLE(table->file);
MYSQL_UNBIND_TABLE(table->file);
tc_release_table(table);
DBUG_VOID_RETURN;
......
......@@ -3227,6 +3227,11 @@ static Sys_var_ulong Sys_table_cache_size(
BLOCK_SIZE(1), NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
ON_UPDATE(fix_table_open_cache));
static Sys_var_ulong Sys_table_cache_instances(
"table_open_cache_instances", "The number of table cache instances",
READ_ONLY GLOBAL_VAR(tc_instances), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(1, 64), DEFAULT(1), BLOCK_SIZE(1));
static Sys_var_ulong Sys_thread_cache_size(
"thread_cache_size",
"How many threads we should keep in a cache for reuse. These are freed after 5 minutes of idle time",
......
......@@ -1021,13 +1021,13 @@ struct TABLE
One should use methods of I_P_List template instead.
*/
TABLE *share_all_next, **share_all_prev;
TABLE *global_free_next, **global_free_prev;
friend struct All_share_tables;
friend class Table_cache_instance;
public:
THD *in_use; /* Which thread uses this */
/* Time when table was released to table cache. Valid for unused tables. */
ulonglong tc_time;
Field **field; /* Pointer to fields */
uchar *record[2]; /* Pointer to records */
......
......@@ -55,10 +55,12 @@
/** Configuration. */
ulong tdc_size; /**< Table definition cache threshold for LRU eviction. */
ulong tc_size; /**< Table cache threshold for LRU eviction. */
ulong tc_instances;
/** Data collections. */
static LF_HASH tdc_hash; /**< Collection of TABLE_SHARE objects. */
/** Collection of unused TABLE_SHARE objects. */
static
I_P_List <TDC_element,
I_P_List_adapter<TDC_element, &TDC_element::next, &TDC_element::prev>,
I_P_List_null_counter,
......@@ -67,8 +69,6 @@ I_P_List <TDC_element,
static int64 tdc_version; /* Increments on each reload */
static bool tdc_inited;
static int32 tc_count; /**< Number of TABLE objects in table cache. */
/**
Protects unused shares list.
......@@ -81,11 +81,13 @@ static int32 tc_count; /**< Number of TABLE objects in table cache. */
static mysql_mutex_t LOCK_unused_shares;
#ifdef HAVE_PSI_INTERFACE
static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share;
static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share,
key_LOCK_table_cache;
static PSI_mutex_info all_tc_mutexes[]=
{
{ &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL },
{ &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 }
{ &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 },
{ &key_LOCK_table_cache, "LOCK_table_cache", 0 }
};
static PSI_cond_key key_TABLE_SHARE_COND_release;
......@@ -93,19 +95,6 @@ static PSI_cond_info all_tc_conds[]=
{
{ &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 }
};
static void init_tc_psi_keys(void)
{
const char *category= "sql";
int count;
count= array_elements(all_tc_mutexes);
mysql_mutex_register(category, all_tc_mutexes, count);
count= array_elements(all_tc_conds);
mysql_cond_register(category, all_tc_conds, count);
}
#endif
......@@ -125,67 +114,88 @@ static int fix_thd_pins(THD *thd)
part of table definition cache.
*/
static void intern_close_table(TABLE *table)
struct Table_cache_instance
{
DBUG_ENTER("intern_close_table");
DBUG_PRINT("tcache", ("table: '%s'.'%s' 0x%lx",
table->s ? table->s->db.str : "?",
table->s ? table->s->table_name.str : "?",
(long) table));
/**
Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev),
records, Share_free_tables::List (TABLE::prev and TABLE::next),
TABLE::in_use.
*/
mysql_mutex_t LOCK_table_cache;
I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next,
&TABLE::global_free_prev>,
I_P_List_null_counter, I_P_List_fast_push_back<TABLE> >
free_tables;
ulong records;
/** Avoid false sharing between instances */
char pad[CPU_LEVEL1_DCACHE_LINESIZE];
Table_cache_instance(): records(0)
{
mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache,
MY_MUTEX_INIT_FAST);
}
delete table->triggers;
if (table->file) // Not true if placeholder
~Table_cache_instance()
{
(void) closefrm(table);
tdc_release_share(table->s);
mysql_mutex_destroy(&LOCK_table_cache);
DBUG_ASSERT(free_tables.is_empty());
DBUG_ASSERT(records == 0);
}
table->alias.free();
my_free(table);
DBUG_VOID_RETURN;
}
};
/**
Get number of TABLE objects (used and unused) in table cache.
*/
static Table_cache_instance *tc;
uint tc_records(void)
static void intern_close_table(TABLE *table)
{
return my_atomic_load32_explicit(&tc_count, MY_MEMORY_ORDER_RELAXED);
delete table->triggers;
DBUG_ASSERT(table->file);
closefrm(table);
tdc_release_share(table->s);
my_free(table);
}
/**
Wait for MDL deadlock detector to complete traversing tdc.all_tables.
Must be called before updating TABLE_SHARE::tdc.all_tables.
Get number of TABLE objects (used and unused) in table cache.
*/
static void tc_wait_for_mdl_deadlock_detector(TDC_element *element)
uint tc_records(void)
{
while (element->all_tables_refs)
mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
ulong total= 0;
for (ulong i= 0; i < tc_instances; i++)
{
mysql_mutex_lock(&tc[i].LOCK_table_cache);
total+= tc[i].records;
mysql_mutex_unlock(&tc[i].LOCK_table_cache);
}
return total;
}
/**
Remove TABLE object from table cache.
- decrement tc_count
- remove object from TABLE_SHARE::tdc.all_tables
*/
static void tc_remove_table(TABLE *table)
{
mysql_mutex_assert_owner(&table->s->tdc->LOCK_table_share);
tc_wait_for_mdl_deadlock_detector(table->s->tdc);
my_atomic_add32_explicit(&tc_count, -1, MY_MEMORY_ORDER_RELAXED);
table->s->tdc->all_tables.remove(table);
TDC_element *element= table->s->tdc;
mysql_mutex_lock(&element->LOCK_table_share);
/* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
while (element->all_tables_refs)
mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
element->all_tables.remove(table);
mysql_mutex_unlock(&element->LOCK_table_share);
intern_close_table(table);
}
static void tc_remove_all_unused_tables(TDC_element *element,
TDC_element::TABLE_list *purge_tables,
Share_free_tables::List *purge_tables,
bool mark_flushed)
{
TABLE *table;
......@@ -200,10 +210,18 @@ static void tc_remove_all_unused_tables(TDC_element *element,
*/
if (mark_flushed)
element->flushed= true;
while ((table= element->free_tables.pop_front()))
for (ulong i= 0; i < tc_instances; i++)
{
tc_remove_table(table);
purge_tables->push_front(table);
mysql_mutex_lock(&tc[i].LOCK_table_cache);
while ((table= element->free_tables[i].list.pop_front()))
{
tc[i].records--;
tc[i].free_tables.remove(table);
DBUG_ASSERT(element->all_tables_refs == 0);
element->all_tables.remove(table);
purge_tables->push_front(table);
}
mysql_mutex_unlock(&tc[i].LOCK_table_cache);
}
}
......@@ -225,7 +243,7 @@ static void tc_remove_all_unused_tables(TDC_element *element,
struct tc_purge_arg
{
TDC_element::TABLE_list purge_tables;
Share_free_tables::List purge_tables;
bool mark_flushed;
};
......@@ -251,20 +269,6 @@ void tc_purge(bool mark_flushed)
}
/**
Get last element of free_tables.
*/
static TABLE *tc_free_tables_back(TDC_element *element)
{
TDC_element::TABLE_list::Iterator it(element->free_tables);
TABLE *entry, *last= 0;
while ((entry= it++))
last= entry;
return last;
}
/**
Add new TABLE object to table cache.
......@@ -281,79 +285,33 @@ static TABLE *tc_free_tables_back(TDC_element *element)
- free evicted object
*/
struct tc_add_table_arg
{
char key[MAX_DBKEY_LENGTH];
uint key_length;
ulonglong purge_time;
};
static my_bool tc_add_table_callback(TDC_element *element, tc_add_table_arg *arg)
void tc_add_table(THD *thd, TABLE *table)
{
TABLE *table;
ulong i= thd->thread_id % tc_instances;
TABLE *LRU_table= 0;
TDC_element *element= table->s->tdc;
DBUG_ASSERT(table->in_use == thd);
mysql_mutex_lock(&element->LOCK_table_share);
if ((table= tc_free_tables_back(element)) && table->tc_time < arg->purge_time)
{
memcpy(arg->key, element->m_key, element->m_key_length);
arg->key_length= element->m_key_length;
arg->purge_time= table->tc_time;
}
/* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
while (element->all_tables_refs)
mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
element->all_tables.push_front(table);
mysql_mutex_unlock(&element->LOCK_table_share);
return FALSE;
}
void tc_add_table(THD *thd, TABLE *table)
{
bool need_purge;
DBUG_ASSERT(table->in_use == thd);
mysql_mutex_lock(&table->s->tdc->LOCK_table_share);
tc_wait_for_mdl_deadlock_detector(table->s->tdc);
table->s->tdc->all_tables.push_front(table);
mysql_mutex_unlock(&table->s->tdc->LOCK_table_share);
/* If we have too many TABLE instances around, try to get rid of them */
need_purge= my_atomic_add32_explicit(&tc_count, 1, MY_MEMORY_ORDER_RELAXED) >=
(int32) tc_size;
if (need_purge)
mysql_mutex_lock(&tc[i].LOCK_table_cache);
if (tc[i].records == tc_size && (LRU_table= tc[i].free_tables.pop_front()))
{
tc_add_table_arg argument;
argument.purge_time= ULONGLONG_MAX;
tdc_iterate(thd, (my_hash_walk_action) tc_add_table_callback, &argument);
if (argument.purge_time != ULONGLONG_MAX)
{
TDC_element *element= (TDC_element*) lf_hash_search(&tdc_hash,
thd->tdc_hash_pins,
argument.key,
argument.key_length);
if (element)
{
TABLE *entry;
mysql_mutex_lock(&element->LOCK_table_share);
lf_hash_search_unpin(thd->tdc_hash_pins);
/*
It may happen that oldest table was acquired meanwhile. In this case
just go ahead, number of objects in table cache will normalize
eventually.
*/
if ((entry= tc_free_tables_back(element)) &&
entry->tc_time == argument.purge_time)
{
element->free_tables.remove(entry);
tc_remove_table(entry);
mysql_mutex_unlock(&element->LOCK_table_share);
intern_close_table(entry);
}
else
mysql_mutex_unlock(&element->LOCK_table_share);
}
}
LRU_table->s->tdc->free_tables[i].list.remove(LRU_table);
/* Needed if MDL deadlock detector chimes in before tc_remove_table() */
LRU_table->in_use= thd;
}
else
tc[i].records++;
mysql_mutex_unlock(&tc[i].LOCK_table_cache);
if (LRU_table)
tc_remove_table(LRU_table);
}
......@@ -369,10 +327,11 @@ void tc_add_table(THD *thd, TABLE *table)
static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
{
ulong i= thd->thread_id % tc_instances;
TABLE *table;
mysql_mutex_lock(&element->LOCK_table_share);
table= element->free_tables.pop_front();
mysql_mutex_lock(&tc[i].LOCK_table_cache);
table= element->free_tables[i].list.pop_front();
if (table)
{
DBUG_ASSERT(!table->in_use);
......@@ -381,8 +340,9 @@ static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
DBUG_ASSERT(table->db_stat && table->file);
/* The children must be detached from the table. */
DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
tc[i].free_tables.remove(table);
}
mysql_mutex_unlock(&element->LOCK_table_share);
mysql_mutex_unlock(&tc[i].LOCK_table_cache);
return table;
}
......@@ -413,40 +373,27 @@ static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
@retval false object released
*/
bool tc_release_table(TABLE *table)
void tc_release_table(TABLE *table)
{
ulong i= table->in_use->thread_id % tc_instances;
DBUG_ASSERT(table->in_use);
DBUG_ASSERT(table->file);
if (table->needs_reopen() || tc_records() > tc_size)
mysql_mutex_lock(&tc[i].LOCK_table_cache);
if (table->needs_reopen() || table->s->tdc->flushed ||
tc[i].records > tc_size)
{
mysql_mutex_lock(&table->s->tdc->LOCK_table_share);
goto purge;
tc[i].records--;
mysql_mutex_unlock(&tc[i].LOCK_table_cache);
tc_remove_table(table);
}
else
{
table->in_use= 0;
table->s->tdc->free_tables[i].list.push_front(table);
tc[i].free_tables.push_back(table);
mysql_mutex_unlock(&tc[i].LOCK_table_cache);
}
table->tc_time= my_interval_timer();
mysql_mutex_lock(&table->s->tdc->LOCK_table_share);
if (table->s->tdc->flushed)
goto purge;
/*
in_use doesn't really need mutex protection, but must be reset after
checking tdc.flushed and before this table appears in free_tables.
Resetting in_use is needed only for print_cached_tables() and
list_open_tables().
*/
table->in_use= 0;
/* Add table to the list of unused TABLE objects for this share. */
table->s->tdc->free_tables.push_front(table);
mysql_mutex_unlock(&table->s->tdc->LOCK_table_share);
return false;
purge:
tc_remove_table(table);
mysql_mutex_unlock(&table->s->tdc->LOCK_table_share);
table->in_use= 0;
intern_close_table(table);
return true;
}
......@@ -456,7 +403,10 @@ static void tdc_assert_clean_share(TDC_element *element)
DBUG_ASSERT(element->ref_count == 0);
DBUG_ASSERT(element->m_flush_tickets.is_empty());
DBUG_ASSERT(element->all_tables.is_empty());
DBUG_ASSERT(element->free_tables.is_empty());
#ifndef DBUG_OFF
for (ulong i= 0; i < tc_instances; i++)
DBUG_ASSERT(element->free_tables[i].list.is_empty());
#endif
DBUG_ASSERT(element->all_tables_refs == 0);
DBUG_ASSERT(element->next == 0);
DBUG_ASSERT(element->prev == 0);
......@@ -527,7 +477,8 @@ static void lf_alloc_constructor(uchar *arg)
mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0);
element->m_flush_tickets.empty();
element->all_tables.empty();
element->free_tables.empty();
for (ulong i= 0; i < tc_instances; i++)
element->free_tables[i].list.empty();
element->all_tables_refs= 0;
element->share= 0;
element->ref_count= 0;
......@@ -573,23 +524,29 @@ static uchar *tdc_hash_key(const TDC_element *element, size_t *length,
Initialize table definition cache.
*/
void tdc_init(void)
bool tdc_init(void)
{
DBUG_ENTER("tdc_init");
#ifdef HAVE_PSI_INTERFACE
init_tc_psi_keys();
mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes));
mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds));
#endif
/* Extra instance is allocated to avoid false sharing */
if (!(tc= new Table_cache_instance[tc_instances + 1]))
DBUG_RETURN(true);
tdc_inited= true;
mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares,
MY_MUTEX_INIT_FAST);
tdc_version= 1L; /* Increments on each reload */
lf_hash_init(&tdc_hash, sizeof(TDC_element), LF_HASH_UNIQUE, 0, 0,
lf_hash_init(&tdc_hash, sizeof(TDC_element) +
sizeof(Share_free_tables) * (tc_instances - 1),
LF_HASH_UNIQUE, 0, 0,
(my_hash_get_key) tdc_hash_key,
&my_charset_bin);
tdc_hash.alloc.constructor= lf_alloc_constructor;
tdc_hash.alloc.destructor= lf_alloc_destructor;
tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer;
DBUG_VOID_RETURN;
DBUG_RETURN(false);
}
......@@ -631,6 +588,7 @@ void tdc_deinit(void)
tdc_inited= false;
lf_hash_destroy(&tdc_hash);
mysql_mutex_destroy(&LOCK_unused_shares);
delete [] tc;
}
DBUG_VOID_RETURN;
}
......@@ -1038,7 +996,7 @@ bool tdc_remove_table(THD *thd, enum_tdc_remove_table_type remove_type,
const char *db, const char *table_name,
bool kill_delayed_threads)
{
TDC_element::TABLE_list purge_tables;
Share_free_tables::List purge_tables;
TABLE *table;
TDC_element *element;
uint my_refs= 1;
......
......@@ -18,6 +18,15 @@
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
struct Share_free_tables
{
typedef I_P_List <TABLE, TABLE_share> List;
List list;
/** Avoid false sharing between instances */
char pad[CPU_LEVEL1_DCACHE_LINESIZE];
};
struct TDC_element
{
uchar m_key[NAME_LEN + 1 + NAME_LEN + 1];
......@@ -26,10 +35,8 @@ struct TDC_element
bool flushed;
TABLE_SHARE *share;
typedef I_P_List <TABLE, TABLE_share> TABLE_list;
/**
Protects ref_count, m_flush_tickets, all_tables, free_tables, flushed,
all_tables_refs.
Protects ref_count, m_flush_tickets, all_tables, flushed, all_tables_refs.
*/
mysql_mutex_t LOCK_table_share;
mysql_cond_t COND_release;
......@@ -45,7 +52,9 @@ struct TDC_element
for this share.
*/
All_share_tables_list all_tables;
TABLE_list free_tables;
/** Avoid false sharing between TDC_element and free_tables */
char pad[CPU_LEVEL1_DCACHE_LINESIZE];
Share_free_tables free_tables[1];
};
......@@ -59,8 +68,9 @@ enum enum_tdc_remove_table_type
extern ulong tdc_size;
extern ulong tc_size;
extern ulong tc_instances;
extern void tdc_init(void);
extern bool tdc_init(void);
extern void tdc_start_shutdown(void);
extern void tdc_deinit(void);
extern ulong tdc_records(void);
......@@ -86,7 +96,7 @@ extern int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument,
extern uint tc_records(void);
extern void tc_purge(bool mark_flushed= false);
extern void tc_add_table(THD *thd, TABLE *table);
extern bool tc_release_table(TABLE *table);
extern void tc_release_table(TABLE *table);
/**
Create a table cache key for non-temporary table.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment