Commit d231d813 authored by Krunal Bauskar's avatar Krunal Bauskar

MDEV-25029: Reduce dict_sys mutex contention for read-only workload

In theory, read-only workload shouldn't have anything to do with
dict_sys mutex. dict_sys mutex is meant to protect database metadata.

But then why does dict_sys mutex shows up as part of a read-only workload?
This workload needs to fetch stats for query processing and while reading
these stats dict_sys mutex is taken.

Is this really needed?
No. For the traditional reasons, it was the default global mutex used.

Based on 10.6 changes, flow can now use table->lock_mutex to protect
update/access of these stats. table mutexes being table specific
global contention arising out of dict_sys is reduced.

Thanks to Marko Makela for his early suggestion around proposed alternative
and review of the draft patch.
parent 80ac9ec1
......@@ -334,7 +334,9 @@ dict_table_close(
if (last_handle && strchr(table->name.m_name, '/') != NULL
&& dict_stats_is_persistent_enabled(table)) {
table->stats_mutex_lock();
dict_stats_deinit(table);
table->stats_mutex_unlock();
}
MONITOR_DEC(MONITOR_TABLE_REFERENCE);
......
......@@ -406,7 +406,9 @@ dict_stats_table_clone_create(
dict_table_t* t;
t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
t = (dict_table_t*) mem_heap_zalloc(heap, sizeof(*t));
t->stats_mutex_init();
MEM_CHECK_DEFINED(&table->id, sizeof(table->id));
t->id = table->id;
......@@ -434,7 +436,7 @@ dict_stats_table_clone_create(
dict_index_t* idx;
idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
idx = (dict_index_t*) mem_heap_zalloc(heap, sizeof(*idx));
MEM_CHECK_DEFINED(&index->id, sizeof(index->id));
idx->id = index->id;
......@@ -452,7 +454,7 @@ dict_stats_table_clone_create(
idx->n_uniq = index->n_uniq;
idx->fields = (dict_field_t*) mem_heap_alloc(
idx->fields = (dict_field_t*) mem_heap_zalloc(
heap, idx->n_uniq * sizeof(idx->fields[0]));
for (ulint i = 0; i < idx->n_uniq; i++) {
......@@ -463,15 +465,15 @@ dict_stats_table_clone_create(
/* hook idx into t->indexes */
UT_LIST_ADD_LAST(t->indexes, idx);
idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_zalloc(
heap,
idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_zalloc(
heap,
idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_zalloc(
heap,
idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
......@@ -494,6 +496,7 @@ dict_stats_table_clone_free(
/*========================*/
dict_table_t* t) /*!< in: dummy table object to free */
{
t->stats_mutex_destroy();
mem_heap_free(t->heap);
}
......@@ -510,7 +513,7 @@ dict_stats_empty_index(
{
ut_ad(!(index->type & DICT_FTS));
ut_ad(!dict_index_is_ibuf(index));
dict_sys.assert_locked();
ut_ad(index->table->stats_mutex_is_owner());
ulint n_uniq = index->n_uniq;
......@@ -540,7 +543,9 @@ dict_stats_empty_table(
bool empty_defrag_stats)
/*!< in: whether to empty defrag stats */
{
dict_sys.mutex_lock();
/* Initialize table/index level stats is now protected by
table level lock_mutex.*/
table->stats_mutex_lock();
/* Zero the stats members */
table->stat_n_rows = 0;
......@@ -566,7 +571,7 @@ dict_stats_empty_table(
}
table->stat_initialized = TRUE;
dict_sys.mutex_unlock();
table->stats_mutex_unlock();
}
/*********************************************************************//**
......@@ -665,7 +670,8 @@ dict_stats_copy(
to have the same statistics as if
the table was empty */
{
dict_sys.assert_locked();
ut_ad(src->stats_mutex_is_owner());
ut_ad(dst->stats_mutex_is_owner());
dst->stats_last_recalc = src->stats_last_recalc;
dst->stat_n_rows = src->stat_n_rows;
......@@ -790,8 +796,14 @@ dict_stats_snapshot_create(
t = dict_stats_table_clone_create(table);
table->stats_mutex_lock();
ut_d(t->stats_mutex_lock());
dict_stats_copy(t, table, false);
ut_d(t->stats_mutex_unlock());
table->stats_mutex_unlock();
t->stat_persistent = table->stat_persistent;
t->stats_auto_recalc = table->stats_auto_recalc;
t->stats_sample_pages = table->stats_sample_pages;
......@@ -836,14 +848,14 @@ dict_stats_update_transient_for_index(
Initialize some bogus index cardinality
statistics, so that the data can be queried in
various means, also via secondary indexes. */
dict_sys.mutex_lock();
index->table->stats_mutex_lock();
dict_stats_empty_index(index, false);
dict_sys.mutex_unlock();
index->table->stats_mutex_unlock();
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
} else if (ibuf_debug && !dict_index_is_clust(index)) {
dict_sys.mutex_lock();
index->table->stats_mutex_lock();
dict_stats_empty_index(index, false);
dict_sys.mutex_unlock();
index->table->stats_mutex_unlock();
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
} else {
mtr_t mtr;
......@@ -864,9 +876,9 @@ dict_stats_update_transient_for_index(
switch (size) {
case ULINT_UNDEFINED:
dict_sys.mutex_lock();
index->table->stats_mutex_lock();
dict_stats_empty_index(index, false);
dict_sys.mutex_unlock();
index->table->stats_mutex_unlock();
return;
case 0:
/* The root node of the tree is a leaf */
......@@ -883,7 +895,7 @@ dict_stats_update_transient_for_index(
index);
if (!stats.empty()) {
dict_sys.mutex_lock();
index->table->stats_mutex_lock();
for (size_t i = 0; i < stats.size(); ++i) {
index->stat_n_diff_key_vals[i]
= stats[i].n_diff_key_vals;
......@@ -892,7 +904,7 @@ dict_stats_update_transient_for_index(
index->stat_n_non_null_key_vals[i]
= stats[i].n_non_null_key_vals;
}
dict_sys.mutex_unlock();
index->table->stats_mutex_unlock();
}
}
}
......@@ -910,7 +922,7 @@ dict_stats_update_transient(
/*========================*/
dict_table_t* table) /*!< in/out: table */
{
dict_sys.assert_not_locked();
ut_ad(!table->stats_mutex_is_owner());
dict_index_t* index;
ulint sum_of_index_sizes = 0;
......@@ -943,9 +955,9 @@ dict_stats_update_transient(
if (dict_stats_should_ignore_index(index)
|| !index->is_readable()) {
dict_sys.mutex_lock();
index->table->stats_mutex_lock();
dict_stats_empty_index(index, false);
dict_sys.mutex_unlock();
index->table->stats_mutex_unlock();
continue;
}
......@@ -954,7 +966,7 @@ dict_stats_update_transient(
sum_of_index_sizes += index->stat_index_size;
}
dict_sys.mutex_lock();
table->stats_mutex_lock();
index = dict_table_get_first_index(table);
......@@ -972,7 +984,7 @@ dict_stats_update_transient(
table->stat_initialized = TRUE;
dict_sys.mutex_unlock();
table->stats_mutex_unlock();
}
/* @{ Pseudo code about the relation between the following functions
......@@ -1930,7 +1942,7 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index)
DBUG_PRINT("info", ("index: %s, online status: %d", index->name(),
dict_index_get_online_status(index)));
dict_sys.assert_not_locked(); // this function is slow
ut_ad(!index->table->stats_mutex_is_owner());
ut_ad(index->table->get_ref_count());
/* Disable update statistic for Rtree */
......@@ -2002,14 +2014,14 @@ static index_stats_t dict_stats_analyze_index(dict_index_t* index)
mtr.commit();
dict_sys.mutex_lock();
index->table->stats_mutex_lock();
for (ulint i = 0; i < n_uniq; i++) {
result.stats[i].n_diff_key_vals = index->stat_n_diff_key_vals[i];
result.stats[i].n_sample_sizes = total_pages;
result.stats[i].n_non_null_key_vals = index->stat_n_non_null_key_vals[i];
}
result.n_leaf_pages = index->stat_n_leaf_pages;
dict_sys.mutex_unlock();
index->table->stats_mutex_unlock();
DBUG_RETURN(result);
}
......@@ -2243,13 +2255,13 @@ dict_stats_update_persistent(
}
ut_ad(!dict_index_is_ibuf(index));
dict_sys.mutex_lock();
table->stats_mutex_lock();
dict_stats_empty_index(index, false);
dict_sys.mutex_unlock();
table->stats_mutex_unlock();
index_stats_t stats = dict_stats_analyze_index(index);
dict_sys.mutex_lock();
table->stats_mutex_lock();
index->stat_index_size = stats.index_size;
index->stat_n_leaf_pages = stats.n_leaf_pages;
for (size_t i = 0; i < stats.stats.size(); ++i) {
......@@ -2285,9 +2297,9 @@ dict_stats_update_persistent(
}
if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
dict_sys.mutex_unlock();
table->stats_mutex_unlock();
stats = dict_stats_analyze_index(index);
dict_sys.mutex_lock();
table->stats_mutex_lock();
index->stat_index_size = stats.index_size;
index->stat_n_leaf_pages = stats.n_leaf_pages;
......@@ -2313,7 +2325,7 @@ dict_stats_update_persistent(
dict_stats_assert_initialized(table);
dict_sys.mutex_unlock();
table->stats_mutex_unlock();
return(DB_SUCCESS);
}
......@@ -3123,13 +3135,11 @@ dict_stats_update_for_index(
{
DBUG_ENTER("dict_stats_update_for_index");
dict_sys.assert_not_locked();
if (dict_stats_is_persistent_enabled(index->table)) {
if (dict_stats_persistent_storage_check(false)) {
index_stats_t stats = dict_stats_analyze_index(index);
dict_sys.mutex_lock();
index->table->stats_mutex_lock();
index->stat_index_size = stats.index_size;
index->stat_n_leaf_pages = stats.n_leaf_pages;
for (size_t i = 0; i < stats.stats.size(); ++i) {
......@@ -3142,7 +3152,7 @@ dict_stats_update_for_index(
}
index->table->stat_sum_of_other_index_sizes
+= index->stat_index_size;
dict_sys.mutex_unlock();
index->table->stats_mutex_unlock();
dict_stats_save(index->table, &index->id);
DBUG_VOID_RETURN;
......@@ -3183,7 +3193,7 @@ dict_stats_update(
the persistent statistics
storage */
{
dict_sys.assert_not_locked();
ut_ad(!table->stats_mutex_is_owner());
if (!table->is_readable()) {
return (dict_stats_report_error(table));
......@@ -3318,7 +3328,10 @@ dict_stats_update(
switch (err) {
case DB_SUCCESS:
dict_sys.mutex_lock();
table->stats_mutex_lock();
/* t is localized to this thread so no need to
take stats mutex lock (limiting it to debug only) */
ut_d(t->stats_mutex_lock());
/* Pass reset_ignored_indexes=true as parameter
to dict_stats_copy. This will cause statictics
......@@ -3327,7 +3340,8 @@ dict_stats_update(
dict_stats_assert_initialized(table);
dict_sys.mutex_unlock();
ut_d(t->stats_mutex_unlock());
table->stats_mutex_unlock();
dict_stats_table_clone_free(t);
......
......@@ -14044,7 +14044,7 @@ ha_innobase::info_low(
ulint stat_clustered_index_size;
ulint stat_sum_of_other_index_sizes;
dict_sys.mutex_lock();
ib_table->stats_mutex_lock();
ut_a(ib_table->stat_initialized);
......@@ -14056,7 +14056,7 @@ ha_innobase::info_low(
stat_sum_of_other_index_sizes
= ib_table->stat_sum_of_other_index_sizes;
dict_sys.mutex_unlock();
ib_table->stats_mutex_unlock();
/*
The MySQL optimizer seems to assume in a left join that n_rows
......@@ -14172,10 +14172,9 @@ ha_innobase::info_low(
stats.create_time = (ulong) stat_info.ctime;
}
struct Locking {
Locking() { dict_sys.mutex_lock(); }
~Locking() { dict_sys.mutex_unlock(); }
} locking;
ib_table->stats_mutex_lock();
auto _ = make_scope_exit([ib_table]() {
ib_table->stats_mutex_unlock(); });
ut_a(ib_table->stat_initialized);
......
......@@ -56,6 +56,7 @@ Created July 18, 2007 Vasil Dimov
#include "fil0fil.h"
#include "fil0crypt.h"
#include "dict0crea.h"
#include "scope.h"
/** The latest successfully looked up innodb_fts_aux_table */
UNIV_INTERN table_id_t innodb_ft_aux_table_id;
......@@ -5021,11 +5022,9 @@ i_s_dict_fill_sys_tablestats(
table->name.m_name));
{
struct Locking
{
Locking() { dict_sys.mutex_lock(); }
~Locking() { dict_sys.mutex_unlock(); }
} locking;
table->stats_mutex_lock();
auto _ = make_scope_exit([table]() {
table->stats_mutex_unlock(); });
OK(fields[SYS_TABLESTATS_INIT]->store(table->stat_initialized,
true));
......
......@@ -1962,6 +1962,9 @@ struct dict_table_t {
/** @return whether the current thread holds the lock_mutex */
bool lock_mutex_is_owner() const
{ return lock_mutex_owner == os_thread_get_curr_id(); }
/** @return whether the current thread holds the stats_mutex (lock_mutex) */
bool stats_mutex_is_owner() const
{ return lock_mutex_owner == os_thread_get_curr_id(); }
#endif /* UNIV_DEBUG */
void lock_mutex_init() { lock_mutex.init(); }
void lock_mutex_destroy() { lock_mutex.destroy(); }
......@@ -1986,6 +1989,16 @@ struct dict_table_t {
ut_ad(lock_mutex_owner.exchange(0) == os_thread_get_curr_id());
lock_mutex.wr_unlock();
}
/* stats mutex lock currently defaults to lock_mutex but in the future,
there could be a use-case to have separate mutex for stats.
  extra indirection (through inline so no performance hit) should
  help simplify code and increase long-term maintainability */
void stats_mutex_init() { lock_mutex_init(); }
void stats_mutex_destroy() { lock_mutex_destroy(); }
void stats_mutex_lock() { lock_mutex_lock(); }
void stats_mutex_unlock() { lock_mutex_unlock(); }
private:
/** Initialize instant->field_map.
@param[in] table table definition to copy from */
......
......@@ -148,7 +148,7 @@ dict_stats_init(
/*============*/
dict_table_t* table) /*!< in/out: table */
{
dict_sys.assert_not_locked();
ut_ad(!table->stats_mutex_is_owner());
if (table->stat_initialized) {
return;
......@@ -174,7 +174,7 @@ dict_stats_deinit(
/*==============*/
dict_table_t* table) /*!< in/out: table */
{
dict_sys.assert_locked();
ut_ad(table->stats_mutex_is_owner());
ut_a(table->get_ref_count() == 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment