Commit f3160ee4 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-22782 AddressSanitizer race condition in trx_free()

In trx_free() we used to declare the entire trx_t unaccessible
and then declare that some data members are accessible.
This involves a race condition with other threads that may concurrently
access the data members that must remain accessible.
One type of error is "AddressSanitizer: unknown-crash", whose
exact cause we have not determined.

Another type of error (reported in MDEV-23472) is "use-after-poison",
where the reported shadow bytes would in fact be 00, indicating that
the memory was no longer poisoned. The poison-access-unpoison race
condition was confirmed by "rr replay".

We eliminate the race condition by invoking MEM_NOACCESS on each
individual data member of trx_t before freeing the memory to the pool.
The memory would not be unpoisoned until the pool is freed
or the memory is being reused for another allocation.

trx_t::free(): Replaces trx_free().

trx_t::active_commit_ordered: Changed to bool, so that MEM_NOACCESS
can be invoked. Removed some accessor functions.

Pool: Remove all MEM_ instrumentation.

TrxFactory: Move the MEM_ instrumentation from Pool.

TrxFactory::debug(): Removed. Moved to trx_t::free(). Because
the memory was already marked unaccessible in trx_t::free(), the
Factory::debug() call in Pool::putl() would be unable to access it.

trx_allocate_for_background(): Replaces trx_create_low().

trx_t::free(): Perform all consistency checks while avoiding
duplication, and declare most data members unaccessible.
parent a19cb388
......@@ -4234,7 +4234,7 @@ fts_sync_commit(
<< " ins/sec";
}
/* Avoid assertion in trx_free(). */
/* Avoid assertion in trx_t::free(). */
trx->dict_operation_lock_mode = 0;
trx_free_for_background(trx);
......@@ -4288,7 +4288,7 @@ fts_sync_rollback(
fts_sql_rollback(trx);
/* Avoid assertion in trx_free(). */
/* Avoid assertion in trx_t::free(). */
trx->dict_operation_lock_mode = 0;
trx_free_for_background(trx);
}
......
......@@ -2800,18 +2800,6 @@ trx_is_registered_for_2pc(
return(trx->is_registered == 1);
}
/*********************************************************************//**
Note that innobase_commit_ordered() was run. */
static inline
void
trx_set_active_commit_ordered(
/*==========================*/
trx_t* trx) /* in: transaction */
{
ut_a(trx_is_registered_for_2pc(trx));
trx->active_commit_ordered = 1;
}
/*********************************************************************//**
Note that a transaction has been registered with MySQL 2PC coordinator. */
static inline
......@@ -2821,7 +2809,7 @@ trx_register_for_2pc(
trx_t* trx) /* in: transaction */
{
trx->is_registered = 1;
ut_ad(trx->active_commit_ordered == 0);
ut_ad(!trx->active_commit_ordered);
}
/*********************************************************************//**
......@@ -2832,19 +2820,8 @@ trx_deregister_from_2pc(
/*====================*/
trx_t* trx) /* in: transaction */
{
trx->is_registered = 0;
trx->active_commit_ordered = 0;
}
/*********************************************************************//**
Check whether a transaction has active_commit_ordered set */
static inline
bool
trx_is_active_commit_ordered(
/*=========================*/
const trx_t* trx) /* in: transaction */
{
return(trx->active_commit_ordered == 1);
trx->is_registered= false;
trx->active_commit_ordered= false;
}
/*********************************************************************//**
......@@ -4617,8 +4594,7 @@ innobase_commit_ordered(
(!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
innobase_commit_ordered_2(trx, thd);
trx_set_active_commit_ordered(trx);
trx->active_commit_ordered = true;
DBUG_VOID_RETURN;
}
......@@ -4671,7 +4647,7 @@ innobase_commit(
DBUG_SUICIDE(););
/* Run the fast part of commit if we did not already. */
if (!trx_is_active_commit_ordered(trx)) {
if (!trx->active_commit_ordered) {
innobase_commit_ordered_2(trx, thd);
}
......@@ -5185,12 +5161,12 @@ static void innobase_kill_query(handlerton*, THD* thd, enum thd_kill_levels)
trx_mutex_enter(trx);
/* It is possible that innobase_close_connection() is concurrently
being executed on our victim. In that case, trx->mysql_thd would
be reset before invoking trx_free(). Even if the trx object is later
be reset before invoking trx_t::free(). Even if the trx object is later
reused for another client connection or a background transaction,
its trx->mysql_thd will differ from our thd.
If trx never performed any changes, nothing is really protecting
the trx_free() call or the changes of trx_t::state when the
the trx_t::free() call or the changes of trx_t::state when the
transaction is being rolled back and trx_commit_low() is being
executed.
......
......@@ -73,12 +73,9 @@ Creates a transaction object for MySQL.
trx_t*
trx_allocate_for_mysql(void);
/*========================*/
/********************************************************************//**
Creates a transaction object for background operations by the master thread.
@return own: transaction object */
trx_t*
trx_allocate_for_background(void);
/*=============================*/
/** @return allocated transaction object for internal operations */
trx_t *trx_allocate_for_background();
/** Frees and initialize a transaction object instantinated during recovery.
@param trx trx object to free and initialize during recovery */
......@@ -917,7 +914,8 @@ struct trx_t {
the coordinator using the XA API, and
is set to false after commit or
rollback. */
unsigned active_commit_ordered:1;/* 1 if owns prepare mutex */
/** whether this is holding the prepare mutex */
bool active_commit_ordered;
/*------------------------------*/
bool check_unique_secondary;
/*!< normally TRUE, but if the user
......@@ -1204,6 +1202,9 @@ struct trx_t {
ut_ad(old_n_ref > 0);
}
/** Free the memory to trx_pools */
inline void free();
private:
/** Assign a rollback segment for modifying temporary tables.
......
......@@ -87,15 +87,6 @@ struct Pool {
for (Element* elem = m_start; elem != m_last; ++elem) {
ut_ad(elem->m_pool == this);
#ifdef __SANITIZE_ADDRESS__
/* Unpoison the memory for AddressSanitizer */
MEM_UNDEFINED(&elem->m_type, sizeof elem->m_type);
#endif
#ifdef HAVE_valgrind
/* Declare the contents as initialized for Valgrind;
we checked this in mem_free(). */
MEM_MAKE_DEFINED(&elem->m_type, sizeof elem->m_type);
#endif
Factory::destroy(&elem->m_type);
}
......@@ -130,22 +121,6 @@ struct Pool {
elem = NULL;
}
#if defined HAVE_valgrind || defined __SANITIZE_ADDRESS__
if (elem) {
# ifdef __SANITIZE_ADDRESS__
/* Unpoison the memory for AddressSanitizer */
MEM_UNDEFINED(&elem->m_type, sizeof elem->m_type);
# endif
# ifdef HAVE_valgrind
/* Declare the memory initialized for Valgrind.
The trx_t that are released to the pool are
actually initialized; we checked that by
MEM_CHECK_DEFINED() in mem_free() below. */
# endif
MEM_MAKE_DEFINED(&elem->m_type, sizeof elem->m_type);
}
#endif
m_lock_strategy.exit();
return elem ? &elem->m_type : NULL;
}
......@@ -158,12 +133,10 @@ struct Pool {
byte* p = reinterpret_cast<byte*>(ptr + 1);
elem = reinterpret_cast<Element*>(p - sizeof(*elem));
MEM_CHECK_DEFINED(&elem->m_type, sizeof elem->m_type);
elem->m_pool->m_lock_strategy.enter();
elem->m_pool->putl(elem);
MEM_NOACCESS(&elem->m_type, sizeof elem->m_type);
elem->m_pool->m_lock_strategy.exit();
}
......@@ -186,9 +159,6 @@ struct Pool {
void putl(Element* elem)
{
ut_ad(elem >= m_start && elem < m_last);
ut_ad(Factory::debug(&elem->m_type));
m_pqueue.push(elem);
}
......
......@@ -105,7 +105,7 @@ trx_init(
trx->op_info = "";
trx->active_commit_ordered = 0;
trx->active_commit_ordered = false;
trx->isolation_level = TRX_ISO_REPEATABLE_READ;
......@@ -202,6 +202,15 @@ struct TrxFactory {
@param trx the transaction for which to release resources */
static void destroy(trx_t* trx)
{
#ifdef __SANITIZE_ADDRESS__
/* Unpoison the memory for AddressSanitizer */
MEM_UNDEFINED(trx, sizeof *trx);
#else
/* Declare the contents as initialized for Valgrind;
we checked this in trx_t::free(). */
MEM_MAKE_DEFINED(trx, sizeof *trx);
#endif
ut_a(trx->magic_n == TRX_MAGIC_N);
ut_ad(!trx->in_rw_trx_list);
ut_ad(!trx->in_mysql_trx_list);
......@@ -229,39 +238,6 @@ struct TrxFactory {
trx->lock.table_locks.~lock_list();
}
/** Enforce any invariants here, this is called before the transaction
is added to the pool.
@return true if all OK */
static bool debug(const trx_t* trx)
{
ut_a(trx->error_state == DB_SUCCESS);
ut_a(trx->magic_n == TRX_MAGIC_N);
ut_ad(!trx->read_only);
ut_ad(trx->state == TRX_STATE_NOT_STARTED);
ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
ut_ad(trx->mysql_thd == 0);
ut_ad(!trx->in_rw_trx_list);
ut_ad(!trx->in_mysql_trx_list);
ut_a(trx->lock.wait_thr == NULL);
ut_a(trx->lock.wait_lock == NULL);
ut_a(trx->dict_operation_lock_mode == 0);
ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
ut_ad(trx->autoinc_locks == NULL);
ut_ad(trx->lock.table_locks.empty());
return(true);
}
};
/** The lock strategy for TrxPool */
......@@ -338,13 +314,23 @@ trx_pool_close()
trx_pools = 0;
}
/** @return a trx_t instance from trx_pools. */
static
trx_t*
trx_create_low()
/** @return allocated transaction object for internal operations */
trx_t *trx_allocate_for_background()
{
trx_t* trx = trx_pools->get();
#ifdef __SANITIZE_ADDRESS__
/* Unpoison the memory for AddressSanitizer.
It may have been poisoned in trx_t::free().*/
MEM_UNDEFINED(trx, sizeof *trx);
#else
/* Declare the memory initialized for Valgrind.
The trx_t that are released to the pool are
actually initialized; we checked that by
MEM_CHECK_DEFINED() in trx_t::free(). */
MEM_MAKE_DEFINED(trx, sizeof *trx);
#endif
assert_trx_is_free(trx);
mem_heap_t* heap;
......@@ -360,14 +346,9 @@ trx_create_low()
alloc = ib_heap_allocator_create(heap);
/* Remember to free the vector explicitly in trx_free(). */
trx->autoinc_locks = ib_vector_create(alloc, sizeof(void**), 4);
/* Should have been either just initialized or .clear()ed by
trx_free(). */
ut_ad(trx->mod_tables.empty());
ut_ad(trx->lock.table_locks.empty());
ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
ut_ad(trx->lock.n_rec_locks == 0);
ut_ad(trx->lock.table_cached == 0);
ut_ad(trx->lock.rec_cached == 0);
......@@ -379,71 +360,119 @@ trx_create_low()
return(trx);
}
/**
Release a trx_t instance back to the pool.
@param trx the instance to release. */
static
void
trx_free(trx_t*& trx)
/** Free the memory to trx_pools */
inline void trx_t::free()
{
assert_trx_is_free(trx);
assert_trx_is_inactive(this);
trx->mysql_thd = 0;
trx->mysql_log_file_name = 0;
MEM_CHECK_DEFINED(this, sizeof *this);
// FIXME: We need to avoid this heap free/alloc for each commit.
if (trx->autoinc_locks != NULL) {
ut_ad(ib_vector_is_empty(trx->autoinc_locks));
/* We allocated a dedicated heap for the vector. */
ib_vector_free(trx->autoinc_locks);
trx->autoinc_locks = NULL;
}
ut_ad(!read_view);
ut_ad(!will_lock);
ut_ad(error_state == DB_SUCCESS);
ut_ad(magic_n == TRX_MAGIC_N);
ut_ad(!read_only);
ut_ad(!in_mysql_trx_list);
ut_ad(!lock.wait_lock);
trx->mod_tables.clear();
mysql_thd= NULL;
mysql_log_file_name= NULL;
ut_ad(trx->read_view == NULL);
// FIXME: We need to avoid this heap free/alloc for each commit.
if (autoinc_locks)
{
ut_ad(ib_vector_is_empty(autoinc_locks));
/* We allocated a dedicated heap for the vector. */
ib_vector_free(autoinc_locks);
autoinc_locks= NULL;
}
/* trx locking state should have been reset before returning trx
to pool */
ut_ad(trx->will_lock == 0);
mod_tables.clear();
trx_pools->mem_free(trx);
#ifdef __SANITIZE_ADDRESS__
/* Unpoison the memory for innodb_monitor_set_option;
it is operating also on the freed transaction objects. */
MEM_UNDEFINED(&trx->mutex, sizeof trx->mutex);
MEM_UNDEFINED(&trx->undo_mutex, sizeof trx->undo_mutex);
/* For innobase_kill_connection() */
MEM_UNDEFINED(&trx->state, sizeof trx->state);
MEM_UNDEFINED(&trx->mysql_thd, sizeof trx->mysql_thd);
#endif
#ifdef HAVE_valgrind_or_MSAN
/* Unpoison the memory for innodb_monitor_set_option;
it is operating also on the freed transaction objects.
We checked that these were initialized in
trx_pools->mem_free(trx). */
MEM_MAKE_DEFINED(&trx->mutex, sizeof trx->mutex);
MEM_MAKE_DEFINED(&trx->undo_mutex, sizeof trx->undo_mutex);
/* For innobase_kill_connection() */
MEM_MAKE_DEFINED(&trx->state, sizeof trx->state);
MEM_MAKE_DEFINED(&trx->mysql_thd, sizeof trx->mysql_thd);
MEM_NOACCESS(&n_ref, sizeof n_ref);
/* do not poison mutex */
MEM_NOACCESS(&id, sizeof id);
MEM_NOACCESS(&no, sizeof no);
/* state is accessed by innobase_kill_connection() */
MEM_NOACCESS(&is_recovered, sizeof is_recovered);
#ifdef WITH_WSREP
MEM_NOACCESS(&wsrep, sizeof wsrep);
#endif
trx = NULL;
}
/********************************************************************//**
Creates a transaction object for background operations by the master thread.
@return own: transaction object */
trx_t*
trx_allocate_for_background(void)
/*=============================*/
{
trx_t* trx;
trx = trx_create_low();
return(trx);
MEM_NOACCESS(&read_view, sizeof read_view);
MEM_NOACCESS(&trx_list, sizeof trx_list);
MEM_NOACCESS(&no_list, sizeof no_list);
MEM_NOACCESS(&lock, sizeof lock);
MEM_NOACCESS(&op_info, sizeof op_info);
MEM_NOACCESS(&isolation_level, sizeof isolation_level);
MEM_NOACCESS(&check_foreigns, sizeof check_foreigns);
MEM_NOACCESS(&is_registered, sizeof is_registered);
MEM_NOACCESS(&active_commit_ordered, sizeof active_commit_ordered);
MEM_NOACCESS(&check_unique_secondary, sizeof check_unique_secondary);
MEM_NOACCESS(&flush_log_later, sizeof flush_log_later);
MEM_NOACCESS(&must_flush_log_later, sizeof must_flush_log_later);
MEM_NOACCESS(&duplicates, sizeof duplicates);
MEM_NOACCESS(&dict_operation, sizeof dict_operation);
MEM_NOACCESS(&declared_to_be_inside_innodb, sizeof declared_to_be_inside_innodb);
MEM_NOACCESS(&n_tickets_to_enter_innodb, sizeof n_tickets_to_enter_innodb);
MEM_NOACCESS(&dict_operation_lock_mode, sizeof dict_operation_lock_mode);
MEM_NOACCESS(&start_time, sizeof start_time);
MEM_NOACCESS(&start_time_micro, sizeof start_time_micro);
MEM_NOACCESS(&commit_lsn, sizeof commit_lsn);
MEM_NOACCESS(&table_id, sizeof table_id);
/* mysql_thd is accessed by innobase_kill_connection() */
MEM_NOACCESS(&mysql_log_file_name, sizeof mysql_log_file_name);
MEM_NOACCESS(&mysql_log_offset, sizeof mysql_log_offset);
MEM_NOACCESS(&n_mysql_tables_in_use, sizeof n_mysql_tables_in_use);
MEM_NOACCESS(&mysql_n_tables_locked, sizeof mysql_n_tables_locked);
#ifdef UNIV_DEBUG
MEM_NOACCESS(&in_rw_trx_list, sizeof in_rw_trx_list);
#endif /* UNIV_DEBUG */
MEM_NOACCESS(&mysql_trx_list, sizeof mysql_trx_list);
#ifdef UNIV_DEBUG
MEM_NOACCESS(&in_mysql_trx_list, sizeof in_mysql_trx_list);
#endif /* UNIV_DEBUG */
MEM_NOACCESS(&error_state, sizeof error_state);
MEM_NOACCESS(&error_info, sizeof error_info);
MEM_NOACCESS(&error_key_num, sizeof error_key_num);
MEM_NOACCESS(&graph, sizeof graph);
MEM_NOACCESS(&trx_savepoints, sizeof trx_savepoints);
/* do not poison undo_mutex */
MEM_NOACCESS(&undo_no, sizeof undo_no);
MEM_NOACCESS(&undo_rseg_space, sizeof undo_rseg_space);
MEM_NOACCESS(&last_sql_stat_start, sizeof last_sql_stat_start);
MEM_NOACCESS(&rsegs, sizeof rsegs);
MEM_NOACCESS(&roll_limit, sizeof roll_limit);
#ifdef UNIV_DEBUG
MEM_NOACCESS(&in_rollback, sizeof in_rollback);
#endif /* UNIV_DEBUG */
MEM_NOACCESS(&pages_undone, sizeof pages_undone);
MEM_NOACCESS(&n_autoinc_rows, sizeof n_autoinc_rows);
MEM_NOACCESS(&autoinc_locks, sizeof autoinc_locks);
MEM_NOACCESS(&read_only, sizeof read_only);
MEM_NOACCESS(&auto_commit, sizeof auto_commit);
MEM_NOACCESS(&will_lock, sizeof will_lock);
MEM_NOACCESS(&fts_trx, sizeof fts_trx);
MEM_NOACCESS(&fts_next_doc_id, sizeof fts_next_doc_id);
MEM_NOACCESS(&flush_tables, sizeof flush_tables);
MEM_NOACCESS(&ddl, sizeof ddl);
MEM_NOACCESS(&internal, sizeof internal);
#ifdef UNIV_DEBUG
MEM_NOACCESS(&start_line, sizeof start_line);
MEM_NOACCESS(&start_file, sizeof start_file);
#endif /* UNIV_DEBUG */
MEM_NOACCESS(&xid, sizeof xid);
MEM_NOACCESS(&mod_tables, sizeof mod_tables);
MEM_NOACCESS(&detailed_error, sizeof detailed_error);
MEM_NOACCESS(&flush_observer, sizeof flush_observer);
MEM_NOACCESS(&n_rec_lock_waits, sizeof n_rec_lock_waits);
MEM_NOACCESS(&n_table_lock_waits, sizeof n_table_lock_waits);
MEM_NOACCESS(&total_rec_lock_wait_time, sizeof total_rec_lock_wait_time);
MEM_NOACCESS(&total_table_lock_wait_time, sizeof total_table_lock_wait_time);
#ifdef WITH_WSREP
MEM_NOACCESS(&wsrep_event, sizeof wsrep_event);
#endif /* WITH_WSREP */
MEM_NOACCESS(&magic_n, sizeof magic_n);
trx_pools->mem_free(this);
}
/********************************************************************//**
......@@ -517,8 +546,7 @@ trx_free_resurrected(trx_t* trx)
trx_validate_state_before_free(trx);
trx_init(trx);
trx_free(trx);
trx->free();
}
/** Free a transaction that was allocated by background or user threads.
......@@ -527,8 +555,7 @@ void
trx_free_for_background(trx_t* trx)
{
trx_validate_state_before_free(trx);
trx_free(trx);
trx->free();
}
/** Transition to committed state, to release implicit locks. */
......@@ -617,8 +644,7 @@ trx_free_prepared(
trx->state = TRX_STATE_NOT_STARTED;
ut_ad(!UT_LIST_GET_LEN(trx->lock.trx_locks));
trx->id = 0;
trx_free(trx);
trx->free();
}
/** Disconnect a transaction from MySQL and optionally mark it as if
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment