Commit 98e36b29 authored by Monty's avatar Monty

With parallel replication we have had a couple of bugs where DDL's

(like DROP TABLE) has been scheduled before conflicting DDL's (like INSERT)
are commited.

What makes these bugs hard to detect is that in most cases any wrong
schduling are caught by MDL locks. It's only when there are timing issues
that the bugs (usually deadlocks) are noticed.

This patch adds a DBUG_ASSERT() that detects, in parallel replication,
if a DDL is scheduled before any depending DML'S are commited.
It does this be checking if there are any conflicting replication locks
when the DDL is about to wait for getting it's MDL lock.

I also did some minor code cleanups in sql_base.cc to make this code
similar to other related code.
parent 66ac894c
......@@ -17,6 +17,7 @@
#include "sql_class.h"
#include "debug_sync.h"
#include "sql_array.h"
#include "rpl_rli.h"
#include <hash.h>
#include <mysqld_error.h>
#include <mysql/plugin.h>
......@@ -442,6 +443,7 @@ public:
virtual void notify_conflicting_locks(MDL_context *ctx) = 0;
virtual bitmap_t hog_lock_types_bitmap() const = 0;
bool check_if_conflicting_replication_locks(MDL_context *ctx);
/** List of granted tickets for this lock. */
Ticket_list m_granted;
......@@ -2290,6 +2292,44 @@ void MDL_scoped_lock::notify_conflicting_locks(MDL_context *ctx)
}
}
/**
Check if there is any conflicting lock that could cause this thread
to wait for another thread which is not ready to commit.
This is always an error, as the upper level of parallel replication
should not allow a scheduling of a conflicting DDL until all earlier
transactions has commited.
This function is only called for a slave using parallel replication
and trying to get an exclusive lock for the table.
*/
bool MDL_lock::check_if_conflicting_replication_locks(MDL_context *ctx)
{
Ticket_iterator it(m_granted);
MDL_ticket *conflicting_ticket;
while ((conflicting_ticket= it++))
{
if (conflicting_ticket->get_ctx() != ctx)
{
MDL_context *conflicting_ctx= conflicting_ticket->get_ctx();
/*
If the conflicting thread is another parallel replication
thread for the same master and it's not in commit stage, then
the current transaction has started too early and something is
seriously wrong.
*/
if (conflicting_ctx->get_thd()->rgi_slave &&
conflicting_ctx->get_thd()->rgi_slave->rli ==
ctx->get_thd()->rgi_slave->rli &&
!conflicting_ctx->get_thd()->rgi_slave->did_mark_start_commit)
return 1; // Fatal error
}
}
return 0;
}
/**
Acquire one lock with waiting for conflicting locks to go away if needed.
......@@ -2355,6 +2395,19 @@ MDL_context::acquire_lock(MDL_request *mdl_request, ulong lock_wait_timeout)
if (lock->needs_notification(ticket) && lock_wait_timeout)
lock->notify_conflicting_locks(this);
/*
Ensure that if we are trying to get an exclusive lock for a slave
running parallel replication, then we are not blocked by another
parallel slave thread that is not committed. This should never happen as
the parallel replication scheduler should never schedule a DDL while
DML's are still running.
*/
DBUG_ASSERT((mdl_request->type != MDL_INTENTION_EXCLUSIVE &&
mdl_request->type != MDL_EXCLUSIVE) ||
!(get_thd()->rgi_slave &&
get_thd()->rgi_slave->is_parallel_exec &&
lock->check_if_conflicting_replication_locks(this)));
mysql_prlock_unlock(&lock->m_rwlock);
will_wait_for(ticket);
......
......@@ -910,7 +910,6 @@ private:
*/
MDL_wait_for_subgraph *m_waiting_for;
private:
THD *get_thd() const { return m_owner->get_thd(); }
MDL_ticket *find_ticket(MDL_request *mdl_req,
enum_mdl_duration *duration);
void release_locks_stored_before(enum_mdl_duration duration, MDL_ticket *sentinel);
......@@ -919,6 +918,7 @@ private:
MDL_ticket **out_ticket);
public:
THD *get_thd() const { return m_owner->get_thd(); }
void find_deadlock();
ulong get_thread_id() const { return thd_get_thread_id(get_thd()); }
......
......@@ -648,7 +648,6 @@ bool close_cached_connection_tables(THD *thd, LEX_STRING *connection)
static void mark_temp_tables_as_free_for_reuse(THD *thd)
{
rpl_group_info *rgi_slave;
DBUG_ENTER("mark_temp_tables_as_free_for_reuse");
if (thd->query_id == 0)
......@@ -657,9 +656,7 @@ static void mark_temp_tables_as_free_for_reuse(THD *thd)
DBUG_VOID_RETURN;
}
rgi_slave=thd->rgi_slave;
if ((!rgi_slave && thd->temporary_tables) ||
(rgi_slave && unlikely(rgi_slave->rli->save_temporary_tables)))
if (thd->have_temporary_tables())
{
thd->lock_temporary_tables();
for (TABLE *table= thd->temporary_tables ; table ; table= table->next)
......@@ -667,15 +664,7 @@ static void mark_temp_tables_as_free_for_reuse(THD *thd)
if ((table->query_id == thd->query_id) && ! table->open_by_handler)
mark_tmp_table_for_reuse(table);
}
thd->unlock_temporary_tables();
if (rgi_slave)
{
/*
Temporary tables are shared with other by sql execution threads.
As a safety messure, clear the pointer to the common area.
*/
thd->temporary_tables= 0;
}
thd->unlock_temporary_tables(1);
}
DBUG_VOID_RETURN;
}
......@@ -1643,7 +1632,7 @@ TABLE *find_temporary_table(THD *thd,
break;
}
}
thd->unlock_temporary_tables();
thd->unlock_temporary_tables(0);
return result;
}
......@@ -1746,7 +1735,7 @@ void close_temporary_table(THD *thd, TABLE *table,
thread_safe_decrement32(&slave_open_temp_tables, &thread_running_lock);
table->in_use= 0; // No statistics
}
thd->unlock_temporary_tables();
thd->unlock_temporary_tables(0);
close_temporary(table, free_share, delete_table);
DBUG_VOID_RETURN;
}
......@@ -5705,7 +5694,7 @@ TABLE *open_table_uncached(THD *thd, handlerton *hton,
{
thread_safe_increment32(&slave_open_temp_tables, &thread_running_lock);
}
thd->unlock_temporary_tables();
thd->unlock_temporary_tables(0);
}
tmp_table->pos_in_table_list= 0;
DBUG_PRINT("tmptable", ("opened table: '%s'.'%s' 0x%lx", tmp_table->s->db.str,
......
......@@ -6477,10 +6477,18 @@ void THD::rgi_lock_temporary_tables()
temporary_tables= rgi_slave->rli->save_temporary_tables;
}
void THD::rgi_unlock_temporary_tables()
void THD::rgi_unlock_temporary_tables(bool clear)
{
rgi_slave->rli->save_temporary_tables= temporary_tables;
mysql_mutex_unlock(&rgi_slave->rli->data_lock);
if (clear)
{
/*
Temporary tables are shared with other by sql execution threads.
As a safety messure, clear the pointer to the common area.
*/
temporary_tables= 0;
}
}
bool THD::rgi_have_temporary_tables()
......
......@@ -3778,7 +3778,7 @@ private:
/* Protect against add/delete of temporary tables in parallel replication */
void rgi_lock_temporary_tables();
void rgi_unlock_temporary_tables();
void rgi_unlock_temporary_tables(bool clear);
bool rgi_have_temporary_tables();
public:
/*
......@@ -3802,15 +3802,15 @@ public:
if (rgi_slave)
rgi_lock_temporary_tables();
}
inline void unlock_temporary_tables()
inline void unlock_temporary_tables(bool clear)
{
if (rgi_slave)
rgi_unlock_temporary_tables();
rgi_unlock_temporary_tables(clear);
}
inline bool have_temporary_tables()
{
return (temporary_tables ||
(rgi_slave && rgi_have_temporary_tables()));
(rgi_slave && unlikely(rgi_have_temporary_tables())));
}
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment