Commit 66832b61 authored by Nirbhay Choubey's avatar Nirbhay Choubey

MDEV-9598: Donor's rsync SST script hangs if FTWRL fails

During SST, since wsrep_sst_rsync waits for mysqld to create
"tables_flushed" file after it has successfully executed FTWRL,
it would wait forever if FTWRL fails.
Fixed by introducing a mechanism to report failure to the script.
parent 0251232f
...@@ -127,7 +127,10 @@ then ...@@ -127,7 +127,10 @@ then
then then
FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed" FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed"
ERROR="$WSREP_SST_OPT_DATA/sst_error"
rm -rf "$FLUSHED" rm -rf "$FLUSHED"
rm -rf "$ERROR"
# Use deltaxfer only for WAN # Use deltaxfer only for WAN
inv=$(basename $0) inv=$(basename $0)
...@@ -137,10 +140,20 @@ then ...@@ -137,10 +140,20 @@ then
echo "flush tables" echo "flush tables"
# Wait for : # Wait for :
# (a) tables to be flushed, and # (a) Tables to be flushed, AND
# (b) state ID & wsrep_gtid_domain_id to be written to the file. # (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR
# (c) ERROR file, in case flush tables operation failed.
while [ ! -r "$FLUSHED" ] && ! grep -q ':' "$FLUSHED" >/dev/null 2>&1 while [ ! -r "$FLUSHED" ] && ! grep -q ':' "$FLUSHED" >/dev/null 2>&1
do do
# Check whether ERROR file exists.
if [ -f "$ERROR" ]
then
# Flush tables operation failed.
rm -rf "$ERROR"
exit 255
fi
sleep 0.2 sleep 0.2
done done
......
...@@ -896,6 +896,56 @@ static int sst_donate_mysqldump (const char* addr, ...@@ -896,6 +896,56 @@ static int sst_donate_mysqldump (const char* addr,
wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED; wsrep_seqno_t wsrep_locked_seqno= WSREP_SEQNO_UNDEFINED;
/*
Create a file under data directory.
*/
static int sst_create_file(const char *name, const char *content)
{
int err= 0;
char *real_name;
char *tmp_name;
ssize_t len;
FILE *file;
len= strlen(mysql_real_data_home) + strlen(name) + 2;
real_name= (char *) alloca(len);
snprintf(real_name, (size_t) len, "%s/%s", mysql_real_data_home, name);
tmp_name= (char *) alloca(len + 4);
snprintf(tmp_name, (size_t) len + 4, "%s.tmp", real_name);
file= fopen(tmp_name, "w+");
if (0 == file)
{
err= errno;
WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err, strerror(err));
}
else
{
// Write the specified content into the file.
if (content != NULL)
{
fprintf(file, "%s\n", content);
fsync(fileno(file));
}
fclose(file);
if (rename(tmp_name, real_name) == -1)
{
err= errno;
WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)", tmp_name,
real_name, err, strerror(err));
}
}
return err;
}
static int run_sql_command(THD *thd, const char *query) static int run_sql_command(THD *thd, const char *query)
{ {
thd->set_query((char *)query, strlen(query)); thd->set_query((char *)query, strlen(query));
...@@ -911,7 +961,7 @@ static int run_sql_command(THD *thd, const char *query) ...@@ -911,7 +961,7 @@ static int run_sql_command(THD *thd, const char *query)
if (thd->is_error()) if (thd->is_error())
{ {
int const err= thd->get_stmt_da()->sql_errno(); int const err= thd->get_stmt_da()->sql_errno();
WSREP_WARN ("error executing '%s': %d (%s)%s", WSREP_WARN ("Error executing '%s': %d (%s)%s",
query, err, thd->get_stmt_da()->message(), query, err, thd->get_stmt_da()->message(),
err == ER_UNKNOWN_SYSTEM_VARIABLE ? err == ER_UNKNOWN_SYSTEM_VARIABLE ?
". Was mysqld built with --with-innodb-disallow-writes ?" : ""); ". Was mysqld built with --with-innodb-disallow-writes ?" : "");
...@@ -921,15 +971,21 @@ static int run_sql_command(THD *thd, const char *query) ...@@ -921,15 +971,21 @@ static int run_sql_command(THD *thd, const char *query)
return 0; return 0;
} }
static int sst_flush_tables(THD* thd) static int sst_flush_tables(THD* thd)
{ {
WSREP_INFO("Flushing tables for SST..."); WSREP_INFO("Flushing tables for SST...");
int err; int err;
int not_used; int not_used;
CHARSET_INFO *current_charset; /*
Files created to notify the SST script about the outcome of table flush
operation.
*/
const char *flush_success= "tables_flushed";
const char *flush_error= "sst_error";
current_charset = thd->variables.character_set_client; CHARSET_INFO *current_charset= thd->variables.character_set_client;
if (!is_supported_parser_charset(current_charset)) if (!is_supported_parser_charset(current_charset))
{ {
...@@ -942,61 +998,55 @@ static int sst_flush_tables(THD* thd) ...@@ -942,61 +998,55 @@ static int sst_flush_tables(THD* thd)
if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK")) if (run_sql_command(thd, "FLUSH TABLES WITH READ LOCK"))
{ {
WSREP_ERROR("Failed to flush and lock tables"); err= -1;
err = -1;
} }
else else
{ {
/* make sure logs are flushed after global read lock acquired */ /*
err= reload_acl_and_cache(thd, REFRESH_ENGINE_LOG | REFRESH_BINARY_LOG, Make sure logs are flushed after global read lock acquired. In case
(TABLE_LIST*) 0, &not_used); reload fails, we must also release the acquired FTWRL.
*/
if (reload_acl_and_cache(thd, REFRESH_ENGINE_LOG | REFRESH_BINARY_LOG,
(TABLE_LIST*) 0, &not_used))
{
thd->global_read_lock.unlock_global_read_lock(thd);
err= -1;
}
} }
thd->variables.character_set_client = current_charset; thd->variables.character_set_client = current_charset;
if (err) if (err)
{ {
WSREP_ERROR("Failed to flush tables: %d (%s)", err, strerror(err)); WSREP_ERROR("Failed to flush and lock tables");
/*
The SST must be aborted as the flush tables failed. Notify this to SST
script by creating the error file.
*/
int tmp;
if ((tmp= sst_create_file(flush_error, NULL))) {
err= tmp;
}
} }
else else
{ {
WSREP_INFO("Tables flushed."); WSREP_INFO("Tables flushed.");
const char base_name[]= "tables_flushed";
ssize_t const full_len= strlen(mysql_real_data_home) + strlen(base_name)+2;
char *real_name= (char *) alloca(full_len);
snprintf(real_name, (size_t) full_len, "%s/%s", mysql_real_data_home,
base_name);
char *tmp_name= (char *) alloca(full_len + 4);
snprintf(tmp_name, (size_t) full_len + 4, "%s.tmp", real_name);
FILE* file= fopen(tmp_name, "w+"); /*
if (0 == file) Tables have been flushed. Create a file with cluster state ID and
{ wsrep_gtid_domain_id.
err= errno; */
WSREP_ERROR("Failed to open '%s': %d (%s)", tmp_name, err,strerror(err)); char content[100];
} snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid,
else (long long)wsrep_locked_seqno, wsrep_gtid_domain_id);
{ err= sst_create_file(flush_success, content);
// Write cluster state ID and wsrep_gtid_domain_id.
fprintf(file, "%s:%lld %d\n",
wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno,
wsrep_gtid_domain_id);
fsync(fileno(file));
fclose(file);
if (rename(tmp_name, real_name) == -1)
{
err= errno;
WSREP_ERROR("Failed to rename '%s' to '%s': %d (%s)",
tmp_name, real_name, err,strerror(err));
}
}
} }
return err; return err;
} }
static void sst_disallow_writes (THD* thd, bool yes) static void sst_disallow_writes (THD* thd, bool yes)
{ {
char query_str[64] = { 0, }; char query_str[64] = { 0, };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment