Commit adedde52 authored by unknown's avatar unknown

WL#1036 (print the db in slave error messages).

I extended the task to cleaning error messages, making them look nicer,
and making the output of SHOW SLAVE STATUS (column Last_error) be as complete
as what's printed on the .err file;
previously we would have, for a failure of a replicated LOAD DATA INFILE:
- in the .err, 2 lines:
"duplicate entry 2708 for key 1"
"failed loading SQL_LOAD-5-2-2.info"
- and in SHOW SLAVE STATUS, only:
"failed loading SQL_LOAD-5-2-2.info".
Now SHOW SLAVE STATUS will contain the concatenation of the 2 messages.


sql/log_event.cc:
  Print the default database when replication stops because of an error. Previously, we had:
  "error "Duplicate entry 87987 for key 1", query 'insert into t values(87987)'", ie the db
  was not mentioned, making it hard for cases where the same table name is used in
  several databases.
  Lengthened some error messages (for failing replication of LOAD DATA: mention the table
  and the db).
  Changes so that SHOW SLAVE STATUS reports as complete errors as the .err file.
sql/slave.cc:
  Removed a useless declaration (the rewrite_db() function is already declared in slave.h).
  Added missing ')' in error messages.
  Tried to make error messages look nicer (previously we had
  "do START SLAVE;, error_code=1062"
  now we'll have
  "do START SLAVE; . Error_code=1062".
  This form has been discussed, I agree it's no panacea, but it's still more readable
  like this. To be improved in the future :)
sql/slave.h:
  declarations.
parent a9a75ed8
......@@ -1753,7 +1753,7 @@ void Unknown_log_event::print(FILE* file, bool short_form, char* last_db)
#ifndef MYSQL_CLIENT
int Query_log_event::exec_event(struct st_relay_log_info* rli)
{
int expected_error,actual_error = 0;
int expected_error, actual_error= 0;
init_sql_alloc(&thd->mem_root, 8192,0);
thd->db = rewrite_db((char*)db);
......@@ -1801,19 +1801,30 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
else if (!strcmp(thd->query,"COMMIT"))
rli->inside_transaction=0;
/*
If we expected a non-zero error code, and we don't get the same error
code, and none of them should be ignored.
*/
if ((expected_error != (actual_error = thd->net.last_errno)) &&
expected_error &&
!ignored_error_code(actual_error) &&
!ignored_error_code(expected_error))
{
const char* errmsg = "Slave: did not get the expected error\
running query from master - expected: '%s' (%d), got '%s' (%d)";
sql_print_error(errmsg, ER_SAFE(expected_error),
expected_error,
actual_error ? thd->net.last_error: "no error",
actual_error);
thd->query_error = 1;
slave_print_error(rli, 0,
"Query '%s' did not get the same error as the query \
got on master - got on master: '%s' (%d), got on slave: '%s' (%d) \
(default database was '%s')",
query,
ER_SAFE(expected_error),
expected_error,
actual_error ? thd->net.last_error: "no error",
actual_error,
print_slave_db_safe((char*)db));
thd->query_error= 1;
}
/*
If we get the same error code as expected, or they should be ignored.
*/
else if (expected_error == actual_error ||
ignored_error_code(actual_error))
{
......@@ -1821,37 +1832,38 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
*rli->last_slave_error = 0;
rli->last_slave_errno = 0;
}
}
else
{
// master could be inconsistent, abort and tell DBA to check/fix it
VOID(pthread_mutex_lock(&LOCK_thread_count));
thd->db = thd->query = 0;
VOID(pthread_mutex_unlock(&LOCK_thread_count));
thd->variables.convert_set = 0;
close_thread_tables(thd);
free_root(&thd->mem_root,0);
return 1;
}
}
thd->db= 0; // prevent db from being freed
/*
Other cases: mostly we expected no error and get one.
*/
else if (thd->query_error || thd->fatal_error)
{
slave_print_error(rli,actual_error, "Error '%s' on query '%s' \
(default database was '%s')",
actual_error ? thd->net.last_error :
"unexpected success or fatal error", query,
print_slave_db_safe((char*)db));
thd->query_error= 1;
}
}
/*
End of sanity check. If the test was wrong, the query got a really bad
error on the master, which could be inconsistent, abort and tell DBA to
check/fix it. check_expected_error() already printed the message to
stderr and rli, and set thd->query_error to 1.
*/
} /* End of if (db_ok(... */
end:
VOID(pthread_mutex_lock(&LOCK_thread_count));
thd->db= 0; // prevent db from being freed
thd->query= 0; // just to be sure
VOID(pthread_mutex_unlock(&LOCK_thread_count));
// assume no convert for next query unless set explictly
thd->variables.convert_set = 0;
close_thread_tables(thd);
if (thd->query_error || thd->fatal_error)
{
slave_print_error(rli,actual_error, "error '%s' on query '%s'",
actual_error ? thd->net.last_error :
"unexpected success or fatal error", query);
free_root(&thd->mem_root,0);
return 1;
}
close_thread_tables(thd);
free_root(&thd->mem_root,0);
return Log_event::exec_event(rli);
return (thd->query_error ? thd->query_error : Log_event::exec_event(rli));
}
/*
......@@ -1972,8 +1984,11 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
event in the master log
*/
sql_print_error("Slave: load data infile at position %s in log \
'%s' produced %d warning(s)", llstr(log_pos,llbuff), RPL_LOG_NAME,
thd->cuted_fields );
'%s' produced %d warning(s) (loaded table was '%s', database was '%s')",
llstr(log_pos,llbuff), RPL_LOG_NAME,
thd->cuted_fields,
(char*)table_name,
print_slave_db_safe((char*)db));
if (net)
net->pkt_nr= thd->net.pkt_nr;
}
......@@ -2005,8 +2020,9 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
err=ER(sql_errno);
}
slave_print_error(rli,sql_errno,
"Error '%s' running load data infile",
err);
"Error '%s' running load data infile \
(loaded table was '%s', database was '%s')",
err, (char*)table_name, print_slave_db_safe((char*)db));
free_root(&thd->mem_root,0);
return 1;
}
......@@ -2014,7 +2030,10 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
if (thd->fatal_error)
{
sql_print_error("Fatal error running LOAD DATA INFILE ");
slave_print_error(rli,ER_UNKNOWN_ERROR,
"Fatal error running \
LOAD DATA INFILE (loaded table was '%s', database was '%s')",
(char*)table_name, print_slave_db_safe((char*)db));
return 1;
}
......@@ -2311,7 +2330,21 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
*/
if (lev->exec_event(0,rli,1))
{
slave_print_error(rli,my_errno, "Failed executing load from '%s'", fname);
/*
We want to indicate the name of the file that could not be loaded
(SQL_LOADxxx).
But as we are here we are sure the error is in rli->last_slave_error and
rli->last_slave_errno (example of error: duplicate entry for key), so we
don't want to overwrite it with the filename.
What we want instead is add the filename to the current error message.
*/
char *tmp= my_strdup(rli->last_slave_error,MYF(MY_WME));
if (!tmp)
goto err;
slave_print_error(rli,rli->last_slave_errno, /* ok to re-use the error code */
"%s. Failed executing load from '%s'",
tmp, fname);
my_free(tmp,MYF(0));
thd->options = save_options;
goto err;
}
......
......@@ -74,7 +74,6 @@ static int request_table_dump(MYSQL* mysql, const char* db, const char* table);
static int create_table_from_dump(THD* thd, NET* net, const char* db,
const char* table_name);
static int check_master_version(MYSQL* mysql, MASTER_INFO* mi);
char* rewrite_db(char* db);
/*
......@@ -290,8 +289,6 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
if (need_data_lock)
pthread_mutex_unlock(&rli->data_lock);
/* Isn't this strange: if !need_data_lock, we broadcast with no lock ?? */
pthread_mutex_unlock(log_lock);
DBUG_RETURN ((*errmsg) ? 1 : 0);
}
......@@ -362,7 +359,10 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
rli->pending= 0;
if (!rli->inited)
{
DBUG_PRINT("info", ("rli->inited == 0"));
DBUG_RETURN(0);
}
DBUG_ASSERT(rli->slave_running == 0);
DBUG_ASSERT(rli->mi->slave_running == 0);
......@@ -828,6 +828,21 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli)
return rli->abort_slave || abort_loop || thd->killed;
}
/*
Writes an error message to rli->last_slave_error and rli->last_slave_errno
(which will be displayed by SHOW SLAVE STATUS), and prints it to stderr.
SYNOPSIS
slave_print_error()
rli
err_code The error code
msg The error message (usually related to the error code, but can
contain more information).
... (this is printf-like format, with % symbols in msg)
RETURN VALUES
void
*/
void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...)
{
......@@ -835,9 +850,16 @@ void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...)
va_start(args,msg);
my_vsnprintf(rli->last_slave_error,
sizeof(rli->last_slave_error), msg, args);
sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error,
err_code);
rli->last_slave_errno = err_code;
/* If the error string ends with '.', do not add a ',' it would be ugly */
if (rli->last_slave_error[0] &&
(rli->last_slave_error[strlen(rli->last_slave_error)-1] == '.'))
sql_print_error("Slave: %s Error_code=%d", rli->last_slave_error,
err_code);
else
sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error,
err_code);
}
......@@ -865,6 +887,16 @@ char* rewrite_db(char* db)
return db;
}
/*
From other comments and tests in code, it looks like
sometimes Query_log_event and Load_log_event can have db==0
(see rewrite_db() above for example)
(cases where this happens are unclear; it may be when the master is 3.23).
*/
char* print_slave_db_safe(char* db)
{
return (db ? rewrite_db(db) : (char*) "");
}
/*
Checks whether a db matches some do_db and ignore_db rules
......@@ -1282,7 +1314,7 @@ file '%s')", fname);
&msg))
{
sql_print_error("Failed to open the relay log (relay_log_name='FIRST', \
relay_log_pos=4");
relay_log_pos=4)");
goto err;
}
rli->master_log_name[0]= 0;
......@@ -1346,7 +1378,7 @@ file '%s')", fname);
{
char llbuf[22];
sql_print_error("Failed to open the relay log (relay_log_name='%s', \
relay_log_pos=%s", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf));
relay_log_pos=%s)", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf));
goto err;
}
}
......@@ -2142,14 +2174,13 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error)
case ER_NET_ERROR_ON_WRITE:
case ER_SERVER_SHUTDOWN:
case ER_NEW_ABORTING_CONNECTION:
my_snprintf(rli->last_slave_error, sizeof(rli->last_slave_error),
"Slave: query '%s' partially completed on the master \
slave_print_error(rli,expected_error,
"query '%s' partially completed on the master \
and was aborted. There is a chance that your master is inconsistent at this \
point. If you are sure that your master is ok, run this query manually on the\
slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;\
SLAVE START;", thd->query);
rli->last_slave_errno = expected_error;
sql_print_error("%s",rli->last_slave_error);
SLAVE START; .", thd->query);
thd->query_error= 1;
return 1;
default:
return 0;
......
......@@ -383,9 +383,10 @@ int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec);
void init_table_rule_hash(HASH* h, bool* h_inited);
void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited);
char* rewrite_db(char* db);
char* print_slave_db_safe(char* db);
int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code);
void skip_load_data_infile(NET* net);
void slave_print_error(RELAY_LOG_INFO* rli,int err_code, const char* msg, ...);
void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...);
void end_slave(); /* clean up */
int init_master_info(MASTER_INFO* mi, const char* master_info_fname,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment