Commit adedde52 authored by unknown's avatar unknown

WL#1036 (print the db in slave error messages).

I extended the task to cleaning error messages, making them look nicer,
and making the output of SHOW SLAVE STATUS (column Last_error) be as complete
as what's printed on the .err file;
previously we would have, for a failure of a replicated LOAD DATA INFILE:
- in the .err, 2 lines:
"duplicate entry 2708 for key 1"
"failed loading SQL_LOAD-5-2-2.info"
- and in SHOW SLAVE STATUS, only:
"failed loading SQL_LOAD-5-2-2.info".
Now SHOW SLAVE STATUS will contain the concatenation of the 2 messages.


sql/log_event.cc:
  Print the default database when replication stops because of an error. Previously, we had:
  "error "Duplicate entry 87987 for key 1", query 'insert into t values(87987)'", ie the db
  was not mentioned, making it hard for cases where the same table name is used in
  several databases.
  Lengthened some error messages (for failing replication of LOAD DATA: mention the table
  and the db).
  Changes so that SHOW SLAVE STATUS reports as complete errors as the .err file.
sql/slave.cc:
  Removed a useless declaration (the rewrite_db() function is already declared in slave.h).
  Added missing ')' in error messages.
  Tried to make error messages look nicer (previously we had
  "do START SLAVE;, error_code=1062"
  now we'll have
  "do START SLAVE; . Error_code=1062".
  This form has been discussed, I agree it's no panacea, but it's still more readable
  like this. To be improved in the future :)
sql/slave.h:
  declarations.
parent a9a75ed8
...@@ -1753,7 +1753,7 @@ void Unknown_log_event::print(FILE* file, bool short_form, char* last_db) ...@@ -1753,7 +1753,7 @@ void Unknown_log_event::print(FILE* file, bool short_form, char* last_db)
#ifndef MYSQL_CLIENT #ifndef MYSQL_CLIENT
int Query_log_event::exec_event(struct st_relay_log_info* rli) int Query_log_event::exec_event(struct st_relay_log_info* rli)
{ {
int expected_error,actual_error = 0; int expected_error, actual_error= 0;
init_sql_alloc(&thd->mem_root, 8192,0); init_sql_alloc(&thd->mem_root, 8192,0);
thd->db = rewrite_db((char*)db); thd->db = rewrite_db((char*)db);
...@@ -1801,19 +1801,30 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli) ...@@ -1801,19 +1801,30 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
else if (!strcmp(thd->query,"COMMIT")) else if (!strcmp(thd->query,"COMMIT"))
rli->inside_transaction=0; rli->inside_transaction=0;
/*
If we expected a non-zero error code, and we don't get the same error
code, and none of them should be ignored.
*/
if ((expected_error != (actual_error = thd->net.last_errno)) && if ((expected_error != (actual_error = thd->net.last_errno)) &&
expected_error && expected_error &&
!ignored_error_code(actual_error) && !ignored_error_code(actual_error) &&
!ignored_error_code(expected_error)) !ignored_error_code(expected_error))
{ {
const char* errmsg = "Slave: did not get the expected error\ slave_print_error(rli, 0,
running query from master - expected: '%s' (%d), got '%s' (%d)"; "Query '%s' did not get the same error as the query \
sql_print_error(errmsg, ER_SAFE(expected_error), got on master - got on master: '%s' (%d), got on slave: '%s' (%d) \
expected_error, (default database was '%s')",
actual_error ? thd->net.last_error: "no error", query,
actual_error); ER_SAFE(expected_error),
thd->query_error = 1; expected_error,
actual_error ? thd->net.last_error: "no error",
actual_error,
print_slave_db_safe((char*)db));
thd->query_error= 1;
} }
/*
If we get the same error code as expected, or they should be ignored.
*/
else if (expected_error == actual_error || else if (expected_error == actual_error ||
ignored_error_code(actual_error)) ignored_error_code(actual_error))
{ {
...@@ -1821,37 +1832,38 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli) ...@@ -1821,37 +1832,38 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
*rli->last_slave_error = 0; *rli->last_slave_error = 0;
rli->last_slave_errno = 0; rli->last_slave_errno = 0;
} }
} /*
else Other cases: mostly we expected no error and get one.
{ */
// master could be inconsistent, abort and tell DBA to check/fix it else if (thd->query_error || thd->fatal_error)
VOID(pthread_mutex_lock(&LOCK_thread_count)); {
thd->db = thd->query = 0; slave_print_error(rli,actual_error, "Error '%s' on query '%s' \
VOID(pthread_mutex_unlock(&LOCK_thread_count)); (default database was '%s')",
thd->variables.convert_set = 0; actual_error ? thd->net.last_error :
close_thread_tables(thd); "unexpected success or fatal error", query,
free_root(&thd->mem_root,0); print_slave_db_safe((char*)db));
return 1; thd->query_error= 1;
} }
} }
thd->db= 0; // prevent db from being freed /*
End of sanity check. If the test was wrong, the query got a really bad
error on the master, which could be inconsistent, abort and tell DBA to
check/fix it. check_expected_error() already printed the message to
stderr and rli, and set thd->query_error to 1.
*/
} /* End of if (db_ok(... */
end:
VOID(pthread_mutex_lock(&LOCK_thread_count)); VOID(pthread_mutex_lock(&LOCK_thread_count));
thd->db= 0; // prevent db from being freed
thd->query= 0; // just to be sure thd->query= 0; // just to be sure
VOID(pthread_mutex_unlock(&LOCK_thread_count)); VOID(pthread_mutex_unlock(&LOCK_thread_count));
// assume no convert for next query unless set explictly // assume no convert for next query unless set explictly
thd->variables.convert_set = 0; thd->variables.convert_set = 0;
close_thread_tables(thd); close_thread_tables(thd);
if (thd->query_error || thd->fatal_error)
{
slave_print_error(rli,actual_error, "error '%s' on query '%s'",
actual_error ? thd->net.last_error :
"unexpected success or fatal error", query);
free_root(&thd->mem_root,0);
return 1;
}
free_root(&thd->mem_root,0); free_root(&thd->mem_root,0);
return Log_event::exec_event(rli); return (thd->query_error ? thd->query_error : Log_event::exec_event(rli));
} }
/* /*
...@@ -1972,8 +1984,11 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, ...@@ -1972,8 +1984,11 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
event in the master log event in the master log
*/ */
sql_print_error("Slave: load data infile at position %s in log \ sql_print_error("Slave: load data infile at position %s in log \
'%s' produced %d warning(s)", llstr(log_pos,llbuff), RPL_LOG_NAME, '%s' produced %d warning(s) (loaded table was '%s', database was '%s')",
thd->cuted_fields ); llstr(log_pos,llbuff), RPL_LOG_NAME,
thd->cuted_fields,
(char*)table_name,
print_slave_db_safe((char*)db));
if (net) if (net)
net->pkt_nr= thd->net.pkt_nr; net->pkt_nr= thd->net.pkt_nr;
} }
...@@ -2005,8 +2020,9 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, ...@@ -2005,8 +2020,9 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
err=ER(sql_errno); err=ER(sql_errno);
} }
slave_print_error(rli,sql_errno, slave_print_error(rli,sql_errno,
"Error '%s' running load data infile", "Error '%s' running load data infile \
err); (loaded table was '%s', database was '%s')",
err, (char*)table_name, print_slave_db_safe((char*)db));
free_root(&thd->mem_root,0); free_root(&thd->mem_root,0);
return 1; return 1;
} }
...@@ -2014,7 +2030,10 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, ...@@ -2014,7 +2030,10 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli,
if (thd->fatal_error) if (thd->fatal_error)
{ {
sql_print_error("Fatal error running LOAD DATA INFILE "); slave_print_error(rli,ER_UNKNOWN_ERROR,
"Fatal error running \
LOAD DATA INFILE (loaded table was '%s', database was '%s')",
(char*)table_name, print_slave_db_safe((char*)db));
return 1; return 1;
} }
...@@ -2311,7 +2330,21 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) ...@@ -2311,7 +2330,21 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
*/ */
if (lev->exec_event(0,rli,1)) if (lev->exec_event(0,rli,1))
{ {
slave_print_error(rli,my_errno, "Failed executing load from '%s'", fname); /*
We want to indicate the name of the file that could not be loaded
(SQL_LOADxxx).
But as we are here we are sure the error is in rli->last_slave_error and
rli->last_slave_errno (example of error: duplicate entry for key), so we
don't want to overwrite it with the filename.
What we want instead is add the filename to the current error message.
*/
char *tmp= my_strdup(rli->last_slave_error,MYF(MY_WME));
if (!tmp)
goto err;
slave_print_error(rli,rli->last_slave_errno, /* ok to re-use the error code */
"%s. Failed executing load from '%s'",
tmp, fname);
my_free(tmp,MYF(0));
thd->options = save_options; thd->options = save_options;
goto err; goto err;
} }
......
...@@ -74,7 +74,6 @@ static int request_table_dump(MYSQL* mysql, const char* db, const char* table); ...@@ -74,7 +74,6 @@ static int request_table_dump(MYSQL* mysql, const char* db, const char* table);
static int create_table_from_dump(THD* thd, NET* net, const char* db, static int create_table_from_dump(THD* thd, NET* net, const char* db,
const char* table_name); const char* table_name);
static int check_master_version(MYSQL* mysql, MASTER_INFO* mi); static int check_master_version(MYSQL* mysql, MASTER_INFO* mi);
char* rewrite_db(char* db);
/* /*
...@@ -290,8 +289,6 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log, ...@@ -290,8 +289,6 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
if (need_data_lock) if (need_data_lock)
pthread_mutex_unlock(&rli->data_lock); pthread_mutex_unlock(&rli->data_lock);
/* Isn't this strange: if !need_data_lock, we broadcast with no lock ?? */
pthread_mutex_unlock(log_lock); pthread_mutex_unlock(log_lock);
DBUG_RETURN ((*errmsg) ? 1 : 0); DBUG_RETURN ((*errmsg) ? 1 : 0);
} }
...@@ -362,7 +359,10 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset, ...@@ -362,7 +359,10 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
rli->pending= 0; rli->pending= 0;
if (!rli->inited) if (!rli->inited)
{
DBUG_PRINT("info", ("rli->inited == 0"));
DBUG_RETURN(0); DBUG_RETURN(0);
}
DBUG_ASSERT(rli->slave_running == 0); DBUG_ASSERT(rli->slave_running == 0);
DBUG_ASSERT(rli->mi->slave_running == 0); DBUG_ASSERT(rli->mi->slave_running == 0);
...@@ -828,6 +828,21 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli) ...@@ -828,6 +828,21 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli)
return rli->abort_slave || abort_loop || thd->killed; return rli->abort_slave || abort_loop || thd->killed;
} }
/*
Writes an error message to rli->last_slave_error and rli->last_slave_errno
(which will be displayed by SHOW SLAVE STATUS), and prints it to stderr.
SYNOPSIS
slave_print_error()
rli
err_code The error code
msg The error message (usually related to the error code, but can
contain more information).
... (this is printf-like format, with % symbols in msg)
RETURN VALUES
void
*/
void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...) void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...)
{ {
...@@ -835,9 +850,16 @@ void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...) ...@@ -835,9 +850,16 @@ void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...)
va_start(args,msg); va_start(args,msg);
my_vsnprintf(rli->last_slave_error, my_vsnprintf(rli->last_slave_error,
sizeof(rli->last_slave_error), msg, args); sizeof(rli->last_slave_error), msg, args);
sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error,
err_code);
rli->last_slave_errno = err_code; rli->last_slave_errno = err_code;
/* If the error string ends with '.', do not add a ',' it would be ugly */
if (rli->last_slave_error[0] &&
(rli->last_slave_error[strlen(rli->last_slave_error)-1] == '.'))
sql_print_error("Slave: %s Error_code=%d", rli->last_slave_error,
err_code);
else
sql_print_error("Slave: %s, error_code=%d", rli->last_slave_error,
err_code);
} }
...@@ -865,6 +887,16 @@ char* rewrite_db(char* db) ...@@ -865,6 +887,16 @@ char* rewrite_db(char* db)
return db; return db;
} }
/*
From other comments and tests in code, it looks like
sometimes Query_log_event and Load_log_event can have db==0
(see rewrite_db() above for example)
(cases where this happens are unclear; it may be when the master is 3.23).
*/
char* print_slave_db_safe(char* db)
{
return (db ? rewrite_db(db) : (char*) "");
}
/* /*
Checks whether a db matches some do_db and ignore_db rules Checks whether a db matches some do_db and ignore_db rules
...@@ -1282,7 +1314,7 @@ file '%s')", fname); ...@@ -1282,7 +1314,7 @@ file '%s')", fname);
&msg)) &msg))
{ {
sql_print_error("Failed to open the relay log (relay_log_name='FIRST', \ sql_print_error("Failed to open the relay log (relay_log_name='FIRST', \
relay_log_pos=4"); relay_log_pos=4)");
goto err; goto err;
} }
rli->master_log_name[0]= 0; rli->master_log_name[0]= 0;
...@@ -1346,7 +1378,7 @@ file '%s')", fname); ...@@ -1346,7 +1378,7 @@ file '%s')", fname);
{ {
char llbuf[22]; char llbuf[22];
sql_print_error("Failed to open the relay log (relay_log_name='%s', \ sql_print_error("Failed to open the relay log (relay_log_name='%s', \
relay_log_pos=%s", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf)); relay_log_pos=%s)", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf));
goto err; goto err;
} }
} }
...@@ -2142,14 +2174,13 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error) ...@@ -2142,14 +2174,13 @@ int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int expected_error)
case ER_NET_ERROR_ON_WRITE: case ER_NET_ERROR_ON_WRITE:
case ER_SERVER_SHUTDOWN: case ER_SERVER_SHUTDOWN:
case ER_NEW_ABORTING_CONNECTION: case ER_NEW_ABORTING_CONNECTION:
my_snprintf(rli->last_slave_error, sizeof(rli->last_slave_error), slave_print_error(rli,expected_error,
"Slave: query '%s' partially completed on the master \ "query '%s' partially completed on the master \
and was aborted. There is a chance that your master is inconsistent at this \ and was aborted. There is a chance that your master is inconsistent at this \
point. If you are sure that your master is ok, run this query manually on the\ point. If you are sure that your master is ok, run this query manually on the\
slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;\ slave and then restart the slave with SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;\
SLAVE START;", thd->query); SLAVE START; .", thd->query);
rli->last_slave_errno = expected_error; thd->query_error= 1;
sql_print_error("%s",rli->last_slave_error);
return 1; return 1;
default: default:
return 0; return 0;
......
...@@ -383,9 +383,10 @@ int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec); ...@@ -383,9 +383,10 @@ int add_wild_table_rule(DYNAMIC_ARRAY* a, const char* table_spec);
void init_table_rule_hash(HASH* h, bool* h_inited); void init_table_rule_hash(HASH* h, bool* h_inited);
void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited); void init_table_rule_array(DYNAMIC_ARRAY* a, bool* a_inited);
char* rewrite_db(char* db); char* rewrite_db(char* db);
char* print_slave_db_safe(char* db);
int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code); int check_expected_error(THD* thd, RELAY_LOG_INFO* rli, int error_code);
void skip_load_data_infile(NET* net); void skip_load_data_infile(NET* net);
void slave_print_error(RELAY_LOG_INFO* rli,int err_code, const char* msg, ...); void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...);
void end_slave(); /* clean up */ void end_slave(); /* clean up */
int init_master_info(MASTER_INFO* mi, const char* master_info_fname, int init_master_info(MASTER_INFO* mi, const char* master_info_fname,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment