Commit 746fc2f4 authored by unknown's avatar unknown

BUG#13985 Cluster: ndb_mgm "status" command can return incorrect data node status

partial fix for this bug. more info on what the other half of the fix involves
is in the bug report.


ndb/src/mgmclient/CommandInterpreter.cpp:
  partial fix for bug13985
  
  hold a mutex around printing out events
  hold the mutex also around printing out put of 'status' commands.
  
  this means we don't get
  1 started
  2 started
  2 starting
  
  output.
  
  we'll instead get the event before/after the entire status output.
  Due to the nature of the event arriving before status being updated,
  we'll pretty much always see started AFTER starting.
parent 48fe5a2d
...@@ -172,8 +172,15 @@ class CommandInterpreter { ...@@ -172,8 +172,15 @@ class CommandInterpreter {
bool rep_connected; bool rep_connected;
#endif #endif
struct NdbThread* m_event_thread; struct NdbThread* m_event_thread;
NdbMutex *m_print_mutex;
}; };
struct event_thread_param {
NdbMgmHandle *m;
NdbMutex **p;
};
NdbMutex* print_mutex;
/* /*
* Facade object for CommandInterpreter * Facade object for CommandInterpreter
...@@ -409,6 +416,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose) ...@@ -409,6 +416,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose)
m_connected= false; m_connected= false;
m_event_thread= 0; m_event_thread= 0;
try_reconnect = 0; try_reconnect = 0;
m_print_mutex= NdbMutex_Create();
#ifdef HAVE_GLOBAL_REPLICATION #ifdef HAVE_GLOBAL_REPLICATION
rep_host = NULL; rep_host = NULL;
m_repserver = NULL; m_repserver = NULL;
...@@ -422,6 +430,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose) ...@@ -422,6 +430,7 @@ CommandInterpreter::CommandInterpreter(const char *_host,int verbose)
CommandInterpreter::~CommandInterpreter() CommandInterpreter::~CommandInterpreter()
{ {
disconnect(); disconnect();
NdbMutex_Destroy(m_print_mutex);
ndb_mgm_destroy_handle(&m_mgmsrv); ndb_mgm_destroy_handle(&m_mgmsrv);
ndb_mgm_destroy_handle(&m_mgmsrv2); ndb_mgm_destroy_handle(&m_mgmsrv2);
} }
...@@ -461,11 +470,13 @@ CommandInterpreter::printError() ...@@ -461,11 +470,13 @@ CommandInterpreter::printError()
static int do_event_thread; static int do_event_thread;
static void* static void*
event_thread_run(void* m) event_thread_run(void* p)
{ {
DBUG_ENTER("event_thread_run"); DBUG_ENTER("event_thread_run");
NdbMgmHandle handle= *(NdbMgmHandle*)m; struct event_thread_param param= *(struct event_thread_param*)p;
NdbMgmHandle handle= *(param.m);
NdbMutex* printmutex= *(param.p);
int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP, int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_BACKUP,
1, NDB_MGM_EVENT_CATEGORY_STARTUP, 1, NDB_MGM_EVENT_CATEGORY_STARTUP,
...@@ -483,8 +494,12 @@ event_thread_run(void* m) ...@@ -483,8 +494,12 @@ event_thread_run(void* m)
{ {
const char ping_token[]= "<PING>"; const char ping_token[]= "<PING>";
if (memcmp(ping_token,tmp,sizeof(ping_token)-1)) if (memcmp(ping_token,tmp,sizeof(ping_token)-1))
if(tmp && strlen(tmp))
{
Guard g(printmutex);
ndbout << tmp; ndbout << tmp;
} }
}
} while(do_event_thread); } while(do_event_thread);
NDB_CLOSE_SOCKET(fd); NDB_CLOSE_SOCKET(fd);
} }
...@@ -516,8 +531,11 @@ CommandInterpreter::connect() ...@@ -516,8 +531,11 @@ CommandInterpreter::connect()
assert(m_event_thread == 0); assert(m_event_thread == 0);
assert(do_event_thread == 0); assert(do_event_thread == 0);
do_event_thread= 0; do_event_thread= 0;
struct event_thread_param p;
p.m= &m_mgmsrv2;
p.p= &m_print_mutex;
m_event_thread = NdbThread_Create(event_thread_run, m_event_thread = NdbThread_Create(event_thread_run,
(void**)&m_mgmsrv2, (void**)&p,
32768, 32768,
"CommandInterpreted_event_thread", "CommandInterpreted_event_thread",
NDB_THREAD_PRIO_LOW); NDB_THREAD_PRIO_LOW);
...@@ -607,6 +625,7 @@ CommandInterpreter::execute(const char *_line, int _try_reconnect, ...@@ -607,6 +625,7 @@ CommandInterpreter::execute(const char *_line, int _try_reconnect,
int result= execute_impl(_line); int result= execute_impl(_line);
if (error) if (error)
*error= m_error; *error= m_error;
return result; return result;
} }
...@@ -920,6 +939,7 @@ CommandInterpreter::executeForAll(const char * cmd, ExecuteFunction fun, ...@@ -920,6 +939,7 @@ CommandInterpreter::executeForAll(const char * cmd, ExecuteFunction fun,
ndbout_c("Trying to start all nodes of system."); ndbout_c("Trying to start all nodes of system.");
ndbout_c("Use ALL STATUS to see the system start-up phases."); ndbout_c("Use ALL STATUS to see the system start-up phases.");
} else { } else {
Guard g(m_print_mutex);
struct ndb_mgm_cluster_state *cl= ndb_mgm_get_status(m_mgmsrv); struct ndb_mgm_cluster_state *cl= ndb_mgm_get_status(m_mgmsrv);
if(cl == 0){ if(cl == 0){
ndbout_c("Unable get status from management server"); ndbout_c("Unable get status from management server");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment