Commit 1de7807d authored by unknown's avatar unknown

Support for ndb multi-node shutdown

parent 1b5ce338
...@@ -92,7 +92,7 @@ class StopRef ...@@ -92,7 +92,7 @@ class StopRef
friend class Ndbcntr; friend class Ndbcntr;
public: public:
STATIC_CONST( SignalLength = 2 ); STATIC_CONST( SignalLength = 3 );
enum ErrorCode { enum ErrorCode {
OK = 0, OK = 0,
...@@ -107,6 +107,7 @@ public: ...@@ -107,6 +107,7 @@ public:
public: public:
Uint32 senderData; Uint32 senderData;
Uint32 errorCode; Uint32 errorCode;
Uint32 masterNodeId;
}; };
inline inline
......
...@@ -2125,6 +2125,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ ...@@ -2125,6 +2125,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
else else
ref->errorCode = StopRef::NodeShutdownInProgress; ref->errorCode = StopRef::NodeShutdownInProgress;
ref->senderData = senderData; ref->senderData = senderData;
ref->masterNodeId = cmasterNodeId;
if (senderRef != RNIL) if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
...@@ -2136,6 +2137,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ ...@@ -2136,6 +2137,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
jam(); jam();
ref->errorCode = StopRef::UnsupportedNodeShutdown; ref->errorCode = StopRef::UnsupportedNodeShutdown;
ref->senderData = senderData; ref->senderData = senderData;
ref->masterNodeId = cmasterNodeId;
if (senderRef != RNIL) if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
return; return;
...@@ -2146,6 +2148,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){ ...@@ -2146,6 +2148,7 @@ Ndbcntr::execSTOP_REQ(Signal* signal){
jam(); jam();
ref->errorCode = StopRef::MultiNodeShutdownNotMaster; ref->errorCode = StopRef::MultiNodeShutdownNotMaster;
ref->senderData = senderData; ref->senderData = senderData;
ref->masterNodeId = cmasterNodeId;
if (senderRef != RNIL) if (senderRef != RNIL)
sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); sendSignal(senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
return; return;
...@@ -2289,6 +2292,7 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){ ...@@ -2289,6 +2292,7 @@ Ndbcntr::StopRecord::checkNodeFail(Signal* signal){
ref->senderData = stopReq.senderData; ref->senderData = stopReq.senderData;
ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash; ref->errorCode = StopRef::NodeShutdownWouldCauseSystemCrash;
ref->masterNodeId = cntr.cmasterNodeId;
const BlockReference bref = stopReq.senderRef; const BlockReference bref = stopReq.senderRef;
if (bref != RNIL) if (bref != RNIL)
...@@ -2437,6 +2441,7 @@ void Ndbcntr::execABORT_ALL_REF(Signal* signal){ ...@@ -2437,6 +2441,7 @@ void Ndbcntr::execABORT_ALL_REF(Signal* signal){
StopRef * const stopRef = (StopRef *)&signal->theData[0]; StopRef * const stopRef = (StopRef *)&signal->theData[0];
stopRef->senderData = c_stopRec.stopReq.senderData; stopRef->senderData = c_stopRec.stopReq.senderData;
stopRef->errorCode = StopRef::TransactionAbortFailed; stopRef->errorCode = StopRef::TransactionAbortFailed;
stopRef->masterNodeId = cmasterNodeId;
sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB); sendSignal(c_stopRec.stopReq.senderRef, GSN_STOP_REF, signal, StopRef::SignalLength, JBB);
} }
......
...@@ -1216,115 +1216,116 @@ Qmgr::check_startup(Signal* signal) ...@@ -1216,115 +1216,116 @@ Qmgr::check_startup(Signal* signal)
goto start_report; goto start_report;
} }
} }
const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
{ {
/** const bool all = c_start.m_starting_nodes.equal(c_definedNodes);
* Check for missing node group directly CheckNodeGroups* sd = (CheckNodeGroups*)&signal->theData[0];
*/
char buf[100];
NdbNodeBitmask check;
check.assign(c_definedNodes);
check.bitANDC(c_start.m_starting_nodes); // Not connected nodes
check.bitOR(c_start.m_starting_nodes_w_log);
sd->blockRef = reference();
sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
sd->mask = check;
EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
CheckNodeGroups::SignalLength);
if (sd->output == CheckNodeGroups::Lose)
{ {
jam(); /**
goto missing_nodegroup; * Check for missing node group directly
*/
char buf[100];
NdbNodeBitmask check;
check.assign(c_definedNodes);
check.bitANDC(c_start.m_starting_nodes); // Not connected nodes
check.bitOR(c_start.m_starting_nodes_w_log);
sd->blockRef = reference();
sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
sd->mask = check;
EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
CheckNodeGroups::SignalLength);
if (sd->output == CheckNodeGroups::Lose)
{
jam();
goto missing_nodegroup;
}
} }
}
sd->blockRef = reference(); sd->blockRef = reference();
sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
sd->mask = c_start.m_starting_nodes; sd->mask = c_start.m_starting_nodes;
EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
CheckNodeGroups::SignalLength); CheckNodeGroups::SignalLength);
const Uint32 result = sd->output; const Uint32 result = sd->output;
sd->blockRef = reference(); sd->blockRef = reference();
sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck; sd->requestType = CheckNodeGroups::Direct | CheckNodeGroups::ArbitCheck;
sd->mask = c_start.m_starting_nodes_w_log; sd->mask = c_start.m_starting_nodes_w_log;
EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal, EXECUTE_DIRECT(DBDIH, GSN_CHECKNODEGROUPSREQ, signal,
CheckNodeGroups::SignalLength); CheckNodeGroups::SignalLength);
const Uint32 result_w_log = sd->output; const Uint32 result_w_log = sd->output;
if (tmp.equal(c_definedNodes)) if (tmp.equal(c_definedNodes))
{ {
/**
* All nodes (wrt no-wait nodes) has connected...
* this means that we will now start or die
*/
jam();
switch(result_w_log){
case CheckNodeGroups::Lose:
{
jam();
goto missing_nodegroup;
}
case CheckNodeGroups::Win:
signal->theData[1] = all ? 0x8001 : 0x8002;
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 1;
goto start_report;
case CheckNodeGroups::Partitioning:
ndbrequire(result != CheckNodeGroups::Lose);
signal->theData[1] =
all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 1;
goto start_report;
}
}
if (now < partial_timeout)
{
jam();
signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3;
signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
report_mask.assign(wait);
retVal = 0;
goto start_report;
}
/** /**
* All nodes (wrt no-wait nodes) has connected... * Start partial has passed...check for partitioning...
* this means that we will now start or die */
*/
jam();
switch(result_w_log){ switch(result_w_log){
case CheckNodeGroups::Lose: case CheckNodeGroups::Lose:
{
jam(); jam();
goto missing_nodegroup; goto missing_nodegroup;
}
case CheckNodeGroups::Win:
signal->theData[1] = all ? 0x8001 : 0x8002;
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 1;
goto start_report;
case CheckNodeGroups::Partitioning: case CheckNodeGroups::Partitioning:
ndbrequire(result != CheckNodeGroups::Lose); if (now < partitioned_timeout && result != CheckNodeGroups::Win)
{
signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5;
signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 0;
goto start_report;
}
// Fall through...
case CheckNodeGroups::Win:
signal->theData[1] = signal->theData[1] =
all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003); all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
report_mask.assign(c_definedNodes); report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes); report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 1; retVal = 1;
goto start_report; goto start_report;
} }
} }
if (now < partial_timeout)
{
jam();
signal->theData[1] = c_restartPartialTimeout == ~0 ? 2 : 3;
signal->theData[2] = Uint32((partial_timeout - now + 500) / 1000);
report_mask.assign(wait);
retVal = 0;
goto start_report;
}
/**
* Start partial has passed...check for partitioning...
*/
switch(result_w_log){
case CheckNodeGroups::Lose:
jam();
goto missing_nodegroup;
case CheckNodeGroups::Partitioning:
if (now < partitioned_timeout && result != CheckNodeGroups::Win)
{
signal->theData[1] = c_restartPartionedTimeout == ~0 ? 4 : 5;
signal->theData[2] = Uint32((partitioned_timeout - now + 500) / 1000);
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 0;
goto start_report;
}
// Fall through...
case CheckNodeGroups::Win:
signal->theData[1] =
all ? 0x8001 : (result == CheckNodeGroups::Win ? 0x8002 : 0x8003);
report_mask.assign(c_definedNodes);
report_mask.bitANDC(c_start.m_starting_nodes);
retVal = 1;
goto start_report;
}
ndbrequire(false); ndbrequire(false);
start_report: start_report:
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#endif #endif
#include <mgmapi.h> #include <mgmapi.h>
#include <util/BaseString.hpp>
class MgmtSrvr; class MgmtSrvr;
...@@ -70,6 +71,9 @@ class CommandInterpreter { ...@@ -70,6 +71,9 @@ class CommandInterpreter {
*/ */
void analyseAfterFirstToken(int processId, char* allAfterFirstTokenCstr); void analyseAfterFirstToken(int processId, char* allAfterFirstTokenCstr);
void executeCommand(Vector<BaseString> &command_list,
unsigned command_pos,
int *node_ids, int no_of_nodes);
/** /**
* Parse the block specification part of the LOG* commands, * Parse the block specification part of the LOG* commands,
* things after LOG*: [BLOCK = {ALL|<blockName>+}] * things after LOG*: [BLOCK = {ALL|<blockName>+}]
...@@ -104,10 +108,14 @@ class CommandInterpreter { ...@@ -104,10 +108,14 @@ class CommandInterpreter {
public: public:
void executeStop(int processId, const char* parameters, bool all); void executeStop(int processId, const char* parameters, bool all);
void executeStop(Vector<BaseString> &command_list, unsigned command_pos,
int *node_ids, int no_of_nodes);
void executeEnterSingleUser(char* parameters); void executeEnterSingleUser(char* parameters);
void executeExitSingleUser(char* parameters); void executeExitSingleUser(char* parameters);
void executeStart(int processId, const char* parameters, bool all); void executeStart(int processId, const char* parameters, bool all);
void executeRestart(int processId, const char* parameters, bool all); void executeRestart(int processId, const char* parameters, bool all);
void executeRestart(Vector<BaseString> &command_list, unsigned command_pos,
int *node_ids, int no_of_nodes);
void executeLogLevel(int processId, const char* parameters, bool all); void executeLogLevel(int processId, const char* parameters, bool all);
void executeError(int processId, const char* parameters, bool all); void executeError(int processId, const char* parameters, bool all);
void executeLog(int processId, const char* parameters, bool all); void executeLog(int processId, const char* parameters, bool all);
...@@ -643,9 +651,16 @@ CommandInterpreter::execute_impl(const char *_line) ...@@ -643,9 +651,16 @@ CommandInterpreter::execute_impl(const char *_line)
} }
} while (do_continue); } while (do_continue);
// if there is anything in the line proceed // if there is anything in the line proceed
Vector<BaseString> command_list;
{
BaseString tmp(line);
tmp.split(command_list);
for (unsigned i= 0; i < command_list.size();)
command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
}
char* firstToken = strtok(line, " "); char* firstToken = strtok(line, " ");
char* allAfterFirstToken = strtok(NULL, ""); char* allAfterFirstToken = strtok(NULL, "");
if (strcasecmp(firstToken, "HELP") == 0 || if (strcasecmp(firstToken, "HELP") == 0 ||
strcasecmp(firstToken, "?") == 0) { strcasecmp(firstToken, "?") == 0) {
executeHelp(allAfterFirstToken); executeHelp(allAfterFirstToken);
...@@ -723,22 +738,45 @@ CommandInterpreter::execute_impl(const char *_line) ...@@ -723,22 +738,45 @@ CommandInterpreter::execute_impl(const char *_line)
analyseAfterFirstToken(-1, allAfterFirstToken); analyseAfterFirstToken(-1, allAfterFirstToken);
} else { } else {
/** /**
* First token should be a digit, node ID * First tokens should be digits, node ID's
*/ */
int nodeId; int node_ids[MAX_NODES];
unsigned pos;
if (! convert(firstToken, nodeId)) { for (pos= 0; pos < command_list.size(); pos++)
{
int node_id;
if (convert(command_list[pos].c_str(), node_id))
{
if (node_id <= 0) {
ndbout << "Invalid node ID: " << command_list[pos].c_str()
<< "." << endl;
DBUG_RETURN(true);
}
node_ids[pos]= node_id;
continue;
}
break;
}
int no_of_nodes= pos;
if (no_of_nodes == 0)
{
/* No digit found */
invalid_command(_line); invalid_command(_line);
DBUG_RETURN(true); DBUG_RETURN(true);
} }
if (pos == command_list.size())
if (nodeId <= 0) { {
ndbout << "Invalid node ID: " << firstToken << "." << endl; /* No command found */
invalid_command(_line);
DBUG_RETURN(true); DBUG_RETURN(true);
} }
if (no_of_nodes == 1)
analyseAfterFirstToken(nodeId, allAfterFirstToken); {
analyseAfterFirstToken(node_ids[0], allAfterFirstToken);
DBUG_RETURN(true);
}
executeCommand(command_list, pos, node_ids, no_of_nodes);
DBUG_RETURN(true);
} }
DBUG_RETURN(true); DBUG_RETURN(true);
} }
...@@ -808,6 +846,27 @@ CommandInterpreter::analyseAfterFirstToken(int processId, ...@@ -808,6 +846,27 @@ CommandInterpreter::analyseAfterFirstToken(int processId,
ndbout << endl; ndbout << endl;
} }
void
CommandInterpreter::executeCommand(Vector<BaseString> &command_list,
unsigned command_pos,
int *node_ids, int no_of_nodes)
{
const char *cmd= command_list[command_pos].c_str();
if (strcasecmp("STOP", cmd) == 0)
{
executeStop(command_list, command_pos+1, node_ids, no_of_nodes);
return;
}
if (strcasecmp("RESTART", cmd) == 0)
{
executeRestart(command_list, command_pos+1, node_ids, no_of_nodes);
return;
}
ndbout_c("Invalid command: '%s' after multi node id list. "
"Expected STOP or RESTART.", cmd);
return;
}
/** /**
* Get next nodeid larger than the give node_id. node_id will be * Get next nodeid larger than the give node_id. node_id will be
* set to the next node_id in the list. node_id should be set * set to the next node_id in the list. node_id should be set
...@@ -1400,24 +1459,60 @@ CommandInterpreter::executeClusterLog(char* parameters) ...@@ -1400,24 +1459,60 @@ CommandInterpreter::executeClusterLog(char* parameters)
//***************************************************************************** //*****************************************************************************
void void
CommandInterpreter::executeStop(int processId, const char *, bool all) CommandInterpreter::executeStop(int processId, const char *parameters,
bool all)
{ {
int result = 0; Vector<BaseString> command_list;
if(all) { if (parameters)
result = ndb_mgm_stop(m_mgmsrv, 0, 0); {
} else { BaseString tmp(parameters);
result = ndb_mgm_stop(m_mgmsrv, 1, &processId); tmp.split(command_list);
for (unsigned i= 0; i < command_list.size();)
command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
} }
if (result < 0) { if (all)
ndbout << "Shutdown failed." << endl; executeStop(command_list, 0, 0, 0);
else
executeStop(command_list, 0, &processId, 1);
}
void
CommandInterpreter::executeStop(Vector<BaseString> &command_list,
unsigned command_pos,
int *node_ids, int no_of_nodes)
{
int abort= 0;
for (; command_pos < command_list.size(); command_pos++)
{
const char *item= command_list[command_pos].c_str();
if (strcasecmp(item, "-A") == 0)
{
abort= 1;
continue;
}
ndbout_c("Invalid option: %s. Expecting -A after STOP",
item);
return;
}
int result= ndb_mgm_stop2(m_mgmsrv, no_of_nodes, node_ids, abort);
if (result < 0)
{
ndbout_c("Shutdown failed.");
printError(); printError();
} else }
else
{
if (node_ids == 0)
ndbout_c("NDB Cluster has shutdown.");
else
{ {
if(all) ndbout << "Node";
ndbout << "NDB Cluster has shutdown." << endl; for (int i= 0; i < no_of_nodes; i++)
else ndbout << " " << node_ids[i];
ndbout << "Node " << processId << " has shutdown." << endl; ndbout_c(" has shutdown.");
} }
}
} }
void void
...@@ -1483,47 +1578,74 @@ CommandInterpreter::executeStart(int processId, const char* parameters, ...@@ -1483,47 +1578,74 @@ CommandInterpreter::executeStart(int processId, const char* parameters,
void void
CommandInterpreter::executeRestart(int processId, const char* parameters, CommandInterpreter::executeRestart(int processId, const char* parameters,
bool all) bool all)
{
Vector<BaseString> command_list;
if (parameters)
{
BaseString tmp(parameters);
tmp.split(command_list);
for (unsigned i= 0; i < command_list.size();)
command_list[i].c_str()[0] ? i++ : (command_list.erase(i),0);
}
if (all)
executeRestart(command_list, 0, 0, 0);
else
executeRestart(command_list, 0, &processId, 1);
}
void
CommandInterpreter::executeRestart(Vector<BaseString> &command_list,
unsigned command_pos,
int *node_ids, int no_of_nodes)
{ {
int result; int result;
int nostart = 0; int nostart= 0;
int initialstart = 0; int initialstart= 0;
int abort = 0; int abort= 0;
if(parameters != 0 && strlen(parameters) != 0){ for (; command_pos < command_list.size(); command_pos++)
char * tmpString = my_strdup(parameters,MYF(MY_WME)); {
My_auto_ptr<char> ap1(tmpString); const char *item= command_list[command_pos].c_str();
char * tmpPtr = 0; if (strcasecmp(item, "-N") == 0)
char * item = strtok_r(tmpString, " ", &tmpPtr); {
while(item != NULL){ nostart= 1;
if(strcasecmp(item, "-N") == 0) continue;
nostart = 1; }
if(strcasecmp(item, "-I") == 0) if (strcasecmp(item, "-I") == 0)
initialstart = 1; {
if(strcasecmp(item, "-A") == 0) initialstart= 1;
abort = 1; continue;
item = strtok_r(NULL, " ", &tmpPtr);
} }
if (strcasecmp(item, "-A") == 0)
{
abort= 1;
continue;
}
ndbout_c("Invalid option: %s. Expecting -A,-N or -I after RESTART",
item);
return;
} }
if(all) { result= ndb_mgm_restart2(m_mgmsrv, no_of_nodes, node_ids,
result = ndb_mgm_restart2(m_mgmsrv, 0, NULL, initialstart, nostart, abort); initialstart, nostart, abort);
} else {
int v[1];
v[0] = processId;
result = ndb_mgm_restart2(m_mgmsrv, 1, v, initialstart, nostart, abort);
}
if (result <= 0) { if (result <= 0) {
ndbout.println("Restart failed.", result); ndbout_c("Restart failed.");
printError(); printError();
} else }
else
{
if (node_ids == 0)
ndbout_c("NDB Cluster is being restarted.");
else
{ {
if(all) ndbout << "Node";
ndbout << "NDB Cluster is being restarted." << endl; for (int i= 0; i < no_of_nodes; i++)
else ndbout << " " << node_ids[i];
ndbout_c("Node %d is being restarted.", processId); ndbout_c(" is being restarted");
} }
}
} }
void void
......
...@@ -294,6 +294,8 @@ static ErrorItem errorTable[] = ...@@ -294,6 +294,8 @@ static ErrorItem errorTable[] =
{MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" }, {MgmtSrvr::SYSTEM_SHUTDOWN_IN_PROGRESS, "System shutdown in progress" },
{MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH, {MgmtSrvr::NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH,
"Node shutdown would cause system crash" }, "Node shutdown would cause system crash" },
{MgmtSrvr::UNSUPPORTED_NODE_SHUTDOWN,
"Unsupported multi node shutdown. Abort option required." },
{MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." }, {MgmtSrvr::NODE_NOT_API_NODE, "The specified node is not an API node." },
{MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP, {MgmtSrvr::OPERATION_NOT_ALLOWED_START_STOP,
"Operation not allowed while nodes are starting or stopping."}, "Operation not allowed while nodes are starting or stopping."},
...@@ -312,6 +314,9 @@ int MgmtSrvr::translateStopRef(Uint32 errCode) ...@@ -312,6 +314,9 @@ int MgmtSrvr::translateStopRef(Uint32 errCode)
case StopRef::NodeShutdownWouldCauseSystemCrash: case StopRef::NodeShutdownWouldCauseSystemCrash:
return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH; return NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH;
break; break;
case StopRef::UnsupportedNodeShutdown:
return UNSUPPORTED_NODE_SHUTDOWN;
break;
} }
return 4999; return 4999;
} }
...@@ -386,8 +391,9 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_server, ...@@ -386,8 +391,9 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_server,
_ownReference(0), _ownReference(0),
theSignalIdleList(NULL), theSignalIdleList(NULL),
theWaitState(WAIT_SUBSCRIBE_CONF), theWaitState(WAIT_SUBSCRIBE_CONF),
m_local_mgm_handle(0),
m_event_listner(this), m_event_listner(this),
m_local_mgm_handle(0) m_master_node(0)
{ {
DBUG_ENTER("MgmtSrvr::MgmtSrvr"); DBUG_ENTER("MgmtSrvr::MgmtSrvr");
...@@ -677,23 +683,16 @@ MgmtSrvr::~MgmtSrvr() ...@@ -677,23 +683,16 @@ MgmtSrvr::~MgmtSrvr()
int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond) int MgmtSrvr::okToSendTo(NodeId nodeId, bool unCond)
{ {
if(nodeId == 0) if(nodeId == 0 || getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
return 0;
if (getNodeType(nodeId) != NDB_MGM_NODE_TYPE_NDB)
return WRONG_PROCESS_TYPE; return WRONG_PROCESS_TYPE;
// Check if we have contact with it // Check if we have contact with it
if(unCond){ if(unCond){
if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected) if(theFacade->theClusterMgr->getNodeInfo(nodeId).connected)
return 0; return 0;
return NO_CONTACT_WITH_PROCESS;
} }
if (theFacade->get_node_alive(nodeId) == 0) { else if (theFacade->get_node_alive(nodeId) == true)
return NO_CONTACT_WITH_PROCESS;
} else {
return 0; return 0;
} return NO_CONTACT_WITH_PROCESS;
} }
void report_unknown_signal(SimpleSignal *signal) void report_unknown_signal(SimpleSignal *signal)
...@@ -935,7 +934,7 @@ int MgmtSrvr::sendStopMgmd(NodeId nodeId, ...@@ -935,7 +934,7 @@ int MgmtSrvr::sendStopMgmd(NodeId nodeId,
* distributed communication up. * distributed communication up.
*/ */
int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, int MgmtSrvr::sendSTOP_REQ(const Vector<NodeId> &node_ids,
NodeBitmask &stoppedNodes, NodeBitmask &stoppedNodes,
Uint32 singleUserNodeId, Uint32 singleUserNodeId,
bool abort, bool abort,
...@@ -945,6 +944,12 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ...@@ -945,6 +944,12 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
bool initialStart) bool initialStart)
{ {
int error = 0; int error = 0;
DBUG_ENTER("MgmtSrvr::sendSTOP_REQ");
DBUG_PRINT("enter", ("no of nodes: %d singleUseNodeId: %d "
"abort: %d stop: %d restart: %d "
"nostart: %d initialStart: %d",
node_ids.size(), singleUserNodeId,
abort, stop, restart, nostart, initialStart));
stoppedNodes.clear(); stoppedNodes.clear();
...@@ -982,36 +987,46 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ...@@ -982,36 +987,46 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
// send the signals // send the signals
NodeBitmask nodes; NodeBitmask nodes;
if (nodeId) NodeId nodeId;
int use_master_node= 0;
int do_send= 0;
NdbNodeBitmask nodes_to_stop;
{ {
if(nodeId==getOwnNodeId()) for (unsigned i= 0; i < node_ids.size(); i++)
{ nodes_to_stop.set(node_ids[i]);
if(restart) }
g_RestartServer= true; if (node_ids.size())
g_StopServer= true; {
return 0; do_send= 1;
} if (node_ids.size() == 1)
if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
{ {
int r; nodeId= node_ids[0];
if((r= okToSendTo(nodeId, true)) != 0) if (nodeId == getOwnNodeId())
return r; {
if (ss.sendSignal(nodeId, &ssig) != SEND_OK) if (restart)
return SEND_OR_RECEIVE_FAILED; g_RestartServer= true;
g_StopServer= true;
DBUG_RETURN(0);
}
else if (getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
{
error= sendStopMgmd(nodeId, abort, stop, restart,
nostart, initialStart);
if (error == 0)
stoppedNodes.set(nodeId);
DBUG_RETURN(error);
}
} }
else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM) else // multi node stop, send to master
{ {
error= sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart); use_master_node= 1;
if(error==0) nodes_to_stop.copyto(NdbNodeBitmask::Size, stopReq->nodes);
stoppedNodes.set(nodeId); StopReq::setStopNodes(stopReq->requestInfo, 1);
return error;
} }
else
return WRONG_PROCESS_TYPE;
nodes.set(nodeId);
} }
else else
{ {
nodeId= 0;
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
{ {
if(okToSendTo(nodeId, true) == 0) if(okToSendTo(nodeId, true) == 0)
...@@ -1032,8 +1047,30 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ...@@ -1032,8 +1047,30 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
} }
// now wait for the replies // now wait for the replies
while (!nodes.isclear()) while (!nodes.isclear() || do_send)
{ {
if (do_send)
{
int r;
assert(nodes.count() == 0);
if (use_master_node)
nodeId= m_master_node;
if ((r= okToSendTo(nodeId, true)) != 0)
{
bool next;
if (!use_master_node)
DBUG_RETURN(r);
m_master_node= nodeId= 0;
while((next= getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
(r= okToSendTo(nodeId, true)) != 0);
if (!next)
DBUG_RETURN(NO_CONTACT_WITH_DB_NODES);
}
if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
nodes.set(nodeId);
do_send= 0;
}
SimpleSignal *signal = ss.waitFor(); SimpleSignal *signal = ss.waitFor();
int gsn = signal->readSignalNumber(); int gsn = signal->readSignalNumber();
switch (gsn) { switch (gsn) {
...@@ -1045,6 +1082,13 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ...@@ -1045,6 +1082,13 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
#endif #endif
assert(nodes.get(nodeId)); assert(nodes.get(nodeId));
nodes.clear(nodeId); nodes.clear(nodeId);
if (ref->errorCode == StopRef::MultiNodeShutdownNotMaster)
{
assert(use_master_node);
m_master_node= ref->masterNodeId;
do_send= 1;
continue;
}
error = translateStopRef(ref->errorCode); error = translateStopRef(ref->errorCode);
break; break;
} }
...@@ -1055,9 +1099,16 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ...@@ -1055,9 +1099,16 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
ndbout_c("Node %d single user mode", nodeId); ndbout_c("Node %d single user mode", nodeId);
#endif #endif
assert(nodes.get(nodeId)); assert(nodes.get(nodeId));
assert(singleUserNodeId != 0); if (singleUserNodeId != 0)
{
stoppedNodes.set(nodeId);
}
else
{
assert(node_ids.size() > 1);
stoppedNodes.bitOR(nodes_to_stop);
}
nodes.clear(nodeId); nodes.clear(nodeId);
stoppedNodes.set(nodeId);
break; break;
} }
case GSN_NF_COMPLETEREP:{ case GSN_NF_COMPLETEREP:{
...@@ -1096,17 +1147,18 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId, ...@@ -1096,17 +1147,18 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("Unknown signal %d", gsn); ndbout_c("Unknown signal %d", gsn);
#endif #endif
return SEND_OR_RECEIVE_FAILED; DBUG_RETURN(SEND_OR_RECEIVE_FAILED);
} }
} }
return error; DBUG_RETURN(error);
} }
/* /*
* Stop one node * Stop one nodes
*/ */
int MgmtSrvr::stopNode(int nodeId, bool abort) int MgmtSrvr::stopNodes(const Vector<NodeId> &node_ids,
int *stopCount, bool abort)
{ {
if (!abort) if (!abort)
{ {
...@@ -1121,14 +1173,17 @@ int MgmtSrvr::stopNode(int nodeId, bool abort) ...@@ -1121,14 +1173,17 @@ int MgmtSrvr::stopNode(int nodeId, bool abort)
} }
} }
NodeBitmask nodes; NodeBitmask nodes;
return sendSTOP_REQ(nodeId, int ret= sendSTOP_REQ(node_ids,
nodes, nodes,
0, 0,
abort, abort,
false, false,
false, false,
false, false,
false); false);
if (stopCount)
*stopCount= nodes.count();
return ret;
} }
/* /*
...@@ -1138,7 +1193,8 @@ int MgmtSrvr::stopNode(int nodeId, bool abort) ...@@ -1138,7 +1193,8 @@ int MgmtSrvr::stopNode(int nodeId, bool abort)
int MgmtSrvr::stop(int * stopCount, bool abort) int MgmtSrvr::stop(int * stopCount, bool abort)
{ {
NodeBitmask nodes; NodeBitmask nodes;
int ret = sendSTOP_REQ(0, Vector<NodeId> node_ids;
int ret = sendSTOP_REQ(node_ids,
nodes, nodes,
0, 0,
abort, abort,
...@@ -1169,7 +1225,8 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) ...@@ -1169,7 +1225,8 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
return OPERATION_NOT_ALLOWED_START_STOP; return OPERATION_NOT_ALLOWED_START_STOP;
} }
NodeBitmask nodes; NodeBitmask nodes;
int ret = sendSTOP_REQ(0, Vector<NodeId> node_ids;
int ret = sendSTOP_REQ(node_ids,
nodes, nodes,
singleUserNodeId, singleUserNodeId,
false, false,
...@@ -1186,18 +1243,22 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId) ...@@ -1186,18 +1243,22 @@ int MgmtSrvr::enterSingleUser(int * stopCount, Uint32 singleUserNodeId)
* Perform node restart * Perform node restart
*/ */
int MgmtSrvr::restartNode(int nodeId, bool nostart, bool initialStart, int MgmtSrvr::restartNodes(const Vector<NodeId> &node_ids,
bool abort) int * stopCount, bool nostart,
bool initialStart, bool abort)
{ {
NodeBitmask nodes; NodeBitmask nodes;
return sendSTOP_REQ(nodeId, int ret= sendSTOP_REQ(node_ids,
nodes, nodes,
0, 0,
abort, abort,
false, false,
true, true,
nostart, nostart,
initialStart); initialStart);
if (stopCount)
*stopCount = nodes.count();
return ret;
} }
/* /*
...@@ -1208,7 +1269,8 @@ int MgmtSrvr::restart(bool nostart, bool initialStart, ...@@ -1208,7 +1269,8 @@ int MgmtSrvr::restart(bool nostart, bool initialStart,
bool abort, int * stopCount ) bool abort, int * stopCount )
{ {
NodeBitmask nodes; NodeBitmask nodes;
int ret = sendSTOP_REQ(0, Vector<NodeId> node_ids;
int ret = sendSTOP_REQ(node_ids,
nodes, nodes,
0, 0,
abort, abort,
...@@ -2135,12 +2197,16 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) ...@@ -2135,12 +2197,16 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
SignalSender ss(theFacade); SignalSender ss(theFacade);
ss.lock(); // lock will be released on exit ss.lock(); // lock will be released on exit
bool next; NodeId nodeId = m_master_node;
NodeId nodeId = 0; if (okToSendTo(nodeId, false) != 0)
while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true && {
theFacade->get_node_alive(nodeId) == false); bool next;
nodeId = m_master_node = 0;
if(!next) return NO_CONTACT_WITH_DB_NODES; while((next = getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB)) == true &&
okToSendTo(nodeId, false) != 0);
if(!next)
return NO_CONTACT_WITH_DB_NODES;
}
SimpleSignal ssig; SimpleSignal ssig;
BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend()); BackupReq* req = CAST_PTR(BackupReq, ssig.getDataPtrSend());
...@@ -2208,7 +2274,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted) ...@@ -2208,7 +2274,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
const BackupRef * const ref = const BackupRef * const ref =
CAST_CONSTPTR(BackupRef, signal->getDataPtr()); CAST_CONSTPTR(BackupRef, signal->getDataPtr());
if(ref->errorCode == BackupRef::IAmNotMaster){ if(ref->errorCode == BackupRef::IAmNotMaster){
nodeId = refToNode(ref->masterRef); m_master_node = nodeId = refToNode(ref->masterRef);
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("I'm not master resending to %d", nodeId); ndbout_c("I'm not master resending to %d", nodeId);
#endif #endif
......
...@@ -176,6 +176,7 @@ public: ...@@ -176,6 +176,7 @@ public:
STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 ); STATIC_CONST( NODE_SHUTDOWN_WOULD_CAUSE_SYSTEM_CRASH = 5028 );
STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 ); STATIC_CONST( NO_CONTACT_WITH_DB_NODES = 5030 );
STATIC_CONST( UNSUPPORTED_NODE_SHUTDOWN = 5031 );
STATIC_CONST( NODE_NOT_API_NODE = 5062 ); STATIC_CONST( NODE_NOT_API_NODE = 5062 );
STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 ); STATIC_CONST( OPERATION_NOT_ALLOWED_START_STOP = 5063 );
...@@ -252,7 +253,7 @@ public: ...@@ -252,7 +253,7 @@ public:
* @param processId: Id of the DB process to stop * @param processId: Id of the DB process to stop
* @return 0 if succeeded, otherwise: as stated above, plus: * @return 0 if succeeded, otherwise: as stated above, plus:
*/ */
int stopNode(int nodeId, bool abort = false); int stopNodes(const Vector<NodeId> &node_ids, int *stopCount, bool abort);
/** /**
* Stop the system * Stop the system
...@@ -286,11 +287,12 @@ public: ...@@ -286,11 +287,12 @@ public:
int start(int processId); int start(int processId);
/** /**
* Restart a node * Restart nodes
* @param processId: Id of the DB process to start * @param processId: Id of the DB process to start
*/ */
int restartNode(int processId, bool nostart, bool initialStart, int restartNodes(const Vector<NodeId> &node_ids,
bool abort = false); int *stopCount, bool nostart,
bool initialStart, bool abort);
/** /**
* Restart the system * Restart the system
...@@ -494,7 +496,7 @@ private: ...@@ -494,7 +496,7 @@ private:
bool nostart, bool nostart,
bool initialStart); bool initialStart);
int sendSTOP_REQ(NodeId nodeId, int sendSTOP_REQ(const Vector<NodeId> &node_ids,
NodeBitmask &stoppedNodes, NodeBitmask &stoppedNodes,
Uint32 singleUserNodeId, Uint32 singleUserNodeId,
bool abort, bool abort,
...@@ -653,6 +655,8 @@ private: ...@@ -653,6 +655,8 @@ private:
friend class Ndb_mgmd_event_service; friend class Ndb_mgmd_event_service;
Ndb_mgmd_event_service m_event_listner; Ndb_mgmd_event_service m_event_listner;
NodeId m_master_node;
/** /**
* Handles the thread wich upon a 'Node is started' event will * Handles the thread wich upon a 'Node is started' event will
* set the node's previous loglevel settings. * set the node's previous loglevel settings.
......
...@@ -866,14 +866,11 @@ MgmApiSession::restart(Parser<MgmApiSession>::Context &, ...@@ -866,14 +866,11 @@ MgmApiSession::restart(Parser<MgmApiSession>::Context &,
} }
int restarted = 0; int restarted = 0;
int result = 0; int result= m_mgmsrv.restartNodes(nodes,
&restarted,
for(size_t i = 0; i < nodes.size(); i++) nostart != 0,
if((result = m_mgmsrv.restartNode(nodes[i], initialstart != 0,
nostart != 0, abort != 0);
initialstart != 0,
abort != 0)) == 0)
restarted++;
m_output->println("restart reply"); m_output->println("restart reply");
if(result != 0){ if(result != 0){
...@@ -998,7 +995,12 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, ...@@ -998,7 +995,12 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
args.get("node", (const char **)&nodes_str); args.get("node", (const char **)&nodes_str);
if(nodes_str == NULL) if(nodes_str == NULL)
{
m_output->println("stop reply");
m_output->println("result: empty node list");
m_output->println("");
return; return;
}
args.get("abort", &abort); args.get("abort", &abort);
char *p, *last; char *p, *last;
...@@ -1010,7 +1012,6 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, ...@@ -1010,7 +1012,6 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
int stop_self= 0; int stop_self= 0;
size_t i; size_t i;
for(i=0; i < nodes.size(); i++) { for(i=0; i < nodes.size(); i++) {
if (nodes[i] == m_mgmsrv.getOwnNodeId()) { if (nodes[i] == m_mgmsrv.getOwnNodeId()) {
stop_self= 1; stop_self= 1;
...@@ -1020,23 +1021,25 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &, ...@@ -1020,23 +1021,25 @@ MgmApiSession::stop(Parser<MgmApiSession>::Context &,
m_output->println(""); m_output->println("");
return; return;
} }
nodes.erase(i);
break;
} }
} }
int stopped = 0, result = 0; int stopped= 0;
int result= 0;
for(i=0; i < nodes.size(); i++) if (nodes.size())
if (nodes[i] != m_mgmsrv.getOwnNodeId()) { result= m_mgmsrv.stopNodes(nodes, &stopped, abort != 0);
if((result = m_mgmsrv.stopNode(nodes[i], abort != 0)) == 0)
stopped++;
} else
stopped++;
m_output->println("stop reply"); m_output->println("stop reply");
if(result != 0) if(result != 0)
m_output->println("result: %s", get_error_text(result)); m_output->println("result: %s", get_error_text(result));
else else
{
m_output->println("result: Ok"); m_output->println("result: Ok");
if (stop_self)
stopped++;
}
m_output->println("stopped: %d", stopped); m_output->println("stopped: %d", stopped);
m_output->println(""); m_output->println("");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment