Commit 8a40180c authored by unknown's avatar unknown

ndb - bug#15695 bug#16447 bug#18612

  For various reasone have a partitioned cluster been created
  This patch makes sure that when they connect
  1) it's detected
  2) shutdown is forced


ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp:
  New dump/error insert for simulating network failure
ndb/src/kernel/blocks/qmgr/Qmgr.hpp:
  1) Activly detect paritioned cluster(s)
  2) add some documentation
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  1) Activly detect paritioned cluster(s)
  2) add some documentation
parent 241377c7
......@@ -133,6 +133,9 @@ Cmvmi::~Cmvmi()
{
}
#ifdef ERROR_INSERT
NodeBitmask c_error_9000_nodes_mask;
#endif
void Cmvmi::execNDB_TAMPER(Signal* signal)
{
......@@ -390,21 +393,33 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal)
const Uint32 len = signal->getLength();
if(len == 2){
globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal->theData[0] = EventReport::CommunicationOpened;
signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//-----------------------------------------------------
#ifdef ERROR_INSERT
if (! (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(tStartingNode)))
#endif
{
globalTransporterRegistry.do_connect(tStartingNode);
globalTransporterRegistry.setIOState(tStartingNode, HaltIO);
//-----------------------------------------------------
// Report that the connection to the node is opened
//-----------------------------------------------------
signal->theData[0] = EventReport::CommunicationOpened;
signal->theData[1] = tStartingNode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB);
//-----------------------------------------------------
}
} else {
for(unsigned int i = 1; i < MAX_NODES; i++ ) {
jam();
if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){
jam();
#ifdef ERROR_INSERT
if (ERROR_INSERTED(9000) && c_error_9000_nodes_mask.get(i))
continue;
#endif
globalTransporterRegistry.do_connect(i);
globalTransporterRegistry.setIOState(i, HaltIO);
......@@ -1010,7 +1025,8 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
DumpStateOrd * const & dumpState = (DumpStateOrd *)&signal->theData[0];
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpConnections){
Uint32 arg = dumpState->args[0];
if (arg == DumpStateOrd::CmvmiDumpConnections){
for(unsigned int i = 1; i < MAX_NODES; i++ ){
const char* nodeTypeStr = "";
switch(getNodeInfo(i).m_type){
......@@ -1043,13 +1059,13 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if (dumpState->args[0] == DumpStateOrd::CmvmiDumpLongSignalMemory){
if (arg == DumpStateOrd::CmvmiDumpLongSignalMemory){
infoEvent("Cmvmi: g_sectionSegmentPool size: %d free: %d",
g_sectionSegmentPool.getSize(),
g_sectionSegmentPool.getNoOfFree());
}
if (dumpState->args[0] == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
if (arg == DumpStateOrd::CmvmiSetRestartOnErrorInsert)
{
if(signal->getLength() == 1)
{
......@@ -1069,7 +1085,7 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
}
}
if (dumpState->args[0] == DumpStateOrd::CmvmiTestLongSigWithDelay) {
if (arg == DumpStateOrd::CmvmiTestLongSigWithDelay) {
unsigned i;
Uint32 loopCount = dumpState->args[1];
const unsigned len0 = 11;
......@@ -1097,6 +1113,30 @@ Cmvmi::execDUMP_STATE_ORD(Signal* signal)
sendSignal(reference(), GSN_TESTSIG, signal, 8, JBB, ptr, 2);
}
#ifdef ERROR_INSERT
if (arg == 9000)
{
SET_ERROR_INSERT_VALUE(9000);
for (Uint32 i = 1; i<signal->getLength(); i++)
c_error_9000_nodes_mask.set(signal->theData[i]);
}
if (arg == 9001)
{
CLEAR_ERROR_INSERT_VALUE;
for (Uint32 i = 0; i<MAX_NODES; i++)
{
if (c_error_9000_nodes_mask.get(i))
{
signal->theData[0] = 0;
signal->theData[1] = i;
EXECUTE_DIRECT(CMVMI, GSN_OPEN_COMREQ, signal, 2);
}
}
c_error_9000_nodes_mask.clear();
}
#endif
#ifdef VM_TRACE
#if 0
{
......
......@@ -100,7 +100,12 @@ public:
};
struct StartRecord {
void reset(){ m_startKey++; m_startNode = 0;}
void reset(){
m_startKey++;
m_startNode = 0;
m_gsn = RNIL;
m_nodes.clearWaitingFor();
}
Uint32 m_startKey;
Uint32 m_startNode;
Uint64 m_startTimeout;
......@@ -112,6 +117,14 @@ public:
NdbNodeBitmask c_definedNodes; // DB nodes in config
NdbNodeBitmask c_clusterNodes; // DB nodes in cluster
NodeBitmask c_connectedNodes; // All kinds of connected nodes
/**
* Nodes which we're checking for partitioned cluster
*
* i.e. nodes that connect to use, when we already have elected president
*/
NdbNodeBitmask c_cmregreq_nodes;
Uint32 c_maxDynamicId;
// Records
......@@ -251,8 +264,10 @@ private:
// Generated statement blocks
void startphase1(Signal* signal);
void electionWon();
void electionWon(Signal* signal);
void cmInfoconf010Lab(Signal* signal);
bool check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn);
void apiHbHandlingLab(Signal* signal);
void timerHandlingLab(Signal* signal);
void hbReceivedLab(Signal* signal);
......
......@@ -56,6 +56,33 @@
#define DEBUG_START3(signal, msg)
#endif
/**
* c_start.m_gsn = GSN_CM_REGREQ
* Possible for all nodes
* c_start.m_nodes contains all nodes in config
*
* c_start.m_gsn = GSN_CM_NODEINFOREQ;
* Set when receiving CM_REGCONF
* State possible for starting node only (not in cluster)
*
* c_start.m_nodes contains all node in alive cluster that
* that has not replied to GSN_CM_NODEINFOREQ
* passed by president in GSN_CM_REGCONF
*
* c_start.m_gsn = GSN_CM_ADD
* Possible for president only
* Set when receiving and accepting CM_REGREQ (to include node)
*
* c_start.m_nodes contains all nodes in alive cluster + starting node
* that has not replied to GSN_CM_ADD
* by sending GSN_CM_ACKADD
*
* c_start.m_gsn = GSN_CM_NODEINFOCONF
* Possible for non presidents only
* c_start.m_nodes contains a node that has been accepted by president
* but has not connected to us yet
*/
// Signal entries and statement blocks
/* 4 P R O G R A M */
/*******************************/
......@@ -259,18 +286,24 @@ void Qmgr::execCONNECT_REP(Signal* signal)
{
jamEntry();
const Uint32 nodeId = signal->theData[0];
if (ERROR_INSERTED(931))
{
jam();
ndbout_c("Discarding CONNECT_REP(%d)", nodeId);
infoEvent("Discarding CONNECT_REP(%d)", nodeId);
return;
}
c_connectedNodes.set(nodeId);
NodeRecPtr nodePtr;
nodePtr.i = getOwnNodeId();
ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
switch(nodePtr.p->phase){
case ZSTARTING:
case ZRUNNING:
ndbrequire(!c_clusterNodes.get(nodeId));
case ZSTARTING:
jam();
if(!c_start.m_nodes.isWaitingFor(nodeId)){
jam();
return;
}
break;
case ZPREPARE_FAIL:
case ZFAIL_CLOSING:
......@@ -282,32 +315,64 @@ void Qmgr::execCONNECT_REP(Signal* signal)
case ZAPI_INACTIVE:
return;
}
if (getNodeInfo(nodeId).getType() != NodeInfo::DB)
{
jam();
return;
}
switch(c_start.m_gsn){
case GSN_CM_REGREQ:
jam();
sendCmRegReq(signal, nodeId);
/**
* We're waiting for CM_REGCONF c_start.m_nodes contains all configured
* nodes
*/
ndbrequire(nodePtr.p->phase == ZSTARTING);
ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
return;
case GSN_CM_NODEINFOREQ:
jam();
sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
if (c_start.m_nodes.isWaitingFor(nodeId))
{
jam();
ndbrequire(getOwnNodeId() != cpresident);
ndbrequire(nodePtr.p->phase == ZSTARTING);
sendCmNodeInfoReq(signal, nodeId, nodePtr.p);
return;
}
return;
case GSN_CM_ADD:{
case GSN_CM_NODEINFOCONF:{
jam();
ndbrequire(getOwnNodeId() != cpresident);
c_start.m_nodes.clearWaitingFor(nodeId);
c_start.m_gsn = RNIL;
NodeRecPtr addNodePtr;
addNodePtr.i = nodeId;
ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
cmAddPrepare(signal, addNodePtr, nodePtr.p);
return;
ndbrequire(getOwnNodeId() != cpresident);
ndbrequire(nodePtr.p->phase == ZRUNNING);
if (c_start.m_nodes.isWaitingFor(nodeId))
{
jam();
c_start.m_nodes.clearWaitingFor(nodeId);
c_start.m_gsn = RNIL;
NodeRecPtr addNodePtr;
addNodePtr.i = nodeId;
ptrCheckGuard(addNodePtr, MAX_NDB_NODES, nodeRec);
cmAddPrepare(signal, addNodePtr, nodePtr.p);
return;
}
}
default:
return;
(void)1;
}
ndbrequire(!c_start.m_nodes.isWaitingFor(nodeId));
ndbrequire(!c_cmregreq_nodes.get(nodeId));
c_cmregreq_nodes.set(nodeId);
sendCmRegReq(signal, nodeId);
c_regReqReqSent--;
return;
}//Qmgr::execCONNECT_REP()
......@@ -601,22 +666,39 @@ void Qmgr::execCM_REGCONF(Signal* signal)
jamEntry();
const CmRegConf * const cmRegConf = (CmRegConf *)&signal->theData[0];
Uint32 presidentNodeId = cmRegConf->presidentNodeId;
if (check_cmregreq_reply(signal, presidentNodeId, GSN_CM_REGCONF))
{
jam();
return;
}
if (!ndbCompatible_ndb_ndb(NDB_VERSION, cmRegConf->presidentVersion)) {
jam();
char buf[128];
BaseString::snprintf(buf,sizeof(buf),"incompatible version own=0x%x other=0x%x, shutting down", NDB_VERSION, cmRegConf->presidentVersion);
BaseString::snprintf(buf,sizeof(buf),
"incompatible version own=0x%x other=0x%x, "
" shutting down",
NDB_VERSION, cmRegConf->presidentVersion);
systemErrorLab(signal, __LINE__, buf);
return;
}
myNodePtr.i = getOwnNodeId();
ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
ndbrequire(c_start.m_gsn == GSN_CM_REGREQ);
ndbrequire(myNodePtr.p->phase = ZSTARTING);
cpdistref = cmRegConf->presidentBlockRef;
cpresident = cmRegConf->presidentNodeId;
UintR TdynamicId = cmRegConf->dynamicId;
c_maxDynamicId = TdynamicId;
c_clusterNodes.assign(NdbNodeBitmask::Size, cmRegConf->allNdbNodes);
myNodePtr.p->ndynamicId = TdynamicId;
/*--------------------------------------------------------------*/
// Send this as an EVENT REPORT to inform about hearing about
// other NDB node proclaiming to be president.
......@@ -627,10 +709,6 @@ void Qmgr::execCM_REGCONF(Signal* signal)
signal->theData[3] = TdynamicId;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
myNodePtr.i = getOwnNodeId();
ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
myNodePtr.p->ndynamicId = TdynamicId;
for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
jam();
if (c_clusterNodes.get(nodePtr.i)){
......@@ -653,6 +731,134 @@ void Qmgr::execCM_REGCONF(Signal* signal)
return;
}//Qmgr::execCM_REGCONF()
bool
Qmgr::check_cmregreq_reply(Signal* signal, Uint32 nodeId, Uint32 gsn)
{
NodeRecPtr myNodePtr;
myNodePtr.i = getOwnNodeId();
ptrCheckGuard(myNodePtr, MAX_NDB_NODES, nodeRec);
NodeRecPtr nodePtr;
nodePtr.i = nodeId;
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec);
/**
* Try to decide if replying node
* knows who is president
*/
Uint32 president_reply = RNIL;
switch(gsn){
case GSN_CM_REGREF:{
jam();
CmRegRef* ref = (CmRegRef*)signal->getDataPtr();
switch(ref->errorCode){
case CmRegRef::ZBUSY:
case CmRegRef::ZBUSY_PRESIDENT:
case CmRegRef::ZBUSY_TO_PRES:
jam();
/**
* Only president replies this
*/
ndbrequire(nodeId == ref->presidentCandidate);
president_reply = nodeId;
break;
case CmRegRef::ZNOT_PRESIDENT:
jam();
president_reply = ref->presidentCandidate;
break;
case CmRegRef::ZNOT_IN_CFG:
case CmRegRef::ZNOT_DEAD:
case CmRegRef::ZELECTION:
// Neither of these replies give certain president knowledge
jam();
}
break;
}
case GSN_CM_REGCONF:
jam();
president_reply = nodeId;
break;
}
char buf[256];
switch(c_start.m_gsn){
case GSN_CM_REGREQ:
jam();
ndbrequire(c_start.m_nodes.isWaitingFor(nodeId));
ndbrequire(c_cmregreq_nodes.isclear());
ndbrequire(myNodePtr.p->phase == ZSTARTING);
return false;
case GSN_CM_NODEINFOREQ:
jam();
ndbrequire(myNodePtr.p->phase == ZSTARTING);
if (c_start.m_nodes.isWaitingFor(nodeId))
{
jam();
/**
* We're waiting for CM_NODEINFO
*/
if (gsn == GSN_CM_REGREF)
{
jam();
return false;
}
jam();
BaseString::snprintf(buf, sizeof(buf),
"Partitioned cluster! check StartPartialTimeout, "
" received CM_REGCONF from %d"
" while waiting for GSN_CM_NODEINFOCONF."
" president=%d",
nodeId, cpresident);
goto die_direct;
}
goto check_reply;
default:
case GSN_CM_NODEINFOCONF:
jam();
ndbrequire(myNodePtr.p->phase == ZRUNNING);
goto check_reply;
}
check_reply:
jam();
c_cmregreq_nodes.clear(nodeId);
if (gsn == GSN_CM_REGCONF)
{
jam();
BaseString::snprintf(buf, sizeof(buf),
"Partitioned cluster! check StartPartialTimeout, "
" received CM_REGCONF"
" from %d I think president: %d",
nodeId, cpresident);
goto die_direct;
}
if (president_reply != RNIL && president_reply != cpresident)
{
jam();
BaseString::snprintf(buf, sizeof(buf),
"Partitioned cluster! check StartPartialTimeout, "
" received CM_REGREF from %d specifying president as"
" %d, president: %d",
nodeId, president_reply, cpresident);
goto die_direct;
}
return false;
die_direct:
ndbout_c(buf);
progError(__LINE__,
ERR_ARBIT_SHUTDOWN,
buf);
ndbrequire(false);
}
void
Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
CmNodeInfoReq * const req = (CmNodeInfoReq*)signal->getDataPtrSend();
......@@ -685,13 +891,21 @@ Qmgr::sendCmNodeInfoReq(Signal* signal, Uint32 nodeId, const NodeRec * self){
void Qmgr::execCM_REGREF(Signal* signal)
{
jamEntry();
c_regReqReqRecv++;
// Ignore block reference in data[0]
UintR TaddNodeno = signal->theData[1];
UintR TrefuseReason = signal->theData[2];
Uint32 candidate = signal->theData[3];
DEBUG_START3(signal, TrefuseReason);
if (check_cmregreq_reply(signal, TaddNodeno, GSN_CM_REGREF))
{
jam();
return;
}
c_regReqReqRecv++;
// Ignore block reference in data[0]
if(candidate != cpresidentCandidate){
jam();
......@@ -779,7 +993,7 @@ void Qmgr::execCM_REGREF(Signal* signal)
Uint64 now = NdbTick_CurrentMillisecond();
if((c_regReqReqRecv == cnoOfNodes) || now > c_stopElectionTime){
jam();
electionWon();
electionWon(signal);
sendSttorryLab(signal);
/**
......@@ -793,7 +1007,7 @@ void Qmgr::execCM_REGREF(Signal* signal)
}//Qmgr::execCM_REGREF()
void
Qmgr::electionWon(){
Qmgr::electionWon(Signal* signal){
NodeRecPtr myNodePtr;
cpresident = getOwnNodeId(); /* This node becomes president. */
myNodePtr.i = getOwnNodeId();
......@@ -812,6 +1026,12 @@ Qmgr::electionWon(){
cpresidentAlive = ZTRUE;
c_stopElectionTime = ~0;
c_start.reset();
signal->theData[0] = EventReport::CM_REGCONF;
signal->theData[1] = getOwnNodeId();
signal->theData[2] = cpresident;
signal->theData[3] = 1;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
}
/*
......@@ -946,7 +1166,7 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
ndbrequire(signal->header.theVerId_signalNumber == GSN_CM_ADD);
c_start.m_nodes.clearWaitingFor();
c_start.m_nodes.setWaitingFor(nodePtr.i);
c_start.m_gsn = GSN_CM_ADD;
c_start.m_gsn = GSN_CM_NODEINFOCONF;
#else
warningEvent("Enabling communication to CM_ADD node %u state=%d",
nodePtr.i,
......@@ -1847,7 +2067,8 @@ void Qmgr::execDISCONNECT_REP(Signal* signal)
const DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0];
const Uint32 nodeId = rep->nodeId;
c_connectedNodes.clear(nodeId);
c_cmregreq_nodes.clear(nodeId);
NodeRecPtr nodePtr;
nodePtr.i = getOwnNodeId();
ptrCheckGuard(nodePtr, MAX_NODES, nodeRec);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment