diff --git a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp index 3406176d7a8ae29a41132854131e878c6f4852ca..6c869435bfa692672b4a29b36ed57f7ef5d56c86 100644 --- a/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp +++ b/storage/ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp @@ -421,9 +421,10 @@ void Cmvmi::execCLOSE_COMREQ(Signal* signal) // Uint32 noOfNodes = closeCom->noOfNodes; jamEntry(); - for (unsigned i = 0; i < MAX_NODES; i++){ - if(NodeBitmask::get(closeCom->theNodes, i)){ - + for (unsigned i = 0; i < MAX_NODES; i++) + { + if(NodeBitmask::get(closeCom->theNodes, i)) + { jam(); //----------------------------------------------------- @@ -437,7 +438,9 @@ void Cmvmi::execCLOSE_COMREQ(Signal* signal) globalTransporterRegistry.do_disconnect(i); } } - if (failNo != 0) { + + if (failNo != 0) + { jam(); signal->theData[0] = userRef; signal->theData[1] = failNo; @@ -456,13 +459,21 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) jamEntry(); const Uint32 len = signal->getLength(); - if(len == 2){ - + if(len == 2) + { #ifdef ERROR_INSERT if (! ((ERROR_INSERTED(9000) || ERROR_INSERTED(9002)) && c_error_9000_nodes_mask.get(tStartingNode))) #endif { + if (globalData.theStartLevel != NodeState::SL_STARTED && + (getNodeInfo(tStartingNode).m_type != NodeInfo::DB && + getNodeInfo(tStartingNode).m_type != NodeInfo::MGM)) + { + jam(); + goto done; + } + globalTransporterRegistry.do_connect(tStartingNode); globalTransporterRegistry.setIOState(tStartingNode, HaltIO); @@ -475,9 +486,11 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) //----------------------------------------------------- } } else { - for(unsigned int i = 1; i < MAX_NODES; i++ ) { + for(unsigned int i = 1; i < MAX_NODES; i++ ) + { jam(); - if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2){ + if (i != getOwnNodeId() && getNodeInfo(i).m_type == tData2) + { jam(); #ifdef ERROR_INSERT @@ -496,6 +509,7 @@ void Cmvmi::execOPEN_COMREQ(Signal* signal) } } +done: if (userRef != 0) { jam(); signal->theData[0] = tStartingNode; @@ -536,24 +550,10 @@ void Cmvmi::execDISCONNECT_REP(Signal *signal) setNodeInfo(hostId).m_connectCount++; const NodeInfo::NodeType type = getNodeInfo(hostId).getType(); ndbrequire(type != NodeInfo::INVALID); - - if(type == NodeInfo::DB || globalData.theStartLevel == NodeState::SL_STARTED){ - jam(); - DisconnectRep * const rep = (DisconnectRep *)&signal->theData[0]; - rep->nodeId = hostId; - rep->err = errNo; - sendSignal(QMGR_REF, GSN_DISCONNECT_REP, signal, - DisconnectRep::SignalLength, JBA); - } else if((globalData.theStartLevel == NodeState::SL_CMVMI || - globalData.theStartLevel == NodeState::SL_STARTING) - && type == NodeInfo::MGM) { - /** - * Someone disconnected during cmvmi period - */ - jam(); - globalTransporterRegistry.do_connect(hostId); - } + sendSignal(QMGR_REF, GSN_DISCONNECT_REP, signal, + DisconnectRep::SignalLength, JBA); + cancelSubscription(hostId); signal->theData[0] = NDB_LE_Disconnected; @@ -587,6 +587,8 @@ void Cmvmi::execCONNECT_REP(Signal *signal){ */ if(type == NodeInfo::MGM){ jam(); + signal->theData[0] = hostId; + sendSignal(QMGR_REF, GSN_CONNECT_REP, signal, 1, JBA); } else { /** * Dont allow api nodes to connect @@ -802,6 +804,8 @@ Cmvmi::execSTART_ORD(Signal* signal) { } } } + + EXECUTE_DIRECT(QMGR, GSN_START_ORD, signal, 1); return ; } @@ -829,9 +833,6 @@ Cmvmi::execSTART_ORD(Signal* signal) { * * Do Restart */ - - globalScheduler.clear(); - globalTimeQueue.clear(); // Disconnect all nodes as part of the system restart. // We need to ensure that we are starting up diff --git a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp index 8d51b24ec6a5973cdd63299cb9eb5c19f54120e4..6a76ce5217a593e33c8745896fd1775a3ef71487 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp +++ b/storage/ndb/src/kernel/blocks/qmgr/Qmgr.hpp @@ -265,6 +265,8 @@ private: void execALLOC_NODEID_CONF(Signal *); void execALLOC_NODEID_REF(Signal *); void completeAllocNodeIdReq(Signal *); + + void execSTART_ORD(Signal*); // Arbitration signals void execARBIT_CFG(Signal* signal); @@ -281,6 +283,7 @@ private: void check_readnodes_reply(Signal* signal, Uint32 nodeId, Uint32 gsn); Uint32 check_startup(Signal* signal); + void api_failed(Signal* signal, Uint32 aFailedNode); void node_failed(Signal* signal, Uint16 aFailedNode); void checkStartInterface(Signal* signal); void failReport(Signal* signal, diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp index f9950072ab4a1a8b57622d3979d15a1d0e1efdaa..2f03bd566945b12ea294236b81e66b846d9b57ea 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrInit.cpp @@ -31,10 +31,6 @@ void Qmgr::initData() cnoCommitFailedNodes = 0; c_maxDynamicId = 0; c_clusterNodes.clear(); - - Uint32 hbDBAPI = 500; - setHbApiDelay(hbDBAPI); - c_connectedNodes.set(getOwnNodeId()); c_stopReq.senderRef = 0; /** @@ -43,6 +39,27 @@ void Qmgr::initData() ndbrequire((Uint32)NodeInfo::DB == 0); ndbrequire((Uint32)NodeInfo::API == 1); ndbrequire((Uint32)NodeInfo::MGM == 2); + + NodeRecPtr nodePtr; + nodePtr.i = getOwnNodeId(); + ptrAss(nodePtr, nodeRec); + nodePtr.p->blockRef = reference(); + + c_connectedNodes.set(getOwnNodeId()); + setNodeInfo(getOwnNodeId()).m_version = NDB_VERSION; + + + /** + * Timeouts + */ + const ndb_mgm_configuration_iterator * p = + m_ctx.m_config.getOwnConfigIterator(); + ndbrequire(p != 0); + + Uint32 hbDBAPI = 1500; + ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI); + + setHbApiDelay(hbDBAPI); }//Qmgr::initData() void Qmgr::initRecords() @@ -113,6 +130,7 @@ Qmgr::Qmgr(Block_context& ctx) addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF); addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF); addRecSignal(GSN_NODE_VERSION_REP, &Qmgr::execNODE_VERSION_REP); + addRecSignal(GSN_START_ORD, &Qmgr::execSTART_ORD); initData(); }//Qmgr::Qmgr() diff --git a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp index 1fba4d62e17fcfa81e1e9bbd1221f34a0ce05c04..23e7829481ec95a9f41821334494260f229fb3a6 100644 --- a/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp +++ b/storage/ndb/src/kernel/blocks/qmgr/QmgrMain.cpp @@ -238,6 +238,38 @@ Qmgr::execREAD_CONFIG_REQ(Signal* signal) ReadConfigConf::SignalLength, JBB); } +void +Qmgr::execSTART_ORD(Signal* signal) +{ + /** + * Start timer handling + */ + signal->theData[0] = ZTIMER_HANDLING; + sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 1, JBB); + + NodeRecPtr nodePtr; + for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) + { + ptrAss(nodePtr, nodeRec); + nodePtr.p->ndynamicId = 0; + if(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB) + { + nodePtr.p->phase = ZINIT; + c_definedNodes.set(nodePtr.i); + } else { + nodePtr.p->phase = ZAPI_INACTIVE; + } + + setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0; + nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE; + nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE; + nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE; + nodePtr.p->failState = NORMAL; + nodePtr.p->rcv[0] = 0; + nodePtr.p->rcv[1] = 0; + }//for +} + /* 4.2 ADD NODE MODULE*/ /*##########################################################################*/ @@ -298,8 +330,6 @@ void Qmgr::startphase1(Signal* signal) nodePtr.i = getOwnNodeId(); ptrAss(nodePtr, nodeRec); nodePtr.p->phase = ZSTARTING; - nodePtr.p->blockRef = reference(); - c_connectedNodes.set(nodePtr.i); signal->theData[0] = reference(); sendSignal(DBDIH_REF, GSN_DIH_RESTARTREQ, signal, 1, JBB); @@ -371,11 +401,14 @@ void Qmgr::execCONNECT_REP(Signal* signal) case ZFAIL_CLOSING: jam(); return; - case ZINIT: - ndbrequire(false); case ZAPI_ACTIVE: case ZAPI_INACTIVE: return; + case ZINIT: + ndbrequire(getNodeInfo(nodeId).m_type == NodeInfo::MGM); + break; + default: + ndbrequire(false); } if (getNodeInfo(nodeId).getType() != NodeInfo::DB) @@ -1212,12 +1245,6 @@ void Qmgr::execCM_REGREF(Signal* signal) { jam(); electionWon(signal); - - /** - * Start timer handling - */ - signal->theData[0] = ZTIMER_HANDLING; - sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 10, JBB); } return; @@ -1855,12 +1882,6 @@ Qmgr::joinedCluster(Signal* signal, NodeRecPtr nodePtr){ sendSttorryLab(signal); - /** - * Start timer handling - */ - signal->theData[0] = ZTIMER_HANDLING; - sendSignal(QMGR_REF, GSN_CONTINUEB, signal, 10, JBB); - sendCmAckAdd(signal, getOwnNodeId(), CmAdd::CommitNew); } @@ -2094,25 +2115,6 @@ void Qmgr::findNeighbours(Signal* signal) /*---------------------------------------------------------------------------*/ void Qmgr::initData(Signal* signal) { - NodeRecPtr nodePtr; - for (nodePtr.i = 1; nodePtr.i < MAX_NODES; nodePtr.i++) { - ptrAss(nodePtr, nodeRec); - nodePtr.p->ndynamicId = 0; - if(getNodeInfo(nodePtr.i).m_type == NodeInfo::DB){ - nodePtr.p->phase = ZINIT; - c_definedNodes.set(nodePtr.i); - } else { - nodePtr.p->phase = ZAPI_INACTIVE; - } - - setNodeInfo(nodePtr.i).m_heartbeat_cnt= 0; - nodePtr.p->sendPrepFailReqStatus = Q_NOT_ACTIVE; - nodePtr.p->sendCommitFailReqStatus = Q_NOT_ACTIVE; - nodePtr.p->sendPresToStatus = Q_NOT_ACTIVE; - nodePtr.p->failState = NORMAL; - nodePtr.p->rcv[0] = 0; - nodePtr.p->rcv[1] = 0; - }//for cfailureNr = 1; ccommitFailureNr = 1; cprepareFailureNr = 1; @@ -2146,13 +2148,11 @@ void Qmgr::initData(Signal* signal) ndbrequire(p != 0); Uint32 hbDBDB = 1500; - Uint32 hbDBAPI = 1500; Uint32 arbitTimeout = 1000; c_restartPartialTimeout = 30000; c_restartPartionedTimeout = 60000; c_restartFailureTimeout = ~0; ndb_mgm_get_int_parameter(p, CFG_DB_HEARTBEAT_INTERVAL, &hbDBDB); - ndb_mgm_get_int_parameter(p, CFG_DB_API_HEARTBEAT_INTERVAL, &hbDBAPI); ndb_mgm_get_int_parameter(p, CFG_DB_ARBIT_TIMEOUT, &arbitTimeout); ndb_mgm_get_int_parameter(p, CFG_DB_START_PARTIAL_TIMEOUT, &c_restartPartialTimeout); @@ -2177,7 +2177,6 @@ void Qmgr::initData(Signal* signal) } setHbDelay(hbDBDB); - setHbApiDelay(hbDBAPI); setArbitTimeout(arbitTimeout); arbitRec.state = ARBIT_NULL; // start state for all nodes @@ -2204,7 +2203,6 @@ void Qmgr::initData(Signal* signal) execARBIT_CFG(signal); } - setNodeInfo(getOwnNodeId()).m_version = NDB_VERSION; }//Qmgr::initData() @@ -2237,20 +2235,22 @@ void Qmgr::timerHandlingLab(Signal* signal) hb_check_timer.reset(); } } - + if (interface_check_timer.check(TcurrentTime)) { jam(); interface_check_timer.reset(); checkStartInterface(signal); } + if (hb_api_timer.check(TcurrentTime)) + { + jam(); + hb_api_timer.reset(); + apiHbHandlingLab(signal); + } + if (cactivateApiCheck != 0) { jam(); - if (hb_api_timer.check(TcurrentTime)) { - jam(); - hb_api_timer.reset(); - apiHbHandlingLab(signal); - }//if if (clatestTransactionCheck == 0) { //------------------------------------------------------------- // Initialise the Transaction check timer. @@ -2367,18 +2367,21 @@ void Qmgr::apiHbHandlingLab(Signal* signal) if(type == NodeInfo::INVALID) continue; - if (TnodePtr.p->phase == ZAPI_ACTIVE){ + if (c_connectedNodes.get(nodeId)) + { jam(); setNodeInfo(TnodePtr.i).m_heartbeat_cnt++; - if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2){ + if(getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 2) + { signal->theData[0] = NDB_LE_MissedHeartbeat; signal->theData[1] = nodeId; signal->theData[2] = getNodeInfo(TnodePtr.i).m_heartbeat_cnt - 1; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3, JBB); } - if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4) { + if (getNodeInfo(TnodePtr.i).m_heartbeat_cnt > 4) + { jam(); /*------------------------------------------------------------------*/ /* THE API NODE HAS NOT SENT ANY HEARTBEAT FOR THREE SECONDS. @@ -2390,8 +2393,8 @@ void Qmgr::apiHbHandlingLab(Signal* signal) signal->theData[0] = NDB_LE_DeadDueToHeartbeat; signal->theData[1] = nodeId; sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); - - node_failed(signal, nodeId); + + api_failed(signal, nodeId); }//if }//if }//for @@ -2480,26 +2483,6 @@ void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo) sendSignal(DBTC_REF, GSN_API_FAILREQ, signal, 2, JBA); sendSignal(DBDICT_REF, GSN_API_FAILREQ, signal, 2, JBA); sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA); - - /**------------------------------------------------------------------------- - * THE OTHER NODE WAS AN API NODE. THE COMMUNICATION LINK IS ALREADY - * BROKEN AND THUS NO ACTION IS NEEDED TO BREAK THE CONNECTION. - * WE ONLY NEED TO SET PARAMETERS TO ENABLE A NEW CONNECTION IN A FEW - * SECONDS. - *-------------------------------------------------------------------------*/ - setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0; - setNodeInfo(failedNodePtr.i).m_version = 0; - recompute_version_info(getNodeInfo(failedNodePtr.i).m_type); - - CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0]; - - closeCom->xxxBlockRef = reference(); - closeCom->failNo = 0; - closeCom->noOfNodes = 1; - NodeBitmask::clear(closeCom->theNodes); - NodeBitmask::set(closeCom->theNodes, failedNodePtr.i); - sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal, - CloseComReqConf::SignalLength, JBA); }//Qmgr::sendApiFailReq() void Qmgr::execAPI_FAILREQ(Signal* signal) @@ -2512,20 +2495,7 @@ void Qmgr::execAPI_FAILREQ(Signal* signal) ndbrequire(getNodeInfo(failedNodePtr.i).getType() != NodeInfo::DB); - // ignore if api not active - if (failedNodePtr.p->phase != ZAPI_ACTIVE) - { - jam(); - // But send to SUMA anyway... - sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA); - return; - } - - signal->theData[0] = NDB_LE_Disconnected; - signal->theData[1] = failedNodePtr.i; - sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 2, JBB); - - node_failed(signal, failedNodePtr.i); + api_failed(signal, signal->theData[0]); } void Qmgr::execAPI_FAILCONF(Signal* signal) @@ -2649,6 +2619,13 @@ void Qmgr::execDISCONNECT_REP(Signal* signal) ndbrequire(false); } + if (getNodeInfo(nodeId).getType() != NodeInfo::DB) + { + jam(); + api_failed(signal, nodeId); + return; + } + switch(nodePtr.p->phase){ case ZRUNNING: jam(); @@ -2685,66 +2662,109 @@ void Qmgr::node_failed(Signal* signal, Uint16 aFailedNode) failedNodePtr.i = aFailedNode; ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); - if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB){ + ndbrequire(getNodeInfo(failedNodePtr.i).getType() == NodeInfo::DB); + + /**--------------------------------------------------------------------- + * THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT + * FAILURE WAS DISCOVERED. + *---------------------------------------------------------------------*/ + switch(failedNodePtr.p->phase){ + case ZRUNNING: jam(); - /**--------------------------------------------------------------------- - * THE OTHER NODE IS AN NDB NODE, WE HANDLE IT AS IF A HEARTBEAT - * FAILURE WAS DISCOVERED. - *---------------------------------------------------------------------*/ - switch(failedNodePtr.p->phase){ - case ZRUNNING: - jam(); - failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE); - return; - case ZFAIL_CLOSING: - jam(); - return; - case ZSTARTING: - c_start.reset(); - // Fall-through - default: - jam(); - /*---------------------------------------------------------------------*/ - // The other node is still not in the cluster but disconnected. - // We must restart communication in three seconds. - /*---------------------------------------------------------------------*/ - failedNodePtr.p->failState = NORMAL; - failedNodePtr.p->phase = ZFAIL_CLOSING; - setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0; - - CloseComReqConf * const closeCom = - (CloseComReqConf *)&signal->theData[0]; - - closeCom->xxxBlockRef = reference(); - closeCom->failNo = 0; - closeCom->noOfNodes = 1; - NodeBitmask::clear(closeCom->theNodes); - NodeBitmask::set(closeCom->theNodes, failedNodePtr.i); - sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal, - CloseComReqConf::SignalLength, JBA); - }//if + failReportLab(signal, aFailedNode, FailRep::ZLINK_FAILURE); return; - } - - /** - * API code - */ - jam(); - if (failedNodePtr.p->phase != ZFAIL_CLOSING){ + case ZFAIL_CLOSING: + jam(); + return; + case ZSTARTING: + c_start.reset(); + // Fall-through + default: jam(); - //------------------------------------------------------------------------- - // The API was active and has now failed. We need to initiate API failure - // handling. If the API had already failed then we can ignore this - // discovery. - //------------------------------------------------------------------------- + /*---------------------------------------------------------------------*/ + // The other node is still not in the cluster but disconnected. + // We must restart communication in three seconds. + /*---------------------------------------------------------------------*/ + failedNodePtr.p->failState = NORMAL; failedNodePtr.p->phase = ZFAIL_CLOSING; - - sendApiFailReq(signal, aFailedNode); - arbitRec.code = ArbitCode::ApiFail; - handleArbitApiFail(signal, aFailedNode); + setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0; + + CloseComReqConf * const closeCom = + (CloseComReqConf *)&signal->theData[0]; + + closeCom->xxxBlockRef = reference(); + closeCom->failNo = 0; + closeCom->noOfNodes = 1; + NodeBitmask::clear(closeCom->theNodes); + NodeBitmask::set(closeCom->theNodes, failedNodePtr.i); + sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal, + CloseComReqConf::SignalLength, JBA); }//if return; -}//Qmgr::node_failed() +} + +void +Qmgr::api_failed(Signal* signal, Uint32 nodeId) +{ + NodeRecPtr failedNodePtr; + /**------------------------------------------------------------------------ + * A COMMUNICATION LINK HAS BEEN DISCONNECTED. WE MUST TAKE SOME ACTION + * DUE TO THIS. + *-----------------------------------------------------------------------*/ + failedNodePtr.i = nodeId; + ptrCheckGuard(failedNodePtr, MAX_NODES, nodeRec); + + if (failedNodePtr.p->phase == ZFAIL_CLOSING) + { + /** + * Failure handling already in progress + */ + jam(); + return; + } + + if (failedNodePtr.p->phase == ZAPI_ACTIVE) + { + jam(); + sendApiFailReq(signal, nodeId); + arbitRec.code = ArbitCode::ApiFail; + handleArbitApiFail(signal, nodeId); + } + else + { + /** + * Always inform SUMA + */ + jam(); + signal->theData[0] = nodeId; + signal->theData[1] = QMGR_REF; + sendSignal(SUMA_REF, GSN_API_FAILREQ, signal, 2, JBA); + failedNodePtr.p->failState = NORMAL; + } + + failedNodePtr.p->phase = ZFAIL_CLOSING; + setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0; + setNodeInfo(failedNodePtr.i).m_version = 0; + recompute_version_info(getNodeInfo(failedNodePtr.i).m_type); + + CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0]; + closeCom->xxxBlockRef = reference(); + closeCom->failNo = 0; + closeCom->noOfNodes = 1; + NodeBitmask::clear(closeCom->theNodes); + NodeBitmask::set(closeCom->theNodes, failedNodePtr.i); + sendSignal(CMVMI_REF, GSN_CLOSE_COMREQ, signal, + CloseComReqConf::SignalLength, JBA); + + if (getNodeInfo(failedNodePtr.i).getType() == NodeInfo::MGM) + { + /** + * Allow MGM do reconnect "directly" + */ + jam(); + setNodeInfo(failedNodePtr.i).m_heartbeat_cnt = 3; + } +} /**-------------------------------------------------------------------------- * AN API NODE IS REGISTERING. IF FOR THE FIRST TIME WE WILL ENABLE @@ -4963,43 +4983,39 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal) c_start.m_president_candidate_gci); infoEvent("ctoStatus = %d\n", ctoStatus); for(Uint32 i = 1; i<MAX_NDB_NODES; i++){ - if(getNodeInfo(i).getType() == NodeInfo::DB){ - NodeRecPtr nodePtr; - nodePtr.i = i; - ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec); - char buf[100]; - switch(nodePtr.p->phase){ - case ZINIT: - sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase); - break; - case ZSTARTING: - sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase); - break; - case ZRUNNING: - sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase); - break; - case ZPREPARE_FAIL: - sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase); - break; - case ZFAIL_CLOSING: - sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase); - break; - case ZAPI_INACTIVE: - sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase); - break; - case ZAPI_ACTIVE: - sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase); - break; - default: - sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase); - break; - } - infoEvent(buf); + NodeRecPtr nodePtr; + nodePtr.i = i; + ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRec); + char buf[100]; + switch(nodePtr.p->phase){ + case ZINIT: + sprintf(buf, "Node %d: ZINIT(%d)", i, nodePtr.p->phase); + break; + case ZSTARTING: + sprintf(buf, "Node %d: ZSTARTING(%d)", i, nodePtr.p->phase); + break; + case ZRUNNING: + sprintf(buf, "Node %d: ZRUNNING(%d)", i, nodePtr.p->phase); + break; + case ZPREPARE_FAIL: + sprintf(buf, "Node %d: ZPREPARE_FAIL(%d)", i, nodePtr.p->phase); + break; + case ZFAIL_CLOSING: + sprintf(buf, "Node %d: ZFAIL_CLOSING(%d)", i, nodePtr.p->phase); + break; + case ZAPI_INACTIVE: + sprintf(buf, "Node %d: ZAPI_INACTIVE(%d)", i, nodePtr.p->phase); + break; + case ZAPI_ACTIVE: + sprintf(buf, "Node %d: ZAPI_ACTIVE(%d)", i, nodePtr.p->phase); + break; + default: + sprintf(buf, "Node %d: <UNKNOWN>(%d)", i, nodePtr.p->phase); + break; } + infoEvent(buf); } - default: - ; - }//switch + } #ifdef ERROR_INSERT if (signal->theData[0] == 935 && signal->getLength() == 2)