Commit ab07c477 authored by unknown's avatar unknown

ndb - bug#25554

  fix bug when master failure during nr
  (recommit against 5.0)


ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  recommit against 5.0
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  recommit against 5.0
ndb/test/ndbapi/testNodeRestart.cpp:
  recommit against 5.0
ndb/test/run-test/daily-basic-tests.txt:
  recommit against 5.0
parent 162a2e41
...@@ -3555,7 +3555,6 @@ void Dbdih::endTakeOver(Uint32 takeOverPtrI) ...@@ -3555,7 +3555,6 @@ void Dbdih::endTakeOver(Uint32 takeOverPtrI)
takeOverPtr.i = takeOverPtrI; takeOverPtr.i = takeOverPtrI;
ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord); ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
releaseTakeOver(takeOverPtrI);
if ((takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) && if ((takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) &&
(takeOverPtr.p->toMasterStatus != TakeOverRecord::TO_WAIT_START_TAKE_OVER)) { (takeOverPtr.p->toMasterStatus != TakeOverRecord::TO_WAIT_START_TAKE_OVER)) {
jam(); jam();
...@@ -3569,6 +3568,7 @@ void Dbdih::endTakeOver(Uint32 takeOverPtrI) ...@@ -3569,6 +3568,7 @@ void Dbdih::endTakeOver(Uint32 takeOverPtrI)
}//if }//if
setAllowNodeStart(takeOverPtr.p->toStartingNode, true); setAllowNodeStart(takeOverPtr.p->toStartingNode, true);
initTakeOver(takeOverPtr); initTakeOver(takeOverPtr);
releaseTakeOver(takeOverPtrI);
}//Dbdih::endTakeOver() }//Dbdih::endTakeOver()
void Dbdih::releaseTakeOver(Uint32 takeOverPtrI) void Dbdih::releaseTakeOver(Uint32 takeOverPtrI)
...@@ -4710,6 +4710,7 @@ void Dbdih::handleTakeOverNewMaster(Signal* signal, Uint32 takeOverPtrI) ...@@ -4710,6 +4710,7 @@ void Dbdih::handleTakeOverNewMaster(Signal* signal, Uint32 takeOverPtrI)
break; break;
} }
ndbrequire(ok); ndbrequire(ok);
endTakeOver(takeOverPtr.i);
}//if }//if
}//Dbdih::handleTakeOverNewMaster() }//Dbdih::handleTakeOverNewMaster()
......
...@@ -2847,6 +2847,17 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode, ...@@ -2847,6 +2847,17 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
systemErrorLab(signal, __LINE__); systemErrorLab(signal, __LINE__);
return; return;
}//if }//if
if (getNodeState().startLevel < NodeState::SL_STARTED)
{
jam();
CRASH_INSERTION(932);
char buf[100];
BaseString::snprintf(buf, 100, "Node failure during restart");
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false);
}
TnoFailedNodes = cnoFailedNodes; TnoFailedNodes = cnoFailedNodes;
failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause); failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause);
if (cpresident == getOwnNodeId()) { if (cpresident == getOwnNodeId()) {
...@@ -2933,6 +2944,16 @@ void Qmgr::execPREP_FAILREQ(Signal* signal) ...@@ -2933,6 +2944,16 @@ void Qmgr::execPREP_FAILREQ(Signal* signal)
return; return;
}//if }//if
if (getNodeState().startLevel < NodeState::SL_STARTED)
{
jam();
CRASH_INSERTION(932);
char buf[100];
BaseString::snprintf(buf, 100, "Node failure during restart");
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false);
}
guard0 = cnoPrepFailedNodes - 1; guard0 = cnoPrepFailedNodes - 1;
arrGuard(guard0, MAX_NDB_NODES); arrGuard(guard0, MAX_NDB_NODES);
for (Tindex = 0; Tindex <= guard0; Tindex++) { for (Tindex = 0; Tindex <= guard0; Tindex++) {
......
...@@ -995,6 +995,56 @@ int runBug25364(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -995,6 +995,56 @@ int runBug25364(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK; return NDBT_OK;
} }
int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 4)
return NDBT_OK;
for (int i = 0; i<loops; i++)
{
int master = restarter.getMasterNodeId();
int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
restarter.restartOneDbNode(node1, false, true, true);
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateOneNode(master, val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInNode(master, 7141))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&node1, 1))
return NDBT_FAILED;
if (restarter.dumpStateOneNode(node1, val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInNode(node1, 932))
return NDBT_FAILED;
if (restarter.startNodes(&node1, 1))
return NDBT_FAILED;
int nodes[] = { master, node1 };
if (restarter.waitNodesNoStart(nodes, 2))
return NDBT_FAILED;
if (restarter.startNodes(nodes, 2))
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
...@@ -1314,6 +1364,9 @@ TESTCASE("Bug24717", ""){ ...@@ -1314,6 +1364,9 @@ TESTCASE("Bug24717", ""){
TESTCASE("Bug25364", ""){ TESTCASE("Bug25364", ""){
INITIALIZER(runBug25364); INITIALIZER(runBug25364);
} }
TESTCASE("Bug25554", ""){
INITIALIZER(runBug25554);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -473,6 +473,10 @@ max-time: 1000 ...@@ -473,6 +473,10 @@ max-time: 1000
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug25364 T1 args: -n Bug25364 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug25554 T1
# OLD FLEX # OLD FLEX
max-time: 500 max-time: 500
cmd: flexBench cmd: flexBench
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment