Commit ab07c477 authored by unknown's avatar unknown

ndb - bug#25554

  fix bug when master failure during nr
  (recommit against 5.0)


ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  recommit against 5.0
ndb/src/kernel/blocks/qmgr/QmgrMain.cpp:
  recommit against 5.0
ndb/test/ndbapi/testNodeRestart.cpp:
  recommit against 5.0
ndb/test/run-test/daily-basic-tests.txt:
  recommit against 5.0
parent 162a2e41
......@@ -3555,7 +3555,6 @@ void Dbdih::endTakeOver(Uint32 takeOverPtrI)
takeOverPtr.i = takeOverPtrI;
ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
releaseTakeOver(takeOverPtrI);
if ((takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) &&
(takeOverPtr.p->toMasterStatus != TakeOverRecord::TO_WAIT_START_TAKE_OVER)) {
jam();
......@@ -3569,6 +3568,7 @@ void Dbdih::endTakeOver(Uint32 takeOverPtrI)
}//if
setAllowNodeStart(takeOverPtr.p->toStartingNode, true);
initTakeOver(takeOverPtr);
releaseTakeOver(takeOverPtrI);
}//Dbdih::endTakeOver()
void Dbdih::releaseTakeOver(Uint32 takeOverPtrI)
......@@ -4710,6 +4710,7 @@ void Dbdih::handleTakeOverNewMaster(Signal* signal, Uint32 takeOverPtrI)
break;
}
ndbrequire(ok);
endTakeOver(takeOverPtr.i);
}//if
}//Dbdih::handleTakeOverNewMaster()
......
......@@ -2847,6 +2847,17 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
systemErrorLab(signal, __LINE__);
return;
}//if
if (getNodeState().startLevel < NodeState::SL_STARTED)
{
jam();
CRASH_INSERTION(932);
char buf[100];
BaseString::snprintf(buf, 100, "Node failure during restart");
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false);
}
TnoFailedNodes = cnoFailedNodes;
failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause);
if (cpresident == getOwnNodeId()) {
......@@ -2933,6 +2944,16 @@ void Qmgr::execPREP_FAILREQ(Signal* signal)
return;
}//if
if (getNodeState().startLevel < NodeState::SL_STARTED)
{
jam();
CRASH_INSERTION(932);
char buf[100];
BaseString::snprintf(buf, 100, "Node failure during restart");
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false);
}
guard0 = cnoPrepFailedNodes - 1;
arrGuard(guard0, MAX_NDB_NODES);
for (Tindex = 0; Tindex <= guard0; Tindex++) {
......
......@@ -995,6 +995,56 @@ int runBug25364(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 4)
return NDBT_OK;
for (int i = 0; i<loops; i++)
{
int master = restarter.getMasterNodeId();
int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
restarter.restartOneDbNode(node1, false, true, true);
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateOneNode(master, val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInNode(master, 7141))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&node1, 1))
return NDBT_FAILED;
if (restarter.dumpStateOneNode(node1, val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInNode(node1, 932))
return NDBT_FAILED;
if (restarter.startNodes(&node1, 1))
return NDBT_FAILED;
int nodes[] = { master, node1 };
if (restarter.waitNodesNoStart(nodes, 2))
return NDBT_FAILED;
if (restarter.startNodes(nodes, 2))
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad",
......@@ -1314,6 +1364,9 @@ TESTCASE("Bug24717", ""){
TESTCASE("Bug25364", ""){
INITIALIZER(runBug25364);
}
TESTCASE("Bug25554", ""){
INITIALIZER(runBug25554);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
......@@ -473,6 +473,10 @@ max-time: 1000
cmd: testNodeRestart
args: -n Bug25364 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug25554 T1
# OLD FLEX
max-time: 500
cmd: flexBench
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment