diff --git a/storage/ndb/src/kernel/blocks/ERROR_codes.txt b/storage/ndb/src/kernel/blocks/ERROR_codes.txt index b3405679978c1caa5dcf493c3b9c651cd1aea420..67eb89f850f9e071dbefe1d4287b4c03d74c11c8 100644 --- a/storage/ndb/src/kernel/blocks/ERROR_codes.txt +++ b/storage/ndb/src/kernel/blocks/ERROR_codes.txt @@ -1,5 +1,5 @@ Next QMGR 1 -Next NDBCNTR 1001 +Next NDBCNTR 1002 Next NDBFS 2000 Next DBACC 3002 Next DBTUP 4029 @@ -523,3 +523,4 @@ Dbtup: NDBCNTR: 1000: Crash insertion on SystemError::CopyFragRef +1001: Delay sending NODE_FAILREP (to own node), until error is cleared diff --git a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp index 76ff0dcc41ba88f4ed3f1270a08b335a997b8e78..a2f8351e308d5ce3a3e0bc6ed9df75046fb5e9ae 100644 --- a/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp @@ -4673,12 +4673,18 @@ void Dbdih::failedNodeLcpHandling(Signal* signal, NodeRecordPtr failedNodePtr) jam(); const Uint32 nodeId = failedNodePtr.i; - if (c_lcpState.m_participatingLQH.get(failedNodePtr.i)){ + if (isMaster() && c_lcpState.m_participatingLQH.get(failedNodePtr.i)) + { /*----------------------------------------------------*/ /* THE NODE WAS INVOLVED IN A LOCAL CHECKPOINT. WE */ /* MUST UPDATE THE ACTIVE STATUS TO INDICATE THAT */ /* THE NODE HAVE MISSED A LOCAL CHECKPOINT. */ /*----------------------------------------------------*/ + + /** + * Bug#28717, Only master should do this, as this status is copied + * to other nodes + */ switch (failedNodePtr.p->activeStatus) { case Sysfile::NS_Active: jam(); diff --git a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp index 5810aea19325b832ea8c776720c01b4b9b7923d7..69673796fee84307720e9d3d2eb7bf9cb0575e71 100644 --- a/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp +++ b/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp @@ -1411,6 +1411,13 @@ void Ndbcntr::execNODE_FAILREP(Signal* signal) { jamEntry(); + if (ERROR_INSERTED(1001)) + { + sendSignalWithDelay(reference(), GSN_NODE_FAILREP, signal, 100, + signal->getLength()); + return; + } + const NodeFailRep * nodeFail = (NodeFailRep *)&signal->theData[0]; NdbNodeBitmask allFailed; allFailed.assign(NdbNodeBitmask::Size, nodeFail->theNodes); diff --git a/storage/ndb/test/ndbapi/testNodeRestart.cpp b/storage/ndb/test/ndbapi/testNodeRestart.cpp index fb05681bab49eccfd7f8f81d9914e37084c00809..b11d1942303b940ebc0c66fc0bbf85375c9d2bf5 100644 --- a/storage/ndb/test/ndbapi/testNodeRestart.cpp +++ b/storage/ndb/test/ndbapi/testNodeRestart.cpp @@ -1535,6 +1535,85 @@ runBug28023(NDBT_Context* ctx, NDBT_Step* step) return NDBT_FAILED; } } + + return NDBT_OK; +} + + +int +runBug28717(NDBT_Context* ctx, NDBT_Step* step) +{ + int result = NDBT_OK; + int loops = ctx->getNumLoops(); + int records = ctx->getNumRecords(); + Ndb* pNdb = GETNDB(step); + NdbRestarter res; + + if (res.getNumDbNodes() < 4) + { + return NDBT_OK; + } + + int master = res.getMasterNodeId(); + int node0 = res.getRandomNodeOtherNodeGroup(master, rand()); + int node1 = res.getRandomNodeSameNodeGroup(node0, rand()); + + ndbout_c("master: %d node0: %d node1: %d", master, node0, node1); + + if (res.restartOneDbNode(node0, false, true, true)) + { + return NDBT_FAILED; + } + + { + int filter[] = { 15, NDB_MGM_EVENT_CATEGORY_CHECKPOINT, 0 }; + NdbLogEventHandle handle = + ndb_mgm_create_logevent_handle(res.handle, filter); + + + int dump[] = { DumpStateOrd::DihStartLcpImmediately }; + struct ndb_logevent event; + + for (Uint32 i = 0; i<3; i++) + { + res.dumpStateOneNode(master, dump, 1); + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_LocalCheckpointStarted); + while(ndb_logevent_get_next(handle, &event, 0) >= 0 && + event.type != NDB_LE_LocalCheckpointCompleted); + } + } + + if (res.waitNodesNoStart(&node0, 1)) + return NDBT_FAILED; + + int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 }; + + if (res.dumpStateOneNode(node0, val2, 2)) + return NDBT_FAILED; + + if (res.insertErrorInNode(node0, 5010)) + return NDBT_FAILED; + + if (res.insertErrorInNode(node1, 1001)) + return NDBT_FAILED; + + if (res.startNodes(&node0, 1)) + return NDBT_FAILED; + + NdbSleep_SecSleep(3); + + if (res.insertErrorInNode(node1, 0)) + return NDBT_FAILED; + + if (res.waitNodesNoStart(&node0, 1)) + return NDBT_FAILED; + + if (res.startNodes(&node0, 1)) + return NDBT_FAILED; + + if (res.waitClusterStarted()) + return NDBT_FAILED; return NDBT_OK; } @@ -1896,6 +1975,12 @@ TESTCASE("Bug27466", ""){ TESTCASE("Bug28023", ""){ INITIALIZER(runBug28023); } +TESTCASE("Bug25554", ""){ + INITIALIZER(runBug25554); +} +TESTCASE("Bug28717", ""){ + INITIALIZER(runBug28717); +} NDBT_TESTSUITE_END(testNodeRestart); int main(int argc, const char** argv){ diff --git a/storage/ndb/test/run-test/daily-basic-tests.txt b/storage/ndb/test/run-test/daily-basic-tests.txt index 203ba4145eca389b27c99a4d4c21796916734194..a376dbb4a914527abbf73c705a1ef9062ff7d1a3 100644 --- a/storage/ndb/test/run-test/daily-basic-tests.txt +++ b/storage/ndb/test/run-test/daily-basic-tests.txt @@ -555,6 +555,10 @@ max-time: 1500 cmd: testDict args: -n CreateAndDrop +max-time: 1000 +cmd: testNodeRestart +args: -n Bug28717 T1 + max-time: 1500 cmd: testDict args: -n CreateAndDropAtRandom -l 200 T1