Commit 431c9d03 authored by unknown's avatar unknown

bug#15587 - ndb

  Fix error in NF during NR


ndb/include/kernel/signaldata/DumpStateOrd.hpp:
  Add dump for ERROR 5002 with specified table
ndb/src/kernel/blocks/ERROR_codes.txt:
  Add dump for ERROR 5002 with specified table
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Run updateNodeInfo if failed node is not in list of storedReplicas
ndb/src/kernel/blocks/dblqh/Dblqh.hpp:
  Add dump for ERROR 5002 with specified table
ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  Add dump for ERROR 5002 with specified table
ndb/test/ndbapi/testNodeRestart.cpp:
  Add testcase for bug#15587
ndb/test/run-test/daily-basic-tests.txt:
  Add testcase for bug#15587
parent 45784557
...@@ -78,6 +78,8 @@ public: ...@@ -78,6 +78,8 @@ public:
LqhDumpAllScanRec = 2301, LqhDumpAllScanRec = 2301,
LqhDumpAllActiveScanRec = 2302, LqhDumpAllActiveScanRec = 2302,
LqhDumpLcpState = 2303, LqhDumpLcpState = 2303,
LqhErrorInsert5042 = 2315,
AccDumpOneScanRec = 2400, AccDumpOneScanRec = 2400,
AccDumpAllScanRec = 2401, AccDumpAllScanRec = 2401,
AccDumpAllActiveScanRec = 2402, AccDumpAllActiveScanRec = 2402,
......
...@@ -155,6 +155,9 @@ Insert node failure handling when receiving COMPLETEREQ. ...@@ -155,6 +155,9 @@ Insert node failure handling when receiving COMPLETEREQ.
5006: 5006:
Insert node failure handling when receiving ABORTREQ. Insert node failure handling when receiving ABORTREQ.
5042:
As 5002, but with specified table (see DumpStateOrd)
These error code can be combined with error codes for testing time-out These error code can be combined with error codes for testing time-out
handling in DBTC to ensure that node failures are also well handled in handling in DBTC to ensure that node failures are also well handled in
time-out handling. They can also be used to test multiple node failure time-out handling. They can also be used to test multiple node failure
......
...@@ -5187,15 +5187,16 @@ void Dbdih::removeNodeFromTable(Signal* signal, ...@@ -5187,15 +5187,16 @@ void Dbdih::removeNodeFromTable(Signal* signal,
/** /**
* For each of replica record * For each of replica record
*/ */
Uint32 replicaNo = 0; bool found = false;
ReplicaRecordPtr replicaPtr; ReplicaRecordPtr replicaPtr;
for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL; for(replicaPtr.i = fragPtr.p->storedReplicas; replicaPtr.i != RNIL;
replicaPtr.i = replicaPtr.p->nextReplica, replicaNo++) { replicaPtr.i = replicaPtr.p->nextReplica) {
jam(); jam();
ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord); ptrCheckGuard(replicaPtr, creplicaFileSize, replicaRecord);
if(replicaPtr.p->procNode == nodeId){ if(replicaPtr.p->procNode == nodeId){
jam(); jam();
found = true;
noOfRemovedReplicas++; noOfRemovedReplicas++;
removeNodeFromStored(nodeId, fragPtr, replicaPtr); removeNodeFromStored(nodeId, fragPtr, replicaPtr);
if(replicaPtr.p->lcpOngoingFlag){ if(replicaPtr.p->lcpOngoingFlag){
...@@ -5211,6 +5212,15 @@ void Dbdih::removeNodeFromTable(Signal* signal, ...@@ -5211,6 +5212,15 @@ void Dbdih::removeNodeFromTable(Signal* signal,
} }
} }
} }
if (!found)
{
jam();
/**
* Run updateNodeInfo to remove any dead nodes from list of activeNodes
* see bug#15587
*/
updateNodeInfo(fragPtr);
}
noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas; noOfRemainingLcpReplicas += fragPtr.p->noLcpReplicas;
} }
......
...@@ -2881,6 +2881,7 @@ private: ...@@ -2881,6 +2881,7 @@ private:
UintR ctransidHash[1024]; UintR ctransidHash[1024];
Uint32 c_diskless; Uint32 c_diskless;
Uint32 c_error_insert_table_id;
public: public:
/** /**
......
...@@ -3532,6 +3532,7 @@ void Dblqh::execLQHKEYREQ(Signal* signal) ...@@ -3532,6 +3532,7 @@ void Dblqh::execLQHKEYREQ(Signal* signal)
jam(); jam();
regTcPtr->activeCreat = ZTRUE; regTcPtr->activeCreat = ZTRUE;
CRASH_INSERTION(5002); CRASH_INSERTION(5002);
CRASH_INSERTION2(5042, tabptr.i == c_error_insert_table_id);
} else { } else {
regTcPtr->activeCreat = ZFALSE; regTcPtr->activeCreat = ZFALSE;
}//if }//if
...@@ -18402,7 +18403,11 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal) ...@@ -18402,7 +18403,11 @@ Dblqh::execDUMP_STATE_ORD(Signal* signal)
return; return;
} }
if (dumpState->args[0] == DumpStateOrd::LqhErrorInsert5042 && signal->getLength() == 2)
{
c_error_insert_table_id = dumpState->args[1];
SET_ERROR_INSERT_VALUE(5042);
}
}//Dblqh::execDUMP_STATE_ORD() }//Dblqh::execDUMP_STATE_ORD()
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <NdbRestarter.hpp> #include <NdbRestarter.hpp>
#include <NdbRestarts.hpp> #include <NdbRestarts.hpp>
#include <Vector.hpp> #include <Vector.hpp>
#include <signaldata/DumpStateOrd.hpp>
int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){ int runLoadTable(NDBT_Context* ctx, NDBT_Step* step){
...@@ -409,6 +410,43 @@ int runLateCommit(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -409,6 +410,43 @@ int runLateCommit(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK; return NDBT_OK;
} }
int runBug15587(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
Uint32 tableId = ctx->getTab()->getTableId();
int dump[2] = { DumpStateOrd::LqhErrorInsert5042, 0 };
dump[1] = tableId;
int nodeId = restarter.getDbNodeId(1);
ndbout << "Restart node " << nodeId << endl;
if (restarter.restartOneDbNode(nodeId,
/** initial */ false,
/** nostart */ true,
/** abort */ true))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&nodeId, 1))
return NDBT_FAILED;
if (restarter.dumpStateOneNode(nodeId, dump, 2))
return NDBT_FAILED;
if (restarter.startNodes(&nodeId, 1))
return NDBT_FAILED;
if (restarter.waitNodesStarted(&nodeId, 1))
return NDBT_FAILED;
ctx->stopTest();
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
"Test that one node at a time can be stopped and then restarted "\ "Test that one node at a time can be stopped and then restarted "\
...@@ -671,6 +709,13 @@ TESTCASE("LateCommit", ...@@ -671,6 +709,13 @@ TESTCASE("LateCommit",
STEP(runLateCommit); STEP(runLateCommit);
FINALIZER(runClearTable); FINALIZER(runClearTable);
} }
TESTCASE("Bug15587",
"Test bug with NF during NR"){
INITIALIZER(runLoadTable);
STEP(runScanUpdateUntilStopped);
STEP(runBug15587);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -434,6 +434,10 @@ max-time: 500 ...@@ -434,6 +434,10 @@ max-time: 500
cmd: testScan cmd: testScan
args: -l 100 -n Scan-bug8262 T7 args: -l 100 -n Scan-bug8262 T7
max-time: 500
cmd: testNodeRestart
args: -n BugBug15587 T1
# OLD FLEX # OLD FLEX
max-time: 500 max-time: 500
cmd: flexBench cmd: flexBench
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment