Commit 55faad67 authored by unknown's avatar unknown

ndb - bug#18414

  Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding


ndb/src/kernel/blocks/ERROR_codes.txt:
  New error code
ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  remove dumping of LCP info during NF
ndb/src/kernel/blocks/dbtc/DbtcMain.cpp:
  Fix timeout during ABORT when ZABORT_TIMEOUT_BREAK is outstanding
ndb/test/ndbapi/testNodeRestart.cpp:
  Add testcase for bug18414
ndb/test/ndbapi/testTimeout.cpp:
  Fix error code checking
ndb/test/run-test/daily-basic-tests.txt:
  Add testcase for bug18414
parent 5971b2b2
...@@ -226,6 +226,8 @@ Delay execution of COMPLETECONF signal 2 seconds to generate time-out. ...@@ -226,6 +226,8 @@ Delay execution of COMPLETECONF signal 2 seconds to generate time-out.
8045: (ABORTCONF only as part of take-over) 8045: (ABORTCONF only as part of take-over)
Delay execution of ABORTCONF signal 2 seconds to generate time-out. Delay execution of ABORTCONF signal 2 seconds to generate time-out.
8050: Send ZABORT_TIMEOUT_BREAK delayed
ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC ERROR CODES FOR TESTING TIME-OUT HANDLING IN DBTC
------------------------------------------------- -------------------------------------------------
......
...@@ -5982,10 +5982,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId) ...@@ -5982,10 +5982,6 @@ void Dbdih::MASTER_LCPhandling(Signal* signal, Uint32 failedNodeId)
signal->theData[0] = 7012; signal->theData[0] = 7012;
execDUMP_STATE_ORD(signal); execDUMP_STATE_ORD(signal);
signal->theData[0] = 7015;
signal->theData[1] = 0;
execDUMP_STATE_ORD(signal);
c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__); c_lcpMasterTakeOverState.set(LMTOS_IDLE, __LINE__);
checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER); checkLocalNodefailComplete(signal, failedNodePtr.i, NF_LCP_TAKE_OVER);
......
...@@ -6386,6 +6386,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) ...@@ -6386,6 +6386,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
return; return;
} }
bool found = false;
OperationState tmp[16]; OperationState tmp[16];
Uint32 TloopCount = 0; Uint32 TloopCount = 0;
...@@ -6393,7 +6394,31 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) ...@@ -6393,7 +6394,31 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
jam(); jam();
if (tcConnectptr.i == RNIL) { if (tcConnectptr.i == RNIL) {
jam(); jam();
if (Tcheck == 0) {
#ifdef VM_TRACE
ndbout_c("found: %d Tcheck: %d apiConnectptr.p->counter: %d",
found, Tcheck, apiConnectptr.p->counter);
#endif
if (found || apiConnectptr.p->counter)
{
jam();
/**
* We sent atleast one ABORT/ABORTED
* or ZABORT_TIMEOUT_BREAK is in job buffer
* wait for reception...
*/
return;
}
if (Tcheck == 1)
{
jam();
releaseAbortResources(signal);
return;
}
if (Tcheck == 0)
{
jam(); jam();
/*------------------------------------------------------------------ /*------------------------------------------------------------------
* All nodes had already reported ABORTED for all tcConnect records. * All nodes had already reported ABORTED for all tcConnect records.
...@@ -6402,9 +6427,11 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) ...@@ -6402,9 +6427,11 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
*------------------------------------------------------------------*/ *------------------------------------------------------------------*/
char buf[96]; buf[0] = 0; char buf[96]; buf[0] = 0;
char buf2[96]; char buf2[96];
BaseString::snprintf(buf, sizeof(buf), "TC %d: %d ops:", BaseString::snprintf(buf, sizeof(buf), "TC %d: %d counter: %d ops:",
__LINE__, apiConnectptr.i); __LINE__, apiConnectptr.i,
for(Uint32 i = 0; i<TloopCount; i++){ apiConnectptr.p->counter);
for(Uint32 i = 0; i<TloopCount; i++)
{
BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]); BaseString::snprintf(buf2, sizeof(buf2), "%s %d", buf, tmp[i]);
BaseString::snprintf(buf, sizeof(buf), buf2); BaseString::snprintf(buf, sizeof(buf), buf2);
} }
...@@ -6412,7 +6439,9 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) ...@@ -6412,7 +6439,9 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
ndbout_c(buf); ndbout_c(buf);
ndbrequire(false); ndbrequire(false);
releaseAbortResources(signal); releaseAbortResources(signal);
return;
} }
return; return;
}//if }//if
TloopCount++; TloopCount++;
...@@ -6427,7 +6456,16 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) ...@@ -6427,7 +6456,16 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK; signal->theData[0] = TcContinueB::ZABORT_TIMEOUT_BREAK;
signal->theData[1] = tcConnectptr.i; signal->theData[1] = tcConnectptr.i;
signal->theData[2] = apiConnectptr.i; signal->theData[2] = apiConnectptr.i;
if (ERROR_INSERTED(8050))
{
ndbout_c("sending ZABORT_TIMEOUT_BREAK delayed (%d %d)",
Tcheck, apiConnectptr.p->counter);
sendSignalWithDelay(cownref, GSN_CONTINUEB, signal, 2000, 3);
}
else
{
sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB); sendSignal(cownref, GSN_CONTINUEB, signal, 3, JBB);
}
return; return;
}//if }//if
ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord); ptrCheckGuard(tcConnectptr, ctcConnectFilesize, tcConnectRecord);
...@@ -6450,7 +6488,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck) ...@@ -6450,7 +6488,7 @@ void Dbtc::sendAbortedAfterTimeout(Signal* signal, int Tcheck)
jam(); jam();
if (tcConnectptr.p->tcNodedata[Ti] != 0) { if (tcConnectptr.p->tcNodedata[Ti] != 0) {
TloopCount += 31; TloopCount += 31;
Tcheck = 1; found = true;
hostptr.i = tcConnectptr.p->tcNodedata[Ti]; hostptr.i = tcConnectptr.p->tcNodedata[Ti];
ptrCheckGuard(hostptr, chostFilesize, hostRecord); ptrCheckGuard(hostptr, chostFilesize, hostRecord);
if (hostptr.p->hostStatus == HS_ALIVE) { if (hostptr.p->hostStatus == HS_ALIVE) {
...@@ -7007,8 +7045,6 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal) ...@@ -7007,8 +7045,6 @@ void Dbtc::execTAKE_OVERTCCONF(Signal* signal)
hostptr.i = tfailedNodeId; hostptr.i = tfailedNodeId;
ptrCheckGuard(hostptr, chostFilesize, hostRecord); ptrCheckGuard(hostptr, chostFilesize, hostRecord);
ndbout_c("received execTAKE_OVERTCCONF(%d) from %x (%x)",
tfailedNodeId, signal->getSendersBlockRef(), reference());
if (signal->getSendersBlockRef() != reference()) if (signal->getSendersBlockRef() != reference())
{ {
jam(); jam();
......
...@@ -581,6 +581,73 @@ runBug16772(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -581,6 +581,73 @@ runBug16772(NDBT_Context* ctx, NDBT_Step* step){
return ret ? NDBT_OK : NDBT_FAILED; return ret ? NDBT_OK : NDBT_FAILED;
} }
int
runBug18414(NDBT_Context* ctx, NDBT_Step* step){
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2)
{
ctx->stopTest();
return NDBT_OK;
}
Ndb* pNdb = GETNDB(step);
HugoOperations hugoOps(*ctx->getTab());
HugoTransactions hugoTrans(*ctx->getTab());
int loop = 0;
do
{
if(hugoOps.startTransaction(pNdb) != 0)
goto err;
if(hugoOps.pkUpdateRecord(pNdb, 0, 128, rand()) != 0)
goto err;
if(hugoOps.execute_NoCommit(pNdb) != 0)
goto err;
int node1 = hugoOps.getTransaction()->getConnectedNodeId();
int node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
if (node1 == -1 || node2 == -1)
break;
if (loop & 1)
{
if (restarter.insertErrorInNode(node1, 8050))
goto err;
}
if (restarter.insertErrorInNode(node2, 5003))
goto err;
int res= hugoOps.execute_Rollback(pNdb);
if (restarter.waitNodesNoStart(&node2, 1) != 0)
goto err;
if (restarter.insertErrorInAllNodes(0))
goto err;
if (restarter.startNodes(&node2, 1) != 0)
goto err;
if (restarter.waitClusterStarted() != 0)
goto err;
if (hugoTrans.scanUpdateRecords(pNdb, 128) != 0)
goto err;
hugoOps.closeTransaction(pNdb);
} while(++loop < 5);
return NDBT_OK;
err:
hugoOps.closeTransaction(pNdb);
return NDBT_FAILED;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
...@@ -870,6 +937,12 @@ TESTCASE("Bug16772", ...@@ -870,6 +937,12 @@ TESTCASE("Bug16772",
"Test bug with restarting before NF handling is complete"){ "Test bug with restarting before NF handling is complete"){
STEP(runBug16772); STEP(runBug16772);
} }
TESTCASE("Bug18414",
"Test bug with NF during NR"){
INITIALIZER(runLoadTable);
STEP(runBug18414);
FINALIZER(runClearTable);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -173,7 +173,10 @@ int runTimeoutTrans(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -173,7 +173,10 @@ int runTimeoutTrans(NDBT_Context* ctx, NDBT_Step* step){
NdbSleep_MilliSleep(sleep); NdbSleep_MilliSleep(sleep);
// Expect that transaction has timed-out // Expect that transaction has timed-out
CHECK(hugoOps.execute_Commit(pNdb) == 237); int ret = hugoOps.execute_Commit(pNdb);
CHECK(ret != 0);
NdbError err = pNdb->getNdbError(ret);
CHECK(err.classification == NdbError::TimeoutExpired);
} while(false); } while(false);
......
...@@ -458,6 +458,10 @@ max-time: 500 ...@@ -458,6 +458,10 @@ max-time: 500
cmd: testSystemRestart cmd: testSystemRestart
args: -n Bug18385 T1 args: -n Bug18385 T1
max-time: 500
cmd: testNodeRestart
args: -n Bug18414 T1
# OLD FLEX # OLD FLEX
max-time: 500 max-time: 500
cmd: flexBench cmd: flexBench
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment