Commit 793c9493 authored by unknown's avatar unknown

ndb - bug#31701 Node failure with repl. wo/ load, can lead to endless out of order buckets

  Correct check for buffer/no buffer


storage/ndb/src/kernel/blocks/ERROR_codes.txt:
  new error code
storage/ndb/src/kernel/blocks/suma/Suma.cpp:
  correct check for buffer/no buffer
storage/ndb/test/ndbapi/test_event.cpp:
  test prg
storage/ndb/test/run-test/daily-basic-tests.txt:
  test prg
parent 4b0fe442
...@@ -11,7 +11,7 @@ Next CMVMI 9000 ...@@ -11,7 +11,7 @@ Next CMVMI 9000
Next BACKUP 10038 Next BACKUP 10038
Next DBUTIL 11002 Next DBUTIL 11002
Next DBTUX 12008 Next DBTUX 12008
Next SUMA 13001 Next SUMA 13034
TESTING NODE FAILURE, ARBITRATION TESTING NODE FAILURE, ARBITRATION
--------------------------------- ---------------------------------
......
...@@ -3650,6 +3650,8 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* signal) ...@@ -3650,6 +3650,8 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* signal)
if(m_gcp_complete_rep_count && !c_subscriber_nodes.isclear()) if(m_gcp_complete_rep_count && !c_subscriber_nodes.isclear())
{ {
CRASH_INSERTION(13033);
NodeReceiverGroup rg(API_CLUSTERMGR, c_subscriber_nodes); NodeReceiverGroup rg(API_CLUSTERMGR, c_subscriber_nodes);
sendSignal(rg, GSN_SUB_GCP_COMPLETE_REP, signal, sendSignal(rg, GSN_SUB_GCP_COMPLETE_REP, signal,
SubGcpCompleteRep::SignalLength, JBB); SubGcpCompleteRep::SignalLength, JBB);
...@@ -3670,7 +3672,7 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* signal) ...@@ -3670,7 +3672,7 @@ Suma::execSUB_GCP_COMPLETE_REP(Signal* signal)
if(m_active_buckets.get(i)) if(m_active_buckets.get(i))
continue; continue;
if(c_buckets[i].m_buffer_tail != RNIL) if (!c_subscriber_nodes.isclear())
{ {
//Uint32* dst; //Uint32* dst;
get_buffer_ptr(signal, i, gci, 0); get_buffer_ptr(signal, i, gci, 0);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <NdbAutoPtr.hpp> #include <NdbAutoPtr.hpp>
#include <NdbRestarter.hpp> #include <NdbRestarter.hpp>
#include <NdbRestarts.hpp> #include <NdbRestarts.hpp>
#include <signaldata/DumpStateOrd.hpp>
#define GETNDB(ps) ((NDBT_NdbApiStep*)ps)->getNdb() #define GETNDB(ps) ((NDBT_NdbApiStep*)ps)->getNdb()
...@@ -1758,6 +1759,85 @@ runInsertDeleteUntilStopped(NDBT_Context* ctx, NDBT_Step* step) ...@@ -1758,6 +1759,85 @@ runInsertDeleteUntilStopped(NDBT_Context* ctx, NDBT_Step* step)
return NDBT_OK; return NDBT_OK;
} }
int
runBug31701(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 2){
ctx->stopTest();
return NDBT_OK;
}
// This should really wait for applier to start...10s is likely enough
NdbSleep_SecSleep(10);
int nodeId = restarter.getDbNodeId(rand() % restarter.getNumDbNodes());
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateOneNode(nodeId, val2, 2))
return NDBT_FAILED;
restarter.insertErrorInNode(nodeId, 13033);
if (restarter.waitNodesNoStart(&nodeId, 1))
return NDBT_FAILED;
if (restarter.startNodes(&nodeId, 1))
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
int records = ctx->getNumRecords();
HugoTransactions hugoTrans(*ctx->getTab());
if(ctx->getPropertyWait("LastGCI", ~(Uint32)0))
{
g_err << "FAIL " << __LINE__ << endl;
return NDBT_FAILED;
}
hugoTrans.clearTable(GETNDB(step), 0);
if (hugoTrans.loadTable(GETNDB(step), 3*records, 1, true, 1) != 0){
g_err << "FAIL " << __LINE__ << endl;
return NDBT_FAILED;
}
if (hugoTrans.pkDelRecords(GETNDB(step), 3*records, 1, true, 1) != 0){
g_err << "FAIL " << __LINE__ << endl;
return NDBT_FAILED;
}
if (hugoTrans.loadTable(GETNDB(step), records, 1, true, 1) != 0){
g_err << "FAIL " << __LINE__ << endl;
return NDBT_FAILED;
}
if (hugoTrans.pkUpdateRecords(GETNDB(step), records, 1, 1) != 0){
g_err << "FAIL " << __LINE__ << endl;
return NDBT_FAILED;
}
if (hugoTrans.pkUpdateRecords(GETNDB(step), records, 1, 1) != 0){
g_err << "FAIL " << __LINE__ << endl;
return NDBT_FAILED;
}
if (hugoTrans.pkUpdateRecords(GETNDB(step), records, 1, 1) != 0){
g_err << "FAIL " << __LINE__ << endl;
return NDBT_FAILED;
}
ctx->setProperty("LastGCI", hugoTrans.m_latest_gci);
if(ctx->getPropertyWait("LastGCI", ~(Uint32)0))
{
g_err << "FAIL " << __LINE__ << endl;
return NDBT_FAILED;
}
ctx->stopTest();
return NDBT_OK;
}
NDBT_TESTSUITE(test_event); NDBT_TESTSUITE(test_event);
TESTCASE("BasicEventOperation", TESTCASE("BasicEventOperation",
"Verify that we can listen to Events" "Verify that we can listen to Events"
...@@ -1887,6 +1967,14 @@ TESTCASE("Bug27169", ""){ ...@@ -1887,6 +1967,14 @@ TESTCASE("Bug27169", ""){
STEP(runRestarterLoop); STEP(runRestarterLoop);
FINALIZER(runDropEvent); FINALIZER(runDropEvent);
} }
TESTCASE("Bug31701", ""){
INITIALIZER(runCreateEvent);
INITIALIZER(runCreateShadowTable);
STEP(runEventApplier);
STEP(runBug31701);
FINALIZER(runDropEvent);
FINALIZER(runDropShadowTable);
}
NDBT_TESTSUITE_END(test_event); NDBT_TESTSUITE_END(test_event);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -938,3 +938,7 @@ max-time: 600 ...@@ -938,3 +938,7 @@ max-time: 600
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug31525 T1 args: -n Bug31525 T1
max-time: 300
cmd: test_event
args: -n Bug31701 T1
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment