Commit cb3b2a36 authored by unknown's avatar unknown

Merge perch.ndb.mysql.com:/home/jonas/src/50-work

into  perch.ndb.mysql.com:/home/jonas/src/51-telco-gca


storage/ndb/src/kernel/blocks/ERROR_codes.txt:
  Auto merged
storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp:
  Auto merged
storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp:
  Auto merged
storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp:
  Auto merged
storage/ndb/test/src/NdbRestarter.cpp:
  Auto merged
storage/ndb/test/include/NdbRestarter.hpp:
  Auto merged
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  merge
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp:
  merge
storage/ndb/src/mgmsrv/InitConfigFileParser.cpp:
  merge
storage/ndb/test/ndbapi/testNodeRestart.cpp:
  merge
storage/ndb/test/run-test/daily-basic-tests.txt:
  merge
parents 5af0cbc7 85fdd106
...@@ -5,7 +5,7 @@ Next DBACC 3002 ...@@ -5,7 +5,7 @@ Next DBACC 3002
Next DBTUP 4024 Next DBTUP 4024
Next DBLQH 5045 Next DBLQH 5045
Next DBDICT 6007 Next DBDICT 6007
Next DBDIH 7178 Next DBDIH 7181
Next DBTC 8039 Next DBTC 8039
Next CMVMI 9000 Next CMVMI 9000
Next BACKUP 10038 Next BACKUP 10038
...@@ -73,6 +73,8 @@ Delay GCP_SAVEREQ by 10 secs ...@@ -73,6 +73,8 @@ Delay GCP_SAVEREQ by 10 secs
7177: Delay copying of sysfileData in execCOPY_GCIREQ 7177: Delay copying of sysfileData in execCOPY_GCIREQ
7180: Crash master during master-take-over in execMASTER_LCPCONF
ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING: ERROR CODES FOR TESTING NODE FAILURE, LOCAL CHECKPOINT HANDLING:
----------------------------------------------------------------- -----------------------------------------------------------------
......
...@@ -1381,6 +1381,7 @@ private: ...@@ -1381,6 +1381,7 @@ private:
Uint32 csystemnodes; Uint32 csystemnodes;
Uint32 currentgcp; Uint32 currentgcp;
Uint32 c_newest_restorable_gci; Uint32 c_newest_restorable_gci;
Uint32 c_set_initial_start_flag;
enum GcpMasterTakeOverState { enum GcpMasterTakeOverState {
GMTOS_IDLE = 0, GMTOS_IDLE = 0,
......
...@@ -61,6 +61,7 @@ void Dbdih::initData() ...@@ -61,6 +61,7 @@ void Dbdih::initData()
c_blockCommit = false; c_blockCommit = false;
c_blockCommitNo = 1; c_blockCommitNo = 1;
cntrlblockref = RNIL; cntrlblockref = RNIL;
c_set_initial_start_flag = FALSE;
}//Dbdih::initData() }//Dbdih::initData()
void Dbdih::initRecords() void Dbdih::initRecords()
......
...@@ -677,6 +677,12 @@ done: ...@@ -677,6 +677,12 @@ done:
Uint32 tmp= SYSFILE->m_restart_seq; Uint32 tmp= SYSFILE->m_restart_seq;
memcpy(sysfileData, cdata, sizeof(sysfileData)); memcpy(sysfileData, cdata, sizeof(sysfileData));
SYSFILE->m_restart_seq = tmp; SYSFILE->m_restart_seq = tmp;
if (c_set_initial_start_flag)
{
jam();
Sysfile::setInitialStartOngoing(SYSFILE->systemRestartBits);
}
} }
c_copyGCISlave.m_copyReason = reason; c_copyGCISlave.m_copyReason = reason;
...@@ -1290,6 +1296,11 @@ void Dbdih::execNDB_STTOR(Signal* signal) ...@@ -1290,6 +1296,11 @@ void Dbdih::execNDB_STTOR(Signal* signal)
// The permission is given by the master node in the alive set. // The permission is given by the master node in the alive set.
/*-----------------------------------------------------------------------*/ /*-----------------------------------------------------------------------*/
createMutexes(signal, 0); createMutexes(signal, 0);
if (cstarttype == NodeState::ST_INITIAL_NODE_RESTART)
{
jam();
c_set_initial_start_flag = TRUE; // In sysfile...
}
break; break;
case ZNDB_SPH3: case ZNDB_SPH3:
...@@ -4828,6 +4839,8 @@ void ...@@ -4828,6 +4839,8 @@ void
Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
jam(); jam();
Uint32 oldNode = c_lcpMasterTakeOverState.failedNodeId;
c_lcpMasterTakeOverState.minTableId = ~0; c_lcpMasterTakeOverState.minTableId = ~0;
c_lcpMasterTakeOverState.minFragId = ~0; c_lcpMasterTakeOverState.minFragId = ~0;
c_lcpMasterTakeOverState.failedNodeId = nodeId; c_lcpMasterTakeOverState.failedNodeId = nodeId;
...@@ -4846,7 +4859,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){ ...@@ -4846,7 +4859,20 @@ Dbdih::startLcpMasterTakeOver(Signal* signal, Uint32 nodeId){
/** /**
* Node failure during master take over... * Node failure during master take over...
*/ */
ndbout_c("Nodefail during master take over"); ndbout_c("Nodefail during master take over (old: %d)", oldNode);
}
NodeRecordPtr nodePtr;
nodePtr.i = oldNode;
if (oldNode > 0 && oldNode < MAX_NDB_NODES)
{
jam();
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
if (nodePtr.p->m_nodefailSteps.get(NF_LCP_TAKE_OVER))
{
jam();
checkLocalNodefailComplete(signal, oldNode, NF_LCP_TAKE_OVER);
}
} }
setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER); setLocalNodefailHandling(signal, nodeId, NF_LCP_TAKE_OVER);
...@@ -5862,6 +5888,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal) ...@@ -5862,6 +5888,14 @@ void Dbdih::execMASTER_LCPREQ(Signal* signal)
jamEntry(); jamEntry();
const BlockReference newMasterBlockref = req->masterRef; const BlockReference newMasterBlockref = req->masterRef;
if (newMasterBlockref != cmasterdihref)
{
jam();
ndbout_c("resending GSN_MASTER_LCPREQ");
sendSignalWithDelay(reference(), GSN_MASTER_LCPREQ, signal,
signal->getLength(), 50);
return;
}
Uint32 failedNodeId = req->failedNodeId; Uint32 failedNodeId = req->failedNodeId;
/** /**
...@@ -6158,6 +6192,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal) ...@@ -6158,6 +6192,8 @@ void Dbdih::execMASTER_LCPCONF(Signal* signal)
ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord); ptrCheckGuard(nodePtr, MAX_NDB_NODES, nodeRecord);
nodePtr.p->lcpStateAtTakeOver = lcpState; nodePtr.p->lcpStateAtTakeOver = lcpState;
CRASH_INSERTION(7180);
#ifdef VM_TRACE #ifdef VM_TRACE
ndbout_c("MASTER_LCPCONF"); ndbout_c("MASTER_LCPCONF");
printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0); printMASTER_LCP_CONF(stdout, &signal->theData[0], 0, 0);
...@@ -10716,6 +10752,17 @@ Dbdih::sendLCP_COMPLETE_REP(Signal* signal){ ...@@ -10716,6 +10752,17 @@ Dbdih::sendLCP_COMPLETE_REP(Signal* signal){
sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal, sendSignal(c_lcpState.m_masterLcpDihRef, GSN_LCP_COMPLETE_REP, signal,
LcpCompleteRep::SignalLength, JBB); LcpCompleteRep::SignalLength, JBB);
/**
* Say that an initial node restart does not need to be redone
* once node has been part of first LCP
*/
if (c_set_initial_start_flag &&
c_lcpState.m_participatingLQH.get(getOwnNodeId()))
{
jam();
c_set_initial_start_flag = FALSE;
}
} }
/*-------------------------------------------------------------------------- */ /*-------------------------------------------------------------------------- */
......
...@@ -11893,7 +11893,7 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal) ...@@ -11893,7 +11893,7 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal)
return; return;
} }
if (getNodeState().getNodeRestartInProgress()) if (getNodeState().getNodeRestartInProgress() && cstartRecReq == ZFALSE)
{ {
GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0]; GCPSaveRef * const saveRef = (GCPSaveRef*)&signal->theData[0];
saveRef->dihPtr = dihPtr; saveRef->dihPtr = dihPtr;
...@@ -11940,7 +11940,6 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal) ...@@ -11940,7 +11940,6 @@ void Dblqh::execGCP_SAVEREQ(Signal* signal)
}//if }//if
ndbrequire(ccurrentGcprec == RNIL); ndbrequire(ccurrentGcprec == RNIL);
ccurrentGcprec = 0; ccurrentGcprec = 0;
gcpPtr.i = ccurrentGcprec; gcpPtr.i = ccurrentGcprec;
ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord); ptrCheckGuard(gcpPtr, cgcprecFileSize, gcpRecord);
......
...@@ -75,8 +75,8 @@ static BlockInfo ALL_BLOCKS[] = { ...@@ -75,8 +75,8 @@ static BlockInfo ALL_BLOCKS[] = {
{ DBTUP_REF, 1 , 4000, 4007 }, { DBTUP_REF, 1 , 4000, 4007 },
{ DBDICT_REF, 1 , 6000, 6003 }, { DBDICT_REF, 1 , 6000, 6003 },
{ NDBCNTR_REF, 0 , 1000, 1999 }, { NDBCNTR_REF, 0 , 1000, 1999 },
{ CMVMI_REF, 1 , 9000, 9999 }, // before QMGR
{ QMGR_REF, 1 , 1, 999 }, { QMGR_REF, 1 , 1, 999 },
{ CMVMI_REF, 1 , 9000, 9999 },
{ TRIX_REF, 1 , 0, 0 }, { TRIX_REF, 1 , 0, 0 },
{ BACKUP_REF, 1 , 10000, 10999 }, { BACKUP_REF, 1 , 10000, 10999 },
{ DBUTIL_REF, 1 , 11000, 11999 }, { DBUTIL_REF, 1 , 11000, 11999 },
......
...@@ -836,7 +836,7 @@ InitConfigFileParser::parse_mycnf() ...@@ -836,7 +836,7 @@ InitConfigFileParser::parse_mycnf()
opt.arg_type = REQUIRED_ARG; opt.arg_type = REQUIRED_ARG;
options.push_back(opt); options.push_back(opt);
opt.name = "api"; opt.name = "ndbapi";
opt.id = 256; opt.id = 256;
opt.value = (gptr*)malloc(sizeof(char*)); opt.value = (gptr*)malloc(sizeof(char*));
opt.var_type = GET_STR; opt.var_type = GET_STR;
...@@ -851,7 +851,6 @@ InitConfigFileParser::parse_mycnf() ...@@ -851,7 +851,6 @@ InitConfigFileParser::parse_mycnf()
mysqld = &options[idx+2]; mysqld = &options[idx+2];
api = &options[idx+3]; api = &options[idx+3];
} }
Context ctx(m_info, m_errstream); Context ctx(m_info, m_errstream);
const char *groups[]= { "cluster_config", 0 }; const char *groups[]= { "cluster_config", 0 };
......
...@@ -61,6 +61,8 @@ public: ...@@ -61,6 +61,8 @@ public:
int dumpStateAllNodes(const int * _args, int _num_args); int dumpStateAllNodes(const int * _args, int _num_args);
int getMasterNodeId(); int getMasterNodeId();
int getNextMasterNodeId(int nodeId);
int getNodeGroup(int nodeId);
int getRandomNodeSameNodeGroup(int nodeId, int randomNumber); int getRandomNodeSameNodeGroup(int nodeId, int randomNumber);
int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber); int getRandomNodeOtherNodeGroup(int nodeId, int randomNumber);
int getRandomNotMasterNodeId(int randomNumber); int getRandomNotMasterNodeId(int randomNumber);
......
...@@ -1178,6 +1178,85 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){ ...@@ -1178,6 +1178,85 @@ int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK; return NDBT_OK;
} }
int
runBug26457(NDBT_Context* ctx, NDBT_Step* step)
{
NdbRestarter res;
if (res.getNumDbNodes() < 4)
return NDBT_OK;
int loops = ctx->getNumLoops();
while (loops --)
{
retry:
int master = res.getMasterNodeId();
int next = res.getNextMasterNodeId(master);
ndbout_c("master: %d next: %d", master, next);
if (res.getNodeGroup(master) == res.getNodeGroup(next))
{
res.restartOneDbNode(next, false, false, true);
if (res.waitClusterStarted())
return NDBT_FAILED;
goto retry;
}
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 2 };
if (res.dumpStateOneNode(next, val2, 2))
return NDBT_FAILED;
if (res.insertErrorInNode(next, 7180))
return NDBT_FAILED;
res.restartOneDbNode(master, false, false, true);
if (res.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
int
runBug26481(NDBT_Context* ctx, NDBT_Step* step)
{
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter res;
int node = res.getRandomNotMasterNodeId(rand());
ndbout_c("node: %d", node);
if (res.restartOneDbNode(node, true, true, true))
return NDBT_FAILED;
if (res.waitNodesNoStart(&node, 1))
return NDBT_FAILED;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (res.dumpStateOneNode(node, val2, 2))
return NDBT_FAILED;
if (res.insertErrorInNode(node, 7018))
return NDBT_FAILED;
if (res.startNodes(&node, 1))
return NDBT_FAILED;
res.waitNodesStartPhase(&node, 1, 3);
if (res.waitNodesNoStart(&node, 1))
return NDBT_FAILED;
res.startNodes(&node, 1);
if (res.waitClusterStarted())
return NDBT_FAILED;
return NDBT_OK;
}
NDBT_TESTSUITE(testNodeRestart); NDBT_TESTSUITE(testNodeRestart);
TESTCASE("NoLoad", TESTCASE("NoLoad",
...@@ -1514,6 +1593,12 @@ TESTCASE("Bug25468", ""){ ...@@ -1514,6 +1593,12 @@ TESTCASE("Bug25468", ""){
TESTCASE("Bug25554", ""){ TESTCASE("Bug25554", ""){
INITIALIZER(runBug25554); INITIALIZER(runBug25554);
} }
TESTCASE("Bug26457", ""){
INITIALIZER(runBug26457);
}
TESTCASE("Bug26481", ""){
INITIALIZER(runBug26481);
}
NDBT_TESTSUITE_END(testNodeRestart); NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){ int main(int argc, const char** argv){
......
...@@ -521,6 +521,10 @@ max-time: 1000 ...@@ -521,6 +521,10 @@ max-time: 1000
cmd: testNodeRestart cmd: testNodeRestart
args: -n Bug25554 T1 args: -n Bug25554 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug26457 T1
# #
# DICT TESTS # DICT TESTS
max-time: 1500 max-time: 1500
......
...@@ -127,6 +127,68 @@ NdbRestarter::getMasterNodeId(){ ...@@ -127,6 +127,68 @@ NdbRestarter::getMasterNodeId(){
return node; return node;
} }
int
NdbRestarter::getNodeGroup(int nodeId){
if (!isConnected())
return -1;
if (getStatus() != 0)
return -1;
for(size_t i = 0; i < ndbNodes.size(); i++)
{
if(ndbNodes[i].node_id == nodeId)
{
return ndbNodes[i].node_group;
}
}
return -1;
}
int
NdbRestarter::getNextMasterNodeId(int nodeId){
if (!isConnected())
return -1;
if (getStatus() != 0)
return -1;
size_t i;
for(i = 0; i < ndbNodes.size(); i++)
{
if(ndbNodes[i].node_id == nodeId)
{
break;
}
}
assert(i < ndbNodes.size());
if (i == ndbNodes.size())
return -1;
int dynid = ndbNodes[i].dynamic_id;
int minid = dynid;
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id > minid)
minid = ndbNodes[i].dynamic_id;
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id > dynid &&
ndbNodes[i].dynamic_id < minid)
{
minid = ndbNodes[i].dynamic_id;
}
if (minid != ~0)
{
for (i = 0; i<ndbNodes.size(); i++)
if (ndbNodes[i].dynamic_id == minid)
return ndbNodes[i].node_id;
}
return getMasterNodeId();
}
int int
NdbRestarter::getRandomNotMasterNodeId(int rand){ NdbRestarter::getRandomNotMasterNodeId(int rand){
int master = getMasterNodeId(); int master = getMasterNodeId();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment