Commit a41e4eda authored by unknown's avatar unknown

Merge perch.ndb.mysql.com:/home/jonas/src/51-work

into  perch.ndb.mysql.com:/home/jonas/src/mysql-5.1-new-ndb


storage/ndb/src/kernel/blocks/dbdict/Dbdict.cpp:
  Auto merged
storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp:
  Auto merged
storage/ndb/test/run-test/daily-basic-tests.txt:
  Auto merged
parents 71776740 79899496
......@@ -183,7 +183,7 @@ extern const GlobalSignalNumber NO_OF_SIGNAL_NAMES;
#define GSN_CNTR_START_REP 119
/* 120 not unused */
#define GSN_ROUTE_ORD 121
/* 122 unused */
#define GSN_NODE_VERSION_REP 122
/* 123 unused */
/* 124 unused */
#define GSN_CHECK_LCP_STOP 125
......
......@@ -90,4 +90,14 @@ operator<<(NdbOut& ndbout, const NodeInfo & info){
return ndbout;
}
struct NodeVersionInfo
{
STATIC_CONST( DataLength = 6 );
struct
{
Uint32 m_min_version;
Uint32 m_max_version;
} m_type [3]; // Indexed as NodeInfo::Type
};
#endif
......@@ -80,12 +80,13 @@ class ApiRegConf {
friend class ClusterMgr;
public:
STATIC_CONST( SignalLength = 3 + NodeState::DataLength );
STATIC_CONST( SignalLength = 4 + NodeState::DataLength );
private:
Uint32 qmgrRef;
Uint32 version; // Version of NDB node
Uint32 apiHeartbeatFrequency;
Uint32 minDbVersion;
NodeState nodeState;
};
......
......@@ -68,6 +68,7 @@ public:
// 100-105 TUP and ACC
// 200-240 UTIL
// 300-305 TRIX
QmgrErr935 = 935,
NdbfsDumpFileStat = 400,
NdbfsDumpAllFiles = 401,
NdbfsDumpOpenFiles = 402,
......
......@@ -72,5 +72,7 @@ char ndb_version_string_buf[NDB_VERSION_STRING_BUF_SZ];
#define NDBD_QMGR_SINGLEUSER_VERSION_5 MAKE_VERSION(5,0,25)
#define NDBD_NODE_VERSION_REP MAKE_VERSION(6,1,1)
#endif
......@@ -637,5 +637,6 @@ const GsnName SignalNames [] = {
,{ GSN_DICT_COMMIT_REQ, "DICT_COMMIT_REQ"}
,{ GSN_ROUTE_ORD, "ROUTE_ORD" }
,{ GSN_NODE_VERSION_REP, "NODE_VERSION_REP" }
};
const unsigned short NO_OF_SIGNAL_NAMES = sizeof(SignalNames)/sizeof(GsnName);
......@@ -23,6 +23,8 @@ Crash president when he starts to run in ArbitState 1-9.
934 : Crash president in ALLOC_NODE_ID_REQ
935 : Crash master on node failure (delayed)
and skip sending GSN_COMMIT_FAILREQ to specified node
ERROR CODES FOR TESTING NODE FAILURE, GLOBAL CHECKPOINT HANDLING:
-----------------------------------------------------------------
......
......@@ -10050,9 +10050,20 @@ void Dbdict::execSUB_START_REQ(Signal* signal)
}
OpSubEventPtr subbPtr;
Uint32 errCode = 0;
DictLockPtr loopPtr;
if (c_dictLockQueue.first(loopPtr) &&
loopPtr.p->lt->lockType == DictLockReq::NodeRestartLock)
{
jam();
errCode = 1405;
goto busy;
}
if (!c_opSubEvent.seize(subbPtr)) {
errCode = SubStartRef::Busy;
busy:
jam();
SubStartRef * ref = (SubStartRef *)signal->getDataPtrSend();
{ // fix
......@@ -10151,6 +10162,7 @@ void Dbdict::execSUB_START_REF(Signal* signal)
SubStartRef* ref = (SubStartRef*) signal->getDataPtrSend();
ref->senderRef = reference();
ref->senderData = subbPtr.p->m_senderData;
ref->errorCode = err;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_START_REF,
signal, SubStartRef::SignalLength2, JBB);
c_opSubEvent.release(subbPtr);
......@@ -10213,6 +10225,7 @@ void Dbdict::execSUB_START_CONF(Signal* signal)
#ifdef EVENT_PH3_DEBUG
ndbout_c("DBDICT(Coordinator) got GSN_SUB_START_CONF = (%d)", subbPtr.i);
#endif
subbPtr.p->m_sub_start_conf = *conf;
subbPtr.p->m_reqTracker.reportConf(c_counterMgr, refToNode(senderRef));
completeSubStartReq(signal,subbPtr.i,0);
}
......@@ -10252,6 +10265,9 @@ void Dbdict::completeSubStartReq(Signal* signal,
#ifdef EVENT_DEBUG
ndbout_c("SUB_START_CONF");
#endif
SubStartConf* conf = (SubStartConf*)signal->getDataPtrSend();
* conf = subbPtr.p->m_sub_start_conf;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_START_CONF,
signal, SubStartConf::SignalLength, JBB);
c_opSubEvent.release(subbPtr);
......@@ -10373,6 +10389,7 @@ void Dbdict::execSUB_STOP_REF(Signal* signal)
SubStopRef* ref = (SubStopRef*) signal->getDataPtrSend();
ref->senderRef = reference();
ref->senderData = subbPtr.p->m_senderData;
ref->errorCode = err;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_STOP_REF,
signal, SubStopRef::SignalLength, JBB);
c_opSubEvent.release(subbPtr);
......@@ -10425,6 +10442,7 @@ void Dbdict::execSUB_STOP_CONF(Signal* signal)
* Coordinator
*/
ndbrequire(refToBlock(senderRef) == DBDICT);
subbPtr.p->m_sub_stop_conf = *conf;
subbPtr.p->m_reqTracker.reportConf(c_counterMgr, refToNode(senderRef));
completeSubStopReq(signal,subbPtr.i,0);
}
......@@ -10465,6 +10483,8 @@ void Dbdict::completeSubStopReq(Signal* signal,
#ifdef EVENT_DEBUG
ndbout_c("SUB_STOP_CONF");
#endif
SubStopConf* conf = (SubStopConf*)signal->getDataPtrSend();
* conf = subbPtr.p->m_sub_stop_conf;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_STOP_CONF,
signal, SubStopConf::SignalLength, JBB);
c_opSubEvent.release(subbPtr);
......@@ -10713,6 +10733,7 @@ Dbdict::execSUB_REMOVE_REF(Signal* signal)
SubRemoveRef* ref = (SubRemoveRef*) signal->getDataPtrSend();
ref->senderRef = reference();
ref->senderData = subbPtr.p->m_senderData;
ref->errorCode = err;
sendSignal(subbPtr.p->m_senderRef, GSN_SUB_REMOVE_REF,
signal, SubRemoveRef::SignalLength, JBB);
}
......
......@@ -52,6 +52,7 @@
#include <signaldata/DropTrig.hpp>
#include <signaldata/AlterTrig.hpp>
#include <signaldata/DictLock.hpp>
#include <signaldata/SumaImpl.hpp>
#include "SchemaFile.hpp"
#include <blocks/mutexes.hpp>
#include <SafeCounter.hpp>
......@@ -1632,6 +1633,10 @@ private:
Uint32 m_senderRef;
Uint32 m_senderData;
Uint32 m_errorCode;
union {
SubStartConf m_sub_start_conf;
SubStopConf m_sub_stop_conf;
};
RequestTracker m_reqTracker;
};
typedef Ptr<OpSubEvent> OpSubEventPtr;
......
......@@ -637,6 +637,7 @@ private:
void execTCGETOPSIZECONF(Signal *);
void execTC_CLOPSIZECONF(Signal *);
int handle_invalid_lcp_no(const class LcpFragRep*, ReplicaRecordPtr);
void execLCP_FRAG_REP(Signal *);
void execLCP_COMPLETE_REP(Signal *);
void execSTART_LCP_REQ(Signal *);
......
......@@ -3741,7 +3741,6 @@ void Dbdih::endTakeOver(Uint32 takeOverPtrI)
takeOverPtr.i = takeOverPtrI;
ptrCheckGuard(takeOverPtr, MAX_NDB_NODES, takeOverRecord);
releaseTakeOver(takeOverPtrI);
if ((takeOverPtr.p->toMasterStatus != TakeOverRecord::IDLE) &&
(takeOverPtr.p->toMasterStatus != TakeOverRecord::TO_WAIT_START_TAKE_OVER)) {
jam();
......@@ -3755,6 +3754,7 @@ void Dbdih::endTakeOver(Uint32 takeOverPtrI)
}//if
setAllowNodeStart(takeOverPtr.p->toStartingNode, true);
initTakeOver(takeOverPtr);
releaseTakeOver(takeOverPtrI);
}//Dbdih::endTakeOver()
void Dbdih::releaseTakeOver(Uint32 takeOverPtrI)
......@@ -4046,6 +4046,11 @@ void Dbdih::execNODE_FAILREP(Signal* signal)
Uint32 newMasterId = nodeFail->masterNodeId;
const Uint32 noOfFailedNodes = nodeFail->noOfNodes;
if (ERROR_INSERTED(7179))
{
CLEAR_ERROR_INSERT_VALUE;
}
/*-------------------------------------------------------------------------*/
// The first step is to convert from a bit mask to an array of failed nodes.
/*-------------------------------------------------------------------------*/
......@@ -4909,6 +4914,7 @@ void Dbdih::handleTakeOverNewMaster(Signal* signal, Uint32 takeOverPtrI)
break;
}
ndbrequire(ok);
endTakeOver(takeOverPtr.i);
}//if
}//Dbdih::handleTakeOverNewMaster()
......@@ -10256,12 +10262,42 @@ void Dbdih::execLCP_FRAG_REP(Signal* signal)
Uint32 fragId = lcpReport->fragId;
jamEntry();
if (ERROR_INSERTED(7178) && nodeId != getOwnNodeId())
{
jam();
Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
if (owng == nodeg)
{
jam();
ndbout_c("throwing away LCP_FRAG_REP from (and killing) %d", nodeId);
SET_ERROR_INSERT_VALUE(7179);
signal->theData[0] = 9999;
sendSignal(numberToRef(CMVMI, nodeId),
GSN_NDB_TAMPER, signal, 1, JBA);
return;
}
}
if (ERROR_INSERTED(7179) && nodeId != getOwnNodeId())
{
jam();
Uint32 owng =Sysfile::getNodeGroup(getOwnNodeId(), SYSFILE->nodeGroups);
Uint32 nodeg = Sysfile::getNodeGroup(nodeId, SYSFILE->nodeGroups);
if (owng == nodeg)
{
jam();
ndbout_c("throwing away LCP_FRAG_REP from %d", nodeId);
return;
}
}
CRASH_INSERTION2(7025, isMaster());
CRASH_INSERTION2(7016, !isMaster());
bool fromTimeQueue = (signal->senderBlockRef() == reference());
TabRecordPtr tabPtr;
tabPtr.i = tableId;
ptrCheckGuard(tabPtr, ctabFileSize, tabRecord);
......@@ -10463,6 +10499,37 @@ void Dbdih::findReplica(ReplicaRecordPtr& replicaPtr,
ndbrequire(false);
}//Dbdih::findReplica()
int
Dbdih::handle_invalid_lcp_no(const LcpFragRep* rep,
ReplicaRecordPtr replicaPtr)
{
ndbrequire(!isMaster());
Uint32 lcpNo = rep->lcpNo;
Uint32 lcpId = rep->lcpId;
Uint32 replicaLcpNo = replicaPtr.p->nextLcp;
Uint32 prevReplicaLcpNo = prevLcpNo(replicaLcpNo);
warningEvent("Detected previous node failure of %d during lcp",
rep->nodeId);
replicaPtr.p->nextLcp = lcpNo;
replicaPtr.p->lcpId[lcpNo] = 0;
replicaPtr.p->lcpStatus[lcpNo] = ZINVALID;
for (Uint32 i = lcpNo; i != lcpNo; i = nextLcpNo(i))
{
jam();
if (replicaPtr.p->lcpStatus[i] == ZVALID &&
replicaPtr.p->lcpId[i] >= lcpId)
{
ndbout_c("i: %d lcpId: %d", i, replicaPtr.p->lcpId[i]);
ndbrequire(false);
}
}
return 0;
}
/**
* Return true if table is all fragment replicas have been checkpointed
* to disk (in all LQHs)
......@@ -10491,9 +10558,12 @@ Dbdih::reportLcpCompletion(const LcpFragRep* lcpReport)
ndbrequire(replicaPtr.p->lcpOngoingFlag == true);
if(lcpNo != replicaPtr.p->nextLcp){
ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d",
lcpNo, replicaPtr.p->nextLcp);
ndbrequire(false);
if (handle_invalid_lcp_no(lcpReport, replicaPtr))
{
ndbout_c("lcpNo = %d replicaPtr.p->nextLcp = %d",
lcpNo, replicaPtr.p->nextLcp);
ndbrequire(false);
}
}
ndbrequire(lcpNo == replicaPtr.p->nextLcp);
ndbrequire(lcpNo < MAX_LCP_STORED);
......
......@@ -446,6 +446,15 @@ private:
StopReq c_stopReq;
bool check_multi_node_shutdown(Signal* signal);
#ifdef ERROR_INSERT
Uint32 c_error_insert_extra;
#endif
void recompute_version_info(Uint32 type);
void recompute_version_info(Uint32 type, Uint32 version);
void execNODE_VERSION_REP(Signal* signal);
void sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr);
};
#endif
......@@ -37,6 +37,13 @@ void Qmgr::initData()
setHbApiDelay(hbDBAPI);
c_connectedNodes.set(getOwnNodeId());
c_stopReq.senderRef = 0;
/**
* Check sanity for NodeVersion
*/
ndbrequire((Uint32)NodeInfo::DB == 0);
ndbrequire((Uint32)NodeInfo::API == 1);
ndbrequire((Uint32)NodeInfo::MGM == 2);
}//Qmgr::initData()
void Qmgr::initRecords()
......@@ -107,6 +114,7 @@ Qmgr::Qmgr(Block_context& ctx)
addRecSignal(GSN_DIH_RESTARTREF, &Qmgr::execDIH_RESTARTREF);
addRecSignal(GSN_DIH_RESTARTCONF, &Qmgr::execDIH_RESTARTCONF);
addRecSignal(GSN_NODE_VERSION_REP, &Qmgr::execNODE_VERSION_REP);
initData();
}//Qmgr::Qmgr()
......
......@@ -260,6 +260,9 @@ void Qmgr::execSTTOR(Signal* signal)
case 1:
initData(signal);
startphase1(signal);
recompute_version_info(NodeInfo::DB);
recompute_version_info(NodeInfo::API);
recompute_version_info(NodeInfo::MGM);
return;
case 7:
cactivateApiCheck = 1;
......@@ -765,6 +768,7 @@ void Qmgr::execCM_REGREQ(Signal* signal)
*/
UintR TdynId = ++c_maxDynamicId;
setNodeInfo(addNodePtr.i).m_version = startingVersion;
recompute_version_info(NodeInfo::DB, startingVersion);
addNodePtr.p->ndynamicId = TdynId;
/**
......@@ -1503,7 +1507,8 @@ void Qmgr::execCM_NODEINFOCONF(Signal* signal)
replyNodePtr.p->ndynamicId = dynamicId;
replyNodePtr.p->blockRef = signal->getSendersBlockRef();
setNodeInfo(replyNodePtr.i).m_version = version;
recompute_version_info(NodeInfo::DB, version);
if(!c_start.m_nodes.done()){
jam();
return;
......@@ -1602,6 +1607,7 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
}
sendCmAckAdd(signal, nodePtr.i, CmAdd::Prepare);
sendApiVersionRep(signal, nodePtr);
/* President have prepared us */
CmNodeInfoConf * conf = (CmNodeInfoConf*)signal->getDataPtrSend();
......@@ -1613,6 +1619,29 @@ Qmgr::cmAddPrepare(Signal* signal, NodeRecPtr nodePtr, const NodeRec * self){
DEBUG_START(GSN_CM_NODEINFOCONF, refToNode(nodePtr.p->blockRef), "");
}
void
Qmgr::sendApiVersionRep(Signal* signal, NodeRecPtr nodePtr)
{
if (getNodeInfo(nodePtr.i).m_version >= NDBD_NODE_VERSION_REP)
{
jam();
Uint32 ref = calcQmgrBlockRef(nodePtr.i);
for(Uint32 i = 1; i<MAX_NODES; i++)
{
jam();
Uint32 version = getNodeInfo(i).m_version;
Uint32 type = getNodeInfo(i).m_type;
if (type != NodeInfo::DB && version)
{
jam();
signal->theData[0] = i;
signal->theData[1] = version;
sendSignal(ref, GSN_NODE_VERSION_REP, signal, 2, JBB);
}
}
}
}
void
Qmgr::sendCmAckAdd(Signal * signal, Uint32 nodeId, CmAdd::RequestType type){
......@@ -2401,7 +2430,9 @@ void Qmgr::sendApiFailReq(Signal* signal, Uint16 failedNodeNo)
* SECONDS.
*-------------------------------------------------------------------------*/
setNodeInfo(failedNodePtr.i).m_heartbeat_cnt= 0;
setNodeInfo(failedNodePtr.i).m_version = 0;
recompute_version_info(getNodeInfo(failedNodePtr.i).m_type);
CloseComReqConf * const closeCom = (CloseComReqConf *)&signal->theData[0];
closeCom->xxxBlockRef = reference();
......@@ -2707,7 +2738,6 @@ void Qmgr::execAPI_REGREQ(Signal* signal)
}
setNodeInfo(apiNodePtr.i).m_version = version;
setNodeInfo(apiNodePtr.i).m_heartbeat_cnt= 0;
ApiRegConf * const apiRegConf = (ApiRegConf *)&signal->theData[0];
......@@ -2728,8 +2758,9 @@ void Qmgr::execAPI_REGREQ(Signal* signal)
apiRegConf->nodeState.dynamicId = -dynamicId;
}
}
NodeVersionInfo info = getNodeVersionInfo();
apiRegConf->minDbVersion = info.m_type[NodeInfo::DB].m_min_version;
apiRegConf->nodeState.m_connected_nodes.assign(c_connectedNodes);
sendSignal(ref, GSN_API_REGCONF, signal, ApiRegConf::SignalLength, JBB);
if (apiNodePtr.p->phase == ZAPI_INACTIVE &&
......@@ -2748,6 +2779,33 @@ void Qmgr::execAPI_REGREQ(Signal* signal)
signal->theData[0] = apiNodePtr.i;
sendSignal(CMVMI_REF, GSN_ENABLE_COMORD, signal, 1, JBA);
recompute_version_info(type, version);
if (info.m_type[NodeInfo::DB].m_min_version >= NDBD_NODE_VERSION_REP)
{
jam();
NodeReceiverGroup rg(QMGR, c_clusterNodes);
rg.m_nodes.clear(getOwnNodeId());
signal->theData[0] = apiNodePtr.i;
signal->theData[1] = version;
sendSignal(rg, GSN_NODE_VERSION_REP, signal, 2, JBB);
}
else
{
Uint32 i = 0;
while((i = c_clusterNodes.find(i + 1)) != NdbNodeBitmask::NotFound)
{
jam();
if (i == getOwnNodeId())
continue;
if (getNodeInfo(i).m_version >= NDBD_NODE_VERSION_REP)
{
jam();
sendSignal(calcQmgrBlockRef(i), GSN_NODE_VERSION_REP, signal, 2,JBB);
}
}
}
signal->theData[0] = apiNodePtr.i;
EXECUTE_DIRECT(NDBCNTR, GSN_API_START_REP, signal, 1);
}
......@@ -2783,6 +2841,76 @@ Qmgr::execAPI_VERSION_REQ(Signal * signal) {
ApiVersionConf::SignalLength, JBB);
}
void
Qmgr::execNODE_VERSION_REP(Signal* signal)
{
jamEntry();
Uint32 nodeId = signal->theData[0];
Uint32 version = signal->theData[1];
if (nodeId < MAX_NODES)
{
jam();
Uint32 type = getNodeInfo(nodeId).m_type;
setNodeInfo(nodeId).m_version = version;
recompute_version_info(type, version);
}
}
void
Qmgr::recompute_version_info(Uint32 type, Uint32 version)
{
NodeVersionInfo& info = setNodeVersionInfo();
switch(type){
case NodeInfo::DB:
case NodeInfo::API:
case NodeInfo::MGM:
break;
default:
return;
}
if (info.m_type[type].m_min_version == 0 ||
version < info.m_type[type].m_min_version)
info.m_type[type].m_min_version = version;
if (version > info.m_type[type].m_max_version)
info.m_type[type].m_max_version = version;
}
void
Qmgr::recompute_version_info(Uint32 type)
{
switch(type){
case NodeInfo::DB:
case NodeInfo::API:
case NodeInfo::MGM:
break;
default:
return;
}
Uint32 min = ~0, max = 0;
Uint32 cnt = type == NodeInfo::DB ? MAX_NDB_NODES : MAX_NODES;
for (Uint32 i = 1; i<cnt; i++)
{
if (getNodeInfo(i).m_type == type)
{
Uint32 version = getNodeInfo(i).m_version;
if (version)
{
if (version < min)
min = version;
if (version > max)
max = version;
}
}
}
NodeVersionInfo& info = setNodeVersionInfo();
info.m_type[type].m_min_version = min == ~(Uint32)0 ? 0 : min;
info.m_type[type].m_max_version = max;
}
#if 0
bool
......@@ -2922,6 +3050,17 @@ void Qmgr::failReportLab(Signal* signal, Uint16 aFailedNode,
systemErrorLab(signal, __LINE__);
return;
}//if
if (getNodeState().startLevel < NodeState::SL_STARTED)
{
jam();
CRASH_INSERTION(932);
char buf[100];
BaseString::snprintf(buf, 100, "Node failure during restart");
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false);
}
TnoFailedNodes = cnoFailedNodes;
failReport(signal, failedNodePtr.i, (UintR)ZTRUE, aFailCause);
if (cpresident == getOwnNodeId()) {
......@@ -3008,6 +3147,16 @@ void Qmgr::execPREP_FAILREQ(Signal* signal)
return;
}//if
if (getNodeState().startLevel < NodeState::SL_STARTED)
{
jam();
CRASH_INSERTION(932);
char buf[100];
BaseString::snprintf(buf, 100, "Node failure during restart");
progError(__LINE__, NDBD_EXIT_SR_OTHERNODEFAILED, buf);
ndbrequire(false);
}
guard0 = cnoPrepFailedNodes - 1;
arrGuard(guard0, MAX_NDB_NODES);
for (Tindex = 0; Tindex <= guard0; Tindex++) {
......@@ -3185,6 +3334,18 @@ Qmgr::sendCommitFailReq(Signal* signal)
for (nodePtr.i = 1; nodePtr.i < MAX_NDB_NODES; nodePtr.i++) {
jam();
ptrAss(nodePtr, nodeRec);
#ifdef ERROR_INSERT
if (ERROR_INSERTED(935) && nodePtr.i == c_error_insert_extra)
{
ndbout_c("skipping node %d", c_error_insert_extra);
CLEAR_ERROR_INSERT_VALUE;
signal->theData[0] = 9999;
sendSignalWithDelay(CMVMI_REF, GSN_NDB_TAMPER, signal, 1000, 1);
continue;
}
#endif
if (nodePtr.p->phase == ZRUNNING) {
jam();
nodePtr.p->sendCommitFailReqStatus = Q_ACTIVE;
......@@ -3255,6 +3416,33 @@ void Qmgr::execPREP_FAILREF(Signal* signal)
return;
}//Qmgr::execPREP_FAILREF()
static
Uint32
clear_nodes(Uint32 dstcnt, Uint16 dst[], Uint32 srccnt, const Uint16 src[])
{
if (srccnt == 0)
return dstcnt;
Uint32 pos = 0;
for (Uint32 i = 0; i<dstcnt; i++)
{
Uint32 node = dst[i];
for (Uint32 j = 0; j<srccnt; j++)
{
if (node == dst[j])
{
node = RNIL;
break;
}
}
if (node != RNIL)
{
dst[pos++] = node;
}
}
return pos;
}
/*---------------------------------------------------------------------------*/
/* THE PRESIDENT IS NOW COMMITTING THE PREVIOUSLY PREPARED NODE FAILURE. */
/*---------------------------------------------------------------------------*/
......@@ -3342,19 +3530,18 @@ void Qmgr::execCOMMIT_FAILREQ(Signal* signal)
NodeFailRep::SignalLength, JBB);
}//if
}//for
if (cpresident != getOwnNodeId()) {
jam();
cnoFailedNodes = cnoCommitFailedNodes - cnoFailedNodes;
if (cnoFailedNodes > 0) {
jam();
guard0 = cnoFailedNodes - 1;
arrGuard(guard0 + cnoCommitFailedNodes, MAX_NDB_NODES);
for (Tj = 0; Tj <= guard0; Tj++) {
jam();
cfailedNodes[Tj] = cfailedNodes[Tj + cnoCommitFailedNodes];
}//for
}//if
}//if
/**
* Remove committed nodes from failed/prepared
*/
cnoFailedNodes = clear_nodes(cnoFailedNodes,
cfailedNodes,
cnoCommitFailedNodes,
ccommitFailedNodes);
cnoPrepFailedNodes = clear_nodes(cnoPrepFailedNodes,
cprepFailedNodes,
cnoCommitFailedNodes,
ccommitFailedNodes);
cnoCommitFailedNodes = 0;
}//if
/**-----------------------------------------------------------------------
......@@ -4733,6 +4920,14 @@ Qmgr::execDUMP_STATE_ORD(Signal* signal)
default:
;
}//switch
#ifdef ERROR_INSERT
if (signal->theData[0] == 935 && signal->getLength() == 2)
{
SET_ERROR_INSERT_VALUE(935);
c_error_insert_extra = signal->theData[1];
}
#endif
}//Qmgr::execDUMP_STATE_ORD()
void Qmgr::execSET_VAR_REQ(Signal* signal)
......
......@@ -230,7 +230,6 @@ Suma::execREAD_CONFIG_REQ(Signal* signal)
c_startup.m_wait_handover= false;
c_failedApiNodes.clear();
c_startup.m_restart_server_node_id = 0; // Server for my NR
ReadConfigConf * conf = (ReadConfigConf*)signal->getDataPtrSend();
conf->senderRef = reference();
......@@ -261,6 +260,14 @@ Suma::execSTTOR(Signal* signal) {
if(startphase == 5)
{
if (ERROR_INSERTED(13029)) /* Hold startphase 5 */
{
sendSignalWithDelay(SUMA_REF, GSN_STTOR, signal,
30, signal->getLength());
DBUG_VOID_RETURN;
}
c_startup.m_restart_server_node_id = 0;
getNodeGroupMembers(signal);
if (typeOfStart == NodeState::ST_NODE_RESTART ||
typeOfStart == NodeState::ST_INITIAL_NODE_RESTART)
......@@ -373,6 +380,8 @@ Suma::execSUMA_START_ME_REF(Signal* signal)
infoEvent("Suma: node %d refused %d",
c_startup.m_restart_server_node_id, ref->errorCode);
c_startup.m_restart_server_node_id++;
send_start_me_req(signal);
}
......@@ -887,6 +896,22 @@ Suma::execDUMP_STATE_ORD(Signal* signal){
ptr->m_buffer_head.m_page_id);
}
}
if (tCase == 8006)
{
SET_ERROR_INSERT_VALUE(13029);
}
if (tCase == 8007)
{
c_startup.m_restart_server_node_id = MAX_NDB_NODES + 1;
SET_ERROR_INSERT_VALUE(13029);
}
if (tCase == 8008)
{
CLEAR_ERROR_INSERT_VALUE;
}
}
/*************************************************************
......@@ -1092,14 +1117,14 @@ Suma::execSUB_CREATE_REQ(Signal* signal)
}
} else {
if (c_startup.m_restart_server_node_id &&
refToNode(subRef) != c_startup.m_restart_server_node_id)
subRef != calcSumaBlockRef(c_startup.m_restart_server_node_id))
{
/**
* only allow "restart_server" Suma's to come through
* for restart purposes
*/
jam();
sendSubStartRef(signal, 1405);
sendSubCreateRef(signal, 1415);
DBUG_VOID_RETURN;
}
// Check that id/key is unique
......@@ -2232,14 +2257,17 @@ Suma::execSUB_START_REQ(Signal* signal){
key.m_subscriptionKey = req->subscriptionKey;
if (c_startup.m_restart_server_node_id &&
refToNode(senderRef) != c_startup.m_restart_server_node_id)
senderRef != calcSumaBlockRef(c_startup.m_restart_server_node_id))
{
/**
* only allow "restart_server" Suma's to come through
* for restart purposes
*/
jam();
sendSubStartRef(signal, 1405);
Uint32 err = c_startup.m_restart_server_node_id != RNIL ? 1405 :
SubStartRef::NF_FakeErrorREF;
sendSubStartRef(signal, err);
DBUG_VOID_RETURN;
}
......@@ -2454,25 +2482,28 @@ Suma::execSUB_STOP_REQ(Signal* signal){
DBUG_VOID_RETURN;
}
if(!c_subscriptions.find(subPtr, key)){
jam();
DBUG_PRINT("error", ("not found"));
sendSubStopRef(signal, 1407);
DBUG_VOID_RETURN;
}
if (c_startup.m_restart_server_node_id &&
refToNode(senderRef) != c_startup.m_restart_server_node_id)
senderRef != calcSumaBlockRef(c_startup.m_restart_server_node_id))
{
/**
* only allow "restart_server" Suma's to come through
* for restart purposes
*/
jam();
sendSubStopRef(signal, 1405);
Uint32 err = c_startup.m_restart_server_node_id != RNIL ? 1405 :
SubStopRef::NF_FakeErrorREF;
sendSubStopRef(signal, err);
DBUG_VOID_RETURN;
}
if(!c_subscriptions.find(subPtr, key)){
jam();
DBUG_PRINT("error", ("not found"));
sendSubStopRef(signal, 1407);
DBUG_VOID_RETURN;
}
if (subPtr.p->m_state == Subscription::LOCKED) {
jam();
DBUG_PRINT("error", ("locked"));
......
......@@ -122,6 +122,8 @@ Suma::Suma(Block_context& ctx) :
addRecSignal(GSN_SUB_GCP_COMPLETE_REP,
&Suma::execSUB_GCP_COMPLETE_REP);
c_startup.m_restart_server_node_id = RNIL; // Server for my NR
}
Suma::~Suma()
......
......@@ -36,6 +36,7 @@ enum restartStates {initial_state,
struct GlobalData {
Uint32 m_restart_seq; //
NodeVersionInfo m_versionInfo;
NodeInfo m_nodeInfo[MAX_NODES];
Signal VMSignals[1]; // Owned by FastScheduler::
......
......@@ -403,6 +403,9 @@ protected:
const NodeInfo & getNodeInfo(NodeId nodeId) const;
NodeInfo & setNodeInfo(NodeId);
const NodeVersionInfo& getNodeVersionInfo() const;
NodeVersionInfo& setNodeVersionInfo();
/**********************
* Xfrm stuff
*/
......@@ -709,6 +712,18 @@ SimulatedBlock::getNodeInfo(NodeId nodeId) const {
return globalData.m_nodeInfo[nodeId];
}
inline
const NodeVersionInfo &
SimulatedBlock::getNodeVersionInfo() const {
return globalData.m_versionInfo;
}
inline
NodeVersionInfo &
SimulatedBlock::setNodeVersionInfo() {
return globalData.m_versionInfo;
}
inline
void
SimulatedBlock::EXECUTE_DIRECT(Uint32 block,
......
......@@ -932,6 +932,81 @@ int runBug20185(NDBT_Context* ctx, NDBT_Step* step){
return NDBT_OK;
}
int runBug24717(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
Ndb* pNdb = GETNDB(step);
HugoTransactions hugoTrans(*ctx->getTab());
int dump[] = { 9002, 0 } ;
Uint32 ownNode = refToNode(pNdb->getReference());
dump[1] = ownNode;
for (; loops; loops --)
{
int nodeId = restarter.getRandomNotMasterNodeId(rand());
restarter.restartOneDbNode(nodeId, false, true, true);
restarter.waitNodesNoStart(&nodeId, 1);
if (restarter.dumpStateOneNode(nodeId, dump, 2))
return NDBT_FAILED;
restarter.startNodes(&nodeId, 1);
for (Uint32 i = 0; i < 100; i++)
{
hugoTrans.pkReadRecords(pNdb, 100, 1, NdbOperation::LM_CommittedRead);
}
restarter.waitClusterStarted();
}
return NDBT_OK;
}
int runBug25364(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
NdbRestarter restarter;
Ndb* pNdb = GETNDB(step);
int loops = ctx->getNumLoops();
if (restarter.getNumDbNodes() < 4)
return NDBT_OK;
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
for (; loops; loops --)
{
int master = restarter.getMasterNodeId();
int victim = restarter.getRandomNodeOtherNodeGroup(master, rand());
int second = restarter.getRandomNodeSameNodeGroup(victim, rand());
int dump[] = { 935, victim } ;
if (restarter.dumpStateOneNode(master, dump, 2))
return NDBT_FAILED;
if (restarter.dumpStateOneNode(master, val2, 2))
return NDBT_FAILED;
if (restarter.restartOneDbNode(second, false, true, true))
return NDBT_FAILED;
int nodes[2] = { master, second };
if (restarter.waitNodesNoStart(nodes, 2))
return NDBT_FAILED;
restarter.startNodes(nodes, 2);
if (restarter.waitNodesStarted(nodes, 2))
return NDBT_FAILED;
}
return NDBT_OK;
}
int
runBug21271(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
......@@ -996,40 +1071,111 @@ runBug24543(NDBT_Context* ctx, NDBT_Step* step){
}
return NDBT_OK;
}
int runBug24717(NDBT_Context* ctx, NDBT_Step* step){
int runBug25468(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
Ndb* pNdb = GETNDB(step);
HugoTransactions hugoTrans(*ctx->getTab());
for (int i = 0; i<loops; i++)
{
int master = restarter.getMasterNodeId();
int node1, node2;
switch(i % 5){
case 0:
node1 = master;
node2 = restarter.getRandomNodeSameNodeGroup(master, rand());
break;
case 1:
node1 = restarter.getRandomNodeSameNodeGroup(master, rand());
node2 = master;
break;
case 2:
case 3:
case 4:
node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
if (node1 == -1)
node1 = master;
node2 = restarter.getRandomNodeSameNodeGroup(node1, rand());
break;
}
int dump[] = { 9000, 0 } ;
Uint32 ownNode = refToNode(pNdb->getReference());
dump[1] = ownNode;
ndbout_c("node1: %d node2: %d master: %d", node1, node2, master);
for (; loops; loops --)
{
int nodeId = restarter.getRandomNotMasterNodeId(rand());
restarter.restartOneDbNode(nodeId, false, true, true);
restarter.waitNodesNoStart(&nodeId, 1);
if (restarter.dumpStateOneNode(nodeId, dump, 2))
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateOneNode(node2, val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInNode(node1, 7178))
return NDBT_FAILED;
int val1 = 7099;
if (restarter.dumpStateOneNode(master, &val1, 1))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&node2, 1))
return NDBT_FAILED;
if (restarter.startAll())
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
restarter.startNodes(&nodeId, 1);
for (Uint32 i = 0; i < 100; i++)
{
hugoTrans.pkReadRecords(pNdb, 100, 1, NdbOperation::LM_CommittedRead);
}
int reset[2] = { 9001, 0 };
restarter.dumpStateOneNode(nodeId, reset, 2);
restarter.waitClusterStarted();
}
return NDBT_OK;
}
int runBug25554(NDBT_Context* ctx, NDBT_Step* step){
int result = NDBT_OK;
int loops = ctx->getNumLoops();
int records = ctx->getNumRecords();
NdbRestarter restarter;
if (restarter.getNumDbNodes() < 4)
return NDBT_OK;
for (int i = 0; i<loops; i++)
{
int master = restarter.getMasterNodeId();
int node1 = restarter.getRandomNodeOtherNodeGroup(master, rand());
restarter.restartOneDbNode(node1, false, true, true);
int val2[] = { DumpStateOrd::CmvmiSetRestartOnErrorInsert, 1 };
if (restarter.dumpStateOneNode(master, val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInNode(master, 7141))
return NDBT_FAILED;
if (restarter.waitNodesNoStart(&node1, 1))
return NDBT_FAILED;
if (restarter.dumpStateOneNode(node1, val2, 2))
return NDBT_FAILED;
if (restarter.insertErrorInNode(node1, 932))
return NDBT_FAILED;
if (restarter.startNodes(&node1, 1))
return NDBT_FAILED;
int nodes[] = { master, node1 };
if (restarter.waitNodesNoStart(nodes, 2))
return NDBT_FAILED;
if (restarter.startNodes(nodes, 2))
return NDBT_FAILED;
if (restarter.waitClusterStarted())
return NDBT_FAILED;
}
return NDBT_OK;
}
......@@ -1360,6 +1506,15 @@ TESTCASE("Bug21271",
TESTCASE("Bug24717", ""){
INITIALIZER(runBug24717);
}
TESTCASE("Bug25364", ""){
INITIALIZER(runBug25364);
}
TESTCASE("Bug25468", ""){
INITIALIZER(runBug25468);
}
TESTCASE("Bug25554", ""){
INITIALIZER(runBug25554);
}
NDBT_TESTSUITE_END(testNodeRestart);
int main(int argc, const char** argv){
......
......@@ -513,6 +513,14 @@ max-time: 1000
cmd: testNodeRestart
args: -n Bug24717 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug25364 T1
max-time: 1000
cmd: testNodeRestart
args: -n Bug25554 T1
#
# DICT TESTS
max-time: 1500
......@@ -764,6 +772,10 @@ max-time: 1500
cmd: testSystemRestart
args: -n Bug24664
max-time: 1000
cmd: testNodeRestart
args: -n Bug25468 T1
# OLD FLEX
max-time: 500
cmd: flexBench
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment