Commit d9faad9c authored by unknown's avatar unknown

bug#9924 - ndb backup abort handling

  Redo abort handling according to descr. in Backup.txt
bug#9960 - ndb backup
      increase wait completed timeout to 48 hours


ndb/include/kernel/signaldata/BackupImpl.hpp:
  Add nodeid to reply to be able to fake reply during NF
ndb/include/kernel/signaldata/BackupSignalData.hpp:
  new error codes
ndb/src/common/debugger/signaldata/BackupImpl.cpp:
  fix printout
ndb/src/kernel/blocks/backup/Backup.cpp:
  bug#9924 - ndb backup abort handling
    Redo abort handling according to descr. in Backup.txt
ndb/src/kernel/blocks/backup/Backup.hpp:
  bug#9924 - ndb backup abort handling
    Redo abort handling according to descr. in Backup.txt
ndb/src/kernel/blocks/backup/Backup.txt:
  bug#9924 - ndb backup abort handling
    Redo abort handling according to descr. in Backup.txt
ndb/src/kernel/blocks/backup/BackupInit.cpp:
  bug#9924 - ndb backup abort handling
    Redo abort handling according to descr. in Backup.txt
ndb/src/kernel/blocks/cmvmi/Cmvmi.cpp:
  Init own version
ndb/src/mgmapi/mgmapi.cpp:
  bug#9960 - ndb backup
    increase wait completed timeout to 48 hours
ndb/src/mgmsrv/MgmtSrvr.cpp:
  Handle node failures activly
    (mainly for backup...)
ndb/src/mgmsrv/MgmtSrvr.hpp:
  Handle node failures activly
    (mainly for backup...)
ndb/src/mgmsrv/MgmtSrvrGeneralSignalHandling.cpp:
  Handle node failures activly
    (mainly for backup...)
ndb/src/ndbapi/ndberror.c:
  new error codes
ndb/test/ndbapi/testBackup.cpp:
  fix return codes
ndb/test/run-test/daily-basic-tests.txt:
  Add failure test cases to autotest
ndb/test/src/NdbBackup.cpp:
  fix error codes
  introduce checking of backup resources after each test
parent 135c7b5a
......@@ -75,7 +75,7 @@ class DefineBackupRef {
friend bool printDEFINE_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public:
STATIC_CONST( SignalLength = 3 );
STATIC_CONST( SignalLength = 4 );
enum ErrorCode {
Undefined = 1340,
......@@ -92,6 +92,7 @@ private:
Uint32 backupId;
Uint32 backupPtr;
Uint32 errorCode;
Uint32 nodeId;
};
class DefineBackupConf {
......@@ -158,7 +159,7 @@ class StartBackupRef {
friend bool printSTART_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public:
STATIC_CONST( SignalLength = 4 );
STATIC_CONST( SignalLength = 5 );
enum ErrorCode {
FailedToAllocateTriggerRecord = 1
......@@ -168,6 +169,7 @@ private:
Uint32 backupPtr;
Uint32 signalNo;
Uint32 errorCode;
Uint32 nodeId;
};
class StartBackupConf {
......@@ -232,9 +234,8 @@ public:
private:
Uint32 backupId;
Uint32 backupPtr;
Uint32 tableId;
Uint32 fragmentNo;
Uint32 errorCode;
Uint32 nodeId;
};
class BackupFragmentConf {
......@@ -296,12 +297,13 @@ class StopBackupRef {
friend bool printSTOP_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public:
STATIC_CONST( SignalLength = 3 );
STATIC_CONST( SignalLength = 4 );
private:
Uint32 backupId;
Uint32 backupPtr;
Uint32 errorCode;
Uint32 nodeId;
};
class StopBackupConf {
......
......@@ -240,6 +240,9 @@ public:
FileOrScanError = 1325, // slave -> coordinator
BackupFailureDueToNodeFail = 1326, // slave -> slave
OkToClean = 1327 // master -> slave
,AbortScan = 1328
,IncompatibleVersions = 1329
};
private:
Uint32 requestType;
......
......@@ -90,10 +90,8 @@ printBACKUP_FRAGMENT_REQ(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){
bool
printBACKUP_FRAGMENT_REF(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){
BackupFragmentRef* sig = (BackupFragmentRef*)data;
fprintf(out, " backupPtr: %d backupId: %d\n",
sig->backupPtr, sig->backupId);
fprintf(out, " tableId: %d fragmentNo: %d errorCode: %d\n",
sig->tableId, sig->fragmentNo, sig->errorCode);
fprintf(out, " backupPtr: %d backupId: %d nodeId: %d errorCode: %d\n",
sig->backupPtr, sig->backupId, sig->nodeId, sig->errorCode);
return true;
}
......
......@@ -67,31 +67,6 @@ static const Uint32 BACKUP_SEQUENCE = 0x1F000000;
//#define DEBUG_ABORT
//---------------------------------------------------------
// Ignore this since a completed abort could have preceded
// this message.
//---------------------------------------------------------
#define slaveAbortCheck() \
if ((ptr.p->backupId != backupId) || \
(ptr.p->slaveState.getState() == ABORTING)) { \
jam(); \
return; \
}
#define masterAbortCheck() \
if ((ptr.p->backupId != backupId) || \
(ptr.p->masterData.state.getState() == ABORTING)) { \
jam(); \
return; \
}
#define defineSlaveAbortCheck() \
if (ptr.p->slaveState.getState() == ABORTING) { \
jam(); \
closeFiles(signal, ptr); \
return; \
}
static Uint32 g_TypeOfStart = NodeState::ST_ILLEGAL_TYPE;
void
......@@ -221,12 +196,7 @@ Backup::execCONTINUEB(Signal* signal)
jam();
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, Tdata1);
if (ptr.p->slaveState.getState() == ABORTING) {
jam();
closeFiles(signal, ptr);
return;
}//if
BackupFilePtr filePtr;
ptr.p->files.getPtr(filePtr, ptr.p->ctlFilePtr);
FsBuffer & buf = filePtr.p->operation.dataBuffer;
......@@ -324,13 +294,7 @@ Backup::execDUMP_STATE_ORD(Signal* signal)
for(c_backups.first(ptr); ptr.i != RNIL; c_backups.next(ptr)){
infoEvent("BackupRecord %d: BackupId: %d MasterRef: %x ClientRef: %x",
ptr.i, ptr.p->backupId, ptr.p->masterRef, ptr.p->clientRef);
if(ptr.p->masterRef == reference()){
infoEvent(" MasterState: %d State: %d",
ptr.p->masterData.state.getState(),
ptr.p->slaveState.getState());
} else {
infoEvent(" State: %d", ptr.p->slaveState.getState());
}
infoEvent(" State: %d", ptr.p->slaveState.getState());
BackupFilePtr filePtr;
for(ptr.p->files.first(filePtr); filePtr.i != RNIL;
ptr.p->files.next(filePtr)){
......@@ -338,7 +302,7 @@ Backup::execDUMP_STATE_ORD(Signal* signal)
infoEvent(" file %d: type: %d open: %d running: %d done: %d scan: %d",
filePtr.i, filePtr.p->fileType, filePtr.p->fileOpened,
filePtr.p->fileRunning,
filePtr.p->fileDone, filePtr.p->scanRunning);
filePtr.p->fileClosing, filePtr.p->scanRunning);
}
}
}
......@@ -356,6 +320,17 @@ Backup::execDUMP_STATE_ORD(Signal* signal)
infoEvent("PagePool: %d",
c_pagePool.getSize());
if(signal->getLength() == 2 && signal->theData[1] == 2424)
{
ndbrequire(c_tablePool.getSize() == c_tablePool.getNoOfFree());
ndbrequire(c_attributePool.getSize() == c_attributePool.getNoOfFree());
ndbrequire(c_backupPool.getSize() == c_backupPool.getNoOfFree());
ndbrequire(c_backupFilePool.getSize() == c_backupFilePool.getNoOfFree());
ndbrequire(c_pagePool.getSize() == c_pagePool.getNoOfFree());
ndbrequire(c_fragmentPool.getSize() == c_fragmentPool.getNoOfFree());
ndbrequire(c_triggerPool.getSize() == c_triggerPool.getNoOfFree());
}
}
}
......@@ -511,27 +486,6 @@ const char* triggerNameFormat[] = {
"NDB$BACKUP_%d_%d_DELETE"
};
const Backup::State
Backup::validMasterTransitions[] = {
INITIAL, DEFINING,
DEFINING, DEFINED,
DEFINED, STARTED,
STARTED, SCANNING,
SCANNING, STOPPING,
STOPPING, INITIAL,
DEFINING, ABORTING,
DEFINED, ABORTING,
STARTED, ABORTING,
SCANNING, ABORTING,
STOPPING, ABORTING,
ABORTING, ABORTING,
DEFINING, INITIAL,
ABORTING, INITIAL,
INITIAL, INITIAL
};
const Backup::State
Backup::validSlaveTransitions[] = {
INITIAL, DEFINING,
......@@ -561,10 +515,6 @@ const Uint32
Backup::validSlaveTransitionsCount =
sizeof(Backup::validSlaveTransitions) / sizeof(Backup::State);
const Uint32
Backup::validMasterTransitionsCount =
sizeof(Backup::validMasterTransitions) / sizeof(Backup::State);
void
Backup::CompoundState::setState(State newState){
bool found = false;
......@@ -578,7 +528,8 @@ Backup::CompoundState::setState(State newState){
break;
}
}
ndbrequire(found);
//ndbrequire(found);
if (newState == INITIAL)
abortState = INITIAL;
......@@ -647,8 +598,7 @@ Backup::execNODE_FAILREP(Signal* signal)
Uint32 theFailedNodes[NodeBitmask::Size];
for (Uint32 i = 0; i < NodeBitmask::Size; i++)
theFailedNodes[i] = rep->theNodes[i];
// NodeId old_master_node_id = getMasterNodeId();
c_masterNodeId = new_master_node_id;
NodePtr nodePtr;
......@@ -686,15 +636,24 @@ Backup::execNODE_FAILREP(Signal* signal)
}
bool
Backup::verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask)
Backup::verifyNodesAlive(BackupRecordPtr ptr,
const NdbNodeBitmask& aNodeBitMask)
{
Uint32 version = getNodeInfo(getOwnNodeId()).m_version;
for (Uint32 i = 0; i < MAX_NDB_NODES; i++) {
jam();
if(aNodeBitMask.get(i)) {
if(!c_aliveNodes.get(i)){
jam();
ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail);
return false;
}//if
if(getNodeInfo(i).m_version != version)
{
jam();
ptr.p->setErrorCode(AbortBackupOrd::IncompatibleVersions);
return false;
}
}//if
}//for
return true;
......@@ -709,6 +668,10 @@ Backup::checkNodeFail(Signal* signal,
ndbrequire( ptr.p->nodes.get(newCoord)); /* just to make sure newCoord
* is part of the backup
*/
NdbNodeBitmask mask;
mask.assign(2, theFailedNodes);
/* Update ptr.p->nodes to be up to date with current alive nodes
*/
NodePtr nodePtr;
......@@ -730,26 +693,42 @@ Backup::checkNodeFail(Signal* signal,
return; // failed node is not part of backup process, safe to continue
}
bool doMasterTakeover = false;
if(NodeBitmask::get(theFailedNodes, refToNode(ptr.p->masterRef))){
jam();
doMasterTakeover = true;
};
if (newCoord == getOwnNodeId()){
jam();
if (doMasterTakeover) {
/**
* I'm new master
*/
CRASH_INSERTION((10002));
#ifdef DEBUG_ABORT
ndbout_c("**** Master Takeover: Node failed: Master id = %u",
refToNode(ptr.p->masterRef));
#endif
masterTakeOver(signal, ptr);
if(mask.get(refToNode(ptr.p->masterRef)))
{
/**
* Master died...abort
*/
ptr.p->masterRef = reference();
ptr.p->nodes.clear();
ptr.p->nodes.set(getOwnNodeId());
ptr.p->setErrorCode(AbortBackupOrd::BackupFailureDueToNodeFail);
switch(ptr.p->m_gsn){
case GSN_DEFINE_BACKUP_REQ:
case GSN_START_BACKUP_REQ:
case GSN_BACKUP_FRAGMENT_REQ:
case GSN_STOP_BACKUP_REQ:
// I'm currently processing...reply to self and abort...
ptr.p->masterData.gsn = ptr.p->m_gsn;
ptr.p->masterData.sendCounter = ptr.p->nodes;
return;
}//if
case GSN_DEFINE_BACKUP_REF:
case GSN_DEFINE_BACKUP_CONF:
case GSN_START_BACKUP_REF:
case GSN_START_BACKUP_CONF:
case GSN_BACKUP_FRAGMENT_REF:
case GSN_BACKUP_FRAGMENT_CONF:
case GSN_STOP_BACKUP_REF:
case GSN_STOP_BACKUP_CONF:
ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ;
masterAbort(signal, ptr);
return;
case GSN_ABORT_BACKUP_ORD:
// Already aborting
return;
}
}
else if (newCoord == getOwnNodeId())
{
/**
* I'm master for this backup
*/
......@@ -759,61 +738,81 @@ Backup::checkNodeFail(Signal* signal,
ndbout_c("**** Master: Node failed: Master id = %u",
refToNode(ptr.p->masterRef));
#endif
masterAbort(signal, ptr, false);
return;
}//if
/**
* If there's a new master, (it's not me)
* but remember who it is
*/
ptr.p->masterRef = calcBackupBlockRef(newCoord);
Uint32 gsn, len, pos;
ptr.p->nodes.bitANDC(mask);
switch(ptr.p->masterData.gsn){
case GSN_DEFINE_BACKUP_REQ:
{
DefineBackupRef * ref = (DefineBackupRef*)signal->getDataPtr();
ref->backupPtr = ptr.i;
ref->backupId = ptr.p->backupId;
ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
gsn= GSN_DEFINE_BACKUP_REF;
len= DefineBackupRef::SignalLength;
pos= &ref->nodeId - signal->getDataPtr();
break;
}
case GSN_START_BACKUP_REQ:
{
StartBackupRef * ref = (StartBackupRef*)signal->getDataPtr();
ref->backupPtr = ptr.i;
ref->backupId = ptr.p->backupId;
ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
ref->signalNo = ptr.p->masterData.startBackup.signalNo;
gsn= GSN_START_BACKUP_REF;
len= StartBackupRef::SignalLength;
pos= &ref->nodeId - signal->getDataPtr();
break;
}
case GSN_BACKUP_FRAGMENT_REQ:
{
BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr();
ref->backupPtr = ptr.i;
ref->backupId = ptr.p->backupId;
ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
gsn= GSN_BACKUP_FRAGMENT_REF;
len= BackupFragmentRef::SignalLength;
pos= &ref->nodeId - signal->getDataPtr();
break;
}
case GSN_STOP_BACKUP_REQ:
{
StopBackupRef * ref = (StopBackupRef*)signal->getDataPtr();
ref->backupPtr = ptr.i;
ref->backupId = ptr.p->backupId;
ref->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
gsn= GSN_STOP_BACKUP_REF;
len= StopBackupRef::SignalLength;
pos= &ref->nodeId - signal->getDataPtr();
break;
}
case GSN_CREATE_TRIG_REQ:
case GSN_ALTER_TRIG_REQ:
case GSN_WAIT_GCP_REQ:
case GSN_UTIL_SEQUENCE_REQ:
case GSN_UTIL_LOCK_REQ:
case GSN_DROP_TRIG_REQ:
return;
}
for(Uint32 i = 0; (i = mask.find(i+1)) != NdbNodeBitmask::NotFound; )
{
signal->theData[pos] = i;
sendSignal(reference(), gsn, signal, len, JBB);
#ifdef DEBUG_ABORT
ndbout_c("**** Slave: Node failed: Master id = %u",
refToNode(ptr.p->masterRef));
ndbout_c("sending %d to self from %d", gsn, i);
#endif
}
return;
}//if
/**
* I abort myself as slave if not master
*/
CRASH_INSERTION((10021));
// slaveAbort(signal, ptr);
}
void
Backup::masterTakeOver(Signal* signal, BackupRecordPtr ptr)
{
ptr.p->masterRef = reference();
ptr.p->masterData.gsn = MAX_GSN + 1;
switch(ptr.p->slaveState.getState()){
case INITIAL:
jam();
ptr.p->masterData.state.forceState(INITIAL);
break;
case ABORTING:
jam();
case DEFINING:
jam();
case DEFINED:
jam();
case STARTED:
jam();
case SCANNING:
jam();
ptr.p->masterData.state.forceState(STARTED);
break;
case STOPPING:
jam();
case CLEANING:
jam();
ptr.p->masterData.state.forceState(STOPPING);
break;
default:
ndbrequire(false);
}
masterAbort(signal, ptr, false);
}
void
Backup::execINCL_NODEREQ(Signal* signal)
{
......@@ -895,8 +894,8 @@ Backup::execBACKUP_REQ(Signal* signal)
ndbrequire(ptr.p->pages.empty());
ndbrequire(ptr.p->tables.isEmpty());
ptr.p->masterData.state.forceState(INITIAL);
ptr.p->masterData.state.setState(DEFINING);
ptr.p->m_gsn = 0;
ptr.p->errorCode = 0;
ptr.p->clientRef = senderRef;
ptr.p->clientData = senderData;
ptr.p->masterRef = reference();
......@@ -905,6 +904,7 @@ Backup::execBACKUP_REQ(Signal* signal)
ptr.p->backupKey[0] = 0;
ptr.p->backupKey[1] = 0;
ptr.p->backupDataLen = 0;
ptr.p->masterData.errorCode = 0;
ptr.p->masterData.dropTrig.tableId = RNIL;
ptr.p->masterData.alterTrig.tableId = RNIL;
......@@ -928,7 +928,6 @@ Backup::execUTIL_SEQUENCE_REF(Signal* signal)
ndbrequire(ptr.i == RNIL);
c_backupPool.getPtr(ptr);
ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ);
ptr.p->masterData.gsn = 0;
sendBackupRef(signal, ptr, BackupRef::SequenceFailure);
}//execUTIL_SEQUENCE_REF()
......@@ -938,8 +937,7 @@ Backup::sendBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errorCode)
{
jam();
sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData, errorCode);
// ptr.p->masterData.state.setState(INITIAL);
cleanupSlaveResources(ptr);
cleanup(signal, ptr);
}
void
......@@ -968,7 +966,8 @@ Backup::execUTIL_SEQUENCE_CONF(Signal* signal)
UtilSequenceConf * conf = (UtilSequenceConf*)signal->getDataPtr();
if(conf->requestType == UtilSequenceReq::Create) {
if(conf->requestType == UtilSequenceReq::Create)
{
jam();
sendSTTORRY(signal); // At startup in NDB
return;
......@@ -979,18 +978,20 @@ Backup::execUTIL_SEQUENCE_CONF(Signal* signal)
c_backupPool.getPtr(ptr);
ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ);
ptr.p->masterData.gsn = 0;
if (ptr.p->masterData.state.getState() == ABORTING) {
if (ptr.p->checkError())
{
jam();
sendBackupRef(signal, ptr, ptr.p->errorCode);
return;
}//if
if (ERROR_INSERTED(10023)) {
ptr.p->masterData.state.setState(ABORTING);
if (ERROR_INSERTED(10023))
{
sendBackupRef(signal, ptr, 323);
return;
}//if
ndbrequire(ptr.p->masterData.state.getState() == DEFINING);
{
Uint64 backupId;
......@@ -1018,7 +1019,6 @@ Backup::defineBackupMutex_locked(Signal* signal, Uint32 ptrI, Uint32 retVal){
c_backupPool.getPtr(ptr);
ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ);
ptr.p->masterData.gsn = 0;
ptr.p->masterData.gsn = GSN_UTIL_LOCK_REQ;
Mutex mutex(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex);
......@@ -1040,14 +1040,13 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal)
c_backupPool.getPtr(ptr);
ndbrequire(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ);
ptr.p->masterData.gsn = 0;
if (ERROR_INSERTED(10031)) {
ptr.p->masterData.state.setState(ABORTING);
ptr.p->setErrorCode(331);
}//if
if (ptr.p->masterData.state.getState() == ABORTING) {
if (ptr.p->checkError())
{
jam();
/**
......@@ -1062,13 +1061,11 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal)
Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex);
jam();
mutex2.unlock(); // ignore response
sendBackupRef(signal, ptr, ptr.p->errorCode);
return;
}//if
ndbrequire(ptr.p->masterData.state.getState() == DEFINING);
sendDefineBackupReq(signal, ptr);
}
......@@ -1078,33 +1075,6 @@ Backup::dictCommitTableMutex_locked(Signal* signal, Uint32 ptrI,Uint32 retVal)
*
*****************************************************************************/
void
Backup::sendSignalAllWait(BackupRecordPtr ptr, Uint32 gsn, Signal *signal,
Uint32 signalLength, bool executeDirect)
{
jam();
ptr.p->masterData.gsn = gsn;
ptr.p->masterData.sendCounter.clearWaitingFor();
NodePtr node;
for(c_nodes.first(node); node.i != RNIL; c_nodes.next(node)){
jam();
const Uint32 nodeId = node.p->nodeId;
if(node.p->alive && ptr.p->nodes.get(nodeId)){
jam();
ptr.p->masterData.sendCounter.setWaitingFor(nodeId);
const BlockReference ref = numberToRef(BACKUP, nodeId);
if (!executeDirect || ref != reference()) {
sendSignal(ref, gsn, signal, signalLength, JBB);
}//if
}//if
}//for
if (executeDirect) {
EXECUTE_DIRECT(BACKUP, gsn, signal, signalLength);
}
}
bool
Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId)
{
......@@ -1114,10 +1084,6 @@ Backup::haveAllSignals(BackupRecordPtr ptr, Uint32 gsn, Uint32 nodeId)
ndbrequire(ptr.p->masterData.sendCounter.isWaitingFor(nodeId));
ptr.p->masterData.sendCounter.clearWaitingFor(nodeId);
if (ptr.p->masterData.sendCounter.done())
ptr.p->masterData.gsn = 0;
return ptr.p->masterData.sendCounter.done();
}
......@@ -1138,11 +1104,12 @@ Backup::sendDefineBackupReq(Signal *signal, BackupRecordPtr ptr)
req->nodes = ptr.p->nodes;
req->backupDataLen = ptr.p->backupDataLen;
ptr.p->masterData.errorCode = 0;
ptr.p->okToCleanMaster = false; // master must wait with cleaning to last
sendSignalAllWait(ptr, GSN_DEFINE_BACKUP_REQ, signal,
DefineBackupReq::SignalLength,
true /* do execute direct on oneself */);
ptr.p->masterData.gsn = GSN_DEFINE_BACKUP_REQ;
ptr.p->masterData.sendCounter = ptr.p->nodes;
NodeReceiverGroup rg(BACKUP, ptr.p->nodes);
sendSignal(rg, GSN_DEFINE_BACKUP_REQ, signal,
DefineBackupReq::SignalLength, JBB);
/**
* Now send backup data
*/
......@@ -1167,17 +1134,15 @@ Backup::execDEFINE_BACKUP_REF(Signal* signal)
jamEntry();
DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtr();
const Uint32 ptrI = ref->backupPtr;
const Uint32 backupId = ref->backupId;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
//const Uint32 backupId = ref->backupId;
const Uint32 nodeId = ref->nodeId;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
masterAbortCheck(); // macro will do return if ABORTING
ptr.p->masterData.errorCode = ref->errorCode;
ptr.p->setErrorCode(ref->errorCode);
defineBackupReply(signal, ptr, nodeId);
}
......@@ -1188,17 +1153,16 @@ Backup::execDEFINE_BACKUP_CONF(Signal* signal)
DefineBackupConf* conf = (DefineBackupConf*)signal->getDataPtr();
const Uint32 ptrI = conf->backupPtr;
const Uint32 backupId = conf->backupId;
//const Uint32 backupId = conf->backupId;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
masterAbortCheck(); // macro will do return if ABORTING
if (ERROR_INSERTED(10024)) {
ptr.p->masterData.errorCode = 324;
}//if
if (ERROR_INSERTED(10024))
{
ptr.p->setErrorCode(324);
}
defineBackupReply(signal, ptr, nodeId);
}
......@@ -1210,6 +1174,7 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId)
jam();
return;
}
/**
* Unlock mutexes
*/
......@@ -1223,16 +1188,10 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId)
jam();
mutex2.unlock(); // ignore response
if(ptr.p->errorCode) {
jam();
ptr.p->masterData.errorCode = ptr.p->errorCode;
}
if(ptr.p->masterData.errorCode){
if(ptr.p->checkError())
{
jam();
ptr.p->setErrorCode(ptr.p->masterData.errorCode);
sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean);
masterSendAbortBackup(signal, ptr);
masterAbort(signal, ptr);
return;
}
......@@ -1252,7 +1211,6 @@ Backup::defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId)
ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+3);
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 3+NdbNodeBitmask::Size, JBB);
ptr.p->masterData.state.setState(DEFINED);
/**
* Prepare Trig
*/
......@@ -1286,7 +1244,6 @@ Backup::sendCreateTrig(Signal* signal,
{
CreateTrigReq * req =(CreateTrigReq *)signal->getDataPtrSend();
ptr.p->errorCode = 0;
ptr.p->masterData.gsn = GSN_CREATE_TRIG_REQ;
ptr.p->masterData.sendCounter = 3;
ptr.p->masterData.createTrig.tableId = tabPtr.p->tableId;
......@@ -1395,17 +1352,14 @@ Backup::createTrigReply(Signal* signal, BackupRecordPtr ptr)
return;
}//if
ptr.p->masterData.gsn = 0;
if (ERROR_INSERTED(10025))
{
ptr.p->errorCode = 325;
}
if(ptr.p->checkError()) {
jam();
masterAbort(signal, ptr, true);
return;
}//if
if (ERROR_INSERTED(10025)) {
ptr.p->errorCode = 325;
masterAbort(signal, ptr, true);
masterAbort(signal, ptr);
return;
}//if
......@@ -1425,10 +1379,7 @@ Backup::createTrigReply(Signal* signal, BackupRecordPtr ptr)
/**
* Finished with all tables, send StartBackupReq
*/
ptr.p->masterData.state.setState(STARTED);
ptr.p->tables.first(tabPtr);
ptr.p->errorCode = 0;
ptr.p->masterData.startBackup.signalNo = 0;
ptr.p->masterData.startBackup.noOfSignals =
(ptr.p->tables.noOfElements() + StartBackupReq::MaxTableTriggers - 1) /
......@@ -1467,9 +1418,12 @@ Backup::sendStartBackup(Signal* signal, BackupRecordPtr ptr, TablePtr tabPtr)
}//for
req->noOfTableTriggers = i;
sendSignalAllWait(ptr, GSN_START_BACKUP_REQ, signal,
StartBackupReq::HeaderLength +
(i * StartBackupReq::TableTriggerLength));
ptr.p->masterData.gsn = GSN_START_BACKUP_REQ;
ptr.p->masterData.sendCounter = ptr.p->nodes;
NodeReceiverGroup rg(BACKUP, ptr.p->nodes);
sendSignal(rg, GSN_START_BACKUP_REQ, signal,
StartBackupReq::HeaderLength +
(i * StartBackupReq::TableTriggerLength), JBB);
}
void
......@@ -1479,15 +1433,13 @@ Backup::execSTART_BACKUP_REF(Signal* signal)
StartBackupRef* ref = (StartBackupRef*)signal->getDataPtr();
const Uint32 ptrI = ref->backupPtr;
const Uint32 backupId = ref->backupId;
//const Uint32 backupId = ref->backupId;
const Uint32 signalNo = ref->signalNo;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
const Uint32 nodeId = ref->nodeId;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
masterAbortCheck(); // macro will do return if ABORTING
ptr.p->setErrorCode(ref->errorCode);
startBackupReply(signal, ptr, nodeId, signalNo);
}
......@@ -1499,15 +1451,13 @@ Backup::execSTART_BACKUP_CONF(Signal* signal)
StartBackupConf* conf = (StartBackupConf*)signal->getDataPtr();
const Uint32 ptrI = conf->backupPtr;
const Uint32 backupId = conf->backupId;
//const Uint32 backupId = conf->backupId;
const Uint32 signalNo = conf->signalNo;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
masterAbortCheck(); // macro will do return if ABORTING
startBackupReply(signal, ptr, nodeId, signalNo);
}
......@@ -1524,17 +1474,16 @@ Backup::startBackupReply(Signal* signal, BackupRecordPtr ptr,
return;
}
if (ERROR_INSERTED(10026))
{
ptr.p->errorCode = 326;
}
if(ptr.p->checkError()){
jam();
masterAbort(signal, ptr, true);
masterAbort(signal, ptr);
return;
}
if (ERROR_INSERTED(10026)) {
ptr.p->errorCode = 326;
masterAbort(signal, ptr, true);
return;
}//if
TablePtr tabPtr;
c_tablePool.getPtr(tabPtr, ptr.p->masterData.startBackup.tablePtr);
......@@ -1566,7 +1515,6 @@ Backup::sendAlterTrig(Signal* signal, BackupRecordPtr ptr)
{
AlterTrigReq * req =(AlterTrigReq *)signal->getDataPtrSend();
ptr.p->errorCode = 0;
ptr.p->masterData.gsn = GSN_ALTER_TRIG_REQ;
ptr.p->masterData.sendCounter = 0;
......@@ -1608,6 +1556,7 @@ Backup::sendAlterTrig(Signal* signal, BackupRecordPtr ptr)
return;
}//if
ptr.p->masterData.alterTrig.tableId = RNIL;
/**
* Finished with all tables
*/
......@@ -1669,11 +1618,9 @@ Backup::alterTrigReply(Signal* signal, BackupRecordPtr ptr)
return;
}//if
ptr.p->masterData.gsn = 0;
if(ptr.p->checkError()){
jam();
masterAbort(signal, ptr, true);
masterAbort(signal, ptr);
return;
}//if
......@@ -1719,11 +1666,10 @@ Backup::execWAIT_GCP_CONF(Signal* signal){
ndbrequire(ptr.p->masterRef == reference());
ndbrequire(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ);
ptr.p->masterData.gsn = 0;
if(ptr.p->checkError()) {
jam();
masterAbort(signal, ptr, true);
masterAbort(signal, ptr);
return;
}//if
......@@ -1731,13 +1677,13 @@ Backup::execWAIT_GCP_CONF(Signal* signal){
jam();
CRASH_INSERTION((10008));
ptr.p->startGCP = gcp;
ptr.p->masterData.state.setState(SCANNING);
ptr.p->masterData.sendCounter= 0;
ptr.p->masterData.gsn = GSN_BACKUP_FRAGMENT_REQ;
nextFragment(signal, ptr);
} else {
jam();
CRASH_INSERTION((10009));
ptr.p->stopGCP = gcp;
ptr.p->masterData.state.setState(STOPPING);
sendDropTrig(signal, ptr); // regular dropping of triggers
}//if
}
......@@ -1787,6 +1733,7 @@ Backup::nextFragment(Signal* signal, BackupRecordPtr ptr)
req->fragmentNo = i;
req->count = 0;
ptr.p->masterData.sendCounter++;
const BlockReference ref = numberToRef(BACKUP, nodeId);
sendSignal(ref, GSN_BACKUP_FRAGMENT_REQ, signal,
BackupFragmentReq::SignalLength, JBB);
......@@ -1824,7 +1771,7 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal)
BackupFragmentConf * conf = (BackupFragmentConf*)signal->getDataPtr();
const Uint32 ptrI = conf->backupPtr;
const Uint32 backupId = conf->backupId;
//const Uint32 backupId = conf->backupId;
const Uint32 tableId = conf->tableId;
const Uint32 fragmentNo = conf->fragmentNo;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
......@@ -1834,10 +1781,9 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
masterAbortCheck(); // macro will do return if ABORTING
ptr.p->noOfBytes += noOfBytes;
ptr.p->noOfRecords += noOfRecords;
ptr.p->masterData.sendCounter--;
TablePtr tabPtr;
ndbrequire(findTable(ptr, tabPtr, tableId));
......@@ -1852,17 +1798,24 @@ Backup::execBACKUP_FRAGMENT_CONF(Signal* signal)
fragPtr.p->scanned = 1;
fragPtr.p->scanning = 0;
if(ptr.p->checkError()) {
jam();
masterAbort(signal, ptr, true);
return;
}//if
if (ERROR_INSERTED(10028)) {
if (ERROR_INSERTED(10028))
{
ptr.p->errorCode = 328;
masterAbort(signal, ptr, true);
return;
}//if
nextFragment(signal, ptr);
}
if(ptr.p->checkError())
{
if(ptr.p->masterData.sendCounter.done())
{
jam();
masterAbort(signal, ptr);
return;
}//if
}
else
{
nextFragment(signal, ptr);
}
}
void
......@@ -1874,15 +1827,52 @@ Backup::execBACKUP_FRAGMENT_REF(Signal* signal)
BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtr();
const Uint32 ptrI = ref->backupPtr;
const Uint32 backupId = ref->backupId;
//const Uint32 backupId = ref->backupId;
const Uint32 nodeId = ref->nodeId;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
masterAbortCheck(); // macro will do return if ABORTING
TablePtr tabPtr;
ptr.p->tables.first(tabPtr);
for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) {
jam();
FragmentPtr fragPtr;
Array<Fragment> & frags = tabPtr.p->fragments;
const Uint32 fragCount = frags.getSize();
for(Uint32 i = 0; i<fragCount; i++) {
jam();
tabPtr.p->fragments.getPtr(fragPtr, i);
if(fragPtr.p->scanning != 0 && nodeId == fragPtr.p->node)
{
jam();
ndbrequire(fragPtr.p->scanned == 0);
fragPtr.p->scanned = 1;
fragPtr.p->scanning = 0;
goto done;
}
}
}
ndbrequire(false);
done:
ptr.p->masterData.sendCounter--;
ptr.p->setErrorCode(ref->errorCode);
masterAbort(signal, ptr, true);
if(ptr.p->masterData.sendCounter.done())
{
jam();
masterAbort(signal, ptr);
return;
}//if
AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
ord->backupId = ptr.p->backupId;
ord->backupPtr = ptr.i;
ord->requestType = AbortBackupOrd::LogBufferFull;
ord->senderData= ptr.i;
execABORT_BACKUP_ORD(signal);
}
/*****************************************************************************
......@@ -1910,15 +1900,7 @@ Backup::sendDropTrig(Signal* signal, BackupRecordPtr ptr)
jam();
ptr.p->masterData.dropTrig.tableId = RNIL;
sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean);
if(ptr.p->masterData.state.getState() == STOPPING) {
jam();
sendStopBackup(signal, ptr);
return;
}//if
ndbrequire(ptr.p->masterData.state.getState() == ABORTING);
masterSendAbortBackup(signal, ptr);
sendStopBackup(signal, ptr);
}//if
}
......@@ -2010,7 +1992,6 @@ Backup::dropTrigReply(Signal* signal, BackupRecordPtr ptr)
return;
}//if
ptr.p->masterData.gsn = 0;
sendDropTrig(signal, ptr); // recursive next
}
......@@ -2023,14 +2004,23 @@ void
Backup::execSTOP_BACKUP_REF(Signal* signal)
{
jamEntry();
ndbrequire(0);
StopBackupRef* ref = (StopBackupRef*)signal->getDataPtr();
const Uint32 ptrI = ref->backupPtr;
//const Uint32 backupId = ref->backupId;
const Uint32 nodeId = ref->nodeId;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
ptr.p->setErrorCode(ref->errorCode);
stopBackupReply(signal, ptr, nodeId);
}
void
Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr)
{
jam();
ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ;
StopBackupReq* stop = (StopBackupReq*)signal->getDataPtrSend();
stop->backupPtr = ptr.i;
......@@ -2038,8 +2028,11 @@ Backup::sendStopBackup(Signal* signal, BackupRecordPtr ptr)
stop->startGCP = ptr.p->startGCP;
stop->stopGCP = ptr.p->stopGCP;
sendSignalAllWait(ptr, GSN_STOP_BACKUP_REQ, signal,
StopBackupReq::SignalLength);
ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ;
ptr.p->masterData.sendCounter = ptr.p->nodes;
NodeReceiverGroup rg(BACKUP, ptr.p->nodes);
sendSignal(rg, GSN_STOP_BACKUP_REQ, signal,
StopBackupReq::SignalLength, JBB);
}
void
......@@ -2049,14 +2042,12 @@ Backup::execSTOP_BACKUP_CONF(Signal* signal)
StopBackupConf* conf = (StopBackupConf*)signal->getDataPtr();
const Uint32 ptrI = conf->backupPtr;
const Uint32 backupId = conf->backupId;
//const Uint32 backupId = conf->backupId;
const Uint32 nodeId = refToNode(signal->senderBlockRef());
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
masterAbortCheck(); // macro will do return if ABORTING
ptr.p->noOfLogBytes += conf->noOfLogBytes;
ptr.p->noOfLogRecords += conf->noOfLogRecords;
......@@ -2073,35 +2064,39 @@ Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId)
return;
}
// ptr.p->masterData.state.setState(INITIAL);
// send backup complete first to slaves so that they know
sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupComplete);
BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend();
rep->backupId = ptr.p->backupId;
rep->senderData = ptr.p->clientData;
rep->startGCP = ptr.p->startGCP;
rep->stopGCP = ptr.p->stopGCP;
rep->noOfBytes = ptr.p->noOfBytes;
rep->noOfRecords = ptr.p->noOfRecords;
rep->noOfLogBytes = ptr.p->noOfLogBytes;
rep->noOfLogRecords = ptr.p->noOfLogRecords;
rep->nodes = ptr.p->nodes;
sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal,
BackupCompleteRep::SignalLength, JBB);
signal->theData[0] = EventReport::BackupCompleted;
signal->theData[1] = ptr.p->clientRef;
signal->theData[2] = ptr.p->backupId;
signal->theData[3] = ptr.p->startGCP;
signal->theData[4] = ptr.p->stopGCP;
signal->theData[5] = ptr.p->noOfBytes;
signal->theData[6] = ptr.p->noOfRecords;
signal->theData[7] = ptr.p->noOfLogBytes;
signal->theData[8] = ptr.p->noOfLogRecords;
ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9);
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB);
if(!ptr.p->checkError())
{
BackupCompleteRep * rep = (BackupCompleteRep*)signal->getDataPtrSend();
rep->backupId = ptr.p->backupId;
rep->senderData = ptr.p->clientData;
rep->startGCP = ptr.p->startGCP;
rep->stopGCP = ptr.p->stopGCP;
rep->noOfBytes = ptr.p->noOfBytes;
rep->noOfRecords = ptr.p->noOfRecords;
rep->noOfLogBytes = ptr.p->noOfLogBytes;
rep->noOfLogRecords = ptr.p->noOfLogRecords;
rep->nodes = ptr.p->nodes;
sendSignal(ptr.p->clientRef, GSN_BACKUP_COMPLETE_REP, signal,
BackupCompleteRep::SignalLength, JBB);
signal->theData[0] = EventReport::BackupCompleted;
signal->theData[1] = ptr.p->clientRef;
signal->theData[2] = ptr.p->backupId;
signal->theData[3] = ptr.p->startGCP;
signal->theData[4] = ptr.p->stopGCP;
signal->theData[5] = ptr.p->noOfBytes;
signal->theData[6] = ptr.p->noOfRecords;
signal->theData[7] = ptr.p->noOfLogBytes;
signal->theData[8] = ptr.p->noOfLogRecords;
ptr.p->nodes.copyto(NdbNodeBitmask::Size, signal->theData+9);
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 9+NdbNodeBitmask::Size, JBB);
}
else
{
masterAbort(signal, ptr);
}
}
/*****************************************************************************
......@@ -2110,199 +2105,96 @@ Backup::stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId)
*
*****************************************************************************/
void
Backup::masterAbort(Signal* signal, BackupRecordPtr ptr, bool controlledAbort)
Backup::masterAbort(Signal* signal, BackupRecordPtr ptr)
{
if(ptr.p->masterData.state.getState() == ABORTING) {
jam();
#ifdef DEBUG_ABORT
ndbout_c("---- Master already aborting");
ndbout_c("************ masterAbort");
#endif
if(ptr.p->masterData.errorCode != 0)
{
jam();
return;
}
jam();
#ifdef DEBUG_ABORT
ndbout_c("************ masterAbort");
#endif
sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure);
if (!ptr.p->checkError())
ptr.p->errorCode = AbortBackupOrd::BackupFailureDueToNodeFail;
const State s = ptr.p->masterData.state.getState();
ptr.p->masterData.state.setState(ABORTING);
ndbrequire(s == INITIAL ||
s == STARTED ||
s == DEFINING ||
s == DEFINED ||
s == SCANNING ||
s == STOPPING ||
s == ABORTING);
if(ptr.p->masterData.gsn == GSN_UTIL_SEQUENCE_REQ) {
jam();
DEBUG_OUT("masterAbort: gsn = GSN_UTIL_SEQUENCE_REQ");
//-------------------------------------------------------
// We are waiting for UTIL_SEQUENCE response. We rely on
// this to arrive and check for ABORTING in response.
// No slaves are involved at this point and ABORT simply
// results in BACKUP_REF to client
//-------------------------------------------------------
/**
* Waiting for Sequence Id
* @see execUTIL_SEQUENCE_CONF
*/
return;
}//if
BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend();
rep->backupId = ptr.p->backupId;
rep->senderData = ptr.p->clientData;
rep->reason = ptr.p->errorCode;
sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal,
BackupAbortRep::SignalLength, JBB);
if(ptr.p->masterData.gsn == GSN_UTIL_LOCK_REQ) {
jam();
DEBUG_OUT("masterAbort: gsn = GSN_UTIL_LOCK_REQ");
//-------------------------------------------------------
// We are waiting for UTIL_LOCK response (mutex). We rely on
// this to arrive and check for ABORTING in response.
// No slaves are involved at this point and ABORT simply
// results in BACKUP_REF to client
//-------------------------------------------------------
/**
* Waiting for lock
* @see execUTIL_LOCK_CONF
*/
return;
}//if
/**
* Unlock mutexes only at master
*/
jam();
Mutex mutex1(signal, c_mutexMgr, ptr.p->masterData.m_dictCommitTableMutex);
jam();
mutex1.unlock(); // ignore response
jam();
Mutex mutex2(signal, c_mutexMgr, ptr.p->masterData.m_defineBackupMutex);
jam();
mutex2.unlock(); // ignore response
if (!controlledAbort) {
jam();
if (s == DEFINING) {
jam();
//-------------------------------------------------------
// If we are in the defining phase all work is done by
// slaves. No triggers have been allocated thus slaves
// may free all "Master" resources, let them know...
//-------------------------------------------------------
sendAbortBackupOrd(signal, ptr, AbortBackupOrd::OkToClean);
return;
}//if
if (s == DEFINED) {
jam();
//-------------------------------------------------------
// DEFINED is the state when triggers are created. We rely
// on that DICT will report create trigger failure in case
// of node failure. Thus no special action is needed here.
// We will check for errorCode != 0 when receiving
// replies on create trigger.
//-------------------------------------------------------
return;
}//if
if(ptr.p->masterData.gsn == GSN_WAIT_GCP_REQ) {
jam();
DEBUG_OUT("masterAbort: gsn = GSN_WAIT_GCP_REQ");
//-------------------------------------------------------
// We are waiting for WAIT_GCP response. We rely on
// this to arrive and check for ABORTING in response.
//-------------------------------------------------------
signal->theData[0] = EventReport::BackupAborted;
signal->theData[1] = ptr.p->clientRef;
signal->theData[2] = ptr.p->backupId;
signal->theData[3] = ptr.p->errorCode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
/**
* Waiting for GCP
* @see execWAIT_GCP_CONF
*/
return;
}//if
if(ptr.p->masterData.gsn == GSN_ALTER_TRIG_REQ) {
jam();
DEBUG_OUT("masterAbort: gsn = GSN_ALTER_TRIG_REQ");
//-------------------------------------------------------
// We are waiting for ALTER_TRIG response. We rely on
// this to arrive and check for ABORTING in response.
//-------------------------------------------------------
ndbrequire(ptr.p->errorCode);
ptr.p->masterData.errorCode = ptr.p->errorCode;
/**
* All triggers haven't been created yet
*/
return;
}//if
AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
ord->backupId = ptr.p->backupId;
ord->backupPtr = ptr.i;
ord->senderData= ptr.i;
NodeReceiverGroup rg(BACKUP, ptr.p->nodes);
if(ptr.p->masterData.gsn == GSN_DROP_TRIG_REQ) {
jam();
DEBUG_OUT("masterAbort: gsn = GSN_DROP_TRIG_REQ");
//-------------------------------------------------------
// We are waiting for DROP_TRIG response. We rely on
// this to arrive and will continue dropping triggers
// until completed.
//-------------------------------------------------------
/**
* I'm currently dropping the trigger
*/
return;
}//if
}//if
//-------------------------------------------------------
// If we are waiting for START_BACKUP responses we can
// safely start dropping triggers (state == STARTED).
// We will ignore any START_BACKUP responses after this.
//-------------------------------------------------------
DEBUG_OUT("masterAbort: sendDropTrig");
sendDropTrig(signal, ptr); // dropping due to error
switch(ptr.p->masterData.gsn){
case GSN_DEFINE_BACKUP_REQ:
ord->requestType = AbortBackupOrd::BackupFailure;
sendSignal(rg, GSN_ABORT_BACKUP_ORD, signal,
AbortBackupOrd::SignalLength, JBB);
return;
case GSN_CREATE_TRIG_REQ:
case GSN_START_BACKUP_REQ:
case GSN_ALTER_TRIG_REQ:
case GSN_WAIT_GCP_REQ:
case GSN_BACKUP_FRAGMENT_REQ:
jam();
ptr.p->stopGCP= ptr.p->startGCP + 1;
sendDropTrig(signal, ptr); // dropping due to error
return;
case GSN_UTIL_SEQUENCE_REQ:
case GSN_UTIL_LOCK_REQ:
case GSN_DROP_TRIG_REQ:
ndbrequire(false);
return;
case GSN_STOP_BACKUP_REQ:
return;
}
}
void
Backup::masterSendAbortBackup(Signal* signal, BackupRecordPtr ptr)
Backup::abort_scan(Signal * signal, BackupRecordPtr ptr)
{
if (ptr.p->masterData.state.getState() != ABORTING) {
sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure);
ptr.p->masterData.state.setState(ABORTING);
}
const State s = ptr.p->masterData.state.getAbortState();
/**
* First inform to client
*/
if(s == DEFINING) {
jam();
#ifdef DEBUG_ABORT
ndbout_c("** Abort: sending BACKUP_REF to mgmtsrvr");
#endif
sendBackupRef(ptr.p->clientRef, signal, ptr.p->clientData,
ptr.p->errorCode);
AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
ord->backupId = ptr.p->backupId;
ord->backupPtr = ptr.i;
ord->senderData= ptr.i;
ord->requestType = AbortBackupOrd::AbortScan;
} else {
TablePtr tabPtr;
ptr.p->tables.first(tabPtr);
for(; tabPtr.i != RNIL; ptr.p->tables.next(tabPtr)) {
jam();
#ifdef DEBUG_ABORT
ndbout_c("** Abort: sending BACKUP_ABORT_REP to mgmtsrvr");
#endif
BackupAbortRep* rep = (BackupAbortRep*)signal->getDataPtrSend();
rep->backupId = ptr.p->backupId;
rep->senderData = ptr.p->clientData;
rep->reason = ptr.p->errorCode;
sendSignal(ptr.p->clientRef, GSN_BACKUP_ABORT_REP, signal,
BackupAbortRep::SignalLength, JBB);
signal->theData[0] = EventReport::BackupAborted;
signal->theData[1] = ptr.p->clientRef;
signal->theData[2] = ptr.p->backupId;
signal->theData[3] = ptr.p->errorCode;
sendSignal(CMVMI_REF, GSN_EVENT_REP, signal, 4, JBB);
}//if
// ptr.p->masterData.state.setState(INITIAL);
sendAbortBackupOrd(signal, ptr, AbortBackupOrd::BackupFailure);
FragmentPtr fragPtr;
Array<Fragment> & frags = tabPtr.p->fragments;
const Uint32 fragCount = frags.getSize();
for(Uint32 i = 0; i<fragCount; i++) {
jam();
tabPtr.p->fragments.getPtr(fragPtr, i);
const Uint32 nodeId = fragPtr.p->node;
if(fragPtr.p->scanning != 0 && ptr.p->nodes.get(nodeId)) {
jam();
const BlockReference ref = numberToRef(BACKUP, nodeId);
sendSignal(ref, GSN_ABORT_BACKUP_ORD, signal,
AbortBackupOrd::SignalLength, JBB);
}
}
}
}
/*****************************************************************************
......@@ -2313,26 +2205,17 @@ Backup::masterSendAbortBackup(Signal* signal, BackupRecordPtr ptr)
void
Backup::defineBackupRef(Signal* signal, BackupRecordPtr ptr, Uint32 errCode)
{
if (ptr.p->slaveState.getState() == ABORTING) {
jam();
return;
}
ptr.p->slaveState.setState(ABORTING);
if (errCode != 0) {
jam();
ptr.p->setErrorCode(errCode);
}//if
ptr.p->m_gsn = GSN_DEFINE_BACKUP_REF;
ptr.p->setErrorCode(errCode);
ndbrequire(ptr.p->errorCode != 0);
DefineBackupRef* ref = (DefineBackupRef*)signal->getDataPtrSend();
ref->backupId = ptr.p->backupId;
ref->backupPtr = ptr.i;
ref->errorCode = ptr.p->errorCode;
ref->nodeId = getOwnNodeId();
sendSignal(ptr.p->masterRef, GSN_DEFINE_BACKUP_REF, signal,
DefineBackupRef::SignalLength, JBB);
closeFiles(signal, ptr);
}
void
......@@ -2366,6 +2249,7 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal)
CRASH_INSERTION((10014));
ptr.p->m_gsn = GSN_DEFINE_BACKUP_REQ;
ptr.p->slaveState.forceState(INITIAL);
ptr.p->slaveState.setState(DEFINING);
ptr.p->errorCode = 0;
......@@ -2432,7 +2316,7 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal)
files[i].p->tableId = RNIL;
files[i].p->backupPtr = ptr.i;
files[i].p->filePointer = RNIL;
files[i].p->fileDone = 0;
files[i].p->fileClosing = 0;
files[i].p->fileOpened = 0;
files[i].p->fileRunning = 0;
files[i].p->scanRunning = 0;
......@@ -2468,17 +2352,14 @@ Backup::execDEFINE_BACKUP_REQ(Signal* signal)
ptr.p->logFilePtr = files[1].i;
ptr.p->dataFilePtr = files[2].i;
if (!verifyNodesAlive(ptr.p->nodes)) {
if (!verifyNodesAlive(ptr, ptr.p->nodes)) {
jam();
defineBackupRef(signal, ptr, DefineBackupRef::Undefined);
// sendBackupRef(signal, ptr,
// ptr.p->errorCode?ptr.p->errorCode:BackupRef::Undefined);
return;
}//if
if (ERROR_INSERTED(10027)) {
jam();
defineBackupRef(signal, ptr, 327);
// sendBackupRef(signal, ptr, 327);
return;
}//if
......@@ -2546,8 +2427,6 @@ Backup::execLIST_TABLES_CONF(Signal* signal)
return;
}//if
defineSlaveAbortCheck();
/**
* All tables fetched
*/
......@@ -2679,8 +2558,6 @@ Backup::openFilesReply(Signal* signal,
}//if
}//for
defineSlaveAbortCheck();
/**
* Did open succeed for all files
*/
......@@ -2810,8 +2687,6 @@ Backup::execGET_TABINFOREF(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, senderData);
defineSlaveAbortCheck();
defineBackupRef(signal, ptr, ref->errorCode);
}
......@@ -2833,8 +2708,6 @@ Backup::execGET_TABINFO_CONF(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, senderData);
defineSlaveAbortCheck();
SegmentedSectionPtr dictTabInfoPtr;
signal->getSection(dictTabInfoPtr, GetTabInfoConf::DICT_TAB_INFO);
ndbrequire(dictTabInfoPtr.sz == len);
......@@ -3047,8 +2920,6 @@ Backup::execDI_FCOUNTCONF(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, senderData);
defineSlaveAbortCheck();
TablePtr tabPtr;
ndbrequire(findTable(ptr, tabPtr, tableId));
......@@ -3127,8 +2998,6 @@ Backup::execDIGETPRIMCONF(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, senderData);
defineSlaveAbortCheck();
TablePtr tabPtr;
ndbrequire(findTable(ptr, tabPtr, tableId));
......@@ -3143,9 +3012,7 @@ Backup::execDIGETPRIMCONF(Signal* signal)
void
Backup::getFragmentInfoDone(Signal* signal, BackupRecordPtr ptr)
{
// Slave must now hold on to master data until
// AbortBackupOrd::OkToClean signal
ptr.p->okToCleanMaster = false;
ptr.p->m_gsn = GSN_DEFINE_BACKUP_CONF;
ptr.p->slaveState.setState(DEFINED);
DefineBackupConf * conf = (DefineBackupConf*)signal->getDataPtr();
conf->backupPtr = ptr.i;
......@@ -3169,16 +3036,15 @@ Backup::execSTART_BACKUP_REQ(Signal* signal)
StartBackupReq* req = (StartBackupReq*)signal->getDataPtr();
const Uint32 ptrI = req->backupPtr;
const Uint32 backupId = req->backupId;
//const Uint32 backupId = req->backupId;
const Uint32 signalNo = req->signalNo;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
slaveAbortCheck(); // macro will do return if ABORTING
ptr.p->slaveState.setState(STARTED);
ptr.p->m_gsn = GSN_START_BACKUP_REQ;
for(Uint32 i = 0; i<req->noOfTableTriggers; i++) {
jam();
TablePtr tabPtr;
......@@ -3191,11 +3057,13 @@ Backup::execSTART_BACKUP_REQ(Signal* signal)
TriggerPtr trigPtr;
if(!ptr.p->triggers.seizeId(trigPtr, triggerId)) {
jam();
ptr.p->m_gsn = GSN_START_BACKUP_REF;
StartBackupRef* ref = (StartBackupRef*)signal->getDataPtrSend();
ref->backupPtr = ptr.i;
ref->backupId = ptr.p->backupId;
ref->signalNo = signalNo;
ref->errorCode = StartBackupRef::FailedToAllocateTriggerRecord;
ref->nodeId = getOwnNodeId();
sendSignal(ptr.p->masterRef, GSN_START_BACKUP_REF, signal,
StartBackupRef::SignalLength, JBB);
return;
......@@ -3233,6 +3101,7 @@ Backup::execSTART_BACKUP_REQ(Signal* signal)
}//if
}//for
ptr.p->m_gsn = GSN_START_BACKUP_CONF;
StartBackupConf* conf = (StartBackupConf*)signal->getDataPtrSend();
conf->backupPtr = ptr.i;
conf->backupId = ptr.p->backupId;
......@@ -3255,7 +3124,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal)
CRASH_INSERTION((10016));
const Uint32 ptrI = req->backupPtr;
const Uint32 backupId = req->backupId;
//const Uint32 backupId = req->backupId;
const Uint32 tableId = req->tableId;
const Uint32 fragNo = req->fragmentNo;
const Uint32 count = req->count;
......@@ -3266,10 +3135,9 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
slaveAbortCheck(); // macro will do return if ABORTING
ptr.p->slaveState.setState(SCANNING);
ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REQ;
/**
* Get file
*/
......@@ -3280,7 +3148,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal)
ndbrequire(filePtr.p->fileOpened == 1);
ndbrequire(filePtr.p->fileRunning == 1);
ndbrequire(filePtr.p->scanRunning == 0);
ndbrequire(filePtr.p->fileDone == 0);
ndbrequire(filePtr.p->fileClosing == 0);
/**
* Get table
......@@ -3350,7 +3218,7 @@ Backup::execBACKUP_FRAGMENT_REQ(Signal* signal)
req->transId1 = 0;
req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
req->clientOpPtr= filePtr.i;
req->batch_size_rows= 16;
req->batch_size_rows= parallelism;
req->batch_size_bytes= 0;
sendSignal(DBLQH_REF, GSN_SCAN_FRAGREQ, signal,
ScanFragReq::SignalLength, JBB);
......@@ -3572,6 +3440,13 @@ Backup::OperationRecord::newScan()
return false;
}
bool
Backup::OperationRecord::closeScan()
{
opNoDone = opNoConf = opLen = 0;
return true;
}
bool
Backup::OperationRecord::scanConf(Uint32 noOfOps, Uint32 total_len)
{
......@@ -3600,11 +3475,9 @@ Backup::execSCAN_FRAGREF(Signal* signal)
c_backupFilePool.getPtr(filePtr, filePtrI);
filePtr.p->errorCode = ref->errorCode;
filePtr.p->scanRunning = 0;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
abortFile(signal, ptr, filePtr);
backupFragmentRef(signal, filePtr);
}
void
......@@ -3639,9 +3512,11 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr)
{
jam();
if(filePtr.p->errorCode != 0){
if(filePtr.p->errorCode != 0)
{
jam();
abortFileHook(signal, filePtr, true); // Scan completed
filePtr.p->scanRunning = 0;
backupFragmentRef(signal, filePtr); // Scan completed
return;
}//if
......@@ -3669,20 +3544,51 @@ Backup::fragmentCompleted(Signal* signal, BackupFilePtr filePtr)
sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_CONF, signal,
BackupFragmentConf::SignalLength, JBB);
ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_CONF;
ptr.p->slaveState.setState(STARTED);
return;
}
void
Backup::backupFragmentRef(Signal * signal, BackupFilePtr filePtr)
{
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
ptr.p->m_gsn = GSN_BACKUP_FRAGMENT_REF;
BackupFragmentRef * ref = (BackupFragmentRef*)signal->getDataPtrSend();
ref->backupId = ptr.p->backupId;
ref->backupPtr = ptr.i;
ref->nodeId = getOwnNodeId();
ref->errorCode = ptr.p->errorCode;
sendSignal(ptr.p->masterRef, GSN_BACKUP_FRAGMENT_REF, signal,
BackupFragmentRef::SignalLength, JBB);
}
void
Backup::checkScan(Signal* signal, BackupFilePtr filePtr)
{
if(filePtr.p->errorCode != 0){
OperationRecord & op = filePtr.p->operation;
if(filePtr.p->errorCode != 0)
{
jam();
abortFileHook(signal, filePtr, false); // Scan not completed
/**
* Close scan
*/
op.closeScan();
ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend();
req->senderData = filePtr.i;
req->closeFlag = 1;
req->transId1 = 0;
req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
ScanFragNextReq::SignalLength, JBB);
return;
}//if
OperationRecord & op = filePtr.p->operation;
if(op.newScan()) {
jam();
......@@ -3693,8 +3599,28 @@ Backup::checkScan(Signal* signal, BackupFilePtr filePtr)
req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
req->batch_size_rows= 16;
req->batch_size_bytes= 0;
sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
ScanFragNextReq::SignalLength, JBB);
if(ERROR_INSERTED(10032))
sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
100, ScanFragNextReq::SignalLength);
else if(ERROR_INSERTED(10033))
{
SET_ERROR_INSERT_VALUE(10032);
sendSignalWithDelay(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
10000, ScanFragNextReq::SignalLength);
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
ord->backupId = ptr.p->backupId;
ord->backupPtr = ptr.i;
ord->requestType = AbortBackupOrd::FileOrScanError;
ord->senderData= ptr.i;
sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
AbortBackupOrd::SignalLength, JBB);
}
else
sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
ScanFragNextReq::SignalLength, JBB);
return;
}//if
......@@ -3718,11 +3644,8 @@ Backup::execFSAPPENDREF(Signal* signal)
filePtr.p->fileRunning = 0;
filePtr.p->errorCode = errCode;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
abortFile(signal, ptr, filePtr);
checkFile(signal, filePtr);
}
void
......@@ -3738,12 +3661,6 @@ Backup::execFSAPPENDCONF(Signal* signal)
BackupFilePtr filePtr;
c_backupFilePool.getPtr(filePtr, filePtrI);
if (ERROR_INSERTED(10029)) {
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
abortFile(signal, ptr, filePtr);
}//if
OperationRecord & op = filePtr.p->operation;
......@@ -3761,30 +3678,25 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
#endif
OperationRecord & op = filePtr.p->operation;
Uint32 * tmp, sz; bool eof;
if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) {
if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof))
{
jam();
if(filePtr.p->errorCode == 0) {
jam();
FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
req->userPointer = filePtr.i;
req->userReference = reference();
req->varIndex = 0;
req->offset = tmp - c_startOfPages;
req->size = sz;
sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal,
FsAppendReq::SignalLength, JBA);
return;
} else {
jam();
if (filePtr.p->scanRunning == 1)
eof = false;
}//if
}//if
jam();
FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
req->userPointer = filePtr.i;
req->userReference = reference();
req->varIndex = 0;
req->offset = tmp - c_startOfPages;
req->size = sz;
sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal,
FsAppendReq::SignalLength, JBA);
return;
}
if(!eof) {
jam();
......@@ -3794,9 +3706,7 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
return;
}//if
ndbrequire(filePtr.p->fileDone == 1);
if(sz > 0 && filePtr.p->errorCode == 0) {
if(sz > 0) {
jam();
FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
......@@ -3812,6 +3722,7 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
}//if
filePtr.p->fileRunning = 0;
filePtr.p->fileClosing = 1;
FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
......@@ -3819,64 +3730,11 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
req->userReference = reference();
req->fileFlag = 0;
#ifdef DEBUG_ABORT
ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i);
ndbout_c("***** a FSCLOSEREQ filePtr.i = %u", filePtr.i);
#endif
sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA);
}
void
Backup::abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr)
{
jam();
if(ptr.p->slaveState.getState() != ABORTING) {
/**
* Inform master of failure
*/
jam();
ptr.p->slaveState.setState(ABORTING);
ptr.p->setErrorCode(AbortBackupOrd::FileOrScanError);
sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::FileOrScanError);
return;
}//if
for(ptr.p->files.first(filePtr);
filePtr.i!=RNIL;
ptr.p->files.next(filePtr)){
jam();
filePtr.p->errorCode = 1;
}//for
closeFiles(signal, ptr);
}
void
Backup::abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanComplete)
{
jam();
if(!scanComplete) {
jam();
ScanFragNextReq * req = (ScanFragNextReq *)signal->getDataPtrSend();
req->senderData = filePtr.i;
req->closeFlag = 1;
req->transId1 = 0;
req->transId2 = (BACKUP << 20) + (getOwnNodeId() << 8);
sendSignal(DBLQH_REF, GSN_SCAN_NEXTREQ, signal,
ScanFragNextReq::SignalLength, JBB);
return;
}//if
filePtr.p->scanRunning = 0;
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
filePtr.i = RNIL;
abortFile(signal, ptr, filePtr);
}
/****************************************************************************
*
......@@ -3953,27 +3811,30 @@ Backup::execTRIG_ATTRINFO(Signal* signal) {
}//if
BackupFormat::LogFile::LogEntry * logEntry = trigPtr.p->logEntry;
if(logEntry == 0) {
if(logEntry == 0)
{
jam();
Uint32 * dst;
FsBuffer & buf = trigPtr.p->operation->dataBuffer;
ndbrequire(trigPtr.p->maxRecordSize <= buf.getMaxWrite());
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
if(!buf.getWritePtr(&dst, trigPtr.p->maxRecordSize)) {
if(ERROR_INSERTED(10030) ||
!buf.getWritePtr(&dst, trigPtr.p->maxRecordSize))
{
jam();
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull;
sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull);
return;
}//if
if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) {
jam();
trigPtr.p->errorCode = AbortBackupOrd::LogBufferFull;
sendAbortBackupOrdSlave(signal, ptr, AbortBackupOrd::LogBufferFull);
AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
ord->backupId = ptr.p->backupId;
ord->backupPtr = ptr.i;
ord->requestType = AbortBackupOrd::LogBufferFull;
ord->senderData= ptr.i;
sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
AbortBackupOrd::SignalLength, JBB);
return;
}//if
logEntry = (BackupFormat::LogFile::LogEntry *)dst;
trigPtr.p->logEntry = logEntry;
logEntry->Length = 0;
......@@ -4015,9 +3876,10 @@ Backup::execFIRE_TRIG_ORD(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, trigPtr.p->backupPtr);
if(gci != ptr.p->currGCP) {
if(gci != ptr.p->currGCP)
{
jam();
trigPtr.p->logEntry->TriggerEvent = htonl(trigPtr.p->event | 0x10000);
trigPtr.p->logEntry->Data[len] = htonl(gci);
len ++;
......@@ -4035,20 +3897,6 @@ Backup::execFIRE_TRIG_ORD(Signal* signal)
trigPtr.p->operation->noOfRecords += 1;
}
void
Backup::sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr,
Uint32 requestType)
{
jam();
AbortBackupOrd *ord = (AbortBackupOrd*)signal->getDataPtrSend();
ord->backupId = ptr.p->backupId;
ord->backupPtr = ptr.i;
ord->requestType = requestType;
ord->senderData= ptr.i;
sendSignal(ptr.p->masterRef, GSN_ABORT_BACKUP_ORD, signal,
AbortBackupOrd::SignalLength, JBB);
}
void
Backup::sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr,
Uint32 requestType)
......@@ -4085,7 +3933,7 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal)
CRASH_INSERTION((10020));
const Uint32 ptrI = req->backupPtr;
const Uint32 backupId = req->backupId;
//const Uint32 backupId = req->backupId;
const Uint32 startGCP = req->startGCP;
const Uint32 stopGCP = req->stopGCP;
......@@ -4101,7 +3949,7 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal)
c_backupPool.getPtr(ptr, ptrI);
ptr.p->slaveState.setState(STOPPING);
slaveAbortCheck(); // macro will do return if ABORTING
ptr.p->m_gsn = GSN_STOP_BACKUP_REQ;
/**
* Insert footers
......@@ -4140,12 +3988,6 @@ Backup::execSTOP_BACKUP_REQ(Signal* signal)
void
Backup::closeFiles(Signal* sig, BackupRecordPtr ptr)
{
if (ptr.p->closingFiles) {
jam();
return;
}
ptr.p->closingFiles = true;
/**
* Close all files
*/
......@@ -4161,12 +4003,12 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr)
jam();
openCount++;
if(filePtr.p->fileDone == 1){
if(filePtr.p->fileClosing == 1){
jam();
continue;
}//if
filePtr.p->fileDone = 1;
filePtr.p->fileClosing = 1;
if(filePtr.p->fileRunning == 1){
jam();
......@@ -4183,7 +4025,7 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr)
req->userReference = reference();
req->fileFlag = 0;
#ifdef DEBUG_ABORT
ndbout_c("***** FSCLOSEREQ filePtr.i = %u", filePtr.i);
ndbout_c("***** b FSCLOSEREQ filePtr.i = %u", filePtr.i);
#endif
sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, sig,
FsCloseReq::SignalLength, JBA);
......@@ -4210,11 +4052,6 @@ Backup::execFSCLOSEREF(Signal* signal)
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, filePtr.p->backupPtr);
/**
* This should only happen during abort of backup
*/
ndbrequire(ptr.p->slaveState.getState() == ABORTING);
filePtr.p->fileOpened = 1;
FsConf * conf = (FsConf*)signal->getDataPtr();
conf->userPointer = filePtrI;
......@@ -4237,7 +4074,7 @@ Backup::execFSCLOSECONF(Signal* signal)
ndbout_c("***** FSCLOSECONF filePtrI = %u", filePtrI);
#endif
ndbrequire(filePtr.p->fileDone == 1);
ndbrequire(filePtr.p->fileClosing == 1);
ndbrequire(filePtr.p->fileOpened == 1);
ndbrequire(filePtr.p->fileRunning == 0);
ndbrequire(filePtr.p->scanRunning == 0);
......@@ -4265,25 +4102,20 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr)
{
jam();
if(ptr.p->slaveState.getState() == STOPPING) {
jam();
BackupFilePtr filePtr;
ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
conf->backupId = ptr.p->backupId;
conf->backupPtr = ptr.i;
conf->noOfLogBytes = filePtr.p->operation.noOfBytes;
conf->noOfLogRecords = filePtr.p->operation.noOfRecords;
sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal,
StopBackupConf::SignalLength, JBB);
ptr.p->slaveState.setState(CLEANING);
return;
}//if
jam();
BackupFilePtr filePtr;
ptr.p->files.getPtr(filePtr, ptr.p->logFilePtr);
ndbrequire(ptr.p->slaveState.getState() == ABORTING);
removeBackup(signal, ptr);
StopBackupConf* conf = (StopBackupConf*)signal->getDataPtrSend();
conf->backupId = ptr.p->backupId;
conf->backupPtr = ptr.i;
conf->noOfLogBytes = filePtr.p->operation.noOfBytes;
conf->noOfLogRecords = filePtr.p->operation.noOfRecords;
sendSignal(ptr.p->masterRef, GSN_STOP_BACKUP_CONF, signal,
StopBackupConf::SignalLength, JBB);
ptr.p->m_gsn = GSN_STOP_BACKUP_CONF;
ptr.p->slaveState.setState(CLEANING);
}
/*****************************************************************************
......@@ -4291,57 +4123,6 @@ Backup::closeFilesDone(Signal* signal, BackupRecordPtr ptr)
* Slave functionallity: Abort backup
*
*****************************************************************************/
void
Backup::removeBackup(Signal* signal, BackupRecordPtr ptr)
{
jam();
FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend();
req->userReference = reference();
req->userPointer = ptr.i;
req->directory = 1;
req->ownDirectory = 1;
FsOpenReq::setVersion(req->fileNumber, 2);
FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL);
FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId);
FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal,
FsRemoveReq::SignalLength, JBA);
}
void
Backup::execFSREMOVEREF(Signal* signal)
{
jamEntry();
ndbrequire(0);
}
void
Backup::execFSREMOVECONF(Signal* signal){
jamEntry();
FsConf * conf = (FsConf*)signal->getDataPtr();
const Uint32 ptrI = conf->userPointer;
/**
* Get backup record
*/
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
ndbrequire(ptr.p->slaveState.getState() == ABORTING);
if (ptr.p->masterRef == reference()) {
if (ptr.p->masterData.state.getAbortState() == DEFINING) {
jam();
sendBackupRef(signal, ptr, ptr.p->errorCode);
return;
} else {
jam();
}//if
}//if
cleanupSlaveResources(ptr);
}
/*****************************************************************************
*
* Slave functionallity: Abort backup
......@@ -4394,8 +4175,7 @@ Backup::execABORT_BACKUP_ORD(Signal* signal)
if (c_backupPool.findId(senderData)) {
jam();
c_backupPool.getPtr(ptr, senderData);
} else { // TODO might be abort sent to not master,
// or master aborting too early
} else {
jam();
#ifdef DEBUG_ABORT
ndbout_c("Backup: abort request type=%u on id=%u,%u not found",
......@@ -4405,15 +4185,15 @@ Backup::execABORT_BACKUP_ORD(Signal* signal)
}
}//if
ptr.p->m_gsn = GSN_ABORT_BACKUP_ORD;
const bool isCoordinator = (ptr.p->masterRef == reference());
bool ok = false;
switch(requestType){
/**
* Requests sent to master
*/
case AbortBackupOrd::ClientAbort:
jam();
// fall through
......@@ -4422,113 +4202,61 @@ Backup::execABORT_BACKUP_ORD(Signal* signal)
// fall through
case AbortBackupOrd::FileOrScanError:
jam();
if(ptr.p->masterData.state.getState() == ABORTING) {
#ifdef DEBUG_ABORT
ndbout_c("---- Already aborting");
#endif
jam();
return;
}
ndbrequire(isCoordinator);
ptr.p->setErrorCode(requestType);
ndbrequire(isCoordinator); // Sent from slave to coordinator
masterAbort(signal, ptr, false);
if(ptr.p->masterData.gsn == GSN_BACKUP_FRAGMENT_REQ)
{
/**
* Only scans are actively aborted
*/
abort_scan(signal, ptr);
}
return;
/**
* Info sent to slave
* Requests sent to slave
*/
case AbortBackupOrd::OkToClean:
case AbortBackupOrd::AbortScan:
jam();
cleanupMasterResources(ptr);
ptr.p->setErrorCode(requestType);
return;
/**
* Requests sent to slave
*/
case AbortBackupOrd::BackupComplete:
jam();
if (ptr.p->slaveState.getState() == CLEANING) { // TODO what if state is
// not CLEANING?
jam();
cleanupSlaveResources(ptr);
}//if
cleanup(signal, ptr);
return;
break;
case AbortBackupOrd::BackupFailureDueToNodeFail:
jam();
ok = true;
if (ptr.p->errorCode != 0)
ptr.p->setErrorCode(requestType);
break;
case AbortBackupOrd::BackupFailure:
jam();
ok = true;
break;
case AbortBackupOrd::BackupFailureDueToNodeFail:
case AbortBackupOrd::OkToClean:
case AbortBackupOrd::IncompatibleVersions:
#ifndef VM_TRACE
default:
#endif
ptr.p->setErrorCode(requestType);
ok= true;
}
ndbrequire(ok);
/**
* Slave abort
*/
slaveAbort(signal, ptr);
}
void
Backup::slaveAbort(Signal* signal, BackupRecordPtr ptr)
{
if(ptr.p->slaveState.getState() == ABORTING) {
#ifdef DEBUG_ABORT
ndbout_c("---- Slave already aborting");
#endif
jam();
return;
Uint32 ref= ptr.p->masterRef;
ptr.p->masterRef = reference();
ptr.p->nodes.clear();
ptr.p->nodes.set(getOwnNodeId());
if(ref == reference())
{
ptr.p->stopGCP= ptr.p->startGCP + 1;
sendDropTrig(signal, ptr);
}
#ifdef DEBUG_ABORT
ndbout_c("************* slaveAbort");
#endif
State slaveState = ptr.p->slaveState.getState();
ptr.p->slaveState.setState(ABORTING);
switch(slaveState) {
case DEFINING:
jam();
return;
//------------------------------------------
// Will watch for the abort at various places
// in the defining phase.
//------------------------------------------
case ABORTING:
jam();
//Fall through
case DEFINED:
jam();
//Fall through
case STOPPING:
jam();
else
{
ptr.p->masterData.gsn = GSN_STOP_BACKUP_REQ;
ptr.p->masterData.sendCounter.clearWaitingFor();
ptr.p->masterData.sendCounter.setWaitingFor(getOwnNodeId());
closeFiles(signal, ptr);
return;
case STARTED:
jam();
//Fall through
case SCANNING:
jam();
BackupFilePtr filePtr;
filePtr.i = RNIL;
abortFile(signal, ptr, filePtr);
return;
case CLEANING:
jam();
cleanupSlaveResources(ptr);
return;
case INITIAL:
jam();
ndbrequire(false);
return;
}
}
void
Backup::dumpUsedResources()
{
......@@ -4576,12 +4304,8 @@ Backup::dumpUsedResources()
}
void
Backup::cleanupMasterResources(BackupRecordPtr ptr)
Backup::cleanup(Signal* signal, BackupRecordPtr ptr)
{
#ifdef DEBUG_ABORT
ndbout_c("******** Cleanup Master Resources *********");
ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode);
#endif
TablePtr tabPtr;
for(ptr.p->tables.first(tabPtr); tabPtr.i != RNIL;ptr.p->tables.next(tabPtr))
......@@ -4601,20 +4325,6 @@ Backup::cleanupMasterResources(BackupRecordPtr ptr)
tabPtr.p->triggerIds[j] = ILLEGAL_TRIGGER_ID;
}//for
}//for
ptr.p->tables.release();
ptr.p->triggers.release();
ptr.p->okToCleanMaster = true;
cleanupFinalResources(ptr);
}
void
Backup::cleanupSlaveResources(BackupRecordPtr ptr)
{
#ifdef DEBUG_ABORT
ndbout_c("******** Clean Up Slave Resources*********");
ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode);
#endif
BackupFilePtr filePtr;
for(ptr.p->files.first(filePtr);
......@@ -4626,35 +4336,65 @@ Backup::cleanupSlaveResources(BackupRecordPtr ptr)
ndbrequire(filePtr.p->scanRunning == 0);
filePtr.p->pages.release();
}//for
ptr.p->files.release();
ptr.p->tables.release();
ptr.p->triggers.release();
cleanupFinalResources(ptr);
ptr.p->tables.release();
ptr.p->triggers.release();
ptr.p->pages.release();
ptr.p->backupId = ~0;
if(ptr.p->checkError())
removeBackup(signal, ptr);
else
c_backups.release(ptr);
}
void
Backup::cleanupFinalResources(BackupRecordPtr ptr)
Backup::removeBackup(Signal* signal, BackupRecordPtr ptr)
{
#ifdef DEBUG_ABORT
ndbout_c("******** Clean Up Final Resources*********");
ndbout_c("backupId = %u, errorCode = %u", ptr.p->backupId, ptr.p->errorCode);
#endif
jam();
FsRemoveReq * req = (FsRemoveReq *)signal->getDataPtrSend();
req->userReference = reference();
req->userPointer = ptr.i;
req->directory = 1;
req->ownDirectory = 1;
FsOpenReq::setVersion(req->fileNumber, 2);
FsOpenReq::setSuffix(req->fileNumber, FsOpenReq::S_CTL);
FsOpenReq::v2_setSequence(req->fileNumber, ptr.p->backupId);
FsOpenReq::v2_setNodeId(req->fileNumber, getOwnNodeId());
sendSignal(NDBFS_REF, GSN_FSREMOVEREQ, signal,
FsRemoveReq::SignalLength, JBA);
}
// if (!ptr.p->tables.empty() || !ptr.p->files.empty()) {
if (!ptr.p->okToCleanMaster || !ptr.p->files.empty()) {
jam();
#ifdef DEBUG_ABORT
ndbout_c("******** Waiting to do final cleanup");
#endif
return;
}
ptr.p->pages.release();
ptr.p->masterData.state.setState(INITIAL);
ptr.p->slaveState.setState(INITIAL);
ptr.p->backupId = 0;
void
Backup::execFSREMOVEREF(Signal* signal)
{
jamEntry();
FsRef * ref = (FsRef*)signal->getDataPtr();
const Uint32 ptrI = ref->userPointer;
FsConf * conf = (FsConf*)signal->getDataPtr();
conf->userPointer = ptrI;
execFSREMOVECONF(signal);
}
ptr.p->closingFiles = false;
ptr.p->okToCleanMaster = true;
void
Backup::execFSREMOVECONF(Signal* signal){
jamEntry();
FsConf * conf = (FsConf*)signal->getDataPtr();
const Uint32 ptrI = conf->userPointer;
/**
* Get backup record
*/
BackupRecordPtr ptr;
c_backupPool.getPtr(ptr, ptrI);
c_backups.release(ptr);
// ndbrequire(false);
}
......@@ -232,6 +232,7 @@ public:
*/
bool newScan();
bool scanConf(Uint32 noOfOps, Uint32 opLen);
bool closeScan();
/**
* Per record
......@@ -330,7 +331,7 @@ public:
Uint8 fileOpened;
Uint8 fileRunning;
Uint8 fileDone;
Uint8 fileClosing;
Uint8 scanRunning;
};
typedef Ptr<BackupFile> BackupFilePtr;
......@@ -403,13 +404,11 @@ public:
ArrayPool<TriggerRecord> & trp)
: slaveState(b, validSlaveTransitions, validSlaveTransitionsCount,1)
, tables(tp), triggers(trp), files(bp), pages(pp)
, masterData(b, validMasterTransitions, validMasterTransitionsCount)
, backup(b)
{
closingFiles = false;
okToCleanMaster = true;
}
, masterData(b), backup(b)
{
}
Uint32 m_gsn;
CompoundState slaveState;
Uint32 clientRef;
......@@ -420,9 +419,6 @@ public:
Uint32 errorCode;
NdbNodeBitmask nodes;
bool okToCleanMaster;
bool closingFiles;
Uint64 noOfBytes;
Uint64 noOfRecords;
Uint64 noOfLogBytes;
......@@ -444,15 +440,13 @@ public:
SimpleProperties props;// Used for (un)packing backup request
struct MasterData {
MasterData(Backup & b, const State valid[], Uint32 count)
: state(b, valid, count, 0)
{
}
MasterData(Backup & b)
{
}
MutexHandle2<BACKUP_DEFINE_MUTEX> m_defineBackupMutex;
MutexHandle2<DICT_COMMIT_TABLE_MUTEX> m_dictCommitTableMutex;
Uint32 gsn;
CompoundState state;
SignalCounter sendCounter;
Uint32 errorCode;
struct {
......@@ -557,7 +551,8 @@ public:
void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId);
void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0);
void backupFragmentRef(Signal * signal, BackupFilePtr filePtr);
void nextFragment(Signal*, BackupRecordPtr);
void sendCreateTrig(Signal*, BackupRecordPtr ptr, TablePtr tabPtr);
......@@ -578,14 +573,14 @@ public:
void sendAbortBackupOrd(Signal* signal, BackupRecordPtr ptr, Uint32 errCode);
void sendAbortBackupOrdSlave(Signal* signal, BackupRecordPtr ptr,
Uint32 errCode);
void masterAbort(Signal*, BackupRecordPtr ptr, bool controlledAbort);
void masterAbort(Signal*, BackupRecordPtr ptr);
void masterSendAbortBackup(Signal*, BackupRecordPtr ptr);
void slaveAbort(Signal*, BackupRecordPtr ptr);
void abortFile(Signal* signal, BackupRecordPtr ptr, BackupFilePtr filePtr);
void abortFileHook(Signal* signal, BackupFilePtr filePtr, bool scanDone);
bool verifyNodesAlive(const NdbNodeBitmask& aNodeBitMask);
bool verifyNodesAlive(BackupRecordPtr, const NdbNodeBitmask& aNodeBitMask);
bool checkAbort(BackupRecordPtr ptr);
void checkNodeFail(Signal* signal,
BackupRecordPtr ptr,
......@@ -603,9 +598,8 @@ public:
void sendBackupRef(BlockReference ref, Signal *signal,
Uint32 senderData, Uint32 errorCode);
void dumpUsedResources();
void cleanupMasterResources(BackupRecordPtr ptr);
void cleanupSlaveResources(BackupRecordPtr ptr);
void cleanupFinalResources(BackupRecordPtr ptr);
void cleanup(Signal*, BackupRecordPtr ptr);
void abort_scan(Signal*, BackupRecordPtr ptr);
void removeBackup(Signal*, BackupRecordPtr ptr);
void sendSTTORRY(Signal*);
......
......@@ -341,3 +341,28 @@ start backup
(ERROR_INSERTED(10022))) {
if (ERROR_INSERTED(10029)) {
if(trigPtr.p->operation->noOfBytes > 123 && ERROR_INSERTED(10030)) {
----- XXX ---
DEFINE_BACKUP_REF ->
ABORT_BACKUP_ORD(no reply) when all DEFINE_BACKUP replies has arrived
START_BACKUP_REF
ABORT_BACKUP_ORD(no reply) when all START_BACKUP_ replies has arrived
BACKUP_FRAGMENT_REF
ABORT_BACKUP_ORD(reply) directly to all nodes running BACKUP_FRAGMENT
When all nodes has replied BACKUP_FRAGMENT
ABORT_BACKUP_ORD(no reply)
STOP_BACKUP_REF
ABORT_BACKUP_ORD(no reply) when all STOP_BACKUP_ replies has arrived
NF_COMPLETE_REP
slave dies
master sends OUTSTANDING_REF to self
slave does nothing
master dies
slave elects self as master and sets only itself as participant
......@@ -175,7 +175,7 @@ Backup::Backup(const Configuration & conf) :
addRecSignal(GSN_START_BACKUP_CONF, &Backup::execSTART_BACKUP_CONF);
addRecSignal(GSN_BACKUP_FRAGMENT_REQ, &Backup::execBACKUP_FRAGMENT_REQ);
//addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF);
addRecSignal(GSN_BACKUP_FRAGMENT_REF, &Backup::execBACKUP_FRAGMENT_REF);
addRecSignal(GSN_BACKUP_FRAGMENT_CONF, &Backup::execBACKUP_FRAGMENT_CONF);
addRecSignal(GSN_STOP_BACKUP_REQ, &Backup::execSTOP_BACKUP_REQ);
......
......@@ -126,6 +126,7 @@ Cmvmi::Cmvmi(const Configuration & conf) :
}
setNodeInfo(getOwnNodeId()).m_connected = true;
setNodeInfo(getOwnNodeId()).m_version = ndbGetOwnVersion();
}
Cmvmi::~Cmvmi()
......
......@@ -1565,9 +1565,9 @@ ndb_mgm_start_backup(NdbMgmHandle handle, int wait_completed,
{ // start backup can take some time, set timeout high
Uint64 old_timeout= handle->read_timeout;
if (wait_completed == 2)
handle->read_timeout= 30*60*1000; // 30 minutes
handle->read_timeout= 48*60*60*1000; // 48 hours
else if (wait_completed == 1)
handle->read_timeout= 5*60*1000; // 5 minutes
handle->read_timeout= 10*60*1000; // 10 minutes
reply = ndb_mgm_call(handle, start_backup_reply, "start backup", &args);
handle->read_timeout= old_timeout;
}
......
......@@ -791,7 +791,7 @@ MgmtSrvr::restartNode(int processId, bool nostart,
result = sendSignal(processId, NO_WAIT, signal, true);
}
if (result == -1) {
if (result == -1 && theWaitState != WAIT_NODEFAILURE) {
m_stopRec.inUse = false;
return SEND_OR_RECEIVE_FAILED;
}
......@@ -1920,6 +1920,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal)
#ifdef VM_TRACE
ndbout_c("I'm not master resending to %d", aNodeId);
#endif
theWaitNode= aNodeId;
NdbApiSignal aSignal(_ownReference);
BackupReq* req = CAST_PTR(BackupReq, aSignal.getDataPtrSend());
aSignal.set(TestOrd::TraceAPI, BACKUP, GSN_BACKUP_REQ,
......@@ -1947,6 +1948,7 @@ MgmtSrvr::handleReceivedSignal(NdbApiSignal* signal)
event.Event = BackupEvent::BackupAborted;
event.Aborted.Reason = rep->reason;
event.Aborted.BackupId = rep->backupId;
event.Aborted.ErrorCode = rep->reason;
backupCallback(event);
}
break;
......@@ -2076,6 +2078,13 @@ MgmtSrvr::handleStatus(NodeId nodeId, bool alive, bool nfComplete)
handleStopReply(nodeId, 0);
DBUG_VOID_RETURN;
}
if(theWaitNode == nodeId &&
theWaitState != NO_WAIT && theWaitState != WAIT_STOP)
{
theWaitState = WAIT_NODEFAILURE;
NdbCondition_Signal(theMgmtWaitForResponseCondPtr);
}
}
eventReport(_ownNodeId, theData);
......@@ -2427,7 +2436,7 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
int result;
if (waitCompleted == 2) {
result = sendRecSignal(nodeId, WAIT_BACKUP_COMPLETED,
signal, true, 30*60*1000 /*30 secs*/);
signal, true, 48*60*60*1000 /* 48 hours */);
}
else if (waitCompleted == 1) {
result = sendRecSignal(nodeId, WAIT_BACKUP_STARTED,
......@@ -2456,22 +2465,6 @@ MgmtSrvr::startBackup(Uint32& backupId, int waitCompleted)
return -1;
break;
}
} else {
switch(m_lastBackupEvent.Event){
case BackupEvent::BackupCompleted:
backupId = m_lastBackupEvent.Completed.BackupId;
break;
case BackupEvent::BackupStarted:
backupId = m_lastBackupEvent.Started.BackupId;
break;
case BackupEvent::BackupFailedToStart:
return m_lastBackupEvent.FailedToStart.ErrorCode;
case BackupEvent::BackupAborted:
return m_lastBackupEvent.Aborted.ErrorCode;
default:
return -1;
break;
}
}
return 0;
......
......@@ -611,7 +611,8 @@ private:
WAIT_STOP,
WAIT_BACKUP_STARTED,
WAIT_BACKUP_COMPLETED,
WAIT_VERSION
WAIT_VERSION,
WAIT_NODEFAILURE
};
/**
......@@ -695,6 +696,7 @@ private:
NdbApiSignal* theSignalIdleList;
// List of unused signals
Uint32 theWaitNode;
WaitSignalType theWaitState;
// State denoting a set of signals we accept to recieve.
......
......@@ -108,6 +108,7 @@ MgmtSrvr::sendRecSignal(Uint16 aNodeId,
return -1;
}
theWaitState = aWaitState;
theWaitNode = aNodeId;
return receiveOptimisedResponse(waitTime);
}
......@@ -119,11 +120,12 @@ MgmtSrvr::receiveOptimisedResponse(int waitTime)
theFacade->checkForceSend(_blockNumber);
NDB_TICKS maxTime = NdbTick_CurrentMillisecond() + waitTime;
while (theWaitState != NO_WAIT && waitTime > 0) {
while (theWaitState != NO_WAIT && theWaitState != WAIT_NODEFAILURE
&& waitTime > 0) {
NdbCondition_WaitTimeout(theMgmtWaitForResponseCondPtr,
theFacade->theMutexPtr,
waitTime);
if(theWaitState == NO_WAIT)
if(theWaitState == NO_WAIT || theWaitState == WAIT_NODEFAILURE)
break;
waitTime = (maxTime - NdbTick_CurrentMillisecond());
}//while
......
......@@ -345,7 +345,7 @@ ErrorBundle ErrorCodes[] = {
{ 1325, IE, "File or scan error" },
{ 1326, IE, "Backup abortet due to node failure" },
{ 1327, IE, "1327" },
{ 1340, IE, "Backup undefined error" },
{ 1342, AE, "Backup failed to allocate buffers (check configuration)" },
{ 1343, AE, "Backup failed to setup fs buffers (check configuration)" },
......@@ -355,7 +355,8 @@ ErrorBundle ErrorCodes[] = {
{ 1347, AE, "Backup failed to allocate table memory (check configuration)" },
{ 1348, AE, "Backup failed to allocate file record (check configuration)" },
{ 1349, AE, "Backup failed to allocate attribute record (check configuration)" },
{ 1329, AE, "Backup during software upgrade not supported" },
/**
* Still uncategorized
*/
......
......@@ -74,20 +74,20 @@ int runAbort(NDBT_Context* ctx, NDBT_Step* step){
if (testMaster) {
if (testSlave) {
if (backup.NFMasterAsSlave(restarter) == -1){
if (backup.NFMasterAsSlave(restarter) != NDBT_OK){
return NDBT_FAILED;
}
} else {
if (backup.NFMaster(restarter) == -1){
if (backup.NFMaster(restarter) != NDBT_OK){
return NDBT_FAILED;
}
}
} else {
if (backup.NFSlave(restarter) == -1){
if (backup.NFSlave(restarter) != NDBT_OK){
return NDBT_FAILED;
}
}
return NDBT_OK;
}
......@@ -108,16 +108,16 @@ int runFail(NDBT_Context* ctx, NDBT_Step* step){
if (testMaster) {
if (testSlave) {
if (backup.FailMasterAsSlave(restarter) == -1){
if (backup.FailMasterAsSlave(restarter) != NDBT_OK){
return NDBT_FAILED;
}
} else {
if (backup.FailMaster(restarter) == -1){
if (backup.FailMaster(restarter) != NDBT_OK){
return NDBT_FAILED;
}
}
} else {
if (backup.FailSlave(restarter) == -1){
if (backup.FailSlave(restarter) != NDBT_OK){
return NDBT_FAILED;
}
}
......
......@@ -2,6 +2,30 @@ max-time: 3600
cmd: atrt-mysql-test-run
args: --force
max-time: 600
cmd: atrt-testBackup
args: -n NFMaster T1
max-time: 600
cmd: atrt-testBackup
args: -n NFMasterAsSlave T1
max-time: 600
cmd: atrt-testBackup
args: -n NFSlave T1
max-time: 600
cmd: atrt-testBackup
args: -n FailMaster T1
max-time: 600
cmd: atrt-testBackup
args: -n FailMasterAsSlave T1
max-time: 600
cmd: atrt-testBackup
args: -n FailSlave T1
max-time: 600
cmd: atrt-testBackup
args: -n BackupOne T1 T6 T3 I3
......
......@@ -245,6 +245,10 @@ NdbBackup::NFSlave(NdbRestarter& _restarter){
int
NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz, bool onMaster){
{
int nNodes = _restarter.getNumDbNodes();
if(nNodes == 1)
return NDBT_OK;
int nodeId = _restarter.getMasterNodeId();
CHECK(_restarter.restartOneDbNode(nodeId, false, true, true) == 0,
......@@ -255,15 +259,11 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
CHECK(_restarter.startNodes(&nodeId, 1) == 0,
"failed to start node");
NdbSleep_SecSleep(10);
}
CHECK(_restarter.waitClusterStarted() == 0,
"waitClusterStarted failed");
int nNodes = _restarter.getNumDbNodes();
myRandom48Init(NdbTick_CurrentMillisecond());
for(int i = 0; i<sz; i++){
......@@ -296,6 +296,7 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
"failed to set error insert");
g_info << "error inserted" << endl;
NdbSleep_SecSleep(1);
g_info << "starting backup" << endl;
int r = start(backupId);
......@@ -304,6 +305,7 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
if (r == 0) {
g_err << "Backup should have failed on error_insertion " << error << endl
<< "Master = " << masterNodeId << "Node = " << nodeId << endl;
return NDBT_FAILED;
}
CHECK(_restarter.waitNodesNoStart(&nodeId, 1) == 0,
......@@ -316,8 +318,6 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
return NDBT_FAILED;
}
NdbSleep_SecSleep(1);
g_info << "starting new backup" << endl;
CHECK(start(backupId) == 0,
"failed to start backup");
......@@ -331,8 +331,14 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
"waitClusterStarted failed");
g_info << "node started" << endl;
int val2[] = { 24, 2424 };
CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
"failed to check backup resources RestartOnErrorInsert");
CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
"failed to set error insert");
NdbSleep_SecSleep(1);
}
return NDBT_OK;
......@@ -340,15 +346,8 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
int
FailS_codes[] = {
10023,
10024,
10025,
10026,
10027,
10028,
10029,
10030,
10031
10033
};
int
......@@ -359,9 +358,8 @@ FailM_codes[] = {
10026,
10027,
10028,
10029,
10030,
10031
10031,
10033
};
int
......@@ -426,13 +424,21 @@ NdbBackup::Fail(NdbRestarter& _restarter, int *Fail_codes, const int sz, bool on
if (r == 0) {
g_err << "Backup should have failed on error_insertion " << error << endl
<< "Master = " << masterNodeId << "Node = " << nodeId << endl;
return NDBT_FAILED;
}
CHECK(_restarter.waitClusterStarted() == 0,
"waitClusterStarted failed");
CHECK(_restarter.insertErrorInNode(nodeId, 10099) == 0,
"failed to set error insert");
NdbSleep_SecSleep(5);
int val2[] = { 24, 2424 };
CHECK(_restarter.dumpStateAllNodes(val2, 2) == 0,
"failed to check backup resources RestartOnErrorInsert");
}
return NDBT_OK;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment