Commit 63bb77af authored by unknown's avatar unknown

WL#2868 Fix backup trigger handling

BACKUP previous set up triggers using DICT.
This lead to all kind of trouble.
An smaller alternative to using SUMA for backup
  is to just make BACKUP handle triggers to TUP directly.

This way all triggers will be totally local,
  and error handling is much simpler.

--- old impl.

Start: Master recives GSN_DEFINE_BACKUP_CONF from all participants
Master sends CREATE_TRIG_REQ for all tables to local DICT (dict master)
Master sends START_BACKUP_REQ to all paricipants with trigger ids from DICT
Master sends ALTER_TRIG_REQ (online) to local DICT
Master waits for GCP
Master starts distributed scan
When scan has finished
Master waits for GCP
Master sends DROP_TRIGGER to local DICT
Master sends STOP_BACKUP_REQ to all participants

--- new impl.

Start: Master recives GSN_DEFINE_BACKUP_CONF from all participants
Master sends START_BACKUP_REQ to all paricipants
  Participand sends CREATE_TRIG_REQ for all tables to local TUP
Master waits for GCP
Master starts distributed scan
When scan has finished
Master waits for GCP
Master sends STOP_BACKUP_REQ to all participants
  Participant sends DROP_TRIGGER to local TUP

Changes:
All trigger handling is _local_
 This implies, that abort (e.g due to node failure) can be _local_


fix testBackup test so that it will run successfully with the (now correct)
backup trigger code.


storage/ndb/include/kernel/signaldata/BackupImpl.hpp:
  rework START_BACKUP signals as we no longer need tableId and triggerIds.
storage/ndb/src/common/debugger/signaldata/BackupImpl.cpp:
  START_BACKUP_REQ no longer has tableIds and trigger ids
storage/ndb/src/kernel/blocks/backup/Backup.cpp:
  Use TUP triggers directly.
  
  removes ALTER trigger
  simplifies DROP triggers
  
  changes to node failure handling
  
  changes in signal order
  
  use SlaveData to track slave status.
storage/ndb/src/kernel/blocks/backup/Backup.hpp:
  - remove ALTER_TRIG (now unused)
  - add signalNo to BackupRecord
  - add SlaveData
  - remove dead items from MasterData
  - update prototype of startBackupReply
storage/ndb/src/kernel/blocks/backup/Backup.txt:
  Update signals for new backup code.
storage/ndb/src/kernel/blocks/backup/BackupInit.cpp:
  remove ALTER_TRIG REF and CONF as we no longer use them.
storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp:
  Add comment about meaning of triggerId
  Add sender BlockNumber parameter to dropTrigger.
storage/ndb/src/kernel/blocks/dbtup/DbtupTrigger.cpp:
  for SUBSCRIPTION triggers, make it so that the trigger ids are private to each block.
storage/ndb/test/ndbapi/testBackup.cpp:
  Don't do initial restart, just a restart. This is to avoid cache issues with
  schema versions
storage/ndb/test/src/NdbBackup.cpp:
  Update error insertions.
parent ec46e626
......@@ -139,21 +139,11 @@ class StartBackupReq {
friend bool printSTART_BACKUP_REQ(FILE *, const Uint32 *, Uint32, Uint16);
public:
STATIC_CONST( MaxTableTriggers = 4 );
STATIC_CONST( HeaderLength = 5 );
STATIC_CONST( TableTriggerLength = 4);
STATIC_CONST( SignalLength = 2 );
private:
Uint32 backupId;
Uint32 backupPtr;
Uint32 signalNo;
Uint32 noOfSignals;
Uint32 noOfTableTriggers;
struct TableTriggers {
Uint32 tableId;
Uint32 triggerIds[3];
} tableTriggers[MaxTableTriggers];
};
class StartBackupRef {
......@@ -169,7 +159,7 @@ class StartBackupRef {
friend bool printSTART_BACKUP_REF(FILE *, const Uint32 *, Uint32, Uint16);
public:
STATIC_CONST( SignalLength = 5 );
STATIC_CONST( SignalLength = 4 );
enum ErrorCode {
FailedToAllocateTriggerRecord = 1
......@@ -177,7 +167,6 @@ public:
private:
Uint32 backupId;
Uint32 backupPtr;
Uint32 signalNo;
Uint32 errorCode;
Uint32 nodeId;
};
......@@ -195,12 +184,11 @@ class StartBackupConf {
friend bool printSTART_BACKUP_CONF(FILE *, const Uint32 *, Uint32, Uint16);
public:
STATIC_CONST( SignalLength = 3 );
STATIC_CONST( SignalLength = 2 );
private:
Uint32 backupId;
Uint32 backupPtr;
Uint32 signalNo;
};
class BackupFragmentReq {
......
......@@ -48,16 +48,8 @@ printDEFINE_BACKUP_CONF(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){
bool
printSTART_BACKUP_REQ(FILE * out, const Uint32 * data, Uint32 l, Uint16 bno){
StartBackupReq* sig = (StartBackupReq*)data;
fprintf(out, " backupPtr: %d backupId: %d signalNo: %d of %d\n",
sig->backupPtr, sig->backupId,
sig->signalNo + 1, sig->noOfSignals);
for(Uint32 i = 0; i<sig->noOfTableTriggers; i++)
fprintf(out,
" Table: %d Triggers = [ insert: %d update: %d delete: %d ]\n",
sig->tableTriggers[i].tableId,
sig->tableTriggers[i].triggerIds[TriggerEvent::TE_INSERT],
sig->tableTriggers[i].triggerIds[TriggerEvent::TE_UPDATE],
sig->tableTriggers[i].triggerIds[TriggerEvent::TE_DELETE]);
fprintf(out, " backupPtr: %d backupId: %d\n",
sig->backupPtr, sig->backupId);
return true;
}
......
......@@ -96,8 +96,6 @@ protected:
void execGET_TABINFO_CONF(Signal* signal);
void execCREATE_TRIG_REF(Signal* signal);
void execCREATE_TRIG_CONF(Signal* signal);
void execALTER_TRIG_REF(Signal* signal);
void execALTER_TRIG_CONF(Signal* signal);
void execDROP_TRIG_REF(Signal* signal);
void execDROP_TRIG_CONF(Signal* signal);
......@@ -426,6 +424,7 @@ public:
Uint32 clientRef;
Uint32 clientData;
Uint32 flags;
Uint32 signalNo;
Uint32 backupId;
Uint32 backupKey[2];
Uint32 masterRef;
......@@ -451,7 +450,18 @@ public:
Uint32 backupDataLen; // Used for (un)packing backup request
Array<Page32> pages; // Used for (un)packing backup request
SimpleProperties props;// Used for (un)packing backup request
struct SlaveData {
SignalCounter trigSendCounter;
Uint32 gsn;
struct {
Uint32 tableId;
} createTrig;
struct {
Uint32 tableId;
} dropTrig;
} slaveData;
struct MasterData {
MasterData(Backup & b)
{
......@@ -462,15 +472,6 @@ public:
Uint32 gsn;
SignalCounter sendCounter;
Uint32 errorCode;
struct {
Uint32 tableId;
} createTrig;
struct {
Uint32 tableId;
} dropTrig;
struct {
Uint32 tableId;
} alterTrig;
union {
struct {
Uint32 startBackup;
......@@ -563,7 +564,7 @@ public:
void defineBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId);
void createTrigReply(Signal* signal, BackupRecordPtr ptr);
void alterTrigReply(Signal* signal, BackupRecordPtr ptr);
void startBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32, Uint32);
void startBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32);
void stopBackupReply(Signal* signal, BackupRecordPtr ptr, Uint32 nodeId);
void defineBackupRef(Signal*, BackupRecordPtr, Uint32 errCode = 0);
......
......@@ -25,15 +25,12 @@ BACKUP_REQ
<-------------------------------
BACKUP_CONF
<----------------
CREATE_TRIG
--------------> (If master crashes here -> rouge triggers/memory leak)
<--------------
START_BACKUP
------------------------------>
CREATE_TRIG
-------------->
<--------------
<------------------------------
ALTER_TRIG
-------------->
<--------------
WAIT_GCP
-------------->
<--------------
......@@ -46,11 +43,11 @@ BACKUP_CONF
WAIT_GCP
-------------->
<--------------
DROP_TRIG
-------------->
<--------------
STOP_BACKUP
------------------------------>
DROP_TRIG
-------------->
<--------------
<------------------------------
BACKUP_COMPLETE_REP
<----------------
......
......@@ -62,9 +62,6 @@ Backup::Backup(const Configuration & conf) :
addRecSignal(GSN_CREATE_TRIG_REF, &Backup::execCREATE_TRIG_REF);
addRecSignal(GSN_CREATE_TRIG_CONF, &Backup::execCREATE_TRIG_CONF);
addRecSignal(GSN_ALTER_TRIG_REF, &Backup::execALTER_TRIG_REF);
addRecSignal(GSN_ALTER_TRIG_CONF, &Backup::execALTER_TRIG_CONF);
addRecSignal(GSN_DROP_TRIG_REF, &Backup::execDROP_TRIG_REF);
addRecSignal(GSN_DROP_TRIG_CONF, &Backup::execDROP_TRIG_CONF);
......
......@@ -777,6 +777,10 @@ struct TupTriggerData {
/**
* Trigger id, used by DICT/TRIX to identify the trigger
*
* trigger Ids are unique per block for SUBSCRIPTION triggers.
* This is so that BACKUP can use TUP triggers directly and delete them
* properly.
*/
Uint32 triggerId;
......@@ -2012,7 +2016,9 @@ private:
bool createTrigger(Tablerec* table, const CreateTrigReq* req);
Uint32 dropTrigger(Tablerec* table, const DropTrigReq* req);
Uint32 dropTrigger(Tablerec* table,
const DropTrigReq* req,
BlockNumber sender);
void
checkImmediateTriggersAfterInsert(KeyReqStruct *req_struct,
......
......@@ -186,7 +186,7 @@ Dbtup::execDROP_TRIG_REQ(Signal* signal)
ptrCheckGuard(tabPtr, cnoOfTablerec, tablerec);
// Drop trigger
Uint32 r = dropTrigger(tabPtr.p, req);
Uint32 r = dropTrigger(tabPtr.p, req, refToBlock(senderRef));
if (r == 0){
// Send conf
DropTrigConf* const conf = (DropTrigConf*)signal->getDataPtrSend();
......@@ -318,7 +318,7 @@ Dbtup::primaryKey(Tablerec* const regTabPtr, Uint32 attrId)
/* */
/* ---------------------------------------------------------------- */
Uint32
Dbtup::dropTrigger(Tablerec* table, const DropTrigReq* req)
Dbtup::dropTrigger(Tablerec* table, const DropTrigReq* req, BlockNumber sender)
{
if (ERROR_INSERTED(4004)) {
CLEAR_ERROR_INSERT_VALUE;
......@@ -330,7 +330,7 @@ Dbtup::dropTrigger(Tablerec* table, const DropTrigReq* req)
TriggerActionTime::Value ttime = req->getTriggerActionTime();
TriggerEvent::Value tevent = req->getTriggerEvent();
// ndbout_c("Drop TupTrigger %u = %u %u %u %u", triggerId, table, ttype, ttime, tevent);
// ndbout_c("Drop TupTrigger %u = %u %u %u %u by %u", triggerId, table, ttype, ttime, tevent, sender);
ArrayList<TupTriggerData>* tlist = findTriggerList(table, ttype, ttime, tevent);
ndbrequire(tlist != NULL);
......@@ -339,6 +339,19 @@ Dbtup::dropTrigger(Tablerec* table, const DropTrigReq* req)
for (tlist->first(ptr); !ptr.isNull(); tlist->next(ptr)) {
ljam();
if (ptr.p->triggerId == triggerId) {
if(ttype==TriggerType::SUBSCRIPTION && sender != ptr.p->m_receiverBlock)
{
/**
* You can only drop your own triggers for subscription triggers.
* Trigger IDs are private for each block.
*
* SUMA encodes information in the triggerId
*
* Backup doesn't really care about the Ids though.
*/
ljam();
continue;
}
ljam();
tlist->release(ptr.i);
return 0;
......
......@@ -193,7 +193,7 @@ runDDL(NDBT_Context* ctx, NDBT_Step* step){
}
int runRestartInitial(NDBT_Context* ctx, NDBT_Step* step){
int runDropTablesRestart(NDBT_Context* ctx, NDBT_Step* step){
NdbRestarter restarter;
Ndb* pNdb = GETNDB(step);
......@@ -201,7 +201,7 @@ int runRestartInitial(NDBT_Context* ctx, NDBT_Step* step){
const NdbDictionary::Table *tab = ctx->getTab();
pNdb->getDictionary()->dropTable(tab->getName());
if (restarter.restartAll(true) != 0)
if (restarter.restartAll(false) != 0)
return NDBT_FAILED;
if (restarter.waitClusterStarted() != 0)
......@@ -406,6 +406,7 @@ int runRestoreBankAndVerify(NDBT_Context* ctx, NDBT_Step* step){
// TEMPORARY FIX
// To erase all tables from cache(s)
// To be removed, maybe replaced by ndb.invalidate();
runDropTable(ctx,step);
{
Bank bank(ctx->m_cluster_connection);
......@@ -416,8 +417,8 @@ int runRestoreBankAndVerify(NDBT_Context* ctx, NDBT_Step* step){
}
// END TEMPORARY FIX
ndbout << "Performing initial restart" << endl;
if (restarter.restartAll(true) != 0)
ndbout << "Performing restart" << endl;
if (restarter.restartAll(false) != 0)
return NDBT_FAILED;
if (restarter.waitClusterStarted() != 0)
......@@ -465,12 +466,12 @@ TESTCASE("BackupOne",
"Test that backup and restore works on one table \n"
"1. Load table\n"
"2. Backup\n"
"3. Restart -i\n"
"3. Drop tables and restart \n"
"4. Restore\n"
"5. Verify count and content of table\n"){
INITIALIZER(runLoadTable);
INITIALIZER(runBackupOne);
INITIALIZER(runRestartInitial);
INITIALIZER(runDropTablesRestart);
INITIALIZER(runRestoreOne);
VERIFIER(runVerifyOne);
FINALIZER(runClearTable);
......
......@@ -199,7 +199,6 @@ int
NFDuringBackupM_codes[] = {
10003,
10004,
10005,
10007,
10008,
10009,
......@@ -349,6 +348,7 @@ NdbBackup::NF(NdbRestarter& _restarter, int *NFDuringBackup_codes, const int sz,
int
FailS_codes[] = {
10025,
10027,
10033
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment