Commit d2a491b5 authored by unknown's avatar unknown

BUG#17928 testBackup fails in error handling testcases

Reproduced failure of NFMaster on my laptop, this is the fix with some added
debug information to help people in the future when they trigger asserts in
AsyncFile (ndbfs helper threads).


storage/ndb/src/kernel/blocks/backup/Backup.cpp:
  In Backup::checkFile(Signal*, BackupFilePtr), only send FSCLOSEREQ if file
  is not already being closed.
  
  Add debug printouts if DEBUG_ABORT is defined to help in finding the problem.
  
  Only set filePtr.p->fileClosing when we are actually closing the file, not when
  we're anticipating a close.
  
  In Backup::closeFiles(Signal*,BackupRecordPtr), when we're closing a file,
  make sure we've queued everything to be written out before sending FSCLOSEREQ.
  
  This solves two problems:
  - in testBackup (NFMaster) on my machine (but not in autotest since the end of
  March for whatever reason), we were hitting an assert in the buffer for files
  saying we hadn't written everything out of the buffer before closing. (for the
  interested, it was 10 bytes of data)
  - once I'd fixed the above (by the checkFile before close) I'd then get really
  nonsensical trace dumps in NFMaster for ERROR_INSERT 10003. It turns out that
  any asserts in AsyncFile (the thread that NDBFS runs to do its bidding) don't
  cause anything to be written out anywhere and you're left scratching your head
  as to what on earth happenned (apart from getting "caught signal 6, aborted").
  What really was happenning was we were then calling FSCLOSEREQ one too many times,
  hitting the assert on trying to close an fd of -1 in AsyncFile.
storage/ndb/src/kernel/blocks/ndbfs/AsyncFile.cpp:
  It turns out that any asserts in AsyncFile (the thread that NDBFS runs to do 
  its bidding) don't cause anything to be written out anywhere and you're left 
  scratching your head as to what on earth happenned (apart from getting 
  "caught signal 6, aborted"). What really was happenning was we were then 
  calling FSCLOSEREQ one too many times, hitting the assert on trying to close
  an fd of -1 in AsyncFile.
  
  Added DEBUG printouts for every assert in AsyncFile
parent def8e0ce
......@@ -3819,19 +3819,37 @@ Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
FsAppendReq::SignalLength, JBA);
return;
}//if
filePtr.p->fileRunning = 0;
filePtr.p->fileClosing = 1;
FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
req->userPointer = filePtr.i;
req->userReference = reference();
req->fileFlag = 0;
#ifdef DEBUG_ABORT
Uint32 running= filePtr.p->fileRunning;
Uint32 closing= filePtr.p->fileClosing;
#endif
if(!filePtr.p->fileClosing)
{
filePtr.p->fileRunning = 0;
filePtr.p->fileClosing = 1;
FsCloseReq * req = (FsCloseReq *)signal->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
req->userPointer = filePtr.i;
req->userReference = reference();
req->fileFlag = 0;
#ifdef DEBUG_ABORT
ndbout_c("***** a FSCLOSEREQ filePtr.i = %u run=%d cl=%d", filePtr.i,
running, closing);
#endif
sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA);
}
else
{
#ifdef DEBUG_ABORT
ndbout_c("***** a FSCLOSEREQ filePtr.i = %u", filePtr.i);
ndbout_c("***** a NOT SENDING FSCLOSEREQ filePtr.i = %u run=%d cl=%d",
filePtr.i,
running, closing);
#endif
sendSignal(NDBFS_REF, GSN_FSCLOSEREQ, signal, FsCloseReq::SignalLength, JBA);
}
}
......@@ -4082,9 +4100,7 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr)
jam();
continue;
}//if
filePtr.p->fileClosing = 1;
if(filePtr.p->fileRunning == 1){
jam();
#ifdef DEBUG_ABORT
......@@ -4093,7 +4109,10 @@ Backup::closeFiles(Signal* sig, BackupRecordPtr ptr)
filePtr.p->operation.dataBuffer.eof();
} else {
jam();
filePtr.p->fileClosing = 1;
filePtr.p->operation.dataBuffer.eof();
checkFile(sig, filePtr); // make sure we write everything before closing
FsCloseReq * req = (FsCloseReq *)sig->getDataPtrSend();
req->filePointer = filePtr.p->filePointer;
req->userPointer = filePtr.i;
......@@ -4555,7 +4574,6 @@ Backup::execLCP_PREPARE_REQ(Signal* signal)
jam();
BackupFilePtr filePtr;
c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
filePtr.p->fileClosing = 1;
filePtr.p->operation.dataBuffer.eof();
}
......@@ -4647,7 +4665,6 @@ Backup::execEND_LCPREQ(Signal* signal)
BackupFilePtr filePtr;
c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
filePtr.p->fileClosing = 1;
filePtr.p->operation.dataBuffer.eof();
return;
}
......
......@@ -228,6 +228,7 @@ AsyncFile::run()
endReq();
return;
default:
DEBUG(ndbout_c("Invalid Request"));
abort();
break;
}//switch
......@@ -676,6 +677,7 @@ AsyncFile::extendfile(Request* request) {
return 0;
#else
request = request;
DEBUG(ndbout_c("no pwrite"));
abort();
return -1;
#endif
......@@ -792,6 +794,7 @@ AsyncFile::writeBuffer(const char * buf, size_t size, off_t offset,
bytes_written = return_value;
if(bytes_written == 0){
DEBUG(ndbout_c("no bytes written"));
abort();
}
......@@ -830,8 +833,10 @@ AsyncFile::closeReq(Request * request)
#else
if (-1 == ::close(theFd)) {
#ifndef DBUG_OFF
if (theFd == -1)
if (theFd == -1) {
DEBUG(ndbout_c("close on fd = -1"));
abort();
}
#endif
request->error = errno;
}
......@@ -899,6 +904,7 @@ AsyncFile::appendReq(Request * request){
return;
}
if(n == 0){
DEBUG(ndbout_c("append with n=0"));
abort();
}
size -= n;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment