Bug #12992 Cluster StopOnError = Y restarts ndbd indefinitly

parent c83eedc8
...@@ -2493,6 +2493,14 @@ void Ndbcntr::Missra::sendNextSTTOR(Signal* signal){ ...@@ -2493,6 +2493,14 @@ void Ndbcntr::Missra::sendNextSTTOR(Signal* signal){
const Uint32 start = currentBlockIndex; const Uint32 start = currentBlockIndex;
if (currentStartPhase == ZSTART_PHASE_6)
{
// Ndbd has passed the critical startphases.
// Change error handler from "startup" state
// to normal state.
ErrorReporter::setErrorHandlerShutdownType();
}
for(; currentBlockIndex < ALL_BLOCKS_SZ; currentBlockIndex++){ for(; currentBlockIndex < ALL_BLOCKS_SZ; currentBlockIndex++){
jam(); jam();
if(ALL_BLOCKS[currentBlockIndex].NextSP == currentStartPhase){ if(ALL_BLOCKS[currentBlockIndex].NextSP == currentStartPhase){
......
...@@ -152,6 +152,14 @@ ErrorReporter::formatMessage(ErrorCategory type, ...@@ -152,6 +152,14 @@ ErrorReporter::formatMessage(ErrorCategory type,
return; return;
} }
NdbShutdownType ErrorReporter::s_errorHandlerShutdownType = NST_ErrorHandler;
void
ErrorReporter::setErrorHandlerShutdownType(NdbShutdownType nst)
{
s_errorHandlerShutdownType = nst;
}
void void
ErrorReporter::handleAssert(const char* message, const char* file, int line) ErrorReporter::handleAssert(const char* message, const char* file, int line)
{ {
...@@ -170,7 +178,7 @@ ErrorReporter::handleAssert(const char* message, const char* file, int line) ...@@ -170,7 +178,7 @@ ErrorReporter::handleAssert(const char* message, const char* file, int line)
WriteMessage(assert, ERR_ERROR_PRGERR, message, refMessage, WriteMessage(assert, ERR_ERROR_PRGERR, message, refMessage,
theEmulatedJamIndex, theEmulatedJam); theEmulatedJamIndex, theEmulatedJam);
NdbShutdown(NST_ErrorHandler); NdbShutdown(s_errorHandlerShutdownType);
} }
void void
...@@ -182,7 +190,7 @@ ErrorReporter::handleThreadAssert(const char* message, ...@@ -182,7 +190,7 @@ ErrorReporter::handleThreadAssert(const char* message,
BaseString::snprintf(refMessage, 100, "file: %s lineNo: %d - %s", BaseString::snprintf(refMessage, 100, "file: %s lineNo: %d - %s",
file, line, message); file, line, message);
NdbShutdown(NST_ErrorHandler); NdbShutdown(s_errorHandlerShutdownType);
}//ErrorReporter::handleThreadAssert() }//ErrorReporter::handleThreadAssert()
...@@ -201,6 +209,8 @@ ErrorReporter::handleError(ErrorCategory type, int messageID, ...@@ -201,6 +209,8 @@ ErrorReporter::handleError(ErrorCategory type, int messageID,
if(messageID == ERR_ERROR_INSERT){ if(messageID == ERR_ERROR_INSERT){
NdbShutdown(NST_ErrorInsert); NdbShutdown(NST_ErrorInsert);
} else { } else {
if (nst == NST_ErrorHandler)
nst = s_errorHandlerShutdownType;
NdbShutdown(nst); NdbShutdown(nst);
} }
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
class ErrorReporter class ErrorReporter
{ {
public: public:
static void setErrorHandlerShutdownType(NdbShutdownType nst = NST_ErrorHandler);
static void handleAssert(const char* message, static void handleAssert(const char* message,
const char* file, const char* file,
int line); int line);
...@@ -57,6 +58,7 @@ public: ...@@ -57,6 +58,7 @@ public:
static const char* formatTimeStampString(); static const char* formatTimeStampString();
private: private:
static enum NdbShutdownType s_errorHandlerShutdownType;
}; };
#endif #endif
...@@ -45,8 +45,14 @@ extern NdbMutex * theShutdownMutex; ...@@ -45,8 +45,14 @@ extern NdbMutex * theShutdownMutex;
void catchsigs(bool ignore); // for process signal handling void catchsigs(bool ignore); // for process signal handling
#define MAX_FAILED_STARTUPS 3
// Flag set by child through SIGUSR1 to signal a failed startup
static bool failed_startup_flag = false;
// Counter for consecutive failed startups
static Uint32 failed_startups = 0;
extern "C" void handler_shutdown(int signum); // for process signal handling extern "C" void handler_shutdown(int signum); // for process signal handling
extern "C" void handler_error(int signum); // for process signal handling extern "C" void handler_error(int signum); // for process signal handling
extern "C" void handler_sigusr1(int signum); // child signalling failed restart
// Shows system information // Shows system information
void systemInfo(const Configuration & conf, void systemInfo(const Configuration & conf,
...@@ -92,6 +98,8 @@ int main(int argc, char** argv) ...@@ -92,6 +98,8 @@ int main(int argc, char** argv)
} }
#ifndef NDB_WIN32 #ifndef NDB_WIN32
signal(SIGUSR1, handler_sigusr1);
for(pid_t child = fork(); child != 0; child = fork()){ for(pid_t child = fork(); child != 0; child = fork()){
/** /**
* Parent * Parent
...@@ -137,6 +145,20 @@ int main(int argc, char** argv) ...@@ -137,6 +145,20 @@ int main(int argc, char** argv)
*/ */
exit(0); exit(0);
} }
if (!failed_startup_flag)
{
// Reset the counter for consecutive failed startups
failed_startups = 0;
}
else if (failed_startups >= MAX_FAILED_STARTUPS && !theConfig->stopOnError())
{
/**
* Error shutdown && stopOnError()
*/
g_eventLogger.alert("Ndbd has failed %u consecutive startups. Not restarting", failed_startups);
exit(0);
}
failed_startup_flag = false;
g_eventLogger.info("Ndb has terminated (pid %d) restarting", child); g_eventLogger.info("Ndb has terminated (pid %d) restarting", child);
theConfig->fetch_configuration(); theConfig->fetch_configuration();
} }
...@@ -170,6 +192,9 @@ int main(int argc, char** argv) ...@@ -170,6 +192,9 @@ int main(int argc, char** argv)
/** /**
* Do startup * Do startup
*/ */
ErrorReporter::setErrorHandlerShutdownType(NST_ErrorHandlerStartup);
switch(globalData.theRestartFlag){ switch(globalData.theRestartFlag){
case initial_state: case initial_state:
globalEmulatorData.theThreadConfig->doStart(NodeState::SL_CMVMI); globalEmulatorData.theThreadConfig->doStart(NodeState::SL_CMVMI);
...@@ -359,3 +384,15 @@ handler_error(int signum){ ...@@ -359,3 +384,15 @@ handler_error(int signum){
BaseString::snprintf(errorData, 40, "Signal %d received", signum); BaseString::snprintf(errorData, 40, "Signal %d received", signum);
ERROR_SET_SIGNAL(fatal, 0, errorData, __FILE__); ERROR_SET_SIGNAL(fatal, 0, errorData, __FILE__);
} }
extern "C"
void
handler_sigusr1(int signum)
{
if (!failed_startup_flag)
{
failed_startups++;
failed_startup_flag = true;
}
g_eventLogger.info("Received signal %d. Ndbd failed startup (%u).", signum, failed_startups);
}
...@@ -154,6 +154,9 @@ NdbShutdown(NdbShutdownType type, ...@@ -154,6 +154,9 @@ NdbShutdown(NdbShutdownType type,
case NST_ErrorHandlerSignal: case NST_ErrorHandlerSignal:
g_eventLogger.info("Error handler signal %s system", shutting); g_eventLogger.info("Error handler signal %s system", shutting);
break; break;
case NST_ErrorHandlerStartup:
g_eventLogger.info("Error handler startup %s system", shutting);
break;
case NST_Restart: case NST_Restart:
g_eventLogger.info("Restarting system"); g_eventLogger.info("Restarting system");
break; break;
...@@ -229,6 +232,9 @@ NdbShutdown(NdbShutdownType type, ...@@ -229,6 +232,9 @@ NdbShutdown(NdbShutdownType type,
} }
if(type != NST_Normal && type != NST_Restart){ if(type != NST_Normal && type != NST_Restart){
// Signal parent that error occured during startup
if (type == NST_ErrorHandlerStartup)
kill(getppid(), SIGUSR1);
g_eventLogger.info("Error handler shutdown completed - %s", exitAbort); g_eventLogger.info("Error handler shutdown completed - %s", exitAbort);
#if ( defined VM_TRACE || defined ERROR_INSERT ) && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) ) #if ( defined VM_TRACE || defined ERROR_INSERT ) && ( ! ( defined NDB_OSE || defined NDB_SOFTOSE) )
signal(6, SIG_DFL); signal(6, SIG_DFL);
......
...@@ -83,7 +83,8 @@ enum NdbShutdownType { ...@@ -83,7 +83,8 @@ enum NdbShutdownType {
NST_ErrorHandler, NST_ErrorHandler,
NST_ErrorHandlerSignal, NST_ErrorHandlerSignal,
NST_Restart, NST_Restart,
NST_ErrorInsert NST_ErrorInsert,
NST_ErrorHandlerStartup
}; };
enum NdbRestartType { enum NdbRestartType {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment