Commit 0caca225 authored by unknown's avatar unknown

BUG#12124 ndb_mgm -e "# stop" does not allow stopping ndb_mgmd processes on other systems

WL#2703 restart for ndb_mgmd

Solving two problems with one stone.

Allows the stopping and restarting of mgm nodes other than the one the mgmclient
is connected to.


ndb/include/mgmapi/mgmapi.h:
  Add the internal ndb_mgm_end_session command.
  
  This will unreserve the nodeid we have allocated synchronously.
  
  Otherwise we can't do a restart of a node really quickly as the nodeids are cleaned
  up after the connection to mgmd is closed.
ndb/include/mgmcommon/ConfigRetriever.hpp:
  Allow configuration on if end_session is going to be called on object destruction.
  
  We need to set this to false for ndbd as we fork()
ndb/src/common/mgmcommon/ConfigRetriever.cpp:
  When destroying ConfigRetreiver, ndb_mgm_end_session - i.e. deallocate the nodeid
ndb/src/common/util/SocketServer.cpp:
  When destroying a SocketServer, close the server socket.
ndb/src/kernel/main.cpp:
  don't purge allocated resources when cleaning up in parent process (nodeid)
ndb/src/kernel/vm/Configuration.cpp:
  have option end_session to closeConfiguration
ndb/src/kernel/vm/Configuration.hpp:
  have option end_session to closeConfiguration
ndb/src/mgmapi/mgmapi.cpp:
  Implement ndb_mgm_end_session
ndb/src/mgmclient/CommandInterpreter.cpp:
  Correct output of STOP as we can now stop mgmd as well as ndbd
ndb/src/mgmsrv/MgmtSrvr.cpp:
  Add code into start for connecting to our own mgmd.
  
  Create sendStopMgmd() which does the same job as sendSTOP_REQ, but for ndb_mgmd
  
  Allow stopping of other ndb_mgmd processes by creating a connection to them
  and issuing the stop command
  
  When stopping all nodes, stop other ndb_mgmd processes as well.
  
  Remove set_connect_string. Replace with connect_to_self. This is a much better
  way of doing things.
ndb/src/mgmsrv/MgmtSrvr.hpp:
  add connect_to_self and remove set_connect_string.
ndb/src/mgmsrv/Services.cpp:
  Add endSession.
  
  - delete Allocated_resources for this connection
  - create new Allocated_resources for this connection
  
  conceivably you could keep the socket open across node restarts (and even
  possibly get a different node id). But I wouldn't try it and expect happiness.
ndb/src/mgmsrv/Services.hpp:
  Add endSession
ndb/src/mgmsrv/main.cpp:
  allow mgmd to be restarted.
  
  - add g_RestartServer flag
  - move connecting to our own mgmd into MgmtSrvr (where it belongs)
  - output correct Shutdown/Restart message on shutdown/restart
parent 57a2516e
......@@ -993,6 +993,22 @@ extern "C" {
int ndb_mgm_alloc_nodeid(NdbMgmHandle handle,
unsigned version, int nodetype);
/**
* End Session
*
* This function tells the mgm server to free all resources associated with
* this connection. It will also close it.
*
* This differs from just disconnecting as we now synchronously clean up,
* so that a quickly restarting server that needs the same node id can
* get it when it restarts.
*
* @param handle NDB management handle
* @return 0 on success
*
* @note you still have to destroy the NdbMgmHandle.
*/
int ndb_mgm_end_session(NdbMgmHandle handle);
/**
* Get the node id of the mgm server we're connected to
......
......@@ -78,6 +78,7 @@ public:
const char *get_connectstring(char *buf, int buf_sz) const;
NdbMgmHandle get_mgmHandle() { return m_handle; };
NdbMgmHandle* get_mgmHandlePtr() { return &m_handle; };
void end_session(bool end) { m_end_session= end; };
Uint32 get_configuration_nodeid() const;
private:
......@@ -92,6 +93,8 @@ private:
void setError(ErrorType, const char * errorMsg);
Uint32 _ownNodeId;
bool m_end_session;
/*
Uint32 m_mgmd_port;
const char *m_mgmd_host;
......
......@@ -52,6 +52,7 @@ ConfigRetriever::ConfigRetriever(const char * _connect_string,
m_version = version;
m_node_type = node_type;
_ownNodeId= 0;
m_end_session= true;
m_handle= ndb_mgm_create_handle();
......@@ -73,6 +74,8 @@ ConfigRetriever::~ConfigRetriever()
{
DBUG_ENTER("ConfigRetriever::~ConfigRetriever");
if (m_handle) {
if(m_end_session)
ndb_mgm_end_session(m_handle);
ndb_mgm_disconnect(m_handle);
ndb_mgm_destroy_handle(&m_handle);
}
......
......@@ -42,6 +42,8 @@ SocketServer::~SocketServer() {
delete m_sessions[i].m_session;
}
for(i = 0; i<m_services.size(); i++){
if(m_services[i].m_socket)
NDB_CLOSE_SOCKET(m_services[i].m_socket);
delete m_services[i].m_service;
}
}
......
......@@ -307,8 +307,11 @@ int main(int argc, char** argv)
/**
* We no longer need the mgm connection in this process
* (as we are the angel, not ndb)
*
* We don't want to purge any allocated resources (nodeid), so
* we set that option to false
*/
theConfig->closeConfiguration();
theConfig->closeConfiguration(false);
int status = 0, error_exit = 0, signum = 0;
while(waitpid(child, &status, 0) != child);
......
......@@ -172,7 +172,8 @@ Configuration::~Configuration(){
}
void
Configuration::closeConfiguration(){
Configuration::closeConfiguration(bool end_session){
m_config_retriever->end_session(end_session);
if (m_config_retriever) {
delete m_config_retriever;
}
......
......@@ -35,7 +35,7 @@ public:
void fetch_configuration();
void setupConfiguration();
void closeConfiguration();
void closeConfiguration(bool end_session= true);
bool lockPagesInMainMemory() const;
......
......@@ -2321,4 +2321,23 @@ int ndb_mgm_report_event(NdbMgmHandle handle, Uint32 *data, Uint32 length)
DBUG_RETURN(0);
}
extern "C"
int ndb_mgm_end_session(NdbMgmHandle handle)
{
DBUG_ENTER("ndb_mgm_end_session");
CHECK_HANDLE(handle, 0);
CHECK_CONNECTED(handle, 0);
SocketOutputStream s_output(handle->socket);
s_output.println("end session");
s_output.println("");
SocketInputStream in(handle->socket, handle->read_timeout);
char buf[32];
in.gets(buf, sizeof(buf));
DBUG_RETURN(0);
}
template class Vector<const ParserRow<ParserDummy>*>;
......@@ -1000,26 +1000,21 @@ CommandInterpreter::executeShutdown(char* parameters)
int result = 0;
result = ndb_mgm_stop(m_mgmsrv, 0, 0);
if (result < 0) {
ndbout << "Shutdown off NDB Cluster storage node(s) failed." << endl;
ndbout << "Shutdown of NDB Cluster node(s) failed." << endl;
printError();
return result;
}
ndbout << result << " NDB Cluster storage node(s) have shutdown." << endl;
ndbout << result << " NDB Cluster node(s) have shutdown." << endl;
int mgm_id= 0;
for(int i=0; i < state->no_of_nodes; i++) {
if(state->node_states[i].node_type == NDB_MGM_NODE_TYPE_MGM &&
state->node_states[i].version != 0){
if (mgm_id == 0)
mgm_id= state->node_states[i].node_id;
else {
ndbout << "Unable to locate management server, "
<< "shutdown manually with <id> STOP"
<< endl;
return 1;
}
}
mgm_id= ndb_mgm_get_mgmd_nodeid(m_mgmsrv);
if (mgm_id == 0)
{
ndbout << "Unable to locate management server, "
<< "shutdown manually with <id> STOP"
<< endl;
return 1;
}
result = ndb_mgm_stop(m_mgmsrv, 1, &mgm_id);
......
......@@ -60,6 +60,9 @@
#include <SignalSender.hpp>
extern bool g_StopServer;
extern bool g_RestartServer;
//#define MGM_SRV_DEBUG
#ifdef MGM_SRV_DEBUG
#define DEBUG(x) do ndbout << x << endl; while(0)
......@@ -373,7 +376,8 @@ MgmtSrvr::MgmtSrvr(SocketServer *socket_server,
_ownReference(0),
theSignalIdleList(NULL),
theWaitState(WAIT_SUBSCRIBE_CONF),
m_event_listner(this)
m_event_listner(this),
m_local_mgm_handle(0)
{
DBUG_ENTER("MgmtSrvr::MgmtSrvr");
......@@ -541,6 +545,8 @@ MgmtSrvr::check_start()
bool
MgmtSrvr::start(BaseString &error_string)
{
int mgm_connect_result;
DBUG_ENTER("MgmtSrvr::start");
if (_props == NULL) {
if (!check_start()) {
......@@ -578,6 +584,13 @@ MgmtSrvr::start(BaseString &error_string)
DBUG_RETURN(false);
}
if((mgm_connect_result= connect_to_self()) < 0)
{
ndbout_c("Unable to connect to our own ndb_mgmd (Error %d)",
mgm_connect_result);
ndbout_c("This is probably a bug.");
}
TransporterRegistry *reg = theFacade->get_registry();
for(unsigned int i=0;i<reg->m_transporter_interface.size();i++) {
BaseString msg;
......@@ -835,9 +848,81 @@ MgmtSrvr::sendVersionReq(int v_nodeId, Uint32 &version, const char **address)
return 0;
}
int MgmtSrvr::sendStopMgmd(NodeId nodeId,
bool abort,
bool stop,
bool restart,
bool nostart,
bool initialStart)
{
const char* hostname;
Uint32 port;
BaseString connect_string;
{
Guard g(m_configMutex);
{
ndb_mgm_configuration_iterator
iter(* _config->m_configValues, CFG_SECTION_NODE);
if(iter.first()) return SEND_OR_RECEIVE_FAILED;
if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
if(iter.get(CFG_NODE_HOST, &hostname)) return SEND_OR_RECEIVE_FAILED;
}
{
ndb_mgm_configuration_iterator
iter(* _config->m_configValues, CFG_SECTION_NODE);
if(iter.first()) return SEND_OR_RECEIVE_FAILED;
if(iter.find(CFG_NODE_ID, nodeId)) return SEND_OR_RECEIVE_FAILED;
if(iter.get(CFG_MGM_PORT, &port)) return SEND_OR_RECEIVE_FAILED;
}
if( strlen(hostname) == 0 )
return SEND_OR_RECEIVE_FAILED;
}
connect_string.assfmt("%s:%u",hostname,port);
DBUG_PRINT("info",("connect string: %s",connect_string.c_str()));
NdbMgmHandle h= ndb_mgm_create_handle();
if ( h && connect_string.length() > 0 )
{
ndb_mgm_set_connectstring(h,connect_string.c_str());
if(ndb_mgm_connect(h,1,0,0))
{
DBUG_PRINT("info",("failed ndb_mgm_connect"));
return SEND_OR_RECEIVE_FAILED;
}
if(!restart)
{
if(ndb_mgm_stop(h, 1, (const int*)&nodeId) < 0)
{
return SEND_OR_RECEIVE_FAILED;
}
}
else
{
int nodes[1];
nodes[0]= (int)nodeId;
if(ndb_mgm_restart2(h, 1, nodes, initialStart, nostart, abort) < 0)
{
return SEND_OR_RECEIVE_FAILED;
}
}
}
ndb_mgm_destroy_handle(&h);
return 0;
}
/*
* Common method for handeling all STOP_REQ signalling that
* is used by Stopping, Restarting and Single user commands
*
* In the event that we need to stop a mgmd, we create a mgm
* client connection to that mgmd and stop it that way.
* This allows us to stop mgm servers when there isn't any real
* distributed communication up.
*/
int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
......@@ -849,6 +934,8 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
bool nostart,
bool initialStart)
{
int error = 0;
stoppedNodes.clear();
SignalSender ss(theFacade);
......@@ -887,18 +974,34 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
NodeBitmask nodes;
if (nodeId)
{
if(nodeId==getOwnNodeId())
{
int r;
if((r = okToSendTo(nodeId, true)) != 0)
return r;
if(restart)
g_RestartServer= true;
g_StopServer= true;
return 0;
}
if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_NDB)
{
int r;
if((r= okToSendTo(nodeId, true)) != 0)
return r;
if (ss.sendSignal(nodeId, &ssig) != SEND_OK)
return SEND_OR_RECEIVE_FAILED;
}
else if(getNodeType(nodeId) == NDB_MGM_NODE_TYPE_MGM)
{
error= sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart);
if(error==0)
stoppedNodes.set(nodeId);
return error;
}
else
return WRONG_PROCESS_TYPE;
nodes.set(nodeId);
}
else
{
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_NDB))
{
if(okToSendTo(nodeId, true) == 0)
......@@ -908,9 +1011,17 @@ int MgmtSrvr::sendSTOP_REQ(NodeId nodeId,
nodes.set(nodeId);
}
}
nodeId= 0;
while(getNextNodeId(&nodeId, NDB_MGM_NODE_TYPE_MGM))
{
if(nodeId==getOwnNodeId())
continue;
if(sendStopMgmd(nodeId, abort, stop, restart, nostart, initialStart)==0)
stoppedNodes.set(nodeId);
}
}
// now wait for the replies
int error = 0;
while (!nodes.isclear())
{
SimpleSignal *signal = ss.waitFor();
......@@ -2446,9 +2557,23 @@ void MgmtSrvr::transporter_connect(NDB_SOCKET_TYPE sockfd)
}
}
int MgmtSrvr::set_connect_string(const char *str)
int MgmtSrvr::connect_to_self(void)
{
return ndb_mgm_set_connectstring(m_config_retriever->get_mgmHandle(),str);
int r= 0;
m_local_mgm_handle= ndb_mgm_create_handle();
snprintf(m_local_mgm_connect_string,sizeof(m_local_mgm_connect_string),
"localhost:%u",getPort());
ndb_mgm_set_connectstring(m_local_mgm_handle, m_local_mgm_connect_string);
if((r= ndb_mgm_connect(m_local_mgm_handle, 0, 0, 0)) < 0)
{
ndb_mgm_destroy_handle(&m_local_mgm_handle);
return r;
}
// TransporterRegistry now owns this NdbMgmHandle and will destroy it.
theFacade->get_registry()->set_mgm_handle(m_local_mgm_handle);
return 0;
}
......
......@@ -471,7 +471,7 @@ public:
int getConnectionDbParameter(int node1, int node2, int param,
int *value, BaseString& msg);
int set_connect_string(const char *str);
int connect_to_self(void);
void transporter_connect(NDB_SOCKET_TYPE sockfd);
......@@ -487,6 +487,13 @@ private:
int send(SignalSender &ss, SimpleSignal &ssig, Uint32 node, Uint32 node_type);
int sendStopMgmd(NodeId nodeId,
bool abort,
bool stop,
bool restart,
bool nostart,
bool initialStart);
int sendSTOP_REQ(NodeId nodeId,
NodeBitmask &stoppedNodes,
Uint32 singleUserNodeId,
......@@ -629,6 +636,8 @@ private:
// signal arrives.
// We wait in receiveOptimisedResponse and signal in handleReceivedSignal.
NdbMgmHandle m_local_mgm_handle;
char m_local_mgm_connect_string[20];
class TransporterFacade * theFacade;
int sendVersionReq( int processId, Uint32 &version, const char **address);
......
......@@ -203,6 +203,8 @@ ParserRow<MgmApiSession> commands[] = {
MGM_CMD("bye", &MgmApiSession::bye, ""),
MGM_CMD("end session", &MgmApiSession::endSession, ""),
MGM_CMD("set loglevel", &MgmApiSession::setLogLevel, ""),
MGM_ARG("node", Int, Mandatory, "Node"),
MGM_ARG("category", Int, Mandatory, "Event category"),
......@@ -719,10 +721,21 @@ MgmApiSession::dumpState(Parser<MgmApiSession>::Context &,
void
MgmApiSession::bye(Parser<MgmApiSession>::Context &,
Properties const &) {
Properties const &) {
m_stop = true;
}
void
MgmApiSession::endSession(Parser<MgmApiSession>::Context &,
Properties const &) {
if(m_allocated_resources)
delete m_allocated_resources;
m_allocated_resources= new MgmtSrvr::Allocated_resources(m_mgmsrv);
m_output->println("end session reply");
}
void
MgmApiSession::setClusterLogLevel(Parser<MgmApiSession>::Context &,
Properties const &args) {
......
......@@ -79,6 +79,7 @@ public:
void start(Parser_t::Context &ctx, const class Properties &args);
void startAll(Parser_t::Context &ctx, const class Properties &args);
void bye(Parser_t::Context &ctx, const class Properties &args);
void endSession(Parser_t::Context &ctx, const class Properties &args);
void setLogLevel(Parser_t::Context &ctx, const class Properties &args);
void setClusterLogLevel(Parser_t::Context &ctx,
const class Properties &args);
......
......@@ -132,6 +132,7 @@ static MgmGlobals *glob= 0;
* Global variables
*/
bool g_StopServer;
bool g_RestartServer;
extern EventLogger g_eventLogger;
extern int global_mgmt_server_check;
......@@ -191,7 +192,19 @@ static void usage()
*/
int main(int argc, char** argv)
{
int mgm_connect_result;
NDB_INIT(argv[0]);
int ho_error;
#ifndef DBUG_OFF
opt_debug= "d:t:O,/tmp/ndb_mgmd.trace";
#endif
if ((ho_error=handle_options(&argc, &argv, my_long_options,
ndb_std_get_one_option)))
exit(ho_error);
start:
glob= new MgmGlobals;
/**
......@@ -208,14 +221,6 @@ int main(int argc, char** argv)
const char *load_default_groups[]= { "mysql_cluster","ndb_mgmd",0 };
load_defaults("my",load_default_groups,&argc,&argv);
int ho_error;
#ifndef DBUG_OFF
opt_debug= "d:t:O,/tmp/ndb_mgmd.trace";
#endif
if ((ho_error=handle_options(&argc, &argv, my_long_options,
ndb_std_get_one_option)))
exit(ho_error);
if (opt_interactive ||
opt_non_interactive ||
g_print_full_config) {
......@@ -293,34 +298,12 @@ int main(int argc, char** argv)
goto error_end;
}
/* Construct a fake connectstring to connect back to ourselves */
char connect_str[20];
if(!opt_connect_str) {
snprintf(connect_str,20,"localhost:%u",glob->mgmObject->getPort());
opt_connect_str= connect_str;
}
glob->mgmObject->set_connect_string(opt_connect_str);
if(!glob->mgmObject->check_start()){
ndbout_c("Unable to check start management server.");
ndbout_c("Probably caused by illegal initial configuration file.");
goto error_end;
}
/*
* Connect back to ourselves so we can use mgmapi to fetch
* config info
*/
int mgm_connect_result;
mgm_connect_result = glob->mgmObject->get_config_retriever()->
do_connect(0,0,0);
if(mgm_connect_result<0) {
ndbout_c("Unable to connect to our own ndb_mgmd (Error %d)",
mgm_connect_result);
ndbout_c("This is probably a bug.");
}
if (opt_daemon) {
// Become a daemon
char *lockfile= NdbConfig_PidFileName(glob->localNodeId);
......@@ -361,6 +344,7 @@ int main(int argc, char** argv)
g_eventLogger.info(msg);
g_StopServer = false;
g_RestartServer= false;
glob->socketServer->startServer();
#if ! defined NDB_OSE && ! defined NDB_SOFTOSE
......@@ -378,14 +362,19 @@ int main(int argc, char** argv)
while(g_StopServer != true)
NdbSleep_MilliSleep(500);
}
g_eventLogger.info("Shutting down server...");
if(g_RestartServer)
g_eventLogger.info("Restarting server...");
else
g_eventLogger.info("Shutting down server...");
glob->socketServer->stopServer();
glob->mgmObject->get_config_retriever()->disconnect();
// We disconnect from the ConfigRetreiver mgmd when we delete glob below
glob->socketServer->stopSessions(true);
g_eventLogger.info("Shutdown complete");
the_end:
delete glob;
if(g_RestartServer)
goto start;
ndb_end(opt_endinfo ? MY_CHECK_ERROR | MY_GIVE_INFO : 0);
return 0;
error_end:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment