Commit b9a3c365 authored by Xavier Thompson's avatar Xavier Thompson

node boot: Format even with unreachable master

Use the format return code to distinguish between failure to format or
failure to report to master at the end. Then make boot run format even
if the master is unreachable, and:
* if format fails to report, wait until master is reachable, and retry
* if format fails for another reason, retry after a fixed 15s delay
* once format succeeds fully, bang
parent 1c5f8833
...@@ -40,7 +40,7 @@ from netaddr import valid_ipv4, valid_ipv6 ...@@ -40,7 +40,7 @@ from netaddr import valid_ipv4, valid_ipv6
from slapos.cli.command import check_root_user from slapos.cli.command import check_root_user
from slapos.cli.entry import SlapOSApp from slapos.cli.entry import SlapOSApp
from slapos.cli.config import ConfigCommand from slapos.cli.config import ConfigCommand
from slapos.format import isGlobalScopeAddress from slapos.format import isGlobalScopeAddress, FormatReturn
from slapos.util import string_to_boolean from slapos.util import string_to_boolean
import argparse import argparse
import logging import logging
...@@ -65,10 +65,7 @@ def _runBang(app): ...@@ -65,10 +65,7 @@ def _runBang(app):
Launch slapos node format. Launch slapos node format.
""" """
logger.info("[BOOT] Invoking slapos node bang...") logger.info("[BOOT] Invoking slapos node bang...")
result = app.run(['node', 'bang', '-m', 'Reboot']) return app.run(['node', 'bang', '-m', 'Reboot'])
if result == 1:
return 0
return 1
def _runFormat(app): def _runFormat(app):
...@@ -76,10 +73,7 @@ def _runFormat(app): ...@@ -76,10 +73,7 @@ def _runFormat(app):
Launch slapos node format. Launch slapos node format.
""" """
logger.info("[BOOT] Invoking slapos node format...") logger.info("[BOOT] Invoking slapos node format...")
result = app.run(['node', 'format', '--now', '--verbose']) return app.run(['node', 'format', '--now', '--verbose'])
if result == 1:
return 0
return 1
def _ping(hostname): def _ping(hostname):
...@@ -139,6 +133,16 @@ def _ping_hostname(hostname): ...@@ -139,6 +133,16 @@ def _ping_hostname(hostname):
is_ready = _ping6(hostname) is_ready = _ping6(hostname)
def _ping_master(master_hostname):
if valid_ipv4(master_hostname):
_test_ping(master_hostname)
elif valid_ipv6(master_hostname):
_test_ping6(master_hostname)
else:
# hostname
_ping_hostname(master_hostname)
def _waitIpv6Ready(ipv6_interface): def _waitIpv6Ready(ipv6_interface):
""" """
test if ipv6 is ready on ipv6_interface test if ipv6 is ready on ipv6_interface
...@@ -154,6 +158,7 @@ def _waitIpv6Ready(ipv6_interface): ...@@ -154,6 +158,7 @@ def _waitIpv6Ready(ipv6_interface):
"try again in 5 seconds...", ipv6_interface) "try again in 5 seconds...", ipv6_interface)
sleep(5) sleep(5)
class BootCommand(ConfigCommand): class BootCommand(ConfigCommand):
""" """
Test network and invoke simple format and bang (Use on Linux startup) Test network and invoke simple format and bang (Use on Linux startup)
...@@ -196,23 +201,29 @@ class BootCommand(ConfigCommand): ...@@ -196,23 +201,29 @@ class BootCommand(ConfigCommand):
if ipv6_interface is not None: if ipv6_interface is not None:
_waitIpv6Ready(ipv6_interface) _waitIpv6Ready(ipv6_interface)
# Check that node can ping master
if valid_ipv4(master_hostname):
_test_ping(master_hostname)
elif valid_ipv6(master_hostname):
_test_ping6(master_hostname)
else:
# hostname
_ping_hostname(master_hostname)
app = SlapOSApp() app = SlapOSApp()
while True:
# Make sure slapos node format returns ok # Make sure slapos node format returns ok
while not _runFormat(app): result = _runFormat(app)
if result == FormatReturn.FAILURE:
logger.error("[BOOT] Fail to format, try again in 15 seconds...") logger.error("[BOOT] Fail to format, try again in 15 seconds...")
sleep(15) sleep(15)
continue
if result == FormatReturn.OFFLINE_SUCCESS:
logger.error(
"[BOOT] Fail to post format information"
", try again when connection to master is up..."
)
sleep(15)
_ping_master(master_hostname)
continue
break
# Make sure slapos node bang returns ok # Make sure slapos node bang returns ok
while not _runBang(app): while _runBang(app):
logger.error("[BOOT] Fail to bang, try again in 15 seconds...") logger.error("[BOOT] Fail to bang, try again in 15 seconds...")
sleep(15) sleep(15)
......
...@@ -125,4 +125,4 @@ class FormatCommand(ConfigCommand): ...@@ -125,4 +125,4 @@ class FormatCommand(ConfigCommand):
tracing_monkeypatch(conf) tracing_monkeypatch(conf)
do_format(conf=conf) return do_format(conf=conf)
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
from six.moves import configparser from six.moves import configparser
import distro import distro
import enum
import errno import errno
import fcntl import fcntl
import grp import grp
...@@ -68,6 +69,12 @@ from slapos import version ...@@ -68,6 +69,12 @@ from slapos import version
from slapos import manager as slapmanager from slapos import manager as slapmanager
class FormatReturn(enum.IntEnum):
SUCCESS = 0
FAILURE = 1
OFFLINE_SUCCESS = 2
logger = logging.getLogger("slapos.format") logger = logging.getLogger("slapos.format")
...@@ -1578,6 +1585,7 @@ def random_delay(conf): ...@@ -1578,6 +1585,7 @@ def random_delay(conf):
def do_format(conf): def do_format(conf):
try:
random_delay(conf) random_delay(conf)
if conf.input_definition_file: if conf.input_definition_file:
...@@ -1609,8 +1617,18 @@ def do_format(conf): ...@@ -1609,8 +1617,18 @@ def do_format(conf):
path_to_json=conf.computer_json, path_to_json=conf.computer_json,
logger=conf.logger) logger=conf.logger)
conf.logger.info('Posting information to %r' % conf.master_url) conf.logger.info('Posting information to %r' % conf.master_url)
try:
computer.send(conf) computer.send(conf)
return FormatReturn.SUCCESS
except Exception:
conf.logger.exception('failed to transfer information to %r' % conf.master_url)
return FormatReturn.OFFLINE_SUCCESS
finally:
conf.logger.info('slapos successfully prepared the computer.') conf.logger.info('slapos successfully prepared the computer.')
except Exception:
conf.logger.exception('slapos failed to prepare the computer.')
return FormatReturn.FAILURE
class FormatConfig(object): class FormatConfig(object):
......
...@@ -436,8 +436,9 @@ class TestCliBoot(CliMixin): ...@@ -436,8 +436,9 @@ class TestCliBoot(CliMixin):
# run slapos node boot # run slapos node boot
app = slapos.cli.entry.SlapOSApp() app = slapos.cli.entry.SlapOSApp()
fake = mock.Mock(return_value=mock.Mock(**{'run.return_value': 0}))
with patch('slapos.cli.boot.check_root_user', return_value=True) as check_root_user,\ with patch('slapos.cli.boot.check_root_user', return_value=True) as check_root_user,\
patch('slapos.cli.boot.SlapOSApp') as SlapOSApp,\ patch('slapos.cli.boot.SlapOSApp', new=fake) as SlapOSApp,\
patch('slapos.cli.boot.ConfigCommand.config_path', return_value=slapos_conf.name), \ patch('slapos.cli.boot.ConfigCommand.config_path', return_value=slapos_conf.name), \
patch( patch(
'slapos.cli.boot.netifaces.ifaddresses', 'slapos.cli.boot.netifaces.ifaddresses',
...@@ -477,8 +478,8 @@ class TestCliBoot(CliMixin): ...@@ -477,8 +478,8 @@ class TestCliBoot(CliMixin):
patch('slapos.cli.format.check_root_user', return_value=True),\ patch('slapos.cli.format.check_root_user', return_value=True),\
patch('slapos.cli.format.logging.FileHandler', return_value=logging.NullHandler()),\ patch('slapos.cli.format.logging.FileHandler', return_value=logging.NullHandler()),\
patch('slapos.cli.bang.check_root_user', return_value=True),\ patch('slapos.cli.bang.check_root_user', return_value=True),\
patch('slapos.cli.format.do_format', side_effect=[Exception, Exception, None]) as do_format,\ patch('slapos.cli.format.do_format', side_effect=[Exception, Exception, 0]) as do_format,\
patch('slapos.cli.bang.do_bang', side_effect=[Exception, Exception, None]) as do_bang: patch('slapos.cli.bang.do_bang', side_effect=[Exception, Exception, 0]) as do_bang:
app.run(('node', 'boot')) app.run(('node', 'boot'))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment