Commit 2f070644 authored by Xavier Thompson's avatar Xavier Thompson

software/theia: Fix resiliency

See merge request nexedi/slapos!1096
parents 4c3725cf ef2a540a
Pipeline #18645 failed with stage
...@@ -15,11 +15,11 @@ ...@@ -15,11 +15,11 @@
[instance-theia] [instance-theia]
_update_hash_filename_ = instance-theia.cfg.jinja.in _update_hash_filename_ = instance-theia.cfg.jinja.in
md5sum = 8e4f43e603a5dd57752758c987465d41 md5sum = 002ca13ec4923d9efa7a99f58ea7917f
[instance] [instance]
_update_hash_filename_ = instance.cfg.in _update_hash_filename_ = instance.cfg.in
md5sum = a7d78b4002266c69ece05a476df82791 md5sum = 4d8d3a351f17c45048fd3ffaee978875
[instance-import] [instance-import]
_update_hash_filename_ = instance-import.cfg.jinja.in _update_hash_filename_ = instance-import.cfg.jinja.in
...@@ -31,19 +31,19 @@ md5sum = 190a736471f0e0cffcb2838968e01d84 ...@@ -31,19 +31,19 @@ md5sum = 190a736471f0e0cffcb2838968e01d84
[instance-resilient] [instance-resilient]
_update_hash_filename_ = instance-resilient.cfg.jinja _update_hash_filename_ = instance-resilient.cfg.jinja
md5sum = d78a9f885bdebf6720197209e0c21aa0 md5sum = 6f6b88d2802cd5eba6e3d2ebf435813a
[theia-common] [theia-common]
_update_hash_filename_ = theia_common.py _update_hash_filename_ = theia_common.py
md5sum = e57396473b4b6a17d26a747f0030293c md5sum = 6a25c6a7f1beb27232a3c9acd8a76500
[theia-export] [theia-export]
_update_hash_filename_ = theia_export.py _update_hash_filename_ = theia_export.py
md5sum = b5f5ac1924b27d3f2be2e5ea291c119e md5sum = e2f6c483cce09f87ab1e63ae8be0daf4
[theia-import] [theia-import]
_update_hash_filename_ = theia_import.py _update_hash_filename_ = theia_import.py
md5sum = 9e8c17a4b2d802695caf0c2c052f0d11 md5sum = 1a668d6203d42b4d46d56e24c7606cb2
[yarn.lock] [yarn.lock]
_update_hash_filename_ = yarn.lock _update_hash_filename_ = yarn.lock
......
{% import 'parts' as parts %} {% import 'parts' as parts -%}
{% import 'replicated' as replicated with context %} {% import 'replicated' as replicated with context -%}
{% set number_of_instances = slapparameter_dict.get('resilient-clone-number', 1)|int %} {% set clones_amount = slapparameter_dict.get('resilient-clone-number', 1)|int + 1 -%}
[buildout] [buildout]
eggs-directory = {{ eggs_directory }} eggs-directory = {{ eggs_directory }}
...@@ -11,59 +11,38 @@ extends = ...@@ -11,59 +11,38 @@ extends =
{{ monitor_template }} {{ monitor_template }}
parts += parts +=
# Generate the parts to request theia-export, pull-backup and theia-import publish
# See stack/resilient/template-parts.cfg.in and stack/resilient/template-replicated.cfg.in {#- Generate the parts to request the main theia, the clones and the PBS. #}
# See below for the generation of the sections corresponding to the parts generated here {#- See ../../stack/resilient/template-parts.cfg.in #}
{{ parts.replicate("theia", number_of_instances + 1) }} {{ parts.replicate("theia", clones_amount) }}
# Also publish some connection parameters
publish-connection-parameter
[ArgLeader]
[ArgBackup] {#- Prepare monitoring information to transmit to and request from the main theia, the clones and the PBS #}
{%- set monitor_cors_domains = slapparameter_dict.pop('monitor-cors-domains', 'monitor.app.officejs.com') %}
{%- set monitor_username = slapparameter_dict.get('monitor-username', '${monitor-instance-parameter:username}') %}
{%- set monitor_password = slapparameter_dict.get('monitor-password', '${monitor-htpasswd:passwd}') %}
{%- set monitor_return = ['monitor-base-url', 'monitor-setup-url'] %}
{%- set monitor_parameter = {'monitor-cors-domains': monitor_cors_domains, 'monitor-username' : monitor_username, 'monitor-password': monitor_password} %}
{%- set monitor_dict = {'parameter': monitor_parameter, 'return': monitor_return, 'set-monitor-url': True} %}
# Generate sections to request theia-export, pull-backup and theia-import
# See stack/resilient/template-replicated.cfg.in
# In particular:
#
# [request-theia]
# <= ArgLeader
# software-type = export
# ...
#
# [request-theia-pseudo-replicating-1]
# <= ArgBackup
# software-type = import
# ...
#
# [request-pbs-theia-1]
# software-type = pull-backup
# ...
#
{{ replicated.replicate("theia", number_of_instances + 1,
"export", "import",
"ArgLeader", "ArgBackup",
slapparameter_dict=slapparameter_dict) }}
# Extend the list of return parameters for the export request {# Generate the sections to request the main theia, the clones and the PBS. #}
# The monitor parameters are only there to assert they are {#- See ../../stack/resilient/template-replicated.cfg.in #}
# actually published by the export instance {{ replicated.replicate("theia", clones_amount, "export", "import", slapparameter_dict=slapparameter_dict, monitor_parameter_dict=monitor_dict) }}
# Ask for the connection parameters of the main theia
[request-theia] [request-theia]
return += url username password backend-url monitor-base-url monitor-setup-url return += url username password backend-url
# Extend the list of return parameters for the import request
# with the monitor parameters to assert they are actually published
[request-theia-pseudo-replicating-1]
return += monitor-base-url monitor-setup-url
# Publish some parameters from the export instance # Publish connection parameters of the main theia and resiliency parameters
[publish-connection-parameter] [publish]
recipe = slapos.cookbook:publish recipe = slapos.cookbook:publish
url = ${request-theia:connection-url} url = ${request-theia:connection-url}
username = ${request-theia:connection-username} username = ${request-theia:connection-username}
password = ${request-theia:connection-password} password = ${request-theia:connection-password}
backend-url = ${request-theia:connection-backend-url} backend-url = ${request-theia:connection-backend-url}
monitor-base-url = ${request-theia:connection-monitor-base-url}
# Publish resiliency parameters fetched by the resilient stack monitor-setup-url = ${request-theia:connection-monitor-setup-url}
[publish-connection-parameter]
<= publish-connection-information <= publish-connection-information
...@@ -63,6 +63,25 @@ bash-completions = $${:home}/.local/share/bash-completion/completions/ ...@@ -63,6 +63,25 @@ bash-completions = $${:home}/.local/share/bash-completion/completions/
fish-completions = $${:home}/.config/fish/completions/ fish-completions = $${:home}/.config/fish/completions/
# Monitor
# -------
[monitor-instance-parameter]
monitor-httpd-port = {{ parameter_dict['monitor-httpd-port'] }}
{%- for k in ('monitor-cors-domains', 'monitor-username', 'monitor-password') %}
{%- set v = parameter_dict.get(k) %}
{%- if v %}
{{ k[8:] }} = {{ v }}
{%- endif %}
{%- endfor %}
{%- for k in ('monitor-url-list', ) %}
{%- set v = parameter_dict.get(k) %}
{%- if v %}
{{ k }} = {{ v }}
{%- endif %}
{%- endfor %}
# Promises # Promises
# -------- # --------
......
...@@ -51,7 +51,8 @@ default-parameters = ...@@ -51,7 +51,8 @@ default-parameters =
"additional-frontend-name":"Theia Additional Frontend", "additional-frontend-name":"Theia Additional Frontend",
"additional-frontend-sr": "$${:frontend-sr}", "additional-frontend-sr": "$${:frontend-sr}",
"additional-frontend-sr-type": "RootSoftwareInstance", "additional-frontend-sr-type": "RootSoftwareInstance",
"additional-frontend-guid": null "additional-frontend-guid": null,
"monitor-httpd-port": 8386
} }
frontend-sr = http://git.erp5.org/gitweb/slapos.git/blob_plain/HEAD:/software/apache-frontend/software.cfg frontend-sr = http://git.erp5.org/gitweb/slapos.git/blob_plain/HEAD:/software/apache-frontend/software.cfg
......
...@@ -123,14 +123,10 @@ initialization = ...@@ -123,14 +123,10 @@ initialization =
standalone.start() standalone.start()
try: try:
partition_count = 20 partition_count = 20
if len(glob.glob(os.path.join(standalone.instance_directory, '*'))) < partition_count: print("Standalone SlapOS: Formatting {partition_count} partitions".format(
print("Standalone SlapOS: Formatting {partition_count} partitions".format( partition_count=partition_count))
partition_count=partition_count)) standalone.format(partition_count, args.ipv4, args.ipv6)
standalone.format(
partition_count,
args.ipv4,
args.ipv6,
)
print("Standalone SlapOS for computer `{}` started".format(args.computer_id)) print("Standalone SlapOS for computer `{}` started".format(args.computer_id))
# Run instance at least once, to start the supervisor managing instances. # Run instance at least once, to start the supervisor managing instances.
try: try:
......
...@@ -121,6 +121,10 @@ class TestTheiaResilienceERP5(ERP5Mixin, test_resiliency.TestTheiaResilience): ...@@ -121,6 +121,10 @@ class TestTheiaResilienceERP5(ERP5Mixin, test_resiliency.TestTheiaResilience):
backup_max_tries = 480 backup_max_tries = 480
backup_wait_interval = 60 backup_wait_interval = 60
def test_twice(self):
# do nothing
pass
def _prepareExport(self): def _prepareExport(self):
super(TestTheiaResilienceERP5, self)._prepareExport() super(TestTheiaResilienceERP5, self)._prepareExport()
......
...@@ -225,6 +225,8 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC ...@@ -225,6 +225,8 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC
script_relpath = os.path.join( script_relpath = os.path.join(
'srv', 'runner', 'instance', 'slappart0', 'srv', 'runner', 'instance', 'slappart0',
'srv', '.backup_identity_script') 'srv', '.backup_identity_script')
signature_relpath = os.path.join(
'srv', 'backup', 'theia', 'backup.signature')
def assertPromiseFailure(self, *msg): def assertPromiseFailure(self, *msg):
# Force promises to recompute regardless of periodicity # Force promises to recompute regardless of periodicity
...@@ -291,6 +293,10 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC ...@@ -291,6 +293,10 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC
self.customSignatureScript(content=None) self.customSignatureScript(content=None)
self.customRestoreScript(content=None) self.customRestoreScript(content=None)
self.cleanupExitfiles() self.cleanupExitfiles()
try:
os.remove(self._getPartitionPath('import', self.signature_relpath))
except OSError:
pass
def test_export_promise(self): def test_export_promise(self):
self.writeFile(self.getExportExitfile(), '1') self.writeFile(self.getExportExitfile(), '1')
...@@ -303,17 +309,14 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC ...@@ -303,17 +309,14 @@ class TestTheiaExportAndImportFailures(ExportAndImportMixin, ResilientTheiaTestC
def test_custom_hash_script(self): def test_custom_hash_script(self):
errmsg = 'Bye bye' errmsg = 'Bye bye'
self.customSignatureScript(content='>&2 echo "%s"\nexit 1' % errmsg) self.customSignatureScript(content='>&2 echo "%s"\nexit 1' % errmsg)
backup_script = self._getPartitionPath( custom_script = self._getPartitionPath('export', self.script_relpath)
'export', 'srv', 'backup', 'theia', self.script_relpath) self.assertExportFailure('Compute partitions backup signatures\n ... ERROR !',
self.assertExportFailure('Compute backup signature\n ... ERROR !', 'Custom signature script %s failed' % os.path.abspath(custom_script),
'Custom signature script %s failed' % os.path.abspath(backup_script),
'and stderr:\n%s' % errmsg) 'and stderr:\n%s' % errmsg)
def test_signature_mismatch(self): def test_signature_mismatch(self):
signature_file = self._getPartitionPath('import', 'srv', 'backup', 'theia', 'backup.signature') signature_file = self._getPartitionPath('import', self.signature_relpath)
moved_file = self._getPartitionPath('import', 'srv', 'backup', 'backup.signature.moved') self.writeFile(signature_file, 'Bogus Hash\n', mode='a')
self.writeFile(moved_file, 'Bogus Hash\n', mode='a')
os.rename(moved_file, signature_file)
self.assertImportFailure('ERROR the backup signatures do not match') self.assertImportFailure('ERROR the backup signatures do not match')
def test_restore_script_error(self): def test_restore_script_error(self):
...@@ -363,12 +366,15 @@ class TestTheiaExportAndImport(ResilienceMixin, ExportAndImportMixin, ResilientT ...@@ -363,12 +366,15 @@ class TestTheiaExportAndImport(ResilienceMixin, ExportAndImportMixin, ResilientT
self.writeFile(os.path.join(dummy_root, 'exclude', 'excluded'), self.writeFile(os.path.join(dummy_root, 'exclude', 'excluded'),
'This file should be excluded from resilient backup') 'This file should be excluded from resilient backup')
# Check that ~/srv/exporter.exclude and ~/srv/runner-import-restore # Check that ~/srv/exporter.exclude and ~/srv/runner-import-restore exist
# As well as ~/srv/.backup_identity_script # As well as ~/srv/.backup_identity_script
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', 'exporter.exclude'))) self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', 'exporter.exclude')))
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', 'runner-import-restore'))) self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', 'runner-import-restore')))
self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', '.backup_identity_script'))) self.assertTrue(os.path.exists(os.path.join(dummy_root, 'srv', '.backup_identity_script')))
# Remember content of ~/etc in the import theia
self.etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
def _doSync(self): def _doSync(self):
self._doExport() self._doExport()
self._doTransfer() self._doTransfer()
...@@ -384,14 +390,20 @@ class TestTheiaExportAndImport(ResilienceMixin, ExportAndImportMixin, ResilientT ...@@ -384,14 +390,20 @@ class TestTheiaExportAndImport(ResilienceMixin, ExportAndImportMixin, ResilientT
self.assertIn(adapted_test_url, proxy_content) self.assertIn(adapted_test_url, proxy_content)
self.assertNotIn(self._test_software_url, proxy_content) self.assertNotIn(self._test_software_url, proxy_content)
# Check that ~/etc still contains everything it did before
etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
self.assertTrue(set(self.etc_listdir).issubset(etc_listdir))
# Check that ~/srv/project was exported # Check that ~/srv/project was exported
self.assertTrue(os.path.exists(adapted_test_url)) self.assertTrue(os.path.exists(adapted_test_url))
# Check that the dummy instance is not yet started # Check that the dummy instance is not yet started
self.checkLog(os.path.join(dummy_root, 'log.log'), self.initial_log, newline=None) self.checkLog(os.path.join(dummy_root, 'log.log'), self.initial_log, newline=None)
# Check that ~/srv/.backup_identity_script was called # Check that ~/srv/.backup_identity_script was detected and called
signature = self._getPartitionPath('import', 'srv', 'backup', 'backup.signature.proof') signature = self._getPartitionPath(
'import', 'srv', 'backup', 'theia', 'slappart0.backup.signature.custom')
self.assertTrue(os.path.exists(signature))
with open(signature) as f: with open(signature) as f:
self.assertIn('Custom script', f.read()) self.assertIn('Custom script', f.read())
...@@ -477,6 +489,14 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase ...@@ -477,6 +489,14 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase
_test_software_url = dummy_software_url _test_software_url = dummy_software_url
def test_twice(self):
# Run two synchronisations on the same instances
# to make sure everything still works the second time
# Check ~/etc in import theia again
self.etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
self._doSync()
self._checkSync()
def _prepareExport(self): def _prepareExport(self):
# Deploy test instance # Deploy test instance
self._deployEmbeddedSoftware(self._test_software_url, 'test_instance', self.test_instance_max_retries) self._deployEmbeddedSoftware(self._test_software_url, 'test_instance', self.test_instance_max_retries)
...@@ -485,6 +505,9 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase ...@@ -485,6 +505,9 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase
self.export_id = self._getPartitionId('export') self.export_id = self._getPartitionId('export')
self.import_id = self._getPartitionId('import') self.import_id = self._getPartitionId('import')
# Remember content of ~/etc in the import theia
self.etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
def _doSync(self): def _doSync(self):
start = time.time() start = time.time()
...@@ -499,6 +522,11 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase ...@@ -499,6 +522,11 @@ class TestTheiaResilience(ResilienceMixin, TakeoverMixin, ResilientTheiaTestCase
# Wait for takoever to be ready # Wait for takoever to be ready
self._waitTakeoverReady(takeover_url, start, self.backup_max_tries, self.backup_wait_interval) self._waitTakeoverReady(takeover_url, start, self.backup_max_tries, self.backup_wait_interval)
def _checkSync(self):
# Check that ~/etc still contains everything it did before
etc_listdir = os.listdir(self._getPartitionPath('import', 'etc'))
self.assertTrue(set(self.etc_listdir).issubset(etc_listdir))
def _doTakeover(self): def _doTakeover(self):
# Takeover # Takeover
takeover_url, takeover_password = self._getTakeoverUrlAndPassword() takeover_url, takeover_password = self._getTakeoverUrlAndPassword()
......
...@@ -4,6 +4,7 @@ import glob ...@@ -4,6 +4,7 @@ import glob
import hashlib import hashlib
import os import os
import re import re
import shutil
import subprocess as sp import subprocess as sp
import sqlite3 import sqlite3
...@@ -21,13 +22,19 @@ EXCLUDE_FLAGS = ['--exclude={}'.format(x) for x in sorted(EXCLUDE_PATTERNS)] ...@@ -21,13 +22,19 @@ EXCLUDE_FLAGS = ['--exclude={}'.format(x) for x in sorted(EXCLUDE_PATTERNS)]
def makedirs(path): def makedirs(path):
try: try:
os.makedirs(path if os.path.isdir(path) else os.path.dirname(path)) os.makedirs(path)
except OSError as e: except OSError as e:
if e.errno != errno.EEXIST: if e.errno != errno.EEXIST:
raise raise
def copytree(rsyncbin, src, dst, exclude=[], extrargs=[], verbosity='-v'): def copyfile(src, dst):
dst = os.path.abspath(dst)
makedirs(os.path.dirname(dst))
shutil.copy2(src, dst)
def copytree(rsyncbin, src, dst, exclude=(), extrargs=(), verbosity='-v'):
# Ensure there is a trailing slash in the source directory # Ensure there is a trailing slash in the source directory
# to avoid creating an additional directory level at the destination # to avoid creating an additional directory level at the destination
src = os.path.join(src, '') src = os.path.join(src, '')
...@@ -60,21 +67,20 @@ def copytree(rsyncbin, src, dst, exclude=[], extrargs=[], verbosity='-v'): ...@@ -60,21 +67,20 @@ def copytree(rsyncbin, src, dst, exclude=[], extrargs=[], verbosity='-v'):
def copydb(sqlite3bin, src_db, dst_db): def copydb(sqlite3bin, src_db, dst_db):
makedirs(dst_db) makedirs(os.path.dirname(dst_db))
sp.check_output((sqlite3bin, src_db, '.backup ' + dst_db)) sp.check_output((sqlite3bin, src_db, '.backup ' + dst_db))
def remove(path): def remove(path):
try: try:
os.remove(path) os.remove(path)
except OSError: except OSError as e:
if os.path.exists(path): if e.errno != errno.ENOENT:
raise raise
def parse_installed(partition): def parse_installed(partition):
paths = [] paths = []
custom_script = os.path.join(partition, 'srv', '.backup_identity_script')
for cfg in glob.glob(os.path.join(partition, '.installed*.cfg')): for cfg in glob.glob(os.path.join(partition, '.installed*.cfg')):
try: try:
with open(cfg) as f: with open(cfg) as f:
...@@ -86,7 +92,7 @@ def parse_installed(partition): ...@@ -86,7 +92,7 @@ def parse_installed(partition):
for section in six.itervalues(installed_cfg): for section in six.itervalues(installed_cfg):
for p in section.get('__buildout_installed__', '').splitlines(): for p in section.get('__buildout_installed__', '').splitlines():
p = p.strip() p = p.strip()
if p and p != custom_script: if p:
paths.append(p) paths.append(p)
return paths return paths
...@@ -101,31 +107,44 @@ def sha256sum(file_path, chunk_size=1024 * 1024): ...@@ -101,31 +107,44 @@ def sha256sum(file_path, chunk_size=1024 * 1024):
return sha256.hexdigest() return sha256.hexdigest()
def hashwalk(backup_dir, mirror_partitions): def fast_hashwalk(root_dir):
scripts = {} for dirpath, dirnames, filenames in os.walk(root_dir):
for p in mirror_partitions: for f in filenames:
script_path = os.path.join(p, 'srv', '.backup_identity_script') filepath = os.path.join(dirpath, f)
if os.path.exists(script_path): if os.path.isfile(filepath):
scripts[os.path.abspath(p)] = script_path displaypath = os.path.relpath(filepath, start=root_dir)
for dirpath, dirnames, filenames in os.walk(backup_dir): yield '%s %s' % (sha256sum(filepath), displaypath)
filenames.sort()
def exclude_hashwalk(root_dir, instance_dir):
root_dir = os.path.abspath(root_dir)
instance_dir = os.path.abspath(instance_dir)
for dirpath, dirnames, filenames in os.walk(root_dir):
for f in filenames: for f in filenames:
filepath = os.path.join(dirpath, f) filepath = os.path.join(dirpath, f)
if os.path.isfile(filepath): if os.path.isfile(filepath):
displaypath = os.path.relpath(filepath, start=backup_dir) displaypath = os.path.relpath(filepath, start=root_dir)
yield '%s %s' % (sha256sum(filepath), displaypath) yield '%s %s' % (sha256sum(filepath), displaypath)
remaining_dirnames = [] if dirpath == instance_dir:
for subdir in dirnames: remaining_dirs = []
subdirpath = os.path.abspath(os.path.join(dirpath, subdir)) for d in dirnames:
custom_hashscript = scripts.get(subdirpath) if not d.startswith('slappart'):
if custom_hashscript: remaining_dirs.append(d)
print('Using custom signature script %s' % custom_hashscript) dirnames[:] = remaining_dirs
for s in hashcustom(subdirpath, backup_dir, custom_hashscript):
yield s
else: def hashwalk(root_dir, instance_dir=None):
remaining_dirnames.append(subdir) if instance_dir and not os.path.relpath(
remaining_dirnames.sort() instance_dir, start=root_dir).startswith(os.pardir):
dirnames[:] = remaining_dirnames return exclude_hashwalk(root_dir, instance_dir)
return fast_hashwalk(root_dir)
def hashscript(partition):
script = os.path.join(partition, 'srv', '.backup_identity_script')
if os.path.exists(script):
return script
return None
@contextlib.contextmanager @contextlib.contextmanager
...@@ -138,10 +157,11 @@ def cwd(path): ...@@ -138,10 +157,11 @@ def cwd(path):
os.chdir(old_path) os.chdir(old_path)
def hashcustom(mirrordir, backup_dir, custom_hashscript): def hashcustom(partition, script):
workingdir = os.path.join(mirrordir, os.pardir, os.pardir, os.pardir) workingdir = os.path.join(partition, os.pardir, os.pardir, os.pardir)
with cwd(os.path.abspath(workingdir)): with cwd(os.path.abspath(workingdir)):
for dirpath, _, filenames in os.walk(mirrordir): for dirpath, dirnames, filenames in os.walk(partition):
dirnames.sort()
filepaths = [] filepaths = []
for f in filenames: for f in filenames:
path = os.path.join(dirpath, f) path = os.path.join(dirpath, f)
...@@ -150,16 +170,16 @@ def hashcustom(mirrordir, backup_dir, custom_hashscript): ...@@ -150,16 +170,16 @@ def hashcustom(mirrordir, backup_dir, custom_hashscript):
if not filepaths: if not filepaths:
continue continue
hashprocess = sp.Popen( hashprocess = sp.Popen(
custom_hashscript, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE) script, stdin=sp.PIPE, stdout=sp.PIPE, stderr=sp.PIPE)
out, err = hashprocess.communicate(str2bytes('\0'.join(filepaths))) out, err = hashprocess.communicate(str2bytes('\0'.join(filepaths)))
if hashprocess.returncode != 0: if hashprocess.returncode != 0:
template = "Custom signature script %s failed on inputs:\n%s" template = "Custom signature script %s failed on inputs:\n%s"
msg = template % (custom_hashscript, '\n'.join(filepaths)) msg = template % (script, '\n'.join(filepaths))
msg += "\nwith stdout:\n%s" % bytes2str(out) msg += "\nwith stdout:\n%s" % bytes2str(out)
msg += "\nand stderr:\n%s" % bytes2str(err) msg += "\nand stderr:\n%s" % bytes2str(err)
raise Exception(msg) raise Exception(msg)
signatures = bytes2str(out).strip('\n').split('\n') signatures = bytes2str(out).strip('\n').split('\n')
signatures.sort() signatures.sort()
displaypath = os.path.relpath(dirpath, start=backup_dir) displaypath = os.path.relpath(dirpath, start=partition)
for s in signatures: for s in signatures:
yield '%s %s/ (custom)' % (s, displaypath) yield '%s %s' % (s, displaypath)
...@@ -9,8 +9,8 @@ import traceback ...@@ -9,8 +9,8 @@ import traceback
import six import six
from six.moves import configparser from six.moves import configparser
sys.path.append(os.path.dirname(__file__)) sys.path.insert(0, os.path.dirname(__file__))
from theia_common import copytree, copydb, hashwalk, parse_installed, remove from theia_common import *
os.environ['LC_ALL'] = 'C' os.environ['LC_ALL'] = 'C'
...@@ -55,45 +55,74 @@ class TheiaExport(object): ...@@ -55,45 +55,74 @@ class TheiaExport(object):
self.copytree_partitions_args = {} self.copytree_partitions_args = {}
self.logs = [] self.logs = []
def mirrorpath(self, src): def mirror_path(self, src):
return os.path.abspath(os.path.join( return os.path.abspath(os.path.join(
self.backup_dir, os.path.relpath(src, start=self.root_dir))) self.backup_dir, os.path.relpath(src, start=self.root_dir)))
def backuptree(self, src, exclude=[], extrargs=[], verbosity='-v'): def backup_tree(self, src):
dst = self.mirrorpath(src) return copytree(self.rsync_bin, src, self.mirror_path(src))
return copytree(self.rsync_bin, src, dst, exclude, extrargs, verbosity)
def backupdb(self): def backup_file(self, src):
copydb(self.sqlite3_bin, self.proxy_db, self.mirrorpath(self.proxy_db)) return copyfile(src, self.mirror_path(src))
def backuppartition(self, partition): def backup_db(self):
copydb(self.sqlite3_bin, self.proxy_db, self.mirror_path(self.proxy_db))
def backup_partition(self, partition):
installed = parse_installed(partition) installed = parse_installed(partition)
rules = os.path.join(partition, 'srv', 'exporter.exclude') rules = os.path.join(partition, 'srv', 'exporter.exclude')
extrargs = ('--filter=.-/ ' + rules,) if os.path.exists(rules) else () extrargs = ('--filter=.-/ ' + rules,) if os.path.exists(rules) else ()
self.backuptree(partition, exclude=installed, extrargs=extrargs) dst = self.mirror_path(partition)
self.copytree_partitions_args[partition] = (installed, extrargs) copytree(self.rsync_bin, partition, dst, installed, extrargs)
self.copytree_partitions_args[partition] = (dst, installed, extrargs)
def sign(self, signaturefile): def sign(self, signaturefile, signatures):
remove(signaturefile) remove(signaturefile)
pardir = os.path.abspath(os.path.join(self.backup_dir, os.pardir)) pardir = os.path.abspath(os.path.join(self.backup_dir, os.pardir))
tmpfile = os.path.join(pardir, 'backup.signature.tmp') tmpfile = os.path.join(pardir, os.path.basename(signaturefile) + '.tmp')
mirror_partitions = [self.mirrorpath(p) for p in self.partition_dirs]
with open(tmpfile, 'w') as f: with open(tmpfile, 'w') as f:
for s in hashwalk(self.backup_dir, mirror_partitions): for s in signatures:
f.write(s + '\n') f.write(s + '\n')
os.rename(tmpfile, signaturefile) os.rename(tmpfile, signaturefile)
def checkpartition(self, partition, pattern='/srv/backup/'): def sign_root(self):
installed, extrargs = self.copytree_partitions_args[partition] signaturefile = os.path.join(self.backup_dir, 'backup.signature')
output = self.backuptree( signatures = hashwalk(self.backup_dir, self.mirror_path(self.instance_dir))
self.sign(signaturefile, signatures)
def sign_partition(self, partition):
dst = self.mirror_path(partition)
filename = os.path.basename(partition) + '.backup.signature'
signaturefile = os.path.join(self.backup_dir, filename)
script = hashscript(partition)
if script:
signaturefile += '.custom'
self.sign(signaturefile, hashcustom(dst, script))
else:
self.sign(signaturefile, hashwalk(dst))
def remove_signatures(self):
pattern = os.path.join(self.backup_dir, '*backup.signature*')
signature_files = glob.glob(pattern)
for f in signature_files:
try:
os.remove(f)
except OSError:
pass
def check_partition(self, partition, pattern='/srv/backup/'):
dst, installed, extrargs = self.copytree_partitions_args[partition]
output = copytree(
self.rsync_bin,
partition, partition,
dst,
exclude=installed, exclude=installed,
extrargs=extrargs + ('--dry-run', '--update'), extrargs=extrargs + ('--dry-run', '--update'),
verbosity='--out-format=%n', verbosity='--out-format=%n',
) )
return [path for path in output.splitlines() if pattern in path] return [path for path in output.splitlines() if pattern in path]
def loginfo(self, msg): def log(self, msg):
print(msg) print(msg)
self.logs.append(msg) self.logs.append(msg)
...@@ -118,40 +147,46 @@ class TheiaExport(object): ...@@ -118,40 +147,46 @@ class TheiaExport(object):
def export(self): def export(self):
export_start_date = int(time.time()) export_start_date = int(time.time())
etc_dir = os.path.join(self.root_dir, 'etc') timestamp = os.path.join(self.root_dir, 'etc', '.resilient_timestamp')
with open(os.path.join(etc_dir, '.resilient_timestamp'), 'w') as f: with open(timestamp, 'w') as f:
f.write(str(export_start_date)) f.write(str(export_start_date))
self.loginfo('Backup directory ' + etc_dir) self.remove_signatures()
self.backuptree(etc_dir, extrargs=('--filter=- */', '--filter=-! .*'))
self.log('Backup resilient timestamp ' + timestamp)
self.backup_file(timestamp)
for d in self.dirs: for d in self.dirs:
self.loginfo('Backup directory ' + d) self.log('Backup directory ' + d)
self.backuptree(d) self.backup_tree(d)
self.loginfo('Backup slapproxy database') self.log('Backup slapproxy database')
self.backupdb() self.backup_db()
self.loginfo('Backup partitions') self.log('Backup partitions')
for p in self.partition_dirs: for p in self.partition_dirs:
self.backuppartition(p) self.backup_partition(p)
self.loginfo('Compute backup signature') self.log('Compute root backup signature')
self.sign(os.path.join(self.backup_dir, 'backup.signature')) self.sign_root()
self.log('Compute partitions backup signatures')
for p in self.partition_dirs:
self.sign_partition(p)
time.sleep(10) time.sleep(10)
self.loginfo('Check partitions') self.log('Check partitions')
modified = list(itertools.chain.from_iterable( modified = list(itertools.chain.from_iterable(
self.checkpartition(p) for p in self.partition_dirs)) self.check_partition(p) for p in self.partition_dirs))
if modified: if modified:
msg = 'Some files have been modified since the backup started' msg = 'Some files have been modified since the backup started'
self.loginfo(msg + ':') self.log(msg + ':')
self.loginfo('\n'.join(modified)) self.log('\n'.join(modified))
self.loginfo("Let's wait %d minutes and try again" % BACKUP_WAIT) self.log("Let's wait %d minutes and try again" % BACKUP_WAIT)
time.sleep(BACKUP_WAIT * 60) time.sleep(BACKUP_WAIT * 60)
raise Exception(msg) raise Exception(msg)
self.loginfo('Done') self.log('Done')
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -10,7 +10,7 @@ import six ...@@ -10,7 +10,7 @@ import six
from six.moves import configparser from six.moves import configparser
sys.path.append(os.path.dirname(__file__)) sys.path.append(os.path.dirname(__file__))
from theia_common import copytree, copydb, hashwalk, parse_installed, remove from theia_common import *
os.environ['LC_ALL'] = 'C' os.environ['LC_ALL'] = 'C'
...@@ -57,28 +57,32 @@ class TheiaImport(object): ...@@ -57,28 +57,32 @@ class TheiaImport(object):
configp.read(cfg) configp.read(cfg)
self.proxy_db = configp.get('slapproxy', 'database_uri') self.proxy_db = configp.get('slapproxy', 'database_uri')
self.instance_dir = configp.get('slapos', 'instance_root') self.instance_dir = configp.get('slapos', 'instance_root')
mirror_dir = self.mirrorpath(self.instance_dir) mirror_dir = self.mirror_path(self.instance_dir)
partitions = glob.glob(os.path.join(mirror_dir, 'slappart*')) partitions = glob.glob(os.path.join(mirror_dir, 'slappart*'))
self.mirror_partition_dirs = [p for p in partitions if os.path.isdir(p)] self.mirror_partition_dirs = [p for p in partitions if os.path.isdir(p)]
self.logs = [] self.logs = []
def mirrorpath(self, dst): def mirror_path(self, dst):
return os.path.abspath(os.path.join( return os.path.abspath(os.path.join(
self.backup_dir, os.path.relpath(dst, start=self.root_dir))) self.backup_dir, os.path.relpath(dst, start=self.root_dir)))
def dstpath(self, src): def dst_path(self, src):
return os.path.abspath(os.path.join( return os.path.abspath(os.path.join(
self.root_dir, os.path.relpath(src, start=self.backup_dir))) self.root_dir, os.path.relpath(src, start=self.backup_dir)))
def restoretree(self, dst, exclude=[], extrargs=[], verbosity='-v'): def restore_tree(self, dst, exclude=(), extrargs=(), verbosity='-v'):
src = self.mirrorpath(dst) src = self.mirror_path(dst)
return copytree(self.rsync_bin, src, dst, exclude, extrargs, verbosity) return copytree(self.rsync_bin, src, dst, exclude, extrargs, verbosity)
def restoredb(self): def restore_file(self, dst):
copydb(self.sqlite3_bin, self.mirrorpath(self.proxy_db), self.proxy_db) src = self.mirror_path(dst)
return copyfile(src, dst)
def restorepartition(self, mirror_partition): def restore_db(self):
p = self.dstpath(mirror_partition) copydb(self.sqlite3_bin, self.mirror_path(self.proxy_db), self.proxy_db)
def restore_partition(self, mirror_partition):
p = self.dst_path(mirror_partition)
installed = parse_installed(p) if os.path.exists(p) else [] installed = parse_installed(p) if os.path.exists(p) else []
copytree(self.rsync_bin, mirror_partition, p, exclude=installed) copytree(self.rsync_bin, mirror_partition, p, exclude=installed)
...@@ -86,38 +90,67 @@ class TheiaImport(object): ...@@ -86,38 +90,67 @@ class TheiaImport(object):
supervisor_command = (self.supervisorctl_bin, '-c', self.supervisord_conf) supervisor_command = (self.supervisorctl_bin, '-c', self.supervisord_conf)
command = supervisor_command + args command = supervisor_command + args
print(' '.join(command)) print(' '.join(command))
sp.check_call(command) print(sp.check_output(command, stderr=sp.STDOUT, universal_newlines=True))
def slapos(self, *args): def slapos(self, *args):
command = (self.slapos_bin,) + args + ('--cfg', self.slapos_cfg) command = (self.slapos_bin,) + args + ('--cfg', self.slapos_cfg)
print(' '.join(command)) print(' '.join(command))
sp.check_call(command) print(sp.check_output(command, stderr=sp.STDOUT, universal_newlines=True))
def sign(self, signaturefile, root_dir):
with open(signaturefile, 'r') as f:
for line in f:
try:
_, relpath = line.strip().split(None, 1)
except ValueError:
yield 'Could not parse: %s' % line
continue
filepath = os.path.join(root_dir, relpath)
try:
signature = sha256sum(filepath)
except IOError:
yield 'Could not read: %s' % filepath
continue
yield '%s %s' % (signature, relpath)
def sign_custom(self, root_dir):
partition = self.dst_path(root_dir)
script = hashscript(partition)
if not script:
msg = 'ERROR: missing custom signature script for partition ' + partition
raise Exception(msg)
return hashcustom(root_dir, script)
def verify(self, signaturefile): def find_signature_file(self, partition):
pardir = os.path.abspath(os.path.join(self.backup_dir, os.pardir)) filename = os.path.basename(partition) + '.backup.signature'
moved = os.path.join(pardir, 'backup.signature.moved') signaturefile = os.path.join(self.backup_dir, filename)
proof = os.path.join(pardir, 'backup.signature.proof')
if os.path.exists(signaturefile): if os.path.exists(signaturefile):
os.rename(signaturefile, moved) return signaturefile, False
if not os.path.exists(moved): signaturefile += '.custom'
msg = 'ERROR the backup signature file is missing' if os.path.exists(signaturefile):
print(msg) return signaturefile, True
raise Exception(msg) raise Exception('ERROR: missing signature file for partition ' + partition)
def verify(self, signaturefile, root_dir, custom=False):
proof = signaturefile + '.proof'
if custom:
signatures = self.sign_custom(root_dir)
else:
signatures = self.sign(signaturefile, root_dir)
with open(proof, 'w') as f: with open(proof, 'w') as f:
for s in hashwalk(self.backup_dir, self.mirror_partition_dirs): for s in signatures:
f.write(s + '\n') f.write(s + '\n')
diffcommand = ('diff', moved, proof) diffcommand = ('diff', signaturefile, proof)
print(' '.join(diffcommand))
try: try:
sp.check_output( sp.check_output(
diffcommand, stderr=sp.STDOUT, universal_newlines=True) diffcommand, stderr=sp.STDOUT, universal_newlines=True)
except sp.CalledProcessError as e: except sp.CalledProcessError as e:
template = 'ERROR the backup signatures do not match\n\n%s' template = 'ERROR the backup signatures do not match\n\n%s\n%s'
msg = template % e.output msg = template % (' '.join(diffcommand), e.output)
print(msg) print(msg)
raise Exception(msg) raise Exception(msg)
def loginfo(self, msg): def log(self, msg):
print(msg) print(msg)
self.logs.append(msg) self.logs.append(msg)
...@@ -126,9 +159,11 @@ class TheiaImport(object): ...@@ -126,9 +159,11 @@ class TheiaImport(object):
exitcode = 0 exitcode = 0
try: try:
self.restore() self.restore()
except Exception: except Exception as e:
exitcode = 1 exitcode = 1
exc = traceback.format_exc() exc = traceback.format_exc()
if isinstance(e, sp.CalledProcessError) and e.output:
exc = "%s\n\n%s" % (exc, e.output)
with open(self.error_file, 'w') as f: with open(self.error_file, 'w') as f:
f.write('\n ... OK\n\n'.join(self.logs)) f.write('\n ... OK\n\n'.join(self.logs))
f.write('\n ... ERROR !\n\n') f.write('\n ... ERROR !\n\n')
...@@ -140,44 +175,54 @@ class TheiaImport(object): ...@@ -140,44 +175,54 @@ class TheiaImport(object):
sys.exit(exitcode) sys.exit(exitcode)
def restore(self): def restore(self):
self.loginfo('Verify backup signature') self.log('Verify main backup signature')
self.verify(os.path.join(self.backup_dir, 'backup.signature')) signaturefile = os.path.join(self.backup_dir, 'backup.signature')
self.verify(signaturefile, self.backup_dir)
self.loginfo('Stop slapproxy') custom_partition_signatures = []
for m in self.mirror_partition_dirs:
signaturefile, custom = self.find_signature_file(m)
if custom:
custom_partition_signatures.append((signaturefile, m))
else:
self.log('Verify backup signature for ' + m)
self.verify(signaturefile, m)
self.log('Stop slapproxy')
self.supervisorctl('stop', 'slapos-proxy') self.supervisorctl('stop', 'slapos-proxy')
self.loginfo('Restore partitions') self.log('Restore partitions')
for m in self.mirror_partition_dirs: for m in self.mirror_partition_dirs:
self.restorepartition(m) self.restore_partition(m)
for d in self.dirs: for d in self.dirs:
self.loginfo('Restore directory ' + d) self.log('Restore directory ' + d)
self.restoretree(d) self.restore_tree(d)
self.loginfo('Restore slapproxy database') self.log('Restore slapproxy database')
self.restoredb() self.restore_db()
etc_dir = os.path.join(self.root_dir, 'etc') timestamp = os.path.join(self.root_dir, 'etc', '.resilient_timestamp')
self.loginfo('Restore directory ' + etc_dir) self.log('Restore resilient timestamp ' + timestamp)
self.restoretree(etc_dir, extrargs=('--filter=- */', '--filter=-! .*')) self.restore_file(timestamp)
custom_script = os.path.join(self.root_dir, 'srv', 'runner-import-restore') custom_script = os.path.join(self.root_dir, 'srv', 'runner-import-restore')
if os.path.exists(custom_script): if os.path.exists(custom_script):
self.loginfo('Run custom restore script %s' % custom_script) self.log('Run custom restore script %s' % custom_script)
sp.check_call(custom_script) print(sp.check_output(custom_script))
self.loginfo('Start slapproxy again') self.log('Start slapproxy again')
self.supervisorctl('start', 'slapos-proxy') self.supervisorctl('start', 'slapos-proxy')
self.loginfo('Reformat partitions') self.log('Reformat partitions')
self.slapos('node', 'format', '--now') self.slapos('node', 'format', '--now')
self.loginfo('Remove old supervisord configuration files') self.log('Remove old supervisord configuration files')
conf_dir = os.path.join(self.instance_dir, 'etc', 'supervisor.conf.d') conf_dir = os.path.join(self.instance_dir, 'etc', 'supervisor.conf.d')
for f in glob.glob(os.path.join(conf_dir, '*')): for f in glob.glob(os.path.join(conf_dir, '*')):
os.remove(f) os.remove(f)
self.loginfo('Build Software Releases') self.log('Build Software Releases')
for i in range(3): for i in range(3):
try: try:
self.slapos('node', 'software', '--all', '--logfile', self.sr_log) self.slapos('node', 'software', '--all', '--logfile', self.sr_log)
...@@ -187,18 +232,18 @@ class TheiaImport(object): ...@@ -187,18 +232,18 @@ class TheiaImport(object):
else: else:
break break
self.loginfo('Remove old custom instance scripts') self.log('Remove old custom instance scripts')
partitions_glob = os.path.join(self.instance_dir, 'slappart*') partitions_glob = os.path.join(self.instance_dir, 'slappart*')
scripts = os.path.join(partitions_glob, 'srv', 'runner-import-restore') scripts = os.path.join(partitions_glob, 'srv', 'runner-import-restore')
for f in glob.glob(scripts): for f in glob.glob(scripts):
remove(f) remove(f)
self.loginfo('Remove partition timestamps') self.log('Remove partition timestamps')
timestamps = os.path.join(partitions_glob, '.timestamp') timestamps = os.path.join(partitions_glob, '.timestamp')
for f in glob.glob(timestamps): for f in glob.glob(timestamps):
remove(f) remove(f)
self.loginfo('Build Instances') self.log('Build Instances')
cp_log = self.cp_log cp_log = self.cp_log
for i in range(3): for i in range(3):
try: try:
...@@ -209,11 +254,15 @@ class TheiaImport(object): ...@@ -209,11 +254,15 @@ class TheiaImport(object):
else: else:
break break
self.log('Verify custom backup signatures')
for signaturefile, m in custom_partition_signatures:
self.verify(signaturefile, m, True)
for custom_script in glob.glob(scripts): for custom_script in glob.glob(scripts):
self.loginfo('Running custom instance script %s' % custom_script) self.log('Running custom instance script %s' % custom_script)
sp.check_call(custom_script) print(sp.check_output(custom_script))
self.loginfo('Done') self.log('Done')
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -26,7 +26,7 @@ md5sum = 8f15263c4a27ec315eb3a12dbf7a7b34 ...@@ -26,7 +26,7 @@ md5sum = 8f15263c4a27ec315eb3a12dbf7a7b34
[template-pull-backup] [template-pull-backup]
filename = instance-pull-backup.cfg.in filename = instance-pull-backup.cfg.in
md5sum = 4425db50d551fb8a974e547308990bac md5sum = e7674770b85c983244255dd82642ebe8
[template-replicated] [template-replicated]
filename = template-replicated.cfg.in filename = template-replicated.cfg.in
......
...@@ -250,6 +250,7 @@ monitor-base-url = $${monitor-publish-parameters:monitor-base-url} ...@@ -250,6 +250,7 @@ monitor-base-url = $${monitor-publish-parameters:monitor-base-url}
monitor-url = $${monitor-publish-parameters:monitor-url} monitor-url = $${monitor-publish-parameters:monitor-url}
monitor-user = $${monitor-publish-parameters:monitor-user} monitor-user = $${monitor-publish-parameters:monitor-user}
monitor-password = $${monitor-publish-parameters:monitor-password} monitor-password = $${monitor-publish-parameters:monitor-password}
monitor-setup-url = $${monitor-publish:monitor-setup-url}
#---------------- #----------------
#-- #--
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment