Commit cf57b1f4 authored by Nicolas Wavrant's avatar Nicolas Wavrant Committed by Rafael Monnerat

Resiliency improvements

This MR brings some confort of use and of administration for the resiliency stack and resiliency related taks :

   *  logrotate some logs/feeds to save disk space
   * adds more promises on export and import instances
   * improves feedback on import failures :  promises are more verbose, more information on the takeover webpage, ...
   * better indempotency for the backup time information, which is now published instead of read from the .installed.cfg

Updated on 11/22 : 

   * improve pbs recipe : makes the trap command cross-shell
   * remove infinite loops in scripts generated by pbs, but run a given amount of time until raising an error (which can be catched by promises)
   * runner-importer script doesn't systematically fails if there is no requested SR.
   * adds promises for notifier feeds (on exporter and pbs) to check last result and check if feed is corrupted

This MR should be merged after slapos.core has been upgraded 

/reviewed-on !115
parents a7eb3cb3 b47506a6
......@@ -92,7 +92,7 @@ command =
[template]
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance.cfg.in
md5sum = 110df709a7c8a5c749f93663f6ab0d28
md5sum = 061604d32cc626352dc3d221bdeaf804
output = ${buildout:directory}/template.cfg
mode = 0644
......
......@@ -5,6 +5,8 @@ parts =
eggs-directory = ${buildout:eggs-directory}
develop-eggs-directory = ${buildout:develop-eggs-directory}
extends = ${template-resilient-templates:output}
[switch-softwaretype]
recipe = slapos.cookbook:softwaretype
default = $${:kvm}
......@@ -137,7 +139,7 @@ context =
key eggs_directory buildout:eggs-directory
raw kvm_template $${dynamic-template-kvm:rendered}
raw template_kvm_export ${template-kvm-export-script:location}/${template-kvm-export-script:filename}
raw pbsready_export_template ${pbsready-export:output}
key pbsready_export_template template-pbsready-export:rendered
raw gzip_binary ${gzip:location}/bin/gzip
key slapparameter_dict slap-configuration:configuration
mode = 0644
......
......@@ -53,7 +53,7 @@ parts =
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance.cfg
output = ${buildout:directory}/template.cfg
md5sum = bb7e0bf9959c4437ff1e23e645315ccf
md5sum = 06107f93ebe78905c957a4c4fc4edf16
mode = 0644
[template-runner]
......@@ -68,7 +68,7 @@ recipe = hexagonit.recipe.download
ignore-existing = true
url = ${:_profile_base_location_}/template/runner-import.sh.jinja2
download-only = true
md5sum = 3cebc5d793ff1b5c786392723babc510
md5sum = 275ae222cd9a560c08748d7502824885
filename = runner-import.sh.jinja2
mode = 0644
......@@ -76,7 +76,7 @@ mode = 0644
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance-runner-import.cfg.in
output = ${buildout:directory}/instance-runner-import.cfg
md5sum = 91c34a55b7a45b14b0fac8b7faa202fe
md5sum = 9db9957f452bda370cb2d5cc2e833e85
mode = 0644
[template-runner-export-script]
......@@ -89,10 +89,10 @@ filename = runner-export.sh.jinja2
mode = 0644
[instance-runner-export]
recipe = slapos.recipe.template
recipe = slapos.recipe.build:download
url = ${:_profile_base_location_}/instance-runner-export.cfg.in
output = ${buildout:directory}/instance-runner-export.cfg
md5sum = ec92773be8f8a2ad20dc0661d58d7717
filename = instance-runner-export.cfg.in
md5sum = 852a2ed99af566d27e5e4403334a3376
mode = 0644
[template-resilient]
......@@ -201,7 +201,7 @@ mode = 0644
recipe = hexagonit.recipe.download
ignore-existing = true
download-only = true
md5sum = 922498a301ab3defe412602f626e02ec
md5sum = 2451072826a9ad9425d62c9e9c7f6284
url = ${:_profile_base_location_}/template/${:filename}
filename = resilient_software_release_information.py.in
mode = 0644
......
[buildout]
extends = template-runner.cfg
${pbsready-export:output}
extends = {{ template_runner_path }}
{{ pbsready_export_template_path }}
parts +=
nginx_conf
......@@ -41,51 +41,51 @@ parts +=
recipe = slapos.cookbook:free_port
minimum = 49980
maximum = 49989
ip = $${slap-network-information:local-ipv4}
ip = ${slap-network-information:local-ipv4}
[runner-free-port]
recipe = slapos.cookbook:free_port
minimum = 50005
maximum = 50014
ip = $${slap-network-information:global-ipv6}
ip = ${slap-network-information:global-ipv6}
[slaprunner]
proxy_port = $${proxy-free-port:port}
runner_port = $${runner-free-port:port}
proxy_port = ${proxy-free-port:port}
runner_port = ${runner-free-port:port}
[supervisord-free-port]
recipe = slapos.cookbook:free_port
minimum = 39986
maximum = 39995
ip = $${slaprunner:ipv4}
ip = ${slaprunner:ipv4}
[supervisord]
port = $${supervisord-free-port:port}
port = ${supervisord-free-port:port}
[exporter]
recipe = slapos.recipe.template:jinja2
template = ${template-runner-export-script:location}/${template-runner-export-script:filename}
rendered = $${directory:bin}/$${slap-parameter:namebase}-exporter
template = {{ exporter_script_path }}
rendered = ${directory:bin}/${slap-parameter:namebase}-exporter
# backward compatibility for resilient stack
wrapper = $${:rendered}
wrapper = ${:rendered}
mode = 700
context =
section directory directory
raw output_log_file $${directory:log}/resilient.log
raw shell_binary ${bash:location}/bin/bash
raw rsync_binary ${rsync:location}/bin/rsync
raw output_log_file ${directory:log}/resilient.log
raw shell_binary {{ bash_executable_location }}
raw rsync_binary {{ rsync_executable_location }}
[monitor-httpd-free-port]
recipe = slapos.cookbook:free_port
minimum = 8437
maximum = 8446
ip = $${slap-network-information:global-ipv6}
ip = ${slap-network-information:global-ipv6}
[monitor-instance-parameter]
monitor-httpd-port = $${monitor-httpd-free-port:port}
monitor-httpd-port = ${monitor-httpd-free-port:port}
# Pass some parameter to dispay in monitoring interface
instance-configuration =
httpdcors cors-domain $${slaprunner-httpd-cors:location} $${httpd-graceful-wrapper:output}
httpdcors cors-domain ${slaprunner-httpd-cors:location} ${httpd-graceful-wrapper:output}
# Extends publish section with resilient parameters
[publish-connection-information]
......@@ -93,11 +93,11 @@ instance-configuration =
[monitor-check-resilient-feed-file]
recipe = slapos.recipe.template:jinja2
template = ${template-monitor-check-resilient-feed:location}/${template-monitor-check-resilient-feed:filename}
rendered = $${monitor-directory:reports}/check-create-resilient-feed-files
template = {{ monitor_check_resilient_feed_template_path }}
rendered = ${monitor-directory:reports}/check-create-resilient-feed-files
mode = 700
context =
key input_feed_directory directory:notifier-feeds
key monitor_feed_directory monitor-directory:public
raw base_url http://[$${notifier:host}]:$${notifier:port}/get/
raw base_url http://[${notifier:host}]:${notifier:port}/get/
raw python_executable ${buildout:executable}
......@@ -79,34 +79,43 @@ rendered = $${directory:bin}/$${slap-parameter:namebase}-importer
# backward compatibility for resilient stack
wrapper = $${:rendered}
mode = 700
restore-exit-code-file=$${directory:srv}/importer-exit-code-file
restore-exit-code-file = $${directory:srv}/$${:restore-exit-code-file-basename}
restore-exit-code-file-basename = importer-exit-code-file
restore-error-message-file = $${directory:srv}/$${:restore-error-message-file-basename}
restore-error-message-file-basename = importer-error-message-file
resilient-log-basename = resilient.log
context =
key backend_url slaprunner:access-url
key ipv4 slaprunner:ipv4
key ipv6 slaprunner:ipv6
key proxy_port slaprunner:proxy_port
key instance_folder slaprunner:instance_root
section directory directory
raw output_log_file $${directory:log}/resilient.log
section supervisord supervisord
raw output_log_file $${directory:log}/$${:resilient-log-basename}
raw shell_binary ${bash:location}/bin/bash
raw rsync_binary ${rsync:location}/bin/rsync
raw restore_exit_code_file $${:restore-exit-code-file}
raw restore_error_message_file $${:restore-error-message-file}
[importer-consistency-promise]
# Test that the importer script and "after-import" subscripts
# are not older than 1 day (24h), and have succeeded
# are not older than 2 days (1 day + some slack), and have succeeded
recipe = collective.recipe.template
input = inline: #!/bin/sh
EXIT_CODE_FILE=$(find "$${importer:restore-exit-code-file}")
RECENT_EXIT_CODE_FILE=$(find "$${importer:restore-exit-code-file}" -mtime -1)
if [ -z "$EXIT_CODE_FILE" ]; then
EXIT_CODE_FILE="$${importer:restore-exit-code-file}"
RECENT_EXIT_CODE_FILE=$(find $${directory:srv} -maxdepth 1 -name "$${importer:restore-exit-code-file-basename}" -mtime -2)
RESILIENT_LOG_URL=$${publish:monitor-base-url}/log/$${importer:resilient-log-basename}
if [ ! -f "$EXIT_CODE_FILE" ]; then
exit 0;
else
if [ -z "$RECENT_EXIT_CODE_FILE" ]; then
echo "Consistency check is too old.";
exit 1;
else
EXIT_CODE=$(cat $EXIT_CODE_FILE)
exit $EXIT_CODE
cat $${importer:restore-error-message-file}
echo "More information can be found here : $RESILIENT_LOG_URL";
exit $(cat $EXIT_CODE_FILE);
fi
fi
exit 1; # Something else went wrong
......@@ -135,6 +144,7 @@ mode = 755
recipe = slapos.recipe.template
url = ${template-resilient-software-release-information:destination}/${template-resilient-software-release-information:filename}
output = $${directory:cgi-bin}/resilient_software_release_information.py
resilient-log-url = $${publish:monitor-base-url}/log/$${importer:resilient-log-basename}
mode = 0600
[slap-parameter]
......
......@@ -5,6 +5,8 @@ parts =
eggs-directory = ${buildout:eggs-directory}
develop-eggs-directory = ${buildout:develop-eggs-directory}
extends = ${template-resilient-templates:output}
[switch_softwaretype]
recipe = slapos.cookbook:softwaretype
default = $${instance-base-runner:rendered}
......@@ -12,7 +14,7 @@ resilient = $${instance-resilient:rendered}
test = $${instance-resilient-test:rendered}
runner = $${instance-base-runner:rendered}
runner-import = ${instance-runner-import:output}
runner-export = ${instance-runner-export:output}
runner-export = $${template-runner-export:rendered}
frozen = ${instance-frozen:output}
pull-backup = ${template-pull-backup:output}
......@@ -58,10 +60,24 @@ context =
key slapparameter_dict slap-configuration:configuration
mode = 0644
[template-runner-export]
recipe = slapos.recipe.template:jinja2
template = ${instance-runner-export:target}
rendered = $${buildout:directory}/instance-runner-export.cfg
mode = 640
context =
key pbsready_export_template_path template-pbsready-export:rendered
key template_runner_path instance-base-runner:rendered
raw exporter_script_path ${template-runner-export-script:location}/${template-runner-export-script:filename}
raw monitor_check_resilient_feed_template_path ${template-monitor-check-resilient-feed:location}/${template-monitor-check-resilient-feed:filename}
raw buildout_executable_location ${buildout:executable}
raw bash_executable_location ${bash:location}/bin/bash
raw rsync_executable_location ${rsync:location}/bin/rsync
[slap-configuration]
recipe = slapos.cookbook:slapconfiguration
computer = $${slap-connection:computer-id}
partition = $${slap-connection:partition-id}
url = $${slap-connection:server-url}
key = $${slap-connection:key-file}
cert = $${slap-connection:cert-file}
cert = $${slap-connection:cert-file}
\ No newline at end of file
......@@ -2,4 +2,6 @@
# takeover interface of the Resilient stack
def main():
return {}
\ No newline at end of file
return {
'Read the log from the importer': '<a href="${:resilient-log-url}">${:resilient-log-url}</a>',
}
\ No newline at end of file
......@@ -8,7 +8,23 @@ umask 077
exec > >(tee -ai {{ output_log_file }})
exec 2>&1
echo -e "\n\nrunner-import run at : $(date)"
RESTORE_EXIT_CODE_FILE="{{ restore_exit_code_file }}"
RESTORE_ERROR_MESSAGE_FILE="{{ restore_error_message_file }}"
ERROR_MESSAGE=""
fail_with_exit_code () {
echo 1 > $RESTORE_EXIT_CODE_FILE
echo -e "Failure during step : $ERROR_MESSAGE" > $RESTORE_ERROR_MESSAGE_FILE
}
trap fail_with_exit_code ERR
log_message () {
ERROR_MESSAGE=$1
echo -e $1
}
# Delete the error message file, to not keep it even after a successful build
rm $RESTORE_ERROR_MESSAGE_FILE || true
srv_directory={{ directory['srv'] }}
restore_element () {
......@@ -27,7 +43,12 @@ restore_element () {
done
}
echo -e "\n\nrunner-import run at : $(date)"
log_message "Restoring WebRunner content..."
restore_element {{ directory['backup'] }}/runner/ $srv_directory/runner instance project proxy.db
log_message "Restoring WebRunner config (etc directory)..."
restore_element {{ directory['backup'] }}/etc/ {{ directory['etc'] }} config.json
cp -r {{ directory['backup'] }}/etc/.??* {{ directory['etc'] }};
......@@ -40,10 +61,17 @@ if [ ! -e "$runner_import_restore" ]; then
touch $runner_import_restore
chmod +x $runner_import_restore
fi
echo "Running $runner_import_restore script..."
log_message "Running $runner_import_restore..."
$srv_directory/runner-import-restore || RESTORE_EXIT_CODE=$?
echo "Updating slapproxy database, software release and instances..."
# If no "etc/.project" neither "srv/runner/proxy.db", we can safely assume
# that there is no instnace deployed on runner0
if [ ! -f "directory['etc']/.project" ] && [ ! -f "$srv_directory/runner/proxy.db" ]; then
echo 0 > $RESTORE_EXIT_CODE_FILE
exit 0
fi
log_message "Updating slapproxy database..."
HOME="{{ directory['home'] }}"
# XXX Hardcoded
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
......@@ -69,35 +97,40 @@ $SQLITE3 $DATABASE "update partition_network11 set address='$IPV6' where netmask
MASTERURL="http://{{ ipv4 }}:{{ proxy_port }}"
echo "Building newest software..."
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1 ||
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1 ||
$SLAPOS node software --cfg $HOME/etc/slapos.cfg --all --master-url=$MASTERURL --logfile $HOME/srv/runner/software.log --pidfile $HOME/var/run/slapos-node-software.pid >/dev/null 2>&1
log_message "Removing old supervisord service description files..."
# XXX: Path hardcoded in slapos.core
rm {{ instance_folder }}/etc/supervisord.conf.d/* || true
log_message "Building newest Software Release..."
$SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node software --cfg {{ supervisord['slapos-cfg'] }} --all --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-sr-log'] }} --pidfile {{ supervisord['slapgrid-sr-pid'] }} >/dev/null 2>&1 ||
(tail -n 200 {{ supervisord['slapgrid-sr-log'] }} && false)
# Remove defined scripts to force buildout to recreate them to have updated paths
rm $srv_directory/runner/instance/slappart*/srv/runner-import-restore || true
echo "Running slapos node instance..."
log_message "Fixing Instances as needed after import..."
# XXX hardcoded
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1 ||
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1 ||
$SLAPOS node instance --cfg $HOME/etc/slapos.cfg --master-url=$MASTERURL --logfile $HOME/srv/runner/instance.log --pidfile $HOME/var/run/slapos-node-instance.pid >/dev/null 2>&1
$SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
$SLAPOS node instance --cfg {{ supervisord['slapos-cfg'] }} --master-url=$MASTERURL --logfile {{ supervisord['slapgrid-cp-log'] }} --pidfile {{ supervisord['slapgrid-cp-pid'] }} >/dev/null 2>&1 ||
(tail -n 200 {{ supervisord['slapgrid-cp-log'] }} && false)
# Invoke defined scripts for each partition inside of slaprunner
echo "Invoke custom import scripts defined by each instances..."
log_message "Invoke custom import scripts defined by each instances..."
for partition in $srv_directory/runner/instance/slappart*/
do
script=$partition/srv/runner-import-restore
if [ -e "$script" ]; then
echo "Running $script script..."
log_message "Running $script..."
$script || RESTORE_EXIT_CODE=$?
fi
done
# Change back slapproxy database to have all instances started
echo "Start instances..."
log_message "Set instances as to start after takeover..."
$SQLITE3 $DATABASE "update partition11 set requested_state='started';"
# Write exit code to an arbitrary file that will be checked by promise/monitor
echo "Write status file... End"
RESTORE_EXIT_CODE_FILE="{{ restore_exit_code_file }}"
log_message "Writing status file... End"
echo $RESTORE_EXIT_CODE > $RESTORE_EXIT_CODE_FILE
exit $RESTORE_EXIT_CODE
......@@ -14,6 +14,7 @@ parts =
pbsready
pbsready-import
pbsready-export
notifier-feed-promise-template
template-replicated
template-parts
instance-frozen
......@@ -41,7 +42,7 @@ eggs =
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/pbsready.cfg.in
output = ${buildout:directory}/pbsready.cfg
md5sum = 3dddf84daf5db8ff4ffc3878e206b467
md5sum = 0df8fe9b69f7943c3d5a2d30d4640557
mode = 0644
[pbsready-import]
......@@ -50,23 +51,23 @@ mode = 0644
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/pbsready-import.cfg.in
output = ${buildout:directory}/pbsready-import.cfg
md5sum = 10264fe1cfb7ebe567d50ebabbd93a43
md5sum = 5d5e4ad35c1a97ea5f7a15a4f5f766a8
mode = 0644
[pbsready-export]
# An export instance has an exporter script, and communicates
# to parent PBS instances to deliver the exported dump.
recipe = slapos.recipe.template
recipe = slapos.recipe.build:download
url = ${:_profile_base_location_}/pbsready-export.cfg.in
output = ${buildout:directory}/pbsready-export.cfg
md5sum = 793f1843a643b3c91b658eca2bad5abc
filename = pbsready-export.cfg.in
md5sum = 1b38292c42702f91f620cb99d1a88952
mode = 0644
[template-pull-backup]
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance-pull-backup.cfg.in
output = ${buildout:directory}/instance-pull-backup.cfg
md5sum = 3ef8f98ff013f06fcd81bba18872e561
md5sum = 7b4f8ac1a62680d624ac632f9601dab5
mode = 0644
[template-replicated]
......@@ -83,6 +84,14 @@ md5sum = 071b1034ee8f5cc14f79b16fdeba2813
mode = 0644
destination = ${buildout:directory}/template-parts.cfg.in
[template-resilient-templates]
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/${:filename}.in
output = ${buildout:directory}/${:filename}
md5sum = be2ebf91faa6b5b131995a05a907707f
mode = 0644
filename = template-resilient-templates.cfg
[instance-frozen]
# When an instance is detected as broken, its software type is changed to "frozen".
# On the next run of slapgrid-cp, the buildout profile is replaced by instance-frozen.cfg,
......@@ -95,7 +104,7 @@ output = ${buildout:directory}/instance-frozen.cfg
[resilient-web-takeover-cgi-script-download]
recipe = slapos.recipe.build:download
url = ${:_profile_base_location_}/resilient-web-takeover-cgi-script.py.in
md5sum = c46c8e3e4ce4376c98ad2fc0e2ff0fe4
md5sum = 9d258d41eeef66f44f361adaa15cbd71
mode = 0644
destination = ${buildout:directory}/resilient-web-takeover-cgi-script.py.in
......@@ -107,6 +116,12 @@ output = ${buildout:directory}/template-wrapper.cfg
mode = 0644
md5sum = 8cde04bfd0c0e9bd56744b988275cfd8
[notifier-feed-promise-template]
recipe = slapos.recipe.build:download
url = ${:_profile_base_location_}/templates/notifier-feed-promise.py.in
md5sum = d75346911dbc4cfcdb39a21e56cd5016
mode = 0644
##################
# Monitor element
#
......
......@@ -4,6 +4,7 @@ parts =
publish-connection-information
pbs
logrotate
logrotate-entry-notifier
cron
cron-entry-logrotate
pbs-sshkeys-authority
......@@ -14,6 +15,7 @@ parts =
backup-signature-link
cron-pbs-status-feed
pull-push-stalled-promise
notifier-feed-status-promise
## Monitor for pbs
monitor-base
......@@ -128,6 +130,7 @@ directory = $${directory:pbs-backup}
cron-entries = $${cron:cron-entries}
wrappers-directory = $${directory:pbs-wrappers}
run-directory = $${basedirectory:run}
pull-push-maximum-run = 5
# XXX: this should be named "notifier-host"
notifier-url = http://[$${notifier:host}]:$${notifier:port}
slave-instance-list = $${slap-parameter:slave_instance_list}
......@@ -147,7 +150,7 @@ wrapper-path = $${rootdirectory:bin}/resilient-genstatusrss.py
<= cron
recipe = slapos.cookbook:cron.d
name = resilient-pbs-status-feed
frequency = 5 * * * *
frequency = */5 * * * *
command = $${pbs-resilient-status-feed:wrapper-path}
#----------------
......@@ -201,6 +204,18 @@ log = $${cron-simplelogger:log}
frequency = daily
rotate-num = 30
[logrotate-entry-notifier]
recipe = collective.recipe.template
mode = 600
input = inline:
$${directory:notifier-feeds}/* {
rotate 5
weekly
nocompress
missingok
olddir $${directory:logrotate-backup}
}
output = $${logrotate:logrotate-entries}/notifier
#----------------
#--
......@@ -303,10 +318,20 @@ symlink = $${directory:pbs-backup}/proof.signature = $${directory:monitor-resili
[pull-push-stalled-promise]
recipe = slapos.cookbook:wrapper
# time-buffer is 18h : cron for backup is run once a day - 6h of random sleep
command-line = ${buildout:bin-directory}/check-feed-as-promise --feed-path $${pbs-resilient-status-feed:feed-path} --title --ok-pattern 'OK' --time-buffer 64800
# # time-buffer is 24h (+1h of latitude)
command-line = ${buildout:bin-directory}/check-feed-as-promise --feed-path $${pbs-resilient-status-feed:feed-path} --title --ok-pattern 'OK' --time-buffer 90000
wrapper-path = $${basedirectory:promises}/stalled-pull-push
[notifier-feed-status-promise]
recipe = slapos.recipe.template:jinja2
template = ${notifier-feed-promise-template:target}
rendered = $${basedirectory:promises}/notifier-feed-check-malformed-or-failure.py
mode = 700
context =
key notifier_feed_directory directory:notifier-feeds
raw base_url http://[$${notifier:host}]:$${notifier:port}/get/
raw python_executable ${buildout:executable}
#----------------
#--
#-- Publish instance parameters.
......
[buildout]
extends = ${pbsready:output}
extends = {{ pbsready_template_path }}
# Explicitely define extended parts from pbsready
# then add local parts
......@@ -8,8 +8,12 @@ parts =
logrotate
logrotate-entry-cron
logrotate-entry-equeue
logrotate-entry-resilient
cron
cron-entry-logrotate
cron-entry-notifier-status-feed
notifier-feed-status-promise
notifier-stalled-promise
resilient-sshkeys-authority
sshd-raw-server
sshd-graceful
......@@ -24,9 +28,9 @@ parts =
[resilient-directory]
recipe = slapos.cookbook:mkdirectory
home = $${buildout:directory}
var = $${:home}/var
pid = $${:var}/pid
home = ${buildout:directory}
var = ${:home}/var
pid = ${:var}/pid
# Define port of ssh server. It has to be different from import so that it
# supports export/import using same IP (slaprunner, slapos-in-partition,
......@@ -35,16 +39,17 @@ pid = $${:var}/pid
recipe = slapos.cookbook:free_port
minimum = 22200
maximum = 22209
ip = $${slap-network-information:global-ipv6}
ip = ${slap-network-information:global-ipv6}
[notifier-port]
recipe = slapos.cookbook:free_port
minimum = 65526
maximum = 65535
ip = $${notifier:host}
ip = ${notifier:host}
[resilient-publish-connection-parameter]
notification-id = http://[$${notifier:host}]:$${notifier:port}/get/$${notifier-exporter:name}
notification-id = http://[${notifier:host}]:${notifier:port}/get/${notifier-exporter:name}
-extends = publish-early
[notifier-exporter]
# notifier.notify launches an (exporter) executable, and when finished,
......@@ -52,24 +57,27 @@ notification-id = http://[$${notifier:host}]:$${notifier:port}/get/$${notifier-e
<= notifier
recipe = slapos.cookbook:notifier.notify
name = exporter
title = Dumping $${slap-parameter:namebase}
executable = $${exporter:wrapper}
wrapper = $${rootdirectory:bin}/exporter
notify = $${slap-parameter:notify}
pidfile = $${resilient-directory:pid}/$${:name}.pid
title = Dumping ${slap-parameter:namebase}
executable = ${exporter:wrapper}
wrapper = ${rootdirectory:bin}/exporter
notify = ${slap-parameter:notify}
pidfile = ${resilient-directory:pid}/${:name}.pid
max-run = 3
[logrotate-entry-notifier]
output = ${rootdirectory:etc}/logrotate_notifier.conf
[notifier-exporter-promise]
recipe = slapos.recipe.template:jinja2
mode = 700
template = inline:
#!${bash:location}/bin/bash
EXPORTER_FEED="$${notifier-exporter:log-file}"
#!{{ bash_executable_location }}
EXPORTER_FEED="${notifier-exporter:log-file}"
FAILURE_PATTERN="FAILURE"
if [ -s "$EXPORTER_FEED" ]; then
tail -n 1 $EXPORTER_FEED | grep -vq FAILURE_PATTERN
fi
rendered = $${basedirectory:promises}/exporter-status
rendered = ${basedirectory:promises}/exporter-status
[cron-entry-backup]
# Schedule the periodic database dump.
......@@ -77,9 +85,21 @@ rendered = $${basedirectory:promises}/exporter-status
<= cron
recipe = slapos.cookbook:cron.d
name = backup
frequency = $${slap-parameter:resiliency-backup-periodicity}
once-a-day = true
command = $${notifier-exporter:wrapper} --transaction-id `date +%s`
{% set resiliency_backup_periodicity = slapparameter_dict.get('resiliency-backup-periodicity') %}
{% if resiliency_backup_periodicity %}
frequency = {{ resiliency_backup_periodicity }}
{% else %}
time = ${publish-early:resiliency-backup-periodicity}
{% endif %}
command = {{ logrotate_executable_location }} -s ${basedirectory:run}/logrotate.status ${logrotate-entry-notifier:output}; ${notifier-exporter:wrapper} --transaction-id `date +%s`
[gen-resiliency-backup-periodicity]
recipe = slapos.cookbook:random.time
[publish-early]
recipe = slapos.cookbook:publish-early
-init =
resiliency-backup-periodicity gen-resiliency-backup-periodicity:time
[slap-parameter]
# In cron.d format (i.e things like */15 * * * * are accepted).
......
......@@ -8,8 +8,13 @@ parts =
logrotate
logrotate-entry-cron
logrotate-entry-equeue
logrotate-entry-notifier
logrotate-entry-resilient
cron
cron-entry-logrotate
cron-entry-notifier-status-feed
notifier-feed-status-promise
notifier-stalled-promise
resilient-sshkeys-authority
sshd-raw-server
sshd-graceful
......@@ -179,4 +184,4 @@ curl_path = ${curl:location}/bin/curl
###########
[backup-signature-link]
recipe = cns.recipe.symlink
symlink = $${post-notification-run:proof-signature-file} = $${directory:monitor-resilient}/backup.signature
\ No newline at end of file
symlink = $${post-notification-run:proof-signature-file} = $${directory:monitor-resilient}/backup.signature
......@@ -4,8 +4,13 @@ parts =
logrotate
logrotate-entry-cron
logrotate-entry-equeue
logrotate-entry-notifier
logrotate-entry-resilient
cron
cron-entry-logrotate
cron-entry-notifier-status-feed
notifier-feed-status-promise
notifier-stalled-promise
resilient-sshkeys-authority
sshd-graceful
sshkeys-sshd
......@@ -45,6 +50,7 @@ ssh = $${rootdirectory:etc}/ssh/
sshkeys = $${rootdirectory:srv}/sshkeys
notifier-feeds = $${basedirectory:notifier}/feeds
notifier-callbacks = $${basedirectory:notifier}/callbacks
notifier-status-items = $${basedirectory:notifier}/status-items
cron-entries = $${rootdirectory:etc}/cron.d
crontabs = $${rootdirectory:etc}/crontabs
cronstamps = $${rootdirectory:etc}/cronstamps
......@@ -126,6 +132,26 @@ log = $${equeue:log} $${sshd-server:log}
frequency = daily
rotate-num = 30
[logrotate-entry-notifier]
recipe = collective.recipe.template
mode = 600
input = inline:
$${notifier:feeds}/* {
rotate 5
weekly
nocompress
missingok
olddir $${directory:logrotate-backup}
}
output = $${logrotate:logrotate-entries}/notifier
[logrotate-entry-resilient]
<= logrotate
recipe = slapos.cookbook:logrotate.d
name = resilient_log
log = $${basedirectory:log}/resilient.log
frequency = weekly
rotate-num = 7
#----------------
#--
......@@ -166,9 +192,30 @@ command = ${buildout:bin-directory}/pubsubserver --callbacks $${directory:notifi
notifier-binary = ${buildout:bin-directory}/pubsubnotifier
host = $${slap-network-information:global-ipv6}
port = $${notifier-port:port}
instance-root-name = $${instance-info-parameters:root-name}
log-url = $${publish:monitor-base-url}/resilient/notifier-status-rss
status-item-directory = $${directory:notifier-status-items}
context =
key content notifier:command
[notifier-resilient-status-feed]
recipe = slapos.cookbook:wrapper
command-line = ${buildout:directory}/bin/generatefeed --output $${:feed-path} --status-item-path $${notifier:status-item-directory} --title "Status feed for $${notifier:instance-root-name}" --link $${notifier:log-url}
feed-path = $${directory:monitor-resilient}/notifier-status-rss
wrapper-path = $${rootdirectory:bin}/resilient-genstatusrss.py
[cron-entry-notifier-status-feed]
<= cron
recipe = slapos.cookbook:cron.d
name = resilient-notifier-status-feed
frequency = */5 * * * *
command = $${notifier-resilient-status-feed:wrapper-path}
[notifier-stalled-promise]
recipe = slapos.cookbook:wrapper
# time-buffer is 24h (+1h of latitude)
command-line = ${buildout:bin-directory}/check-feed-as-promise --feed-path $${notifier-resilient-status-feed:feed-path} --title --ok-pattern 'OK' --time-buffer 90000
wrapper-path = $${basedirectory:promises}/stalled-notifier-callbacks
#----------------
#--
......@@ -260,6 +307,20 @@ input = inline:#!${bash:location}/bin/bash
output = $${basedirectory:promises}/public-key-existence
mode = 700
#----------------
#--
#-- Promises
[notifier-feed-status-promise]
recipe = slapos.recipe.template:jinja2
template = ${notifier-feed-promise-template:target}
rendered = $${basedirectory:promises}/notifier-feed-check-malformed-or-failure.py
mode = 700
context =
key notifier_feed_directory directory:notifier-feeds
raw base_url http://[$${notifier:host}]:$${notifier:port}/get/
raw python_executable ${buildout:executable}
#----------------
#--
#-- Connection informations to re-use.
......
......@@ -4,6 +4,7 @@ equeue_database = '${equeue:database}'
equeue_lockfile = '${equeue:lockfile}'
takeover_script = '${resiliency-takeover-script:wrapper-takeover}'
import atexit
import cgi
import cgitb
import datetime
......@@ -21,12 +22,17 @@ else:
cgitb.enable()
def deleteTemporaryDirectory(path):
if os.path.exists(path):
shutil.rmtree(path)
def getLatestBackupDate():
"""
Get the date of the latest successful backup.
"""
# Create a copy of the db (locked by equeue process)
temporary_directory = tempfile.mkdtemp()
atexit.register(deleteTemporaryDirectory, temporary_directory)
equeue_database_copy = os.path.join(temporary_directory, 'equeue.db')
shutil.copyfile(equeue_database, equeue_database_copy)
db = gdbm.open(equeue_database_copy)
......
[template-pbsready-export]
recipe = slapos.recipe.template:jinja2
template = ${pbsready-export:target}
rendered = $${buildout:directory}/pbsready-exporter.cfg
mode = 640
context =
key slapparameter_dict slap-configuration:configuration
raw pbsready_template_path ${pbsready:output}
raw bash_executable_location ${bash:location}/bin/bash
raw logrotate_executable_location ${logrotate:location}/sbin/logrotate
#!{{ python_executable }}
import csv
import os
import sys
import urllib2
csv.field_size_limit(sys.maxsize)
notifier_feed_directory = '{{ notifier_feed_directory }}'
base_url = "{{ base_url }}"
feed_file_list = os.listdir(notifier_feed_directory)
for feed_file_name in feed_file_list:
url = base_url + feed_file_name
# Try feed consistency
try:
feed = urllib2.urlopen(url)
body = feed.read()
except urllib2.HTTPError as e:
sys.exit("%s is unavailable: %s" % (feed_file_name, e))
with open(os.path.join(notifier_feed_directory, feed_file_name)) as feed_file:
reader = csv.reader(feed_file)
# Get last row because we only care about last run
for row in reader:
pass
try:
timestamp, title, content, guid = row
if content.startswith('OK'):
continue
else:
sys.exit("Last run of %s failed" % feed_file_name)
except ValueError:
sys.exit("Notifier feed %s is malformed" % notifier_feed)
except NameError:
# row can be not defined if feed is empty
pass
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment