Commit 8cb74356 authored by Alain Takoudjou's avatar Alain Takoudjou

monitor: add promise to check free disk space

parent 68e46df1
...@@ -85,6 +85,11 @@ md5sum = 1bdb4e05c6be04f4e5766c64467fbcec ...@@ -85,6 +85,11 @@ md5sum = 1bdb4e05c6be04f4e5766c64467fbcec
<= monitor-template-base <= monitor-template-base
filename = httpd-cors.cfg.in filename = httpd-cors.cfg.in
md5sum = 683ea85fc054094248baf5752dd089bf md5sum = 683ea85fc054094248baf5752dd089bf
[monitor-check-free-disk-space]
<= monitor-template-base
filename = check_free_disk.in
md5sum = bc61a77f8c06615dfa687ed48893bbc1
# End templates files # End templates files
# XXX keep compatibility (with software/ipython_notebook/software.cfg ) # XXX keep compatibility (with software/ipython_notebook/software.cfg )
...@@ -97,7 +102,7 @@ recipe = slapos.recipe.template:jinja2 ...@@ -97,7 +102,7 @@ recipe = slapos.recipe.template:jinja2
filename = template-monitor.cfg filename = template-monitor.cfg
template = ${:_profile_base_location_}/instance-monitor.cfg.jinja2.in template = ${:_profile_base_location_}/instance-monitor.cfg.jinja2.in
rendered = ${buildout:directory}/template-monitor.cfg rendered = ${buildout:directory}/template-monitor.cfg
md5sum = 3a0417a9a3c2710c31be37e7a66f8a82 md5sum = 8433adc2ad0bc3a443ff941580593fb0
context = context =
key apache_location apache:location key apache_location apache:location
key gzip_location gzip:location key gzip_location gzip:location
...@@ -119,6 +124,7 @@ context = ...@@ -119,6 +124,7 @@ context =
raw python_executable ${buildout:executable} raw python_executable ${buildout:executable}
raw python_with_eggs ${buildout:directory}/bin/${extra-eggs:interpreter} raw python_with_eggs ${buildout:directory}/bin/${extra-eggs:interpreter}
raw template_wrapper ${monitor-template-wrapper:location}/${monitor-template-wrapper:filename} raw template_wrapper ${monitor-template-wrapper:location}/${monitor-template-wrapper:filename}
raw template_check_disk_space ${monitor-check-free-disk-space:location}/${monitor-check-free-disk-space:filename}
depends = depends =
${monitor-eggs:eggs} ${monitor-eggs:eggs}
......
...@@ -102,6 +102,7 @@ parameter-list = ...@@ -102,6 +102,7 @@ parameter-list =
raw monitor-user ${monitor-instance-parameter:username} raw monitor-user ${monitor-instance-parameter:username}
htpasswd monitor-password ${monitor-htpassword-file:password-file} ${monitor-instance-parameter:username} ${httpd-monitor-htpasswd:htpasswd-path} htpasswd monitor-password ${monitor-htpassword-file:password-file} ${monitor-instance-parameter:username} ${httpd-monitor-htpasswd:htpasswd-path}
file promise-timeout ${monitor-promise-timeout-file:file} file promise-timeout ${monitor-promise-timeout-file:file}
file free-disk-space-MB ${promise-check-free-disk-space:config-file}
${monitor-instance-parameter:instance-configuration} ${monitor-instance-parameter:instance-configuration}
# htpasswd entry: htpasswd key password-file username htpasswd-file # htpasswd entry: htpasswd key password-file username htpasswd-file
...@@ -345,6 +346,18 @@ input = inline:#!/bin/sh ...@@ -345,6 +346,18 @@ input = inline:#!/bin/sh
output = ${monitor-directory:promises}/buildout-${slap-connection:partition-id}-status output = ${monitor-directory:promises}/buildout-${slap-connection:partition-id}-status
mode = 700 mode = 700
[promise-check-free-disk-space]
recipe = slapos.recipe.template:jinja2
template = {{ template_check_disk_space }}
rendered = ${monitor-directory:promises}/check-free-disk-space
mode = 0700
context =
key config_file :config-file
raw home_path ${buildout:directory}
raw python_bin {{ python_with_eggs }}
config-file = ${directory:etc}/min-free-disk-size
[monitor-base] [monitor-base]
# create dependencies between required monitor parts # create dependencies between required monitor parts
recipe = plone.recipe.command recipe = plone.recipe.command
......
#!{{ python_bin }}
import os
import sys
def free_space(path, fn):
while True:
try:
disk = os.statvfs(path)
return fn(disk)
except OSError:
pass
if os.sep not in path:
break
path = os.path.split(path)[0]
def user_free_space(path):
return free_space(path, lambda d: d.f_bsize * d.f_bavail)
if __name__ == '__main__':
home_path = '{{ home_path }}'
config_file = '{{ config_file }}'
min_free_size = 1024*1024*1024*2 # 2G by default
if os.path.exists(config_file):
with open(config_file) as f:
min_size_str = f.read().strip()
if min_size_str == '0':
# disable check
print "Free disk space check is disabled\n set a number up to 0 to enable!"
exit(0)
if min_size_str.isdigit():
value = int(min_size_str)
if value >= 200:
# Minimum value is 200Mb, it's already low
min_free_size = int(min_size_str)*1024
else:
with open(config_file, 'w') as f:
f.write(str(min_free_size/1024))
real_free_space = user_free_space(home_path)
if real_free_space > min_free_size:
print "Free disk space: OK"
exit(0)
real_space_g = round(real_free_space/(1024.0*1024*1024), 2)
min_space_g = round(min_free_size/(1024.0*1024*1024), 2)
print 'Free disk space slow: remaning %s G, min is %s G' % (
real_space_g, min_space_g)
print 'You can modify minimum value on your monitor interface.'
exit(1)
  • @alain It is not ok to include python code we cannot test.... we should avoid this.

    It is better move into here:

    https://lab.nexedi.com/nexedi/slapos.toolbox/tree/master/slapos/promise

    and write tests for it.

  • @alain.takoudjou : is it ok to add this kind of promise directly into the monitor stack ? I feel that this promise should be instead integrated into the monitor Software Release, which role is to monitor the computer. Multipliying this promise into all instances extending the stack monitor will just have the consequence of creating multiple tickets (thus noise) for in reality only 1 issue.

    /cc @rafael

  • Each user should be notified for the lack of space, that's why it was added everywhere, each project (instance owner) should be able to define what is the acceptable free space to have. Sometimes, for pack ZODB, we need at least 30% of the disk free sometimes while others instances on the same computer expect less....

  • Ok, now I see the need. But I'm wondering : real projects need custom servers, and not shared servers, because they cannot trust other users sharing the hardware. Then shouldn't we ask projects to setup the monitor SR on their dedicated server if they need to monitor the system ? Also, practically speaking, isn't it inconvenient for the vifib team to receive 20 tickets when one server gets full ?

Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment