Commit 70bf9386 authored by Justin's avatar Justin

promise/plugin: Merged check_cpu_temperature from @jhuge

parent 4df69149
import json
import os
import psutil
import time
from .util import JSONPromise
from zope.interface import implementer
from slapos.grid.promise import interface
@implementer(interface.IPromise)
class RunPromise(JSONPromise):
def __init__(self, config):
super(RunPromise, self).__init__(config)
self.setPeriodicity(float(self.getConfig('frequency', 2)))
self.avg_flag_file = self.getConfig('last-avg-computation-file', 'last_avg')
self.max_spot_temp = float(self.getConfig('max-spot-temp', 90)) # °C
self.max_avg_temp = float(self.getConfig('max-avg-temp', 80)) # °C
self.avg_temp_duration = int(self.getConfig('avg-temp-duration', 600)) # secondes
def sense(self):
success = True
# Get current temperature
try:
cpu_temp = psutil.sensors_temperatures()['coretemp'][0][1]
except (KeyError, IndexError) as e:
# Put logger.info to avoid errors when sensors are not
# supported by OS (ex: VM)
self.logger.info("Could not read core temperature on VM")
return
# Check spot temperature
if cpu_temp > self.max_spot_temp:
success = False
self.logger.error(
"Temperature reached critical threshold: %s °C"
" (threshold is %s °C)",
cpu_temp, self.max_spot_temp)
# Log temperature
data = json.dumps({'cpu_temperature': cpu_temp})
self.json_logger.info("Temperature data", extra={'data': data})
# TODO: promise should compute average only with logs between interval
# Computer average temperature
avg_computation_period = self.avg_temp_duration / 4
try:
t = os.path.getmtime(self.avg_flag_file)
except OSError:
t = 0
if (time.time() - t) > avg_computation_period:
open(self.avg_flag_file, 'w').close()
temp_list = self.getJsonLogDataInterval(self.avg_temp_duration)
if temp_list:
avg_temp = sum(x['cpu_temperature'] for x in temp_list) / len(temp_list)
if avg_temp > self.max_avg_temp:
success = False
self.logger.error(
"Average temperature over the last %ds reached threshold: %s °C"
" (threshold is %s °C)",
self.avg_temp_duration, avg_temp, self.max_avg_temp)
else:
success = False
self.logger.error("Couldn't read temperature from log")
if success:
self.logger.info("Temperature OK (%s °C)", cpu_temp)
def test(self):
"""
Called after sense() if the instance is still converging.
Returns success or failure based on sense results.
In this case, fail if the previous sensor result is negative.
"""
return self._test(result_count=1, failure_amount=1)
def anomaly(self):
"""
Called after sense() if the instance has finished converging.
Returns success or failure based on sense results.
Failure signals the instance has diverged.
In this case, fail if two out of the last three results are negative.
"""
return self._anomaly(result_count=3, failure_amount=2)
# -*- coding: utf-8 -*-
##############################################################################
# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
##############################################################################
import mock
import os
import time
from slapos.grid.promise import PromiseError
from slapos.promise.plugin.check_cpu_temperature import RunPromise
from . import TestPromisePluginMixin
class TestCheckCpuTemperature(TestPromisePluginMixin):
promise_name = "monitor-cpu-temperature.py"
def setUp(self):
super(TestCheckCpuTemperature, self).setUp()
def writePromise(self, **kw):
super(TestCheckCpuTemperature, self).writePromise(self.promise_name,
"from %s import %s\nextra_config_dict = %r\n"
% (RunPromise.__module__, RunPromise.__name__, kw))
def runPromise(self, summary, failed=False):
self.configureLauncher(enable_anomaly=True, force=True)
with mock.patch('psutil.sensors_temperatures', return_value=summary):
if failed:
self.assertRaises(PromiseError, self.launcher.run)
else:
self.launcher.run()
result = self.getPromiseResult(self.promise_name)['result']
self.assertEqual(result['failed'], failed)
return result['message']
def test_temp_ok(self):
message = "Temperature OK (50 °C)"
self.writePromise(**{
'last-avg-computation-file':'last_avg_computation_file',
'max-spot-temp': 80,
'max-avg-temp': 100,
})
self.assertEqual(message, self.runPromise({'coretemp': [[0, 50]]}))
def test_spot_critical(self):
message = "Temperature reached critical threshold: 90 °C (threshold is 80.0 °C)"
self.writePromise(**{
'last-avg-computation-file':'last_avg_computation_file',
'max-spot-temp': 80,
'max-avg-temp': 100,
})
self.assertEqual(message, self.runPromise({'coretemp': [[0, 90]]}))
def test_avg_critical(self):
message = "Average temperature over the last 1s reached threshold: 45.0 °C (threshold is 40.0 °C)"
self.writePromise(**{
'last-avg-computation-file':'last_avg_computation_file',
'max-spot-temp': 99999,
'max-avg-temp': 40,
'avg-temp-duration': 1,
})
m = self.runPromise({'coretemp': [[0, 0]]})
time.sleep(0.6)
m = self.runPromise({'coretemp': [[0, 0]]})
time.sleep(0.5)
self.assertEqual(message, self.runPromise({'coretemp': [[0, 90]]}))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment