Commit 3e813ce2 authored by Joanne Hugé's avatar Joanne Hugé

promise/plugin: add check_cpu_temperature promise

parent b17843bc
import json
import os
import psutil
import time
from .util import get_data_interval_json_log
from .util import JSONRunPromise
from zope.interface import implementer
from slapos.grid.promise import interface
@implementer(interface.IPromise)
class RunPromise(JSONRunPromise):
def __init__(self, config):
super(RunPromise, self).__init__(config)
self.setPeriodicity(minute=2)
self.last_avg_computation_file = self.getConfig(
'last-avg-computation-file', 'last_avg')
def sense(self):
promise_success = True
max_spot_temp = float(self.getConfig('max-spot-temp', 90))
max_avg_temp = float(self.getConfig('max-avg-temp', 80))
avg_temp_duration_sec = int(self.getConfig('avg-temp-duration-sec', 0))
if avg_temp_duration_sec:
avg_temp_duration = avg_temp_duration_sec
else:
avg_temp_duration = 60 * int(self.getConfig('avg-temp-duration', 5))
testing = self.getConfig('testing') == "True"
# For theia JHGD
#testing = True # JHGD
# Get current temperature
if testing:
from random import randint
cpu_temp = randint(40, 75)
else:
data = psutil.sensors_temperatures()
cpu_temp = data['coretemp'][0][1]
if cpu_temp > max_spot_temp:
self.logger.error("Temperature reached critical threshold: %s degrees "\
"celsius (threshold is %s degrees celsius)" % (cpu_temp, max_spot_temp))
promise_success = False
# Log temperature
data = json.dumps({'cpu_temperature': cpu_temp})
self.json_logger.info("Temperature data", extra={'data': data})
# TODO: promise should computer average only with logs between interval
# Computer average temperature
avg_computation_period = avg_temp_duration / 4
try:
t = os.path.getmtime(self.last_avg_computation_file)
except OSError:
t = 0
if (time.time() - t) > avg_computation_period:
open(self.last_avg_computation_file, 'w').close()
temp_list = get_data_interval_json_log(self.log_file, avg_temp_duration)
if temp_list:
avg_temp = sum(map(lambda x: x['cpu_temperature'], temp_list)) / len(temp_list)
if avg_temp > max_avg_temp:
self.logger.error("Average temperature over the last %s seconds "\
"reached threshold: %s degrees celsius (threshold is %s degrees "\
"celsius)" % (avg_temp_duration, avg_temp, max_avg_temp))
promise_success = False
else:
self.logger.error("Couldn't read temperature from log")
promise_success = False
if promise_success:
self.logger.info("Temperature OK")
def test(self):
"""
Called after sense() if the instance is still converging.
Returns success or failure based on sense results.
In this case, fail if the previous sensor result is negative.
"""
return self._test(result_count=1, failure_amount=1)
def anomaly(self):
"""
Called after sense() if the instance has finished converging.
Returns success or failure based on sense results.
Failure signals the instance has diverged.
In this case, fail if two out of the last three results are negative.
"""
return self._anomaly(result_count=3, failure_amount=2)
import json
import logging
import os
from dateutil import parser
from slapos.grid.promise.generic import GenericPromise
def tail_file(file_path, line_count=10): def tail_file(file_path, line_count=10):
""" """
...@@ -27,3 +32,85 @@ def tail_file(file_path, line_count=10): ...@@ -27,3 +32,85 @@ def tail_file(file_path, line_count=10):
block -= 1 block -= 1
return '\n'.join(''.join(line_list).splitlines()[-line_count:]) return '\n'.join(''.join(line_list).splitlines()[-line_count:])
class JSONRunPromise(GenericPromise):
def __init__(self, config):
self.__name = config.get('name', None)
self.__log_folder = config.get('log-folder', None)
super(JSONRunPromise, self).__init__(config)
self.__title = os.path.splitext(self.__name)[0]
self.log_file = os.path.join(self.__log_folder, '%s.json.log' % self.__title)
self.json_logger = logging.getLogger('json_logger')
self.json_logger.setLevel(logging.INFO)
handler = logging.FileHandler(self.log_file)
formatter = logging.Formatter('{"time": "%(asctime)s", "log_level": '\
'"%(levelname)s", "message": "%(message)s", "data": %(data)s}')
handler.setFormatter(formatter)
self.json_logger.addHandler(handler)
def get_data_interval_json_log(log, interval):
"""
Get all data in the last "interval" seconds from JSON log
Reads rotated logs too (XX.log, XX.log.1, XX.log.2, ...)
"""
log_number = 0
latest_timestamp = 0
data_list = []
while True:
try:
f = open("{}.{}".format(log, log_number) if log_number else log, "rb")
except OSError:
return data_list
try:
f.seek(0, os.SEEK_END)
while True:
try:
while f.seek(-2, os.SEEK_CUR) and f.read(1) != b'\n':
pass
except OSError:
break
pos = f.tell()
l = json.loads(f.readline().decode().replace("'", '"'))
timestamp = parser.parse(l['time'])
data_list.append(l['data'])
if not latest_timestamp:
latest_timestamp = timestamp
if (latest_timestamp - timestamp).total_seconds() > interval:
return data_list
f.seek(pos, os.SEEK_SET)
finally:
f.close()
log_number += 1
def get_latest_timestamp_json_log(log):
"""
Get latest timestamp from JSON log
Reads rotated logs too (XX.log, XX.log.1, XX.log.2, ...)
"""
log_number = 0
while True:
try:
f = open("{}.{}".format(log, log_number) if log_number else log, "rb")
except OSError:
return 0
try:
f.seek(0, os.SEEK_END)
try:
while f.seek(-2, os.SEEK_CUR) and f.read(1) != b'\n':
pass
except OSError:
break
l = json.loads(f.readline().decode().replace("'", '"'))
return parser.parse(l['time'])
finally:
f.close()
log_number += 1
return 0
# -*- coding: utf-8 -*-
##############################################################################
# Copyright (c) 2018 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
##############################################################################
import mock
import os
import time
from slapos.grid.promise import PromiseError
from slapos.promise.plugin.check_cpu_temperature import RunPromise
from . import TestPromisePluginMixin
class TestCheckCpuTemperature(TestPromisePluginMixin):
promise_name = "monitor-cpu-temperature.py"
def setUp(self):
super(TestCheckCpuTemperature, self).setUp()
def writePromise(self, **kw):
super(TestCheckCpuTemperature, self).writePromise(self.promise_name,
"from %s import %s\nextra_config_dict = %r\n"
% (RunPromise.__module__, RunPromise.__name__, kw))
def runPromise(self, summary, failed=False):
self.configureLauncher(enable_anomaly=True, force=True)
with mock.patch('psutil.sensors_temperatures', return_value=summary):
if failed:
self.assertRaises(PromiseError, self.launcher.run)
else:
self.launcher.run()
result = self.getPromiseResult(self.promise_name)['result']
self.assertEqual(result['failed'], failed)
return result['message']
def test_temp_ok(self):
message = "Temperature OK"
self.writePromise(**{
'last-avg-computation-file':'last_avg_computation_file',
'max-spot-temp': 80,
'max-avg-temp': 100,
})
self.assertEqual(message, self.runPromise({'coretemp': [[0, 50]]}))
def test_spot_critical(self):
message = "Temperature reached critical threshold: 90 degrees celsius (threshold is 80.0 degrees celsius)"
self.writePromise(**{
'last-avg-computation-file':'last_avg_computation_file',
'max-spot-temp': 80,
'max-avg-temp': 100,
})
self.assertEqual(message, self.runPromise({'coretemp': [[0, 90]]}))
def test_avg_critical(self):
message = "Average temperature over the last 1 seconds reached threshold: 45.0 degrees celsius (threshold is 40.0 degrees celsius)"
self.writePromise(**{
'last-avg-computation-file':'last_avg_computation_file',
'max-spot-temp': 99999,
'max-avg-temp': 40,
'avg-temp-duration-sec': 1,
})
# TODO: promise should computer average only with logs between interval
m = self.runPromise({'coretemp': [[0, 0]]})
m = self.runPromise({'coretemp': [[0, 0]]})
time.sleep(2)
self.assertEqual(message, self.runPromise({'coretemp': [[0, 90]]}))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment