monitor.py.in 5.19 KB
Newer Older
1
#!{{ python_executable }}
2 3 4 5 6

import json
import os
import subprocess
import sys
7
import sqlite3
8
import time
9
import threading
10
from optparse import OptionParser, make_option
11

12 13 14 15 16

FAILURE = "FAILURE"
SUCCESS = "SUCCESS"

db_path = "{{ monitor_parameter['db-path'] }}"
17
instance_path = "{{ directory['home'] }}"
18
monitor_dir = "{{ directory['monitor-custom-scripts'] }}"
19 20
pid_dir = "{{ directory['run'] }}"
promise_dir = "{{ directory['promise'] }}"
21

22 23 24 25 26 27 28 29 30 31 32 33 34 35
monitoring_file_json = "{{ monitoring_file_json }}"

option_list = [
  make_option("-a", "--all", action="store_true", dest="all",
              help="test everything : promises, services, customs"),
  make_option("-n", "--no-write", action="store_true", dest="only_stdout",
              help="just show the json output on stdout"),
  make_option("-m", "--monitors", action="store_true", dest="monitor",
              help="add the custom monitoring file to the files to monitor"),
  make_option("-p", "--promises", action="store_true", dest="promise",
              help="add the promises\'file to the files to monitor"),
  make_option("-s", "--services", action="store_true", dest="service",
              help="add the file containing services\'pid to the files to monitor")
]
36

37
class Popen(subprocess.Popen):
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54

  def set_timeout(self, timeout):
    self.set_timeout = None # assert we're not called twice
    event = threading.Event()
    event.__killed = False # we just need a mutable
    def t():
      # do not call wait() or poll() because they're not thread-safe
      if not event.wait(timeout) and self.returncode is None:
        # race condition if waitpid completes just before the signal sent ?
        self.terminate()
        event.__killed = True
        if event.wait(5):
          return
        if self.returncode is None:
          self.kill() # same race as for terminate ?
    t = threading.Thread(target=t)
    t.daemon = True
55
    t.start()
56 57 58 59 60
    def killed():
      event.set()
      t.join()
      return event.__killed
    return killed
61

62 63
def init_db(db):
  db.executescript("""
64 65 66 67 68 69 70 71 72 73
CREATE TABLE IF NOT EXISTS status (
  timestamp INTEGER UNIQUE,
  status VARCHAR(255));
CREATE TABLE IF NOT EXISTS individual_status (
  timestamp INTEGER,
  status VARCHAR(255),
  element VARCHAR(255),
  output TEXT);
""")

74
def getListOfScripts(directory):
75 76 77
  """
  Get the list of script inside of a directory (not recursive)
  """
78
  scripts = []
79
  if os.path.exists(directory) and os.path.isdir(directory):
80 81 82 83
    for file_name in os.listdir(directory):
      file = os.path.join(directory, file_name)
      if os.access(file, os.X_OK) and not os.path.isdir(file):
        scripts.append(file)
84 85 86
  else:
    exit("There is a problem in your directories" \
          "of monitoring. Please check them")
87 88
  return scripts

89

90 91 92 93 94 95
def runServices(directory):
  services = getListOfScripts(directory)
  result = {}
  for service in services:
    service_path = os.path.join(pid_dir, service)
    service_name = os.path.basename(service_path)
96 97 98
    try:
      pid = int(open(service_path).read())
    ### because apache (or others) can write sockets
99 100
    ### We also ignore not readable pid files
    except (IOError, ValueError):
101
      continue
102 103 104 105 106 107
    try:
      os.kill(pid, 0)
      result[service_name] = ''
    except OSError:
      result[service_name] = "This service is not running anymore"
  return result
108

109

110
def runScripts(directory):
111 112
  # XXX script_timeout could be passed as parameters
  script_timeout = 60 # in seconds
113
  result = {}
114 115 116 117 118 119 120 121 122 123 124 125 126 127
  with open(os.devnull, 'r+') as f:
    for script in getListOfScripts(directory):
      command = os.path.join(promise_dir, script),
      script = os.path.basename(script)
      result[script] = ''

      p = Popen(command, cwd=instance_path,
                env=None if sys.platform == 'cygwin' else {},
                stdin=f, stdout=f, stderr=subprocess.PIPE)
      killed = p.set_timeout(script_timeout)
      stderr = p.communicate()[1]
      if killed():
        result[script] = "Time Out"
      elif p.returncode:
128 129
        result[script] = stderr.strip()
  return result
130 131


132
def writeFiles(monitors):
133 134
  timestamp = int(time.time())
  db = sqlite3.connect(db_path)
135 136
  init_db(db)
  status = SUCCESS
137
  for key, value in monitors.iteritems():
138 139 140 141
    if value:
      element_status = status = FAILURE
    else:
      element_status = SUCCESS
142 143 144 145
    db.execute("insert into individual_status(timestamp, element, output, status) values (?, ?, ?, ?)", (timestamp, key, value, element_status))
  db.execute("insert into status(timestamp, status) values (?, ?)", (timestamp, status))
  db.commit()
  db.close()
146 147
  monitors['datetime'] = time.ctime(timestamp)
  json.dump(monitors, open(monitoring_file_json, "w+"))
148

149
def main():
150 151 152
  parser = OptionParser(option_list=option_list)
  monitors = {}
  (options, args) = parser.parse_args()
153

154 155 156
  if not (options.monitor or options.promise
         or options.service or options.all):
    exit("Please provide at list one arg in : -a, -m, -p, -s")
157

158 159 160 161 162 163
  if options.monitor or options.all:
    monitors.update(runScripts(monitor_dir))
  if options.promise or options.all:
    monitors.update(runScripts(promise_dir))
  if options.service or options.all:
    monitors.update(runServices(pid_dir))
164

165 166 167 168
  if options.only_stdout:
    print json.dumps(monitors)
  else:
    writeFiles(monitors)
169

170 171 172

if __name__ == "__main__":
  main()