Commit 8c8da003 authored by Jérome Perrin's avatar Jérome Perrin

software/grafana: WIP generate telegraf and loki config

parent 9763c853
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
[instance-profile] [instance-profile]
filename = instance.cfg.in filename = instance.cfg.in
md5sum = 3ccdd2299e759488545b62368c7a0b91 md5sum = 09a24413839d930420f0ac0298ef3297
[influxdb-config-file] [influxdb-config-file]
filename = influxdb-config-file.cfg.in filename = influxdb-config-file.cfg.in
...@@ -23,20 +23,20 @@ md5sum = a28972ced3e0f4aa776e43a9c44717c0 ...@@ -23,20 +23,20 @@ md5sum = a28972ced3e0f4aa776e43a9c44717c0
[telegraf-config-file] [telegraf-config-file]
filename = telegraf-config-file.cfg.in filename = telegraf-config-file.cfg.in
md5sum = a1a9c22c2a7829c66a49fc2504604d21 md5sum = 6de1faa34842e1eda095a51edecc2083
[grafana-config-file] [grafana-config-file]
filename = grafana-config-file.cfg.in filename = grafana-config-file.cfg.in
md5sum = e255dcca466f5de51698d24cbd114577 md5sum = 83a8445858eab21a12f1769c23424bea
[grafana-provisioning-config-file] [grafana-provisioning-datasources-config-file]
filename = grafana-provisioning-config-file.cfg.in filename = grafana-provisioning-datasources-config-file.cfg.in
md5sum = 3aa0f1ed752b2a59ea2b5e7c1733daf3 md5sum = 3aa0f1ed752b2a59ea2b5e7c1733daf3
[grafana-provisioning-dashboards-config-file]
filename = grafana-provisioning-dashboards-config-file.cfg.in
md5sum = 5616679a9c5c2757540175ead3f5500a
[loki-config-file] [loki-config-file]
filename = loki-config-file.cfg.in filename = loki-config-file.cfg.in
md5sum = ad2baf4599a937d7352034a41fa24814 md5sum = ad2baf4599a937d7352034a41fa24814
[promtail-config-file]
filename = promtail-config-file.cfg.in
md5sum = c8c9d815dd7b427788c066f041f04573
...@@ -154,7 +154,7 @@ reporting_enabled = true ...@@ -154,7 +154,7 @@ reporting_enabled = true
# in some UI views to notify that grafana or plugin update exists # in some UI views to notify that grafana or plugin update exists
# This option does not cause any auto updates, nor send any information # This option does not cause any auto updates, nor send any information
# only a GET request to https://grafana.com to get latest versions # only a GET request to https://grafana.com to get latest versions
check_for_updates = true check_for_updates = false
# Google Analytics universal tracking code, only enabled if you specify an id here # Google Analytics universal tracking code, only enabled if you specify an id here
google_analytics_ua_id = google_analytics_ua_id =
...@@ -345,11 +345,8 @@ user = {{ slapparameter_dict.get('smtp-username', '') }} ...@@ -345,11 +345,8 @@ user = {{ slapparameter_dict.get('smtp-username', '') }}
password = {{ slapparameter_dict.get('smtp-password', '') and '"""%s"""' % slapparameter_dict['smtp-password'] or ""}} password = {{ slapparameter_dict.get('smtp-password', '') and '"""%s"""' % slapparameter_dict['smtp-password'] or ""}}
cert_file = cert_file =
key_file = key_file =
#skip_verify = false skip_verify = {{ slapparameter_dict.get('smtp-verify-ssl') and 'true' or 'false' }}
skip_verify = {{ slapparameter_dict.get('smtp-verify-ssl', 'true').lower() == 'true' and 'false' or 'true' }}
#from_address = admin@grafana.localhost
from_address = {{ slapparameter_dict.get('email-from-address', '') }} from_address = {{ slapparameter_dict.get('email-from-address', '') }}
#from_name = Grafana
from_name = {{ slapparameter_dict.get('email-from-name', 'Grafana') }} from_name = {{ slapparameter_dict.get('email-from-name', 'Grafana') }}
ehlo_identity = ehlo_identity =
......
# https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards
apiVersion: 1
providers:
- name: SlapOS
folder: ''
updateIntervalSeconds: 10
allowUiUpdates: false
options:
path: {{ dashboards_dir }}
{ {
"$schema": "http://json-schema.org/draft-04/schema#", "$schema": "http://json-schema.org/draft-07/schema#",
"description": "Parameters to instantiate Grafana", "description": "Parameters to instantiate Grafana",
"type": "object", "type": "object",
"additionalProperties": false, "additionalProperties": false,
...@@ -18,11 +18,7 @@ ...@@ -18,11 +18,7 @@
}, },
"smtp-verify-ssl": { "smtp-verify-ssl": {
"description": "Verify SSL certificate of SMTP server", "description": "Verify SSL certificate of SMTP server",
"type": "string", "type": "boolean"
"enum": [
"true",
"false"
]
}, },
"email-from-address": { "email-from-address": {
"description": "Email address used in From: header of emails", "description": "Email address used in From: header of emails",
...@@ -33,6 +29,133 @@ ...@@ -33,6 +29,133 @@
"default": "Grafana", "default": "Grafana",
"type": "string" "type": "string"
}, },
"applications": {
"description": "Applications to monitor",
"type": "array",
"items": {
"type": "object",
"required": [
"name",
"instance-root",
"partitions"
],
"properties": {
"name": {
"description": "Name of this application",
"type": "string"
},
"instance-root": {
"description": "Directory containing SlapOS partitions.",
"type": "string"
},
"urls": {
"description": "URLs to monitor for availability and certificate lifetime",
"type": "array",
"items": {
"type": "string"
}
},
"partitions": {
"description": "SlapOS partitions to monitor",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Friendly name of the partition",
"examples": [
"mariadb",
"zope-activity"
]
},
"reference": {
"type": "string",
"description": "Reference of the partition",
"examples": [
"slappart1",
"slappart2"
]
},
"type": {
"type": "string",
"description": "Type of the partition. Known types have extra metrics and logs collected",
"enum": [
"erp5/mariadb",
"erp5/balancer",
"erp5/zope-activity",
"erp5/zope-front",
"erp5/zeo",
"mariadb",
"default"
]
},
"file-path": {
"type": "string",
"description": "Glob for the files to watch. This mostly makes sense for `default` type"
},
"static-tags": {
"type": "object",
"description": "Static tags for this partition",
"examples": [
{
"region": "eu",
"data-center": "abc123"
}
]
}
},
"anyOf": [
{
"properties": {
"type": {
"const": "default"
}
},
"required": [
"name",
"file-path"
]
},
{
"properties": {
"type": {
"not": {
"const": "default"
}
}
},
"required": [
"name",
"reference"
]
}
],
"examples": [
{
"name": "zope-backoffice",
"type": "erp5/zope-front",
"reference": "slappart1",
"static-tags": {
"instance": "instance-name"
}
},
{
"name": "mariadb",
"type": "erp5/mariadb",
"reference": "slappart2"
},
{
"name": "syslog",
"type": "default",
"file-path": "/var/log/syslog"
}
]
}
}
}
}
},
"promtail-extra-scrape-config": { "promtail-extra-scrape-config": {
"description": "Raw promtail config (experimental parameter, see https://github.com/grafana/loki/blob/v0.3.0/docs/promtail.md#scrape-configs for detail)", "description": "Raw promtail config (experimental parameter, see https://github.com/grafana/loki/blob/v0.3.0/docs/promtail.md#scrape-configs for detail)",
"default": "", "default": "",
......
...@@ -8,7 +8,6 @@ develop-eggs-directory = {{ buildout['develop-eggs-directory'] }} ...@@ -8,7 +8,6 @@ develop-eggs-directory = {{ buildout['develop-eggs-directory'] }}
offline = true offline = true
[instance-parameter] [instance-parameter]
recipe = slapos.cookbook:slapconfiguration recipe = slapos.cookbook:slapconfiguration
computer = ${slap-connection:computer-id} computer = ${slap-connection:computer-id}
...@@ -42,6 +41,7 @@ grafana-plugins-dir = ${:grafana-dir}/plugins ...@@ -42,6 +41,7 @@ grafana-plugins-dir = ${:grafana-dir}/plugins
grafana-provisioning-config-dir = ${:grafana-dir}/provisioning-config grafana-provisioning-config-dir = ${:grafana-dir}/provisioning-config
grafana-provisioning-datasources-dir = ${:grafana-provisioning-config-dir}/datasources grafana-provisioning-datasources-dir = ${:grafana-provisioning-config-dir}/datasources
grafana-provisioning-dashboards-dir = ${:grafana-provisioning-config-dir}/dashboards grafana-provisioning-dashboards-dir = ${:grafana-provisioning-config-dir}/dashboards
grafana-dashboards-dir = ${:grafana-dir}/dashboards
telegraf-dir = ${:srv}/telegraf telegraf-dir = ${:srv}/telegraf
telegraf-extra-config-dir = ${:telegraf-dir}/extra-config telegraf-extra-config-dir = ${:telegraf-dir}/extra-config
loki-dir = ${:srv}/loki loki-dir = ${:srv}/loki
...@@ -150,6 +150,7 @@ logs-dir = ${directory:grafana-logs-dir} ...@@ -150,6 +150,7 @@ logs-dir = ${directory:grafana-logs-dir}
plugins-dir = ${directory:grafana-plugins-dir} plugins-dir = ${directory:grafana-plugins-dir}
provisioning-config-dir = ${directory:grafana-provisioning-config-dir} provisioning-config-dir = ${directory:grafana-provisioning-config-dir}
provisioning-datasources-dir = ${directory:grafana-provisioning-datasources-dir} provisioning-datasources-dir = ${directory:grafana-provisioning-datasources-dir}
provisioning-dashboards-dir = ${directory:grafana-provisioning-dashboards-dir}
admin-user = ${grafana-password:username} admin-user = ${grafana-password:username}
admin-password = ${grafana-password:passwd} admin-password = ${grafana-password:passwd}
secret-key = ${grafana-secret-key:passwd} secret-key = ${grafana-secret-key:passwd}
...@@ -178,22 +179,27 @@ context = ...@@ -178,22 +179,27 @@ context =
section apache_frontend apache-frontend section apache_frontend apache-frontend
key slapparameter_dict slap-configuration:configuration key slapparameter_dict slap-configuration:configuration
depends = depends =
${grafana-provisioning-config-file:rendered} ${grafana-provisioning-datasources-config-file:rendered}
${grafana-provisioning-dashboards-config-file:rendered}
[grafana-provisioning-config-file] [grafana-provisioning-datasources-config-file]
<= config-file <= config-file
rendered = ${grafana:provisioning-datasources-dir}/datasource.yaml rendered = ${grafana:provisioning-datasources-dir}/datasource.yaml
context = context =
section influxdb influxdb section influxdb influxdb
section loki loki section loki loki
[grafana-provisioning-dashboards-config-file]
<= config-file
rendered = ${grafana:provisioning-dashboards-dir}/dashboard.yaml
context =
key dashboards_dir directory:grafana-dashboards-dir
[grafana-listen-promise] [grafana-listen-promise]
<= check-port-listening-promise <= check-port-listening-promise
hostname= ${grafana:ipv6} hostname= ${grafana:ipv6}
port = ${grafana:port} port = ${grafana:port}
[telegraf] [telegraf]
recipe = slapos.cookbook:wrapper recipe = slapos.cookbook:wrapper
extra-config-dir = ${directory:telegraf-extra-config-dir} extra-config-dir = ${directory:telegraf-extra-config-dir}
...@@ -207,6 +213,222 @@ wrapper-path = ${directory:service}/telegraf ...@@ -207,6 +213,222 @@ wrapper-path = ${directory:service}/telegraf
context = context =
section influxdb influxdb section influxdb influxdb
section telegraf telegraf section telegraf telegraf
section extra telegraf-config-file-extra
[telegraf-config-file-extra]
recipe = slapos.recipe.build
telegraf-input-slapos-bin = {{ telegraf_input_slapos_bin }}
slapparameter-dict = ${slap-configuration:configuration}
init =
import zc.buildout
import pkg_resources
buildout_options = self.buildout["buildout"]
zc.buildout.easy_install.install(
["toml"],
dest=None,
working_set=pkg_resources.working_set,
path=[
buildout_options["develop-eggs-directory"],
buildout_options["eggs-directory"],
],
)
import collections
import os.path
import urllib.parse
import toml
# files to create during install step
self._config_files = {}
inputs = collections.defaultdict(list)
processors = collections.defaultdict(list)
slapparameter_dict = self.options["slapparameter-dict"]
for application in slapparameter_dict.get('applications', []):
partition_mapping = {}
for partition in application.get("partitions", []):
partition.setdefault("type", "default")
if "reference" in partition:
partition_mapping[partition["reference"]] = partition["name"]
partition_directory = os.path.join(application["instance-root"], partition['reference'])
if partition["type"] in ("erp5/mariadb", "mariadb"):
partition.setdefault("username", "root")
partition.setdefault("dbname", "erp5")
dsn = f"{partition['username']}@unix({partition_directory}/var/run/mariadb.sock)/{partition['dbname']}"
inputs["mysql"].append(
{
# TODO: name here ???
"name_override": f"{partition['name']}-mysql",
"servers": [dsn],
"gather_innodb_metrics": True,
"tags": dict(partition.get("static-tags", {}), app=application["name"]),
}
)
if partition["type"] == "erp5/mariadb":
inputs["sql"].append(
{
"name_override": "erp5-mariadb-activities",
"driver": "mysql",
"dsn": dsn,
"query": [
{
"query": "select count(*) as message_count, \"message\" as queue from message",
"field_columns_include": ["message_count"],
"tag_columns_include": ["queue"],
},
{
"query": "select count(*) as message_count, \"message_queue\" as queue from message_queue",
"field_columns_include": ["message_count"],
"tag_columns_include": ["queue"],
},
{
"query": "select count(*) as failed_message_count, \"message\" as queue from message where processing_node=-2",
"field_columns_include": ["failed_message_count"],
"tag_columns_include": ["queue"],
},
{
"query": "select count(*) as failed_message_count, \"message_queue\" as queue from message_queue where processing_node=-2",
"field_columns_include": ["failed_message_count"],
"tag_columns_include": ["queue"],
},
{
"query": """
select cast(coalesce(max(UNIX_TIMESTAMP(now()) - UNIX_TIMESTAMP(message.date)), 0) as int)
as waiting_time, \"message\" as queue from message
where processing_node in (-1, 0) and message not like '%after_tag%'
""",
"field_columns_include": ["waiting_time"],
"tag_columns_include": ["queue"],
},
{
"query": """
select cast(coalesce(max(UNIX_TIMESTAMP(now()) - UNIX_TIMESTAMP(message_queue.date)), 0) as int)
as waiting_time, \"message_queue\" as queue from message_queue
where processing_node in (-1, 0) and message not like '%after_tag%'
""",
"field_columns_include": ["waiting_time"],
"tag_columns_include": ["queue"],
}
],
"tags": dict(partition.get("static-tags", {}), app=application["name"], partition=partition["name"]),
}
)
if partition["type"] == "erp5/balancer":
inputs["tail"].append(
{
"data_format": "grok",
"files": [f"{partition_directory}/var/log/apache-access.log"],
"grok_custom_pattern_files": [],
"grok_custom_patterns": "",
"grok_patterns": [
'%{IPORHOST:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \\[%{HTTPDATE:timestamp}\\] "(?:%{WORD:verb:tag} %{NOTSPACE:request}(?: HTTP/%{NUMBER:http_version:float})?|%{DATA})" %{NUMBER:resp_code:tag} (?:%{NUMBER:resp_bytes:int}|-) %{QS:referrer} %{QS:agent} %{NUMBER:response_time:int}'
],
"grok_timezone": "Local",
"name_override": f"{partition['name']}",
"tags": dict(partition.get("static-tags", {}), app=application["name"]),
}
)
urls = application.get("urls", [])
if urls:
inputs["http_response"].append({
"interval": "5m",
"urls": urls,
"tags": {"app": application["name"]},
})
for url in urls:
x509_url = url
parsed_url = urllib.parse.urlparse(url)
if parsed_url.scheme == 'https':
# x509_cert wants a port
if not parsed_url.port:
x509_url = parsed_url._replace(netloc=parsed_url.hostname+':443').geturl()
inputs["x509_cert"].append({
"sources": [x509_url],
"tags": {"url": url},
"interval": "5h",
"tags": {"app": application["name"]},
})
# TODO: we don't need to run this execd plugin more than once for the
# same instance root - which can happen when configured with multiple
# applications.
telegraf_slapos_input_config_file = os.path.join(
self.options['location'],
f"telegraf-input-slapos-{application['name']}.cfg")
self._config_files[telegraf_slapos_input_config_file] = toml.dumps({
"inputs": {
"slapos": [{
"instance_root": application['instance-root']}]}})
# TODO: supervisor process finder for
# https://github.com/influxdata/telegraf/tree/master/plugins/inputs/procstat ?
telegraf_slapos_input_command = self.options['telegraf-input-slapos-bin']
inputs["execd"].append({
"name_override": "slapos-processes",
"command": [telegraf_slapos_input_command, '-config', telegraf_slapos_input_config_file],
"tags": {"app": application["name"]},
})
# normalize slapos process names, remove hash from wrappers and -on-watch suffix
processors["regex"].append({
"namepass": ["slapos-processes"],
"order": 1,
"tags": [{
"key": "name",
"pattern": "^(.*)-.{32}",
# XXX we concatenate strings so that we don't have to escape them for buildout
"replacement": "$" + "{1}",
}]})
processors["regex"].append({
"namepass": ["slapos-processes"],
"order": 2,
"tags": [{
"key": "name",
"pattern": "^(.*)-on-watch$",
"replacement": "$" + "{1}",
}]})
processors["enum"].append({
"namepass": [ "slapos-processes"],
"mapping": [{
# "tag": "group", # TODO: rename this in input plugin (in the golang code)
"tag": "slappart",
"dest": "partition",
"value_mappings": partition_mapping,
}]})
# TODOs:
# - [ ] slapos input
# - [x] friendly name of slappart
# - [x] strip hashes from -on-watch
# - [x] activity metrics
# - [ ] alert dashboard
# - [ ] inclu "jerome-dev" partout ???
# - [ ] apdex
# - [ ] check why no history list length in mysql metrics
options["extra-config"] = toml.dumps({
"inputs": inputs,
"processors": processors})
# import pdb; pdb.set_trace()
# apdex
# SELECT sum("success") / sum("all") FROM
# (SELECT count("duration") AS "all" FROM "jerome-dev-balancer" WHERE $timeFilter GROUP BY time($__interval) fill(null)),
# (SELECT count("duration") AS "success" FROM "jerome-dev-balancer" WHERE ("resp_code" = '200' ) AND $timeFilter GROUP BY time($__interval) fill(null))
#SELECT sum("success") + sum("all") FROM
# (SELECT count("duration") AS "all" FROM "jerome-dev-balancer" WHERE $timeFilter GROUP BY time($__interval) fill(0)),
# (SELECT count("duration") AS "success" FROM "jerome-dev-balancer" WHERE ("resp_code" = '200' ) AND $timeFilter GROUP BY time($__interval) fill(0))
install =
import os
os.mkdir(self.options['location'])
for fname, content in self._config_files.items():
with open(fname, 'w') as f:
f.write(content)
[loki] [loki]
...@@ -235,7 +457,7 @@ url = ${loki:url}/ready ...@@ -235,7 +457,7 @@ url = ${loki:url}/ready
[promtail] [promtail]
recipe = slapos.cookbook:wrapper recipe = slapos.cookbook:wrapper
command-line = command-line =
bash -c 'nice -19 chrt --idle 0 ionice -c3 {{ promtail_bin }} -config.file=${promtail-config-file:rendered}' bash -c 'nice -19 chrt --idle 0 ionice -c3 {{ promtail_bin }} -config.file=${promtail-config-file:location}'
wrapper-path = ${directory:service}/promtail wrapper-path = ${directory:service}/promtail
dir = ${directory:promtail-dir} dir = ${directory:promtail-dir}
...@@ -245,11 +467,306 @@ ip = ${instance-parameter:ipv4-random} ...@@ -245,11 +467,306 @@ ip = ${instance-parameter:ipv4-random}
url = http://${:ip}:${:http-port} url = http://${:ip}:${:http-port}
[promtail-config-file] [promtail-config-file]
<= config-file recipe = slapos.recipe.build
context = location = ${directory:etc}/${:_buildout_section_name_}.cfg
section promtail promtail slapparameter-dict = ${slap-configuration:configuration}
section loki loki install =
key slapparameter_dict slap-configuration:configuration {% raw %}
import os
# XXX make extra eggs available to buildout
import zc.buildout
import pkg_resources
buildout_options = self.buildout['buildout']
zc.buildout.easy_install.install(
['pyyaml'],
dest=None,
working_set=pkg_resources.working_set,
path=[
buildout_options['develop-eggs-directory'],
buildout_options['eggs-directory']])
import yaml
slapparameter_dict = self.options['slapparameter-dict']
cfg = {
"server": {
"http_listen_address": self.buildout['promtail']['ip'],
"http_listen_port": int(self.buildout['promtail']['http-port']),
"grpc_listen_address": self.buildout['promtail']['ip'],
"grpc_listen_port": int(self.buildout['promtail']['grpc-port']),
"external_url": self.buildout['promtail']['url'],
},
"positions": {
"filename": "{}/positions.yaml".format(self.buildout['promtail']['dir']),
},
"clients": [
{
"url": "{}/api/prom/push".format(self.buildout['loki']['url']),
}
],
"scrape_configs": []
}
def get_job_selector(partition, job_name, application_name):
# make a selector in LogQL, like '{job="job_name",key="value"}'
selector_parts = [f'app="{application_name}"']
for k, v in dict(partition.get('static-tags', {}), job=job_name).items():
selector_parts.append(f'{k}="{v}"')
return "{%s}" % ",".join(selector_parts)
def get_static_configs(partition, job_name, path, application):
directory = ''
if partition.get('reference'):
directory = os.path.join(application['instance-root'], partition['reference'])
return [
{
"targets": [
"localhost"
],
"labels": dict(
partition.get('static-tags', {}),
job=job_name,
app=application['name'],
__path__=path.format(directory=directory),
)
}
]
for application in slapparameter_dict.get('applications', []):
for partition in application.get('partitions', []):
partition.setdefault("type", "default")
if partition['type'] in ('erp5/zope-activity', 'erp5/zope-front'):
job_name = f"{partition['name']}-event-log"
cfg['scrape_configs'].append({
"job_name": job_name,
"pipeline_stages": [
{
"match": {
"selector": get_job_selector(partition, job_name, application['name']),
"stages": [
{
"multiline": {
"firstline": "^------",
"max_wait_time": "3s"
}
},
{
"regex": {
"expression": "^------\\n(?P<timestamp>\\d{4}-\\d{2}-\\d{2}\\s\\d{1,2}\\:\\d{2}\\:\\d{2}\\,\\d{3}) (?P<level>\\S+) (?P<component>\\S+) (?P<message>.*)"
}
},
{
"timestamp": {
"format": "2021-04-04 03:57:11,242",
"source": "timestamp"
}
},
{
"labels": {
"level": None,
"component": None
}
}
]
}
}
],
"static_configs": get_static_configs(
partition,
job_name,
"{directory}/var/log/zope-*-event.log",
application,
)})
if partition['type'] == 'erp5/zope-front':
job_name = f"{partition['name']}-access-log"
cfg['scrape_configs'].append({
"job_name": job_name,
# drop requests for haproxy health check
"pipeline_stages": [
{
"drop": {
"expression": '.* "GET / HTTP/1.0" 200 .*'
}
}
],
"static_configs": get_static_configs(
partition,
job_name,
"{directory}/var/log/zope-*-Z2.log",
application,
)})
job_name = f"{partition['name']}-long-request-log"
cfg['scrape_configs'].append({
"job_name": job_name,
"pipeline_stages": [
{
"match": {
"selector": get_job_selector(partition, job_name, application['name']),
"stages": [
{
"multiline": {
"firstline": "^\\d{4}-\\d{2}-\\d{2}\\s\\d{1,2}\\:\\d{2}\\:\\d{2}\\,\\d{3}",
"max_wait_time": "3s"
}
},
{
"regex": {
"expression": "^(?P<timestamp>.*) .*"
}
},
{
"timestamp": {
"format": "2021-04-04 03:57:11,242",
"source": "timestamp"
}
}
]
}
}
],
"static_configs": get_static_configs(
partition,
job_name,
"{directory}/var/log/zope-*-longrequest.log",
application,
)})
if partition['type'] in ('erp5/mariadb', 'mariadb'):
job_name = f"{partition['name']}-mariadb-slow-queries"
cfg['scrape_configs'].append({
"job_name": job_name,
"pipeline_stages": [
{
"match": {
"selector": get_job_selector(partition, job_name, application['name']),
"stages": [
{
"multiline": {
# TODO
#"firstline": "^# Time: \\d{2}\\d{2}\\d{2}\\s\\d{1,2}\\:\\d{2}\\:\\d{2}",
"firstline": r"^# Time: \d{2}.*",
"max_wait_time": "3s"
}
},
{
"regex": {
"expression": ".*SET timestamp=(?P<timestamp>\\d+);.*"
}
},
{
"timestamp": {
"format": "Unix",
"source": "timestamp"
}
}
]
}
}
],
"static_configs": get_static_configs(
partition,
job_name,
"{directory}/var/log/mariadb_slowquery.log",
application,
)})
job_name = f"{partition['name']}-mariadb-error-log"
cfg['scrape_configs'].append({
"job_name": job_name,
"pipeline_stages": [
{
"match": {
"selector": get_job_selector(partition, job_name, application['name']),
"stages": [
{
"timestamp": {
"format": "2021-06-05 3:55:31",
"source": "timestamp"
}
}
]
}
}
],
"static_configs": get_static_configs(
partition,
job_name,
"{directory}/var/log/mariadb_error.log",
application,
)})
if partition['type'] == 'erp5/zeo':
job_name = f"{partition['name']}-zeo-log"
cfg['scrape_configs'].append({
"job_name": job_name,
"pipeline_stages": [
{
"match": {
"selector": get_job_selector(partition, job_name, application['name']),
"stages": [
{
"multiline": {
"firstline": "^------",
"max_wait_time": "3s"
}
},
{
"regex": {
"expression": "^------\\n(?P<timestamp>\\d{4}-\\d{2}-\\d{2}\\s\\d{1,2}\\:\\d{2}\\:\\d{2}\\,\\d{3}) (?P<level>\\S+) (?P<component>\\S+) (?P<message>.*)"
}
},
{
"timestamp": {
"format": "2021-04-04 03:57:11,242",
"source": "timestamp"
}
},
{
"labels": {
"level": None,
"component": None
}
}
]
}
}
],
"static_configs": get_static_configs(
partition,
job_name,
"{directory}/var/log/zeo-*.log",
application,
)})
if partition['type'] == 'erp5/balancer':
job_name = f"{partition['name']}-balancer-access-log"
cfg['scrape_configs'].append({
"job_name": job_name,
"static_configs": get_static_configs(
partition,
job_name,
"{directory}/var/log/apache-access.log",
application,
)})
job_name = f"{partition['name']}-balancer-error-log"
cfg['scrape_configs'].append({
"job_name": job_name,
"static_configs": get_static_configs(
partition,
job_name,
"{directory}/var/log/apache-error.log",
application,
)})
if partition.get('file-path'):
job_name = partition['name']
cfg['scrape_configs'].append({
"job_name": job_name,
"static_configs": get_static_configs(
partition,
job_name,
f"{partition['file-path']}",
application,
)})
with open(self.options['location'], 'w') as f:
yaml.dump(cfg, f)
{% endraw %}
[promtail-listen-promise] [promtail-listen-promise]
<= check-port-listening-promise <= check-port-listening-promise
...@@ -257,7 +774,6 @@ hostname= ${promtail:ip} ...@@ -257,7 +774,6 @@ hostname= ${promtail:ip}
port = ${promtail:http-port} port = ${promtail:http-port}
[apache-frontend] [apache-frontend]
<= slap-connection <= slap-connection
recipe = slapos.cookbook:requestoptional recipe = slapos.cookbook:requestoptional
......
server:
http_listen_address: {{ promtail['ip'] }}
http_listen_port: {{ promtail['http-port'] }}
grpc_listen_address: {{ promtail['ip'] }}
grpc_listen_port: {{ promtail['grpc-port'] }}
external_url: {{ promtail['url'] }}
positions:
filename: {{ promtail['dir'] }}/positions.yaml
clients:
- url: {{ loki['url'] }}/api/prom/push
scrape_configs:
- job_name: test
static_configs:
- targets:
- localhost
labels:
job: grafanalogs
__path__: ./var/log/*log
{{ slapparameter_dict.get('promtail-extra-scrape-config', '') }}
...@@ -9,7 +9,6 @@ extends = ...@@ -9,7 +9,6 @@ extends =
../../component/dash/buildout.cfg ../../component/dash/buildout.cfg
buildout.hash.cfg buildout.hash.cfg
versions = versions
parts = parts =
slapos-cookbook slapos-cookbook
instance-profile instance-profile
...@@ -17,9 +16,9 @@ parts = ...@@ -17,9 +16,9 @@ parts =
influxdb-config-file influxdb-config-file
telegraf-config-file telegraf-config-file
grafana-config-file grafana-config-file
grafana-provisioning-config-file grafana-provisioning-datasources-config-file
grafana-provisioning-dashboards-config-file
loki-config-file loki-config-file
promtail-config-file
[python] [python]
part = python3 part = python3
...@@ -75,6 +74,7 @@ environment = ...@@ -75,6 +74,7 @@ environment =
CGO_ENABLED = 0 CGO_ENABLED = 0
telegraf-bin = ${:bin}/telegraf telegraf-bin = ${:bin}/telegraf
telegraf-input-slapos-bin = ${:bin}/telegraf-input-slapos
influx-bin = ${:bin}/influx influx-bin = ${:bin}/influx
influxd-bin = ${:bin}/influxd influxd-bin = ${:bin}/influxd
grafana-bin = ${:bin}/grafana-server grafana-bin = ${:bin}/grafana-server
...@@ -115,15 +115,21 @@ url = ${:_profile_base_location_}/${:filename} ...@@ -115,15 +115,21 @@ url = ${:_profile_base_location_}/${:filename}
[grafana-config-file] [grafana-config-file]
<= download-file-base <= download-file-base
[grafana-provisioning-config-file] [grafana-provisioning-datasources-config-file]
<= download-file-base <= download-file-base
[loki-config-file] [grafana-provisioning-dashboards-config-file]
<= download-file-base <= download-file-base
[promtail-config-file] [loki-config-file]
<= download-file-base <= download-file-base
[instance-eggs]
recipe = zc.recipe.egg
eggs =
${python-PyYAML:egg}
toml
[instance-profile] [instance-profile]
recipe = slapos.recipe.template:jinja2 recipe = slapos.recipe.template:jinja2
template = ${:_profile_base_location_}/${:filename} template = ${:_profile_base_location_}/${:filename}
...@@ -134,6 +140,7 @@ context = ...@@ -134,6 +140,7 @@ context =
section buildout buildout section buildout buildout
key openssl_bin openssl-output:openssl key openssl_bin openssl-output:openssl
key telegraf_bin gowork:telegraf-bin key telegraf_bin gowork:telegraf-bin
key telegraf_input_slapos_bin gowork:telegraf-input-slapos-bin
key influxd_bin gowork:influxd-bin key influxd_bin gowork:influxd-bin
key influx_bin gowork:influx-bin key influx_bin gowork:influx-bin
key grafana_bin gowork:grafana-bin key grafana_bin gowork:grafana-bin
...@@ -144,6 +151,8 @@ context = ...@@ -144,6 +151,8 @@ context =
key dash_bin :dash-bin key dash_bin :dash-bin
curl-bin = ${curl:location}/bin/curl curl-bin = ${curl:location}/bin/curl
dash-bin = ${dash:location}/bin/dash dash-bin = ${dash:location}/bin/dash
depends = ${instance-eggs:eggs}
[versions] [versions]
inotifyx = 0.2.2 inotifyx = 0.2.2
toml = 0.10.2
{ {
"name": "Grafana", "name": "Grafana",
"description": "Grafana, Telegraf and Influxdb", "description": "Grafana, Telegraf and Influxdb",
"serialisation": "xml", "serialisation": "json-in-xml",
"software-type": { "software-type": {
"default": { "default": {
"title": "Default", "title": "Default",
......
...@@ -55,9 +55,6 @@ ...@@ -55,9 +55,6 @@
[outputs.influxdb] [outputs.influxdb]
# The full HTTP or UDP endpoint URL for your InfluxDB instance # The full HTTP or UDP endpoint URL for your InfluxDB instance
# Multiple urls can be specified for InfluxDB cluster support. # Multiple urls can be specified for InfluxDB cluster support.
# urls = ["udp://localhost:8089"] # UDP endpoint example
# XXX XXX XXX
#urls = ["http://localhost:8086"] # required
urls = ["{{ influxdb['url'] }}"] urls = ["{{ influxdb['url'] }}"]
insecure_skip_verify = true # because we are using a self signed certificate insecure_skip_verify = true # because we are using a self signed certificate
# The target database for metrics (telegraf will create it if not exists) # The target database for metrics (telegraf will create it if not exists)
...@@ -100,32 +97,9 @@ ...@@ -100,32 +97,9 @@
[system] [system]
{{ extra['extra-config'] }}
############################################################################### ###############################################################################
# ERP5 - PLUGINS # # To add ad-hoc config, don't edit this file directly, but place your config
###############################################################################
#
# Left here as example, don't edit this file directly, but place your config
# files in {{ telegraf['extra-config-dir'] }} # files in {{ telegraf['extra-config-dir'] }}
#
#[mysql]
# servers = ["root@unix(/srv/slapgrid/slappart12/srv/runner/instance/slappart1/var/run/mariadb.sock)/erp5"]
#[memcached]
# # XXX kumofs does not support memcached's stat command
# servers = ["10.0.248.233:2013", "10.0.248.233:2003"]
#[haproxy]
# servers = ["http://10.0.121.162:2150/haproxy", "http://10.0.121.162:2152/haproxy"]
#[[inputs.exec]]
# commands = ["/srv/slapgrid/slappart0/bin/slapsensor /srv/slapgrid/slappart0/srv/runner/instance/etc/supervisord.conf"]
# name_suffix = "_slapos"
# interval = "5s"
###############################################################################
# SERVICE PLUGINS #
############################################################################### ###############################################################################
...@@ -32,6 +32,7 @@ import os ...@@ -32,6 +32,7 @@ import os
import tempfile import tempfile
import textwrap import textwrap
import time import time
import json
import psutil import psutil
import requests import requests
...@@ -185,8 +186,93 @@ class TestTelegraf(GrafanaTestCase): ...@@ -185,8 +186,93 @@ class TestTelegraf(GrafanaTestCase):
class TestLoki(GrafanaTestCase): class TestLoki(GrafanaTestCase):
instance_max_retry = 2
@classmethod @classmethod
def getInstanceParameterDict(cls): def getInstanceParameterDict(cls):
cls._logfile = tempfile.NamedTemporaryFile(suffix='log')
parameter_dict = {
"applications": [
{
"name": "ERP5",
"instance-root": "/srv/slapgrid/slappart4/srv/slapos/inst/",
"urls": [
"https://softinst12345-erp5.host.vifib.net/",
],
"partitions": [
{
"name": "jerome-dev-mariadb",
"reference": "slappart6",
"type": "erp5/mariadb",
#"static-tags": {
# "XXX": "needed?"
#}
},
{
"name": "jerome-dev-zodb",
"reference": "slappart7",
"type": "erp5/zeo",
#"static-tags": {
# "XXX": "needed?"
#}
},
{
"name": "jerome-dev-balancer",
"reference": "slappart9",
"type": "erp5/balancer",
#"static-tags": {
# "XXX": "needed?"
#}
},
{
"name": "jerome-dev-zope-front",
"reference": "slappart8",
"type": "erp5/zope-front",
#"static-tags": {
# "XXX": "needed?"
#}
},
{
"name": "jerome-dev-zope-front",
"reference": "slappart13",
"type": "erp5/zope-activity",
#"static-tags": {
# "XXX": "needed?"
#}
}
]
}
],
# TODO: drop this
'promtail-extra-scrape-config':
textwrap.dedent(r'''
- job_name: {cls.__name__}
pipeline_stages:
- match:
selector: '{{job="{cls.__name__}"}}'
stages:
- multiline:
firstline: '^\d{{4}}-\d{{2}}-\d{{2}}\s\d{{1,2}}\:\d{{2}}\:\d{{2}}\,\d{{3}}'
max_wait_time: 3s
- regex:
expression: '^(?P<timestamp>.*) - (?P<name>\S+) - (?P<level>\S+) - (?P<message>.*)'
- timestamp:
format: 2006-01-02T15:04:05Z00:00
source: timestamp
- labels:
level:
name:
static_configs:
- targets:
- localhost
labels:
job: {cls.__name__}
__path__: {cls._logfile.name}
''').format(**locals())
}
return {'_': json.dumps(parameter_dict)}
def xgetInstanceParameterDict(cls):
cls._logfile = tempfile.NamedTemporaryFile(suffix='log') cls._logfile = tempfile.NamedTemporaryFile(suffix='log')
return { return {
'promtail-extra-scrape-config': 'promtail-extra-scrape-config':
...@@ -227,6 +313,7 @@ class TestLoki(GrafanaTestCase): ...@@ -227,6 +313,7 @@ class TestLoki(GrafanaTestCase):
)['loki-url'] )['loki-url']
def test_loki_available(self): def test_loki_available(self):
self.assertEqual( self.assertEqual(
requests.codes.ok, requests.codes.ok,
requests.get('{self.loki_url}/ready'.format(**locals()), requests.get('{self.loki_url}/ready'.format(**locals()),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment