Commit 09d10836 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch 'alerts-for-built-in-metrics-ee' into 'master'

Import common metrics into database (EE)

Closes #6948

See merge request gitlab-org/gitlab-ee!7175
parents f16cfb7a e6c87595
# frozen_string_literal: true
class PrometheusMetric < ActiveRecord::Base
prepend EE::PrometheusMetric
belongs_to :project, validate: true, inverse_of: :prometheus_metrics
enum group: {
# built-in groups
nginx_ingress: -1,
ha_proxy: -2,
aws_elb: -3,
nginx: -4,
kubernetes: -5,
# custom/user groups
business: 0,
response: 1,
system: 2
}
validates :title, presence: true
validates :query, presence: true
validates :group, presence: true
validates :y_label, presence: true
validates :unit, presence: true
validates :project, presence: true, unless: :common?
validates :project, absence: true, if: :common?
scope :common, -> { where(common: true) }
GROUP_TITLES = {
# built-in groups
nginx_ingress: _('Response metrics (NGINX Ingress)'),
ha_proxy: _('Response metrics (HA Proxy)'),
aws_elb: _('Response metrics (AWS ELB)'),
nginx: _('Response metrics (NGINX)'),
kubernetes: _('System metrics (Kubernetes)'),
# custom/user groups
business: _('Business metrics (Custom)'),
response: _('Response metrics (Custom)'),
system: _('System metrics (Custom)')
}.freeze
REQUIRED_METRICS = {
nginx_ingress: %w(nginx_upstream_responses_total nginx_upstream_response_msecs_avg),
ha_proxy: %w(haproxy_frontend_http_requests_total haproxy_frontend_http_responses_total),
aws_elb: %w(aws_elb_request_count_sum aws_elb_latency_average aws_elb_httpcode_backend_5_xx_sum),
nginx: %w(nginx_server_requests nginx_server_requestMsec),
kubernetes: %w(container_memory_usage_bytes container_cpu_usage_seconds_total)
}.freeze
def group_title
GROUP_TITLES[group.to_sym]
end
def required_metrics
REQUIRED_METRICS[group.to_sym].to_a.map(&:to_s)
end
def to_query_metric
Gitlab::Prometheus::Metric.new(id: id, title: title, required_metrics: required_metrics, weight: 0, y_label: y_label, queries: queries)
end
def queries
[
{
query_range: query,
unit: unit,
label: legend,
series: query_series
}.compact
]
end
def query_series
case legend
when 'Status Code'
[{
label: 'status_code',
when: [
{ value: '2xx', color: 'green' },
{ value: '4xx', color: 'orange' },
{ value: '5xx', color: 'red' }
]
}]
end
end
end
---
title: Import all common metrics into database
merge_request: 21459
author:
type: changed
......@@ -7,7 +7,8 @@
- nginx_upstream_responses_total
weight: 1
queries:
- query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
- id: response_metrics_nginx_ingress_throughput_status_code
query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
unit: req / sec
label: Status Code
series:
......@@ -25,7 +26,8 @@
- nginx_upstream_response_msecs_avg
weight: 1
queries:
- query_range: 'avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"})'
- id: response_metrics_nginx_ingress_latency_pod_average
query_range: 'avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"})'
label: Pod average
unit: ms
- title: "HTTP Error Rate"
......@@ -34,7 +36,8 @@
- nginx_upstream_responses_total
weight: 1
queries:
- query_range: 'sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) / sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) * 100'
- id: response_metrics_nginx_ingress_http_error_rate
query_range: 'sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) / sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) * 100'
label: 5xx Errors
unit: "%"
- group: Response metrics (HA Proxy)
......@@ -46,10 +49,12 @@
- haproxy_frontend_http_requests_total
weight: 1
queries:
- query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)'
- id: response_metrics_ha_proxy_throughput_status_code
query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)'
unit: req / sec
label: Status Code
series:
- label: code
- label: status_code
when:
- value: 2xx
color: green
......@@ -63,7 +68,8 @@
- haproxy_frontend_http_responses_total
weight: 1
queries:
- query_range: 'sum(rate(haproxy_frontend_http_responses_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_responses_total{%{environment_filter}}[2m]))'
- id: response_metrics_ha_proxy_http_error_rate
query_range: 'sum(rate(haproxy_frontend_http_responses_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_responses_total{%{environment_filter}}[2m]))'
label: HTTP Errors
unit: "%"
- group: Response metrics (AWS ELB)
......@@ -75,7 +81,8 @@
- aws_elb_request_count_sum
weight: 1
queries:
- query_range: 'sum(aws_elb_request_count_sum{%{environment_filter}}) / 60'
- id: response_metrics_aws_elb_throughput_requests
query_range: 'sum(aws_elb_request_count_sum{%{environment_filter}}) / 60'
label: Total
unit: req / sec
- title: "Latency"
......@@ -84,7 +91,8 @@
- aws_elb_latency_average
weight: 1
queries:
- query_range: 'avg(aws_elb_latency_average{%{environment_filter}}) * 1000'
- id: response_metrics_aws_elb_latency_average
query_range: 'avg(aws_elb_latency_average{%{environment_filter}}) * 1000'
label: Average
unit: ms
- title: "HTTP Error Rate"
......@@ -94,7 +102,8 @@
- aws_elb_httpcode_backend_5_xx_sum
weight: 1
queries:
- query_range: 'sum(aws_elb_httpcode_backend_5_xx_sum{%{environment_filter}}) / sum(aws_elb_request_count_sum{%{environment_filter}})'
- id: response_metrics_aws_elb_http_error_rate
query_range: 'sum(aws_elb_httpcode_backend_5_xx_sum{%{environment_filter}}) / sum(aws_elb_request_count_sum{%{environment_filter}})'
label: HTTP Errors
unit: "%"
- group: Response metrics (NGINX)
......@@ -106,7 +115,8 @@
- nginx_server_requests
weight: 1
queries:
- query_range: 'sum(rate(nginx_server_requests{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (code)'
- id: response_metrics_nginx_throughput_status_code
query_range: 'sum(rate(nginx_server_requests{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (code)'
unit: req / sec
label: Status Code
series:
......@@ -124,7 +134,8 @@
- nginx_server_requestMsec
weight: 1
queries:
- query_range: 'avg(nginx_server_requestMsec{%{environment_filter}})'
- id: response_metrics_nginx_latency
query_range: 'avg(nginx_server_requestMsec{%{environment_filter}})'
label: Upstream
unit: ms
- title: "HTTP Error Rate"
......@@ -133,7 +144,8 @@
- nginx_server_requests
weight: 1
queries:
- query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))'
- id: response_metrics_nginx_http_error_rate
query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))'
label: HTTP Errors
unit: "errors / sec"
- group: System metrics (Kubernetes)
......@@ -145,7 +157,8 @@
- container_memory_usage_bytes
weight: 4
queries:
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) /1024/1024/1024'
- id: system_metrics_kubernetes_container_memory_total
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) /1024/1024/1024'
label: Total
unit: GB
- title: "Core Usage (Total)"
......@@ -154,7 +167,8 @@
- container_cpu_usage_seconds_total
weight: 3
queries:
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)'
- id: system_metrics_kubernetes_container_cores_total
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)'
label: Total
unit: "cores"
- title: "Memory Usage (Pod Average)"
......@@ -163,10 +177,18 @@
- container_memory_usage_bytes
weight: 2
queries:
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
- id: system_metrics_kubernetes_container_memory_average
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
label: Pod average
unit: MB
- query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
- title: "Canary: Memory Usage (Pod Average)"
y_label: "Memory Used per Pod"
required_metrics:
- container_memory_usage_bytes
weight: 2
queries:
- id: system_metrics_kubernetes_container_memory_average_canary
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024'
label: Pod average
unit: MB
track: canary
......@@ -176,11 +198,19 @@
- container_cpu_usage_seconds_total
weight: 1
queries:
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
- id: system_metrics_kubernetes_container_core_usage
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
label: Pod average
unit: "cores"
- query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
- title: "Canary: Core Usage (Pod Average)"
y_label: "Cores per Pod"
required_metrics:
- container_cpu_usage_seconds_total
weight: 1
queries:
- id: system_metrics_kubernetes_container_core_usage_canary
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))'
label: Pod average
unit: "cores"
track: canary
# frozen_string_literal: true
require Rails.root.join('db/importers/common_metrics_importer.rb')
::Importers::CommonMetricsImporter.new.execute
# frozen_string_literal: true
require Rails.root.join('db/importers/common_metrics_importer.rb')
::Importers::CommonMetricsImporter.new.execute
# frozen_string_literal: true
module Importers
class PrometheusMetric < ActiveRecord::Base
enum group: {
# built-in groups
nginx_ingress: -1,
ha_proxy: -2,
aws_elb: -3,
nginx: -4,
kubernetes: -5,
# custom groups
business: 0,
response: 1,
system: 2
}
scope :common, -> { where(common: true) }
GROUP_TITLES = {
business: _('Business metrics (Custom)'),
response: _('Response metrics (Custom)'),
system: _('System metrics (Custom)'),
nginx_ingress: _('Response metrics (NGINX Ingress)'),
ha_proxy: _('Response metrics (HA Proxy)'),
aws_elb: _('Response metrics (AWS ELB)'),
nginx: _('Response metrics (NGINX)'),
kubernetes: _('System metrics (Kubernetes)')
}.freeze
end
class CommonMetricsImporter
MissingQueryId = Class.new(StandardError)
attr_reader :content
def initialize(file = 'config/prometheus/common_metrics.yml')
@content = YAML.load_file(file)
end
def execute
process_content do |id, attributes|
find_or_build_metric!(id)
.update!(**attributes)
end
end
private
def process_content(&blk)
content.map do |group|
process_group(group, &blk)
end
end
def process_group(group, &blk)
attributes = {
group: find_group_title_key(group['group'])
}
group['metrics'].map do |metric|
process_metric(metric, attributes, &blk)
end
end
def process_metric(metric, attributes, &blk)
attributes = attributes.merge(
title: metric['title'],
y_label: metric['y_label'])
metric['queries'].map do |query|
process_metric_query(query, attributes, &blk)
end
end
def process_metric_query(query, attributes, &blk)
attributes = attributes.merge(
legend: query['label'],
query: query['query_range'],
unit: query['unit'])
yield(query['id'], attributes)
end
def find_or_build_metric!(id)
raise MissingQueryId unless id
PrometheusMetric.common.find_by(identifier: id) ||
PrometheusMetric.new(common: true, identifier: id)
end
def find_group_title_key(title)
PrometheusMetric.groups[find_group_title(title)]
end
def find_group_title(title)
PrometheusMetric::GROUP_TITLES.invert[title]
end
end
end
# frozen_string_literal: true
class CreatePrometheusMetrics < ActiveRecord::Migration
DOWNTIME = false
......
# frozen_string_literal: true
class ChangeProjectIdForPrometheusMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
change_column_null :prometheus_metrics, :project_id, true
end
end
# frozen_string_literal: true
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
require Rails.root.join('db/migrate/prometheus_metrics_limits_to_mysql')
class FixPrometheusMetricQueryLimits < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
PrometheusMetricsLimitsToMysql.new.up
end
def down
# no-op
end
end
# frozen_string_literal: true
class AddCommonToPrometheusMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_column_with_default(:prometheus_metrics, :common, :boolean, default: false)
end
def down
remove_column(:prometheus_metrics, :common)
end
end
# frozen_string_literal: true
class AddIndexOnCommonForPrometheusMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :prometheus_metrics, :common
end
def down
remove_concurrent_index :prometheus_metrics, :common
end
end
# frozen_string_literal: true
class AddIdentifierToPrometheusMetric < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def change
add_column :prometheus_metrics, :identifier, :string
end
end
# frozen_string_literal: true
class AddIndexForIdentifierToPrometheusMetric < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
add_concurrent_index :prometheus_metrics, :identifier, unique: true
end
def down
remove_concurrent_index :prometheus_metrics, :identifier, unique: true
end
end
# frozen_string_literal: true
class ImportCommonMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
require Rails.root.join('db/importers/common_metrics_importer.rb')
DOWNTIME = false
def up
Importers::CommonMetricsImporter.new.execute
end
def down
# no-op
end
end
class PrometheusMetricsLimitsToMysql < ActiveRecord::Migration
DOWNTIME = false
def up
return unless Gitlab::Database.mysql?
change_column :prometheus_metrics, :query, :text, limit: 4096, default: nil
end
def down
end
end
......@@ -2272,10 +2272,11 @@ ActiveRecord::Schema.define(version: 20180901171833) do
end
add_index "prometheus_alerts", ["environment_id"], name: "index_prometheus_alerts_on_environment_id", using: :btree
add_index "prometheus_alerts", ["prometheus_metric_id"], name: "index_prometheus_alerts_on_prometheus_metric_id", unique: true, using: :btree
add_index "prometheus_alerts", ["project_id", "prometheus_metric_id"], name: "index_prometheus_alerts_on_project_id_and_prometheus_metric_id", unique: true, using: :btree
add_index "prometheus_alerts", ["prometheus_metric_id"], name: "index_prometheus_alerts_on_prometheus_metric_id", using: :btree
create_table "prometheus_metrics", force: :cascade do |t|
t.integer "project_id", null: false
t.integer "project_id"
t.string "title", null: false
t.string "query", null: false
t.string "y_label"
......@@ -2284,9 +2285,13 @@ ActiveRecord::Schema.define(version: 20180901171833) do
t.integer "group", null: false
t.datetime_with_timezone "created_at", null: false
t.datetime_with_timezone "updated_at", null: false
t.boolean "common", default: false, null: false
t.string "identifier"
end
add_index "prometheus_metrics", ["common"], name: "index_prometheus_metrics_on_common", using: :btree
add_index "prometheus_metrics", ["group"], name: "index_prometheus_metrics_on_group", using: :btree
add_index "prometheus_metrics", ["identifier"], name: "index_prometheus_metrics_on_identifier", unique: true, using: :btree
add_index "prometheus_metrics", ["project_id"], name: "index_prometheus_metrics_on_project_id", using: :btree
create_table "protected_branch_merge_access_levels", force: :cascade do |t|
......
......@@ -49,6 +49,7 @@ description: 'Learn how to contribute to GitLab.'
- [Working with the GitHub importer](github_importer.md)
- [Elasticsearch integration docs](elasticsearch.md)
- [Working with Merge Request diffs](diffs.md)
- [Prometheus metrics](prometheus_metrics.md)
## Performance guides
......
# Working with Prometheus Metrics
## Adding to the library
We strive to support the 2-4 most important metrics for each common system service that supports Prometheus. If you are looking for support for a particular exporter which has not yet been added to the library, additions can be made [to the `common_metrics.yml`](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/prometheus/common_metrics.yml) file.
### Query identifier
The requirement for adding a new metrics is to make each query to have an unique identifier.
Identifier is used to update the metric later when changed.
```yaml
- group: Response metrics (NGINX Ingress)
metrics:
- title: "Throughput"
y_label: "Requests / Sec"
queries:
- id: response_metrics_nginx_ingress_throughput_status_code
query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
unit: req / sec
label: Status Code
```
### Update existing metrics
After you add or change existing _common_ metric you have to create a new database migration that will query and update all existing metrics.
**Note: If a query metric (which is identified by `id:`) is removed it will not be removed from database by default.**
**You might want to add additional database migration that makes a decision what to do with removed one.**
**For example: you might be interested in migrating all dependent data to a different metric.**
```ruby
class ImportCommonMetrics < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
require Rails.root.join('db/importers/common_metrics_importer.rb')
DOWNTIME = false
def up
Importers::CommonMetricsImporter.new.execute
end
def down
# no-op
end
end
```
......@@ -17,9 +17,3 @@ GitLab retrieves performance data from the configured Prometheus server, and att
In order to isolate and only display relevant metrics for a given environment, GitLab needs a method to detect which labels are associated. To do that,
GitLab uses the defined queries and fills in the environment specific variables. Typically this involves looking for the [$CI_ENVIRONMENT_SLUG](../../../../ci/variables/README.md#predefined-variables-environment-variables), but may also include other information such as the project's Kubernetes namespace. Each search query is defined in the [exporter specific documentation](#prometheus-metrics-library).
## Adding to the library
We strive to support the 2-4 most important metrics for each common system service that supports Prometheus. If you are looking for support for a particular exporter which has not yet been added to the library, additions can be made [to the `additional_metrics.yml`](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/prometheus/additional_metrics.yml) file.
> Note: The library is only for monitoring public, common, system services which all customers can benefit from. Support for monitoring [customer proprietary metrics](https://gitlab.com/gitlab-org/gitlab-ee/issues/2273) will be added in a subsequent release.
......@@ -82,7 +82,7 @@ module Projects
end
def alert
@alert ||= project.prometheus_alerts.find_by(prometheus_metric: params[:id]) || render_404
@alert ||= project.prometheus_alerts.find_by(prometheus_metric_id: params[:id]) || render_404
end
def application
......
module EE
module PrometheusMetric
extend ActiveSupport::Concern
extend ::Gitlab::Utils::Override
prepended do
has_many :prometheus_alerts, inverse_of: :prometheus_metric
end
end
end
......@@ -7,11 +7,14 @@ class PrometheusAlert < ActiveRecord::Base
belongs_to :environment, required: true, validate: true, inverse_of: :prometheus_alerts
belongs_to :project, required: true, validate: true, inverse_of: :prometheus_alerts
belongs_to :prometheus_metric, required: true, validate: true, inverse_of: :prometheus_alert
belongs_to :prometheus_metric, required: true, validate: true, inverse_of: :prometheus_alerts
after_save :clear_prometheus_adapter_cache!
after_destroy :clear_prometheus_adapter_cache!
validate :require_valid_environment_project!
validate :require_valid_metric_project!
enum operator: [:lt, :eq, :gt]
delegate :title, :query, to: :prometheus_metric
......@@ -45,4 +48,17 @@ class PrometheusAlert < ActiveRecord::Base
def clear_prometheus_adapter_cache!
environment.clear_prometheus_reactive_cache!(:additional_metrics_environment)
end
def require_valid_environment_project!
return if project == environment&.project
errors.add(:environment, "invalid project")
end
def require_valid_metric_project!
return if prometheus_metric&.common?
return if project == prometheus_metric&.project
errors.add(:prometheus_metric, "invalid project")
end
end
class PrometheusMetric < ActiveRecord::Base
belongs_to :project, required: true, validate: true, inverse_of: :prometheus_metrics
has_one :prometheus_alert, inverse_of: :prometheus_metric
enum group: [:business, :response, :system]
validates :title, presence: true
validates :query, presence: true
validates :group, presence: true
validates :y_label, presence: true
validates :unit, presence: true
GROUP_TITLES = {
business: _('Business metrics (Custom)'),
response: _('Response metrics (Custom)'),
system: _('System metrics (Custom)')
}.freeze
def group_title
GROUP_TITLES[group.to_sym]
end
def to_query_metric
Gitlab::Prometheus::Metric.new(id: id, title: title, required_metrics: [], weight: 0, y_label: y_label, queries: build_queries)
end
private
def build_queries
[
{
query_range: query,
unit: unit,
label: legend
}
]
end
end
......@@ -2,6 +2,8 @@ module Projects
module Prometheus
module Metrics
class BaseService
include Gitlab::Utils::StrongMemoize
def initialize(metric, params = {})
@metric = metric
@project = metric.project
......@@ -13,15 +15,25 @@ module Projects
attr_reader :metric, :project, :params
def application
metric.prometheus_alert.environment.cluster_prometheus_adapter
alert.environment.cluster_prometheus_adapter
end
def schedule_alert_update
::Clusters::Applications::ScheduleUpdateService.new(application, project).execute
return unless alert
return unless alert.environment
::Clusters::Applications::ScheduleUpdateService.new(
alert.environment.cluster_prometheus_adapter, project).execute
end
def alert
strong_memoize(:alert) do
metric.prometheus_alerts.find_by(project: project)
end
end
def has_alert?
metric.prometheus_alert.present?
alert.present?
end
end
end
......
class AllowManyPrometheusAlerts < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
# We mutate `:prometheus_metric_id` into non unique one,
# and convert it into project+prometheus_metric unique
def up
rebuild_foreign_key do
remove_concurrent_index :prometheus_alerts, :prometheus_metric_id, unique: true
add_concurrent_index :prometheus_alerts, :prometheus_metric_id
add_concurrent_index :prometheus_alerts, [:project_id, :prometheus_metric_id], unique: true
end
end
def down
rebuild_foreign_key do
remove_concurrent_index :prometheus_alerts, [:project_id, :prometheus_metric_id], unique: true
remove_concurrent_index :prometheus_alerts, :prometheus_metric_id
add_concurrent_index :prometheus_alerts, :prometheus_metric_id, unique: true
end
end
private
# MySQL requires to drop FK for time of re-adding index
def rebuild_foreign_key
if Gitlab::Database.mysql?
remove_foreign_key_without_error :prometheus_alerts, :prometheus_metrics
remove_foreign_key_without_error :prometheus_alerts, :projects
end
yield
if Gitlab::Database.mysql?
add_concurrent_foreign_key :prometheus_alerts, :prometheus_metrics,
column: :prometheus_metric_id, on_delete: :cascade
add_concurrent_foreign_key :prometheus_alerts, :projects,
column: :project_id, on_delete: :cascade
end
end
end
module EE
module Gitlab
module Prometheus
module MetricGroup
extend ActiveSupport::Concern
class_methods do
extend ::Gitlab::Utils::Override
def custom_metrics(project)
project.prometheus_metrics.all.group_by(&:group_title).map do |name, metrics|
::Gitlab::Prometheus::MetricGroup.new(
name: name, priority: 0, metrics: metrics.map(&:to_query_metric))
end
end
override :for_project
def for_project(project)
super + custom_metrics(project)
end
end
end
end
end
end
FactoryBot.define do
factory :prometheus_alert do
project
environment
prometheus_metric
operator :gt
threshold 1
environment do |alert|
build(:environment, project: alert.project)
end
prometheus_metric do |alert|
build(:prometheus_metric, project: alert.project)
end
end
end
# frozen_string_literal: true
require 'rails_helper'
describe Gitlab::Prometheus::MetricGroup do
describe '.for_project' do
let!(:project_metric) { create(:prometheus_metric) }
let!(:common_metric) { create(:prometheus_metric, :common, group: :aws_elb) }
subject do
described_class.for_project(project)
.map(&:metrics).flatten
.map(&:id)
end
context 'for current project' do
let(:project) { project_metric.project }
it 'returns metrics for given project and common ones' do
is_expected.to contain_exactly(project_metric.id, common_metric.id)
end
end
context 'for other project' do
let(:project) { create(:project) }
it 'returns metrics only common ones' do
is_expected.to contain_exactly(common_metric.id)
end
end
end
end
require 'spec_helper'
describe PrometheusAlert do
let(:metric) { create(:prometheus_metric) }
set(:project) { build(:project) }
let(:metric) { build(:prometheus_metric) }
describe 'associations' do
it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:environment) }
end
describe 'project validations' do
let(:environment) { build(:environment, project: project) }
let(:metric) { build(:prometheus_metric, project: project) }
subject do
build(:prometheus_alert, prometheus_metric: metric, environment: environment, project: project)
end
context 'when environment and metric belongs same project' do
it { is_expected.to be_valid }
end
context 'when environment belongs to different project' do
let(:environment) { build(:environment) }
it { is_expected.not_to be_valid }
end
context 'when metric belongs to different project' do
let(:metric) { build(:prometheus_metric) }
it { is_expected.not_to be_valid }
end
context 'when metric is common' do
let(:metric) { build(:prometheus_metric, :common) }
it { is_expected.to be_valid }
end
end
describe '#full_query' do
it 'returns the concatenated query' do
before do
subject.operator = "gt"
subject.threshold = 1
subject.prometheus_metric_id = metric.id
subject.prometheus_metric = metric
end
it 'returns the concatenated query' do
expect(subject.full_query).to eq("#{metric.query} > 1.0")
end
end
describe '#to_param' do
it 'returns the params of the prometheus alert' do
before do
subject.operator = "gt"
subject.threshold = 1
subject.prometheus_metric_id = metric.id
subject.prometheus_metric = metric
end
alert_params = {
it 'returns the params of the prometheus alert' do
expect(subject.to_param).to eq(
"alert" => metric.title,
"expr" => "#{metric.query} > 1.0",
"for" => "5m",
"labels" => {
"gitlab" => "hook",
"gitlab_alert_id" => metric.id
}
}
expect(subject.to_param).to eq(alert_params)
})
end
end
end
......@@ -13,7 +13,7 @@ describe Projects::Prometheus::Metrics::DestroyService do
context 'when metric has a prometheus alert associated' do
it 'schedules a prometheus alert update' do
create(:prometheus_alert, prometheus_metric: metric)
create(:prometheus_alert, project: metric.project, prometheus_metric: metric)
schedule_update_service = spy
allow(::Clusters::Applications::ScheduleUpdateService).to receive(:new).and_return(schedule_update_service)
......
......@@ -13,7 +13,7 @@ describe Projects::Prometheus::Metrics::UpdateService do
let(:schedule_update_service) { spy }
before do
create(:prometheus_alert, prometheus_metric: metric)
create(:prometheus_alert, project: metric.project, prometheus_metric: metric)
allow(::Clusters::Applications::ScheduleUpdateService).to receive(:new).and_return(schedule_update_service)
end
......
......@@ -120,6 +120,9 @@ excluded_attributes:
- :description_html
- :repository_languages
- :packages_enabled
prometheus_metrics:
- :common
- :identifier
snippets:
- :expired_at
merge_request_diff:
......
......@@ -5,7 +5,7 @@ module Gitlab
MUTEX = Mutex.new
extend self
def load_groups_from_yaml(file_name = 'additional_metrics.yml')
def load_groups_from_yaml(file_name)
yaml_metrics_raw(file_name).map(&method(:group_from_entry))
end
......
module Gitlab
module Prometheus
class MetricGroup
prepend EE::Gitlab::Prometheus::MetricGroup
include ActiveModel::Model
attr_accessor :name, :priority, :metrics
validates :name, :priority, :metrics, presence: true
def self.common_metrics
AdditionalMetricsParser.load_groups_from_yaml
end
def self.for_project(project)
common_metrics + custom_metrics(project)
end
def self.custom_metrics(project)
project.prometheus_metrics.all.group_by(&:group_title).map do |name, metrics|
::PrometheusMetric.common.group_by(&:group_title).map do |name, metrics|
MetricGroup.new(name: name, priority: 0, metrics: metrics.map(&:to_query_metric))
end
end
# EE only
def self.for_project(_)
common_metrics
end
end
end
end
......@@ -3,6 +3,7 @@ require Rails.root.join('db/migrate/markdown_cache_limits_to_mysql')
require Rails.root.join('db/migrate/merge_request_diff_file_limits_to_mysql')
require Rails.root.join('db/migrate/limits_ci_build_trace_chunks_raw_data_for_mysql')
require Rails.root.join('db/migrate/gpg_keys_limits_to_mysql')
require Rails.root.join('db/migrate/prometheus_metrics_limits_to_mysql')
desc "GitLab | Add limits to strings in mysql database"
task add_limits_mysql: :environment do
......@@ -12,4 +13,5 @@ task add_limits_mysql: :environment do
MergeRequestDiffFileLimitsToMysql.new.up
LimitsCiBuildTraceChunksRawDataForMysql.new.up
IncreaseMysqlTextLimitForGpgKeys.new.up
PrometheusMetricsLimitsToMysql.new.up
end
......@@ -6235,9 +6235,21 @@ msgstr ""
msgid "Resolve discussion"
msgstr ""
msgid "Response metrics (AWS ELB)"
msgstr ""
msgid "Response metrics (Custom)"
msgstr ""
msgid "Response metrics (HA Proxy)"
msgstr ""
msgid "Response metrics (NGINX Ingress)"
msgstr ""
msgid "Response metrics (NGINX)"
msgstr ""
msgid "Resume"
msgstr ""
......@@ -6976,6 +6988,9 @@ msgstr ""
msgid "System metrics (Custom)"
msgstr ""
msgid "System metrics (Kubernetes)"
msgstr ""
msgid "Tag (%{tag_count})"
msgid_plural "Tags (%{tag_count})"
msgstr[0] ""
......
# frozen_string_literal: true
require 'spec_helper'
describe 'Import metrics on development seed' do
subject { load Rails.root.join('db', 'fixtures', 'development', '99_common_metrics.rb') }
it "imports all prometheus metrics" do
expect(PrometheusMetric.common).to be_empty
subject
expect(PrometheusMetric.common).not_to be_empty
end
end
# frozen_string_literal: true
require 'rails_helper'
require Rails.root.join("db", "importers", "common_metrics_importer.rb")
describe Importers::PrometheusMetric do
it 'group enum equals ::PrometheusMetric' do
expect(described_class.groups).to eq(::PrometheusMetric.groups)
end
it 'GROUP_TITLES equals ::PrometheusMetric' do
expect(described_class::GROUP_TITLES).to eq(::PrometheusMetric::GROUP_TITLES)
end
end
describe Importers::CommonMetricsImporter do
subject { described_class.new }
context "does import common_metrics.yml" do
let(:groups) { subject.content }
let(:metrics) { groups.map { |group| group['metrics'] }.flatten }
let(:queries) { metrics.map { |group| group['queries'] }.flatten }
let(:query_ids) { queries.map { |query| query['id'] } }
before do
subject.execute
end
it "has the same amount of groups" do
expect(PrometheusMetric.common.group(:group).count.count).to eq(groups.count)
end
it "has the same amount of metrics" do
expect(PrometheusMetric.common.group(:group, :title).count.count).to eq(metrics.count)
end
it "has the same amount of queries" do
expect(PrometheusMetric.common.count).to eq(queries.count)
end
it "does not have duplicate IDs" do
expect(query_ids).to eq(query_ids.uniq)
end
it "imports all IDs" do
expect(PrometheusMetric.common.pluck(:identifier)).to contain_exactly(*query_ids)
end
end
context 'does import properly all fields' do
let(:query_identifier) { 'response-metric' }
let(:group) do
{
group: 'Response metrics (NGINX Ingress)',
metrics: [{
title: "Throughput",
y_label: "Requests / Sec",
queries: [{
id: query_identifier,
query_range: 'my-query',
unit: 'my-unit',
label: 'status code'
}]
}]
}
end
before do
expect(subject).to receive(:content) { [group.deep_stringify_keys] }
end
shared_examples 'stores metric' do
let(:metric) { PrometheusMetric.find_by(identifier: query_identifier) }
it 'with all data' do
expect(metric.group).to eq('nginx_ingress')
expect(metric.title).to eq('Throughput')
expect(metric.y_label).to eq('Requests / Sec')
expect(metric.unit).to eq('my-unit')
expect(metric.legend).to eq('status code')
expect(metric.query).to eq('my-query')
end
end
context 'if ID is missing' do
let(:query_identifier) { }
it 'raises exception' do
expect { subject.execute }.to raise_error(described_class::MissingQueryId)
end
end
context 'for existing common metric with different ID' do
let!(:existing_metric) { create(:prometheus_metric, :common, identifier: 'my-existing-metric') }
before do
subject.execute
end
it_behaves_like 'stores metric' do
it 'and existing metric is not changed' do
expect(metric).not_to eq(existing_metric)
end
end
end
context 'when metric with ID exists ' do
let!(:existing_metric) { create(:prometheus_metric, :common, identifier: 'response-metric') }
before do
subject.execute
end
it_behaves_like 'stores metric' do
it 'and existing metric is changed' do
expect(metric).to eq(existing_metric)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe 'Import metrics on production seed' do
subject { load Rails.root.join('db', 'fixtures', 'production', '999_common_metrics.rb') }
it "imports all prometheus metrics" do
expect(PrometheusMetric.common).to be_empty
subject
expect(PrometheusMetric.common).not_to be_empty
end
end
# frozen_string_literal: true
FactoryBot.define do
factory :prometheus_metric, class: PrometheusMetric do
title 'title'
......@@ -7,5 +9,10 @@ FactoryBot.define do
group :business
project
legend 'legend'
trait :common do
common true
project nil
end
end
end
......@@ -355,7 +355,9 @@ priorities:
- label
prometheus_metrics:
- project
- prometheus_alert
- prometheus_alerts
prometheus_alerts:
- project
timelogs:
- issue
- merge_request
......
......@@ -594,6 +594,14 @@ PrometheusMetric:
- title
- query
- group
- common
- identifier
PrometheusAlert:
- threshold
- operator
- environment_id
- project_id
- prometheus_metric_id
Badge:
- id
- link_url
......
......@@ -6,7 +6,7 @@ describe Gitlab::Prometheus::AdditionalMetricsParser do
let(:parser_error_class) { Gitlab::Prometheus::ParsingError }
describe '#load_groups_from_yaml' do
subject { described_class.load_groups_from_yaml }
subject { described_class.load_groups_from_yaml('dummy.yaml') }
describe 'parsing sample yaml' do
let(:sample_yaml) do
......
# frozen_string_literal: true
require 'rails_helper'
describe Gitlab::Prometheus::MetricGroup do
describe '.common_metrics' do
let!(:project_metric) { create(:prometheus_metric) }
let!(:common_metric_group_a) { create(:prometheus_metric, :common, group: :aws_elb) }
let!(:common_metric_group_b_q1) { create(:prometheus_metric, :common, group: :kubernetes) }
let!(:common_metric_group_b_q2) { create(:prometheus_metric, :common, group: :kubernetes) }
subject { described_class.common_metrics }
it 'returns exactly two groups' do
expect(subject.map(&:name)).to contain_exactly(
'Response metrics (AWS ELB)', 'System metrics (Kubernetes)')
end
it 'returns exactly three metric queries' do
expect(subject.map(&:metrics).flatten.map(&:id)).to contain_exactly(
common_metric_group_a.id, common_metric_group_b_q1.id,
common_metric_group_b_q2.id)
end
end
describe '.for_project' do
let!(:other_project) { create(:project) }
let!(:project_metric) { create(:prometheus_metric) }
let!(:common_metric) { create(:prometheus_metric, :common, group: :aws_elb) }
subject do
described_class.for_project(other_project)
.map(&:metrics).flatten
.map(&:id)
end
it 'returns exactly one common metric' do
is_expected.to contain_exactly(common_metric.id)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'migrate', '20180831164910_import_common_metrics.rb')
describe ImportCommonMetrics, :migration do
describe '#up' do
it "imports all prometheus metrics" do
expect(PrometheusMetric.common).to be_empty
migrate!
expect(PrometheusMetric.common).not_to be_empty
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe PrometheusMetric do
subject { build(:prometheus_metric) }
let(:other_project) { build(:project) }
it { is_expected.to belong_to(:project) }
it { is_expected.to validate_presence_of(:title) }
it { is_expected.to validate_presence_of(:query) }
it { is_expected.to validate_presence_of(:group) }
describe 'common metrics' do
using RSpec::Parameterized::TableSyntax
where(:common, :project, :result) do
false | other_project | true
false | nil | false
true | other_project | false
true | nil | true
end
with_them do
before do
subject.common = common
subject.project = project
end
it { expect(subject.valid?).to eq(result) }
end
end
describe '#query_series' do
using RSpec::Parameterized::TableSyntax
where(:legend, :type) do
'Some other legend' | NilClass
'Status Code' | Array
end
with_them do
before do
subject.legend = legend
end
it { expect(subject.query_series).to be_a(type) }
end
end
describe '#group_title' do
shared_examples 'group_title' do |group, title|
subject { build(:prometheus_metric, group: group).group_title }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment