Commit 5a1329a4 authored by Dylan Griffith's avatar Dylan Griffith

Add Elasticsearch to Sidekiq ServerMetrics

This will allow us to add Grafana graphs for these prometheus
metrics. After this is deployed we will add to the sidekiq detail graphs
to see how these are impacted by Elasticsearch requests.
parent ff4425ba
...@@ -126,10 +126,12 @@ configuration option in `gitlab.yml`. These metrics are served from the ...@@ -126,10 +126,12 @@ configuration option in `gitlab.yml`. These metrics are served from the
| `sidekiq_jobs_db_seconds` | Histogram | 12.9 | Seconds of DB time to run Sidekiq job | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` | | `sidekiq_jobs_db_seconds` | Histogram | 12.9 | Seconds of DB time to run Sidekiq job | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` |
| `sidekiq_jobs_gitaly_seconds` | Histogram | 12.9 | Seconds of Gitaly time to run Sidekiq job | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` | | `sidekiq_jobs_gitaly_seconds` | Histogram | 12.9 | Seconds of Gitaly time to run Sidekiq job | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` |
| `sidekiq_redis_requests_duration_seconds` | Histogram | 13.1 | Duration in seconds that a Sidekiq job spent querying a Redis server | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` | | `sidekiq_redis_requests_duration_seconds` | Histogram | 13.1 | Duration in seconds that a Sidekiq job spent querying a Redis server | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` |
| `sidekiq_elasticsearch_requests_duration_seconds` | Histogram | 13.1 | Duration in seconds that a Sidekiq job spent in requests to an Elasticsearch server | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` |
| `sidekiq_jobs_queue_duration_seconds` | Histogram | 12.5 | Duration in seconds that a Sidekiq job was queued before being executed | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` | | `sidekiq_jobs_queue_duration_seconds` | Histogram | 12.5 | Duration in seconds that a Sidekiq job was queued before being executed | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` |
| `sidekiq_jobs_failed_total` | Counter | 12.2 | Sidekiq jobs failed | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` | | `sidekiq_jobs_failed_total` | Counter | 12.2 | Sidekiq jobs failed | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` |
| `sidekiq_jobs_retried_total` | Counter | 12.2 | Sidekiq jobs retried | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` | | `sidekiq_jobs_retried_total` | Counter | 12.2 | Sidekiq jobs retried | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` |
| `sidekiq_redis_requests_total` | Counter | 13.1 | Redis requests during a Sidekiq job execution | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` | | `sidekiq_redis_requests_total` | Counter | 13.1 | Redis requests during a Sidekiq job execution | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` |
| `sidekiq_elasticsearch_requests_total` | Counter | 13.1 | Elasticsearch requests during a Sidekiq job execution | `queue`, `boundary`, `external_dependencies`, `feature_category`, `job_status`, `urgency` |
| `sidekiq_running_jobs` | Gauge | 12.2 | Number of Sidekiq jobs running | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` | | `sidekiq_running_jobs` | Gauge | 12.2 | Number of Sidekiq jobs running | `queue`, `boundary`, `external_dependencies`, `feature_category`, `urgency` |
| `sidekiq_concurrency` | Gauge | 12.5 | Maximum number of Sidekiq jobs | | | `sidekiq_concurrency` | Gauge | 12.5 | Maximum number of Sidekiq jobs | |
| `geo_db_replication_lag_seconds` | Gauge | 10.2 | Database replication lag (seconds) | `url` | | `geo_db_replication_lag_seconds` | Gauge | 10.2 | Database replication lag (seconds) | `url` |
......
---
title: Add Elasticsearch to Sidekiq ServerMetrics
merge_request: 32937
author:
type: added
...@@ -49,6 +49,8 @@ module Gitlab ...@@ -49,6 +49,8 @@ module Gitlab
@metrics[:sidekiq_jobs_gitaly_seconds].observe(labels, get_gitaly_time(job)) @metrics[:sidekiq_jobs_gitaly_seconds].observe(labels, get_gitaly_time(job))
@metrics[:sidekiq_redis_requests_total].increment(labels, get_redis_calls(job)) @metrics[:sidekiq_redis_requests_total].increment(labels, get_redis_calls(job))
@metrics[:sidekiq_redis_requests_duration_seconds].observe(labels, get_redis_time(job)) @metrics[:sidekiq_redis_requests_duration_seconds].observe(labels, get_redis_time(job))
@metrics[:sidekiq_elasticsearch_requests_total].increment(labels, get_elasticsearch_calls(job))
@metrics[:sidekiq_elasticsearch_requests_duration_seconds].observe(labels, get_elasticsearch_time(job))
end end
end end
...@@ -62,9 +64,11 @@ module Gitlab ...@@ -62,9 +64,11 @@ module Gitlab
sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS), sidekiq_jobs_gitaly_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_gitaly_seconds, 'Seconds of Gitaly time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS), sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_redis_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_redis_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent requests a Redis server', {}, Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS), sidekiq_redis_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_redis_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent requests a Redis server', {}, Gitlab::Instrumentation::Redis::QUERY_TIME_BUCKETS),
sidekiq_elasticsearch_requests_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_elasticsearch_requests_duration_seconds, 'Duration in seconds that a Sidekiq job spent in requests to an Elasticsearch server', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'), sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'), sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_redis_requests_total: ::Gitlab::Metrics.counter(:sidekiq_redis_requests_total, 'Redis requests during a Sidekiq job execution'), sidekiq_redis_requests_total: ::Gitlab::Metrics.counter(:sidekiq_redis_requests_total, 'Redis requests during a Sidekiq job execution'),
sidekiq_elasticsearch_requests_total: ::Gitlab::Metrics.counter(:sidekiq_elasticsearch_requests_total, 'Elasticsearch requests during a Sidekiq job execution'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all), sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all) sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
} }
...@@ -82,6 +86,14 @@ module Gitlab ...@@ -82,6 +86,14 @@ module Gitlab
job.fetch(:redis_calls, 0) job.fetch(:redis_calls, 0)
end end
def get_elasticsearch_time(job)
job.fetch(:elasticsearch_duration_s, 0)
end
def get_elasticsearch_calls(job)
job.fetch(:elasticsearch_calls, 0)
end
def get_gitaly_time(job) def get_gitaly_time(job)
job.fetch(:gitaly_duration_s, 0) job.fetch(:gitaly_duration_s, 0)
end end
......
...@@ -34,6 +34,8 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -34,6 +34,8 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do
let(:redis_requests_total) { double('redis calls total metric') } let(:redis_requests_total) { double('redis calls total metric') }
let(:running_jobs_metric) { double('running jobs metric') } let(:running_jobs_metric) { double('running jobs metric') }
let(:redis_seconds_metric) { double('redis seconds metric') } let(:redis_seconds_metric) { double('redis seconds metric') }
let(:elasticsearch_seconds_metric) { double('elasticsearch seconds metric') }
let(:elasticsearch_requests_total) { double('elasticsearch calls total metric') }
before do before do
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_queue_duration_seconds, anything, anything, anything).and_return(queue_duration_seconds) allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_queue_duration_seconds, anything, anything, anything).and_return(queue_duration_seconds)
...@@ -42,9 +44,11 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -42,9 +44,11 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_db_seconds, anything, anything, anything).and_return(db_seconds_metric) allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_db_seconds, anything, anything, anything).and_return(db_seconds_metric)
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_gitaly_seconds, anything, anything, anything).and_return(gitaly_seconds_metric) allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_gitaly_seconds, anything, anything, anything).and_return(gitaly_seconds_metric)
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_redis_requests_duration_seconds, anything, anything, anything).and_return(redis_seconds_metric) allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_redis_requests_duration_seconds, anything, anything, anything).and_return(redis_seconds_metric)
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_elasticsearch_requests_duration_seconds, anything, anything, anything).and_return(elasticsearch_seconds_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric) allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric) allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_redis_requests_total, anything).and_return(redis_requests_total) allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_redis_requests_total, anything).and_return(redis_requests_total)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_elasticsearch_requests_total, anything).and_return(elasticsearch_requests_total)
allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :all).and_return(running_jobs_metric) allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :all).and_return(running_jobs_metric)
allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_concurrency, anything, {}, :all).and_return(concurrency_metric) allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_concurrency, anything, {}, :all).and_return(concurrency_metric)
...@@ -76,6 +80,9 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -76,6 +80,9 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do
let(:redis_calls) { 2 } let(:redis_calls) { 2 }
let(:redis_duration) { 0.01 } let(:redis_duration) { 0.01 }
let(:elasticsearch_calls) { 8 }
let(:elasticsearch_duration) { 0.54 }
before do before do
allow(subject).to receive(:get_thread_cputime).and_return(thread_cputime_before, thread_cputime_after) allow(subject).to receive(:get_thread_cputime).and_return(thread_cputime_before, thread_cputime_after)
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(monotonic_time_before, monotonic_time_after) allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(monotonic_time_before, monotonic_time_after)
...@@ -86,14 +93,19 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -86,14 +93,19 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do
job[:redis_calls] = redis_calls job[:redis_calls] = redis_calls
job[:redis_duration_s] = redis_duration job[:redis_duration_s] = redis_duration
job[:elasticsearch_calls] = elasticsearch_calls
job[:elasticsearch_duration_s] = elasticsearch_duration
allow(running_jobs_metric).to receive(:increment) allow(running_jobs_metric).to receive(:increment)
allow(redis_requests_total).to receive(:increment) allow(redis_requests_total).to receive(:increment)
allow(elasticsearch_requests_total).to receive(:increment)
allow(queue_duration_seconds).to receive(:observe) allow(queue_duration_seconds).to receive(:observe)
allow(user_execution_seconds_metric).to receive(:observe) allow(user_execution_seconds_metric).to receive(:observe)
allow(db_seconds_metric).to receive(:observe) allow(db_seconds_metric).to receive(:observe)
allow(gitaly_seconds_metric).to receive(:observe) allow(gitaly_seconds_metric).to receive(:observe)
allow(completion_seconds_metric).to receive(:observe) allow(completion_seconds_metric).to receive(:observe)
allow(redis_seconds_metric).to receive(:observe) allow(redis_seconds_metric).to receive(:observe)
allow(elasticsearch_seconds_metric).to receive(:observe)
end end
it 'yields block' do it 'yields block' do
...@@ -109,7 +121,9 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -109,7 +121,9 @@ describe Gitlab::SidekiqMiddleware::ServerMetrics do
expect(gitaly_seconds_metric).to receive(:observe).with(labels_with_job_status, gitaly_duration) expect(gitaly_seconds_metric).to receive(:observe).with(labels_with_job_status, gitaly_duration)
expect(completion_seconds_metric).to receive(:observe).with(labels_with_job_status, monotonic_time_duration) expect(completion_seconds_metric).to receive(:observe).with(labels_with_job_status, monotonic_time_duration)
expect(redis_seconds_metric).to receive(:observe).with(labels_with_job_status, redis_duration) expect(redis_seconds_metric).to receive(:observe).with(labels_with_job_status, redis_duration)
expect(elasticsearch_seconds_metric).to receive(:observe).with(labels_with_job_status, elasticsearch_duration)
expect(redis_requests_total).to receive(:increment).with(labels_with_job_status, redis_calls) expect(redis_requests_total).to receive(:increment).with(labels_with_job_status, redis_calls)
expect(elasticsearch_requests_total).to receive(:increment).with(labels_with_job_status, elasticsearch_calls)
subject.call(worker, job, :test) { nil } subject.call(worker, job, :test) { nil }
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment