Commit cfea48df authored by Ryan Cobb's avatar Ryan Cobb Committed by Kamil Trzciński

Adds direct monitoring for sidekiq metrics

This adds diirect monitoring for sidekiq metrics. This is done via
sidekiq middleware and a sampler to pull from sidekiqs api.
parent 3bb3ac3d
...@@ -33,6 +33,7 @@ Sidekiq.configure_server do |config| ...@@ -33,6 +33,7 @@ Sidekiq.configure_server do |config|
config.redis = queues_config_hash config.redis = queues_config_hash
config.server_middleware do |chain| config.server_middleware do |chain|
chain.add Gitlab::SidekiqMiddleware::Metrics if Settings.monitoring.sidekiq_exporter
chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if ENV['SIDEKIQ_LOG_ARGUMENTS'] && !enable_json_logs
chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS'] chain.add Gitlab::SidekiqMiddleware::MemoryKiller if ENV['SIDEKIQ_MEMORY_KILLER_MAX_RSS']
chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0' chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware unless ENV['SIDEKIQ_REQUEST_STORE'] == '0'
......
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
class Metrics
def initialize
@metrics = init_metrics
end
def call(_worker, job, queue)
labels = create_labels(queue)
@metrics[:sidekiq_running_jobs].increment(labels, 1)
if job['retry_count'].present?
@metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
end
realtime = Benchmark.realtime do
yield
end
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, realtime)
rescue Exception # rubocop: disable Lint/RescueException
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1)
raise
ensure
@metrics[:sidekiq_running_jobs].increment(labels, -1)
end
private
def init_metrics
{
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job'),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :livesum)
}
end
def create_labels(queue)
{
queue: queue
}
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::SidekiqMiddleware::Metrics do
describe '#call' do
let(:middleware) { described_class.new }
let(:worker) { double(:worker) }
let(:completion_seconds_metric) { double('completion seconds metric') }
let(:failed_total_metric) { double('failed total metric') }
let(:retried_total_metric) { double('retried total metric') }
let(:running_jobs_metric) { double('running jobs metric') }
before do
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything).and_return(completion_seconds_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric)
allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :livesum).and_return(running_jobs_metric)
allow(running_jobs_metric).to receive(:increment)
end
it 'yields block' do
allow(completion_seconds_metric).to receive(:observe)
expect { |b| middleware.call(worker, {}, :test, &b) }.to yield_control.once
end
it 'sets metrics' do
labels = { queue: :test }
expect(running_jobs_metric).to receive(:increment).with(labels, 1)
expect(running_jobs_metric).to receive(:increment).with(labels, -1)
expect(completion_seconds_metric).to receive(:observe).with(labels, kind_of(Numeric))
middleware.call(worker, {}, :test) { nil }
end
context 'when job is retried' do
it 'sets sidekiq_jobs_retried_total metric' do
allow(completion_seconds_metric).to receive(:observe)
expect(retried_total_metric).to receive(:increment)
middleware.call(worker, { 'retry_count' => 1 }, :test) { nil }
end
end
context 'when error is raised' do
it 'sets sidekiq_jobs_failed_total and reraises' do
expect(failed_total_metric).to receive(:increment)
expect { middleware.call(worker, {}, :test) { raise } }.to raise_error
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment