Commit 1d138989 authored by Qingyu Zhao's avatar Qingyu Zhao

Add Sidekiq job CPU time prometheus metrics

parent 670fa76a
......@@ -19,10 +19,16 @@ module Gitlab
@metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
end
job_thread_cputime_start = get_thread_cputime
realtime = Benchmark.realtime do
yield
end
job_thread_cputime_end = get_thread_cputime
job_thread_cputime = job_thread_cputime_end - job_thread_cputime_start
@metrics[:sidekiq_jobs_cpu_seconds].observe(labels, job_thread_cputime)
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, realtime)
rescue Exception # rubocop: disable Lint/RescueException
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1)
......@@ -35,6 +41,7 @@ module Gitlab
def init_metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
......@@ -47,6 +54,10 @@ module Gitlab
queue: queue
}
end
def get_thread_cputime
defined?(Process::CLOCK_THREAD_CPUTIME_ID) ? Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID) : 0
end
end
end
end
......@@ -8,12 +8,14 @@ describe Gitlab::SidekiqMiddleware::Metrics do
let(:worker) { double(:worker) }
let(:completion_seconds_metric) { double('completion seconds metric') }
let(:user_execution_seconds_metric) { double('user execution seconds metric') }
let(:failed_total_metric) { double('failed total metric') }
let(:retried_total_metric) { double('retried total metric') }
let(:running_jobs_metric) { double('running jobs metric') }
before do
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_completion_seconds, anything, anything, anything).and_return(completion_seconds_metric)
allow(Gitlab::Metrics).to receive(:histogram).with(:sidekiq_jobs_cpu_seconds, anything, anything, anything).and_return(user_execution_seconds_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_failed_total, anything).and_return(failed_total_metric)
allow(Gitlab::Metrics).to receive(:counter).with(:sidekiq_jobs_retried_total, anything).and_return(retried_total_metric)
allow(Gitlab::Metrics).to receive(:gauge).with(:sidekiq_running_jobs, anything, {}, :livesum).and_return(running_jobs_metric)
......@@ -23,13 +25,16 @@ describe Gitlab::SidekiqMiddleware::Metrics do
it 'yields block' do
allow(completion_seconds_metric).to receive(:observe)
allow(user_execution_seconds_metric).to receive(:observe)
expect { |b| middleware.call(worker, {}, :test, &b) }.to yield_control.once
end
it 'sets metrics' do
labels = { queue: :test }
allow(middleware).to receive(:get_thread_cputime).and_return(1, 3)
expect(user_execution_seconds_metric).to receive(:observe).with(labels, 2)
expect(running_jobs_metric).to receive(:increment).with(labels, 1)
expect(running_jobs_metric).to receive(:increment).with(labels, -1)
expect(completion_seconds_metric).to receive(:observe).with(labels, kind_of(Numeric))
......@@ -37,9 +42,17 @@ describe Gitlab::SidekiqMiddleware::Metrics do
middleware.call(worker, {}, :test) { nil }
end
it 'ignore user execution when measured 0' do
allow(completion_seconds_metric).to receive(:observe)
allow(middleware).to receive(:get_thread_cputime).and_return(0, 0)
expect(user_execution_seconds_metric).not_to receive(:observe)
end
context 'when job is retried' do
it 'sets sidekiq_jobs_retried_total metric' do
allow(completion_seconds_metric).to receive(:observe)
expect(user_execution_seconds_metric).to receive(:observe)
expect(retried_total_metric).to receive(:increment)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment