Commit 50a2078c authored by Oswaldo Ferreira's avatar Oswaldo Ferreira

Record enqueuing rate for Sidekiq jobs

It introduces a MetricsClient middleware which
records the job being enqueued in a Counter
in Prometheus.
parent 0276f654
...@@ -10,7 +10,7 @@ module Gitlab ...@@ -10,7 +10,7 @@ module Gitlab
def self.server_configurator(metrics: true, arguments_logger: true, memory_killer: true, request_store: true) def self.server_configurator(metrics: true, arguments_logger: true, memory_killer: true, request_store: true)
lambda do |chain| lambda do |chain|
chain.add Gitlab::SidekiqMiddleware::Monitor chain.add Gitlab::SidekiqMiddleware::Monitor
chain.add Gitlab::SidekiqMiddleware::Metrics if metrics chain.add Gitlab::SidekiqMiddleware::ServerMetrics if metrics
chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if arguments_logger chain.add Gitlab::SidekiqMiddleware::ArgumentsLogger if arguments_logger
chain.add Gitlab::SidekiqMiddleware::MemoryKiller if memory_killer chain.add Gitlab::SidekiqMiddleware::MemoryKiller if memory_killer
chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware if request_store chain.add Gitlab::SidekiqMiddleware::RequestStoreMiddleware if request_store
...@@ -27,6 +27,7 @@ module Gitlab ...@@ -27,6 +27,7 @@ module Gitlab
def self.client_configurator def self.client_configurator
lambda do |chain| lambda do |chain|
chain.add Gitlab::SidekiqStatus::ClientMiddleware chain.add Gitlab::SidekiqStatus::ClientMiddleware
chain.add Gitlab::SidekiqMiddleware::ClientMetrics
chain.add Labkit::Middleware::Sidekiq::Client chain.add Labkit::Middleware::Sidekiq::Client
end end
end end
......
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
class ClientMetrics < SidekiqMiddleware::Metrics
ENQUEUED = :sidekiq_enqueued_jobs_total
def initialize
@metrics = init_metrics
end
def call(worker, _job, queue, _redis_pool)
labels = create_labels(worker.class, queue)
@metrics.fetch(ENQUEUED).increment(labels, 1)
yield
end
private
def init_metrics
{
ENQUEUED => ::Gitlab::Metrics.counter(ENQUEUED, 'Sidekiq jobs enqueued')
}
end
end
end
end
...@@ -3,68 +3,11 @@ ...@@ -3,68 +3,11 @@
module Gitlab module Gitlab
module SidekiqMiddleware module SidekiqMiddleware
class Metrics class Metrics
# SIDEKIQ_LATENCY_BUCKETS are latency histogram buckets better suited to Sidekiq
# timeframes than the DEFAULT_BUCKET definition. Defined in seconds.
SIDEKIQ_LATENCY_BUCKETS = [0.1, 0.25, 0.5, 1, 2.5, 5, 10, 60, 300, 600].freeze
TRUE_LABEL = "yes" TRUE_LABEL = "yes"
FALSE_LABEL = "no" FALSE_LABEL = "no"
def initialize
@metrics = init_metrics
@metrics[:sidekiq_concurrency].set({}, Sidekiq.options[:concurrency].to_i)
end
def call(worker, job, queue)
labels = create_labels(worker.class, queue)
queue_duration = ::Gitlab::InstrumentationHelper.queue_duration_for_job(job)
@metrics[:sidekiq_jobs_queue_duration_seconds].observe(labels, queue_duration) if queue_duration
@metrics[:sidekiq_running_jobs].increment(labels, 1)
if job['retry_count'].present?
@metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
end
job_succeeded = false
monotonic_time_start = Gitlab::Metrics::System.monotonic_time
job_thread_cputime_start = get_thread_cputime
begin
yield
job_succeeded = true
ensure
monotonic_time_end = Gitlab::Metrics::System.monotonic_time
job_thread_cputime_end = get_thread_cputime
monotonic_time = monotonic_time_end - monotonic_time_start
job_thread_cputime = job_thread_cputime_end - job_thread_cputime_start
# sidekiq_running_jobs, sidekiq_jobs_failed_total should not include the job_status label
@metrics[:sidekiq_running_jobs].increment(labels, -1)
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1) unless job_succeeded
# job_status: done, fail match the job_status attribute in structured logging
labels[:job_status] = job_succeeded ? "done" : "fail"
@metrics[:sidekiq_jobs_cpu_seconds].observe(labels, job_thread_cputime)
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, monotonic_time)
end
end
private private
def init_metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
}
end
def create_labels(worker_class, queue) def create_labels(worker_class, queue)
labels = { queue: queue.to_s, latency_sensitive: FALSE_LABEL, external_dependencies: FALSE_LABEL, feature_category: "", boundary: "" } labels = { queue: queue.to_s, latency_sensitive: FALSE_LABEL, external_dependencies: FALSE_LABEL, feature_category: "", boundary: "" }
return labels unless worker_class.include? WorkerAttributes return labels unless worker_class.include? WorkerAttributes
...@@ -84,10 +27,6 @@ module Gitlab ...@@ -84,10 +27,6 @@ module Gitlab
def bool_as_label(value) def bool_as_label(value)
value ? TRUE_LABEL : FALSE_LABEL value ? TRUE_LABEL : FALSE_LABEL
end end
def get_thread_cputime
defined?(Process::CLOCK_THREAD_CPUTIME_ID) ? Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID) : 0
end
end end
end end
end end
# frozen_string_literal: true
module Gitlab
module SidekiqMiddleware
class ServerMetrics < SidekiqMiddleware::Metrics
# SIDEKIQ_LATENCY_BUCKETS are latency histogram buckets better suited to Sidekiq
# timeframes than the DEFAULT_BUCKET definition. Defined in seconds.
SIDEKIQ_LATENCY_BUCKETS = [0.1, 0.25, 0.5, 1, 2.5, 5, 10, 60, 300, 600].freeze
def initialize
@metrics = init_metrics
@metrics[:sidekiq_concurrency].set({}, Sidekiq.options[:concurrency].to_i)
end
def call(worker, job, queue)
labels = create_labels(worker.class, queue)
queue_duration = ::Gitlab::InstrumentationHelper.queue_duration_for_job(job)
@metrics[:sidekiq_jobs_queue_duration_seconds].observe(labels, queue_duration) if queue_duration
@metrics[:sidekiq_running_jobs].increment(labels, 1)
if job['retry_count'].present?
@metrics[:sidekiq_jobs_retried_total].increment(labels, 1)
end
job_succeeded = false
monotonic_time_start = Gitlab::Metrics::System.monotonic_time
job_thread_cputime_start = get_thread_cputime
begin
yield
job_succeeded = true
ensure
monotonic_time_end = Gitlab::Metrics::System.monotonic_time
job_thread_cputime_end = get_thread_cputime
monotonic_time = monotonic_time_end - monotonic_time_start
job_thread_cputime = job_thread_cputime_end - job_thread_cputime_start
# sidekiq_running_jobs, sidekiq_jobs_failed_total should not include the job_status label
@metrics[:sidekiq_running_jobs].increment(labels, -1)
@metrics[:sidekiq_jobs_failed_total].increment(labels, 1) unless job_succeeded
# job_status: done, fail match the job_status attribute in structured logging
labels[:job_status] = job_succeeded ? "done" : "fail"
@metrics[:sidekiq_jobs_cpu_seconds].observe(labels, job_thread_cputime)
@metrics[:sidekiq_jobs_completion_seconds].observe(labels, monotonic_time)
end
end
private
def init_metrics
{
sidekiq_jobs_cpu_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_cpu_seconds, 'Seconds of cpu time to run Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_completion_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_completion_seconds, 'Seconds to complete Sidekiq job', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_queue_duration_seconds: ::Gitlab::Metrics.histogram(:sidekiq_jobs_queue_duration_seconds, 'Duration in seconds that a Sidekiq job was queued before being executed', {}, SIDEKIQ_LATENCY_BUCKETS),
sidekiq_jobs_failed_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_failed_total, 'Sidekiq jobs failed'),
sidekiq_jobs_retried_total: ::Gitlab::Metrics.counter(:sidekiq_jobs_retried_total, 'Sidekiq jobs retried'),
sidekiq_running_jobs: ::Gitlab::Metrics.gauge(:sidekiq_running_jobs, 'Number of Sidekiq jobs running', {}, :all),
sidekiq_concurrency: ::Gitlab::Metrics.gauge(:sidekiq_concurrency, 'Maximum number of Sidekiq jobs', {}, :all)
}
end
def get_thread_cputime
defined?(Process::CLOCK_THREAD_CPUTIME_ID) ? Process.clock_gettime(Process::CLOCK_THREAD_CPUTIME_ID) : 0
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::SidekiqMiddleware::ClientMetrics do
context "with worker attribution" do
subject { described_class.new }
let(:queue) { :test }
let(:worker_class) { worker.class }
let(:job) { {} }
let(:default_labels) { { queue: queue.to_s, boundary: "", external_dependencies: "no", feature_category: "", latency_sensitive: "no" } }
shared_examples "a metrics client middleware" do
context "with mocked prometheus" do
let(:enqueued_jobs_metric) { double('enqueued jobs metric', increment: true) }
before do
allow(Gitlab::Metrics).to receive(:counter).with(described_class::ENQUEUED, anything).and_return(enqueued_jobs_metric)
end
describe '#call' do
it 'yields block' do
expect { |b| subject.call(worker, job, :test, double, &b) }.to yield_control.once
end
it 'increments enqueued jobs metric' do
expect(enqueued_jobs_metric).to receive(:increment).with(labels, 1)
subject.call(worker, job, :test, double) { nil }
end
end
end
end
context "when workers are not attributed" do
class TestNonAttributedWorker
include Sidekiq::Worker
end
it_behaves_like "a metrics client middleware" do
let(:worker) { TestNonAttributedWorker.new }
let(:labels) { default_labels }
end
end
context "when workers are attributed" do
def create_attributed_worker_class(latency_sensitive, external_dependencies, resource_boundary, category)
Class.new do
include Sidekiq::Worker
include WorkerAttributes
latency_sensitive_worker! if latency_sensitive
worker_has_external_dependencies! if external_dependencies
worker_resource_boundary resource_boundary unless resource_boundary == :unknown
feature_category category unless category.nil?
end
end
let(:latency_sensitive) { false }
let(:external_dependencies) { false }
let(:resource_boundary) { :unknown }
let(:feature_category) { nil }
let(:worker_class) { create_attributed_worker_class(latency_sensitive, external_dependencies, resource_boundary, feature_category) }
let(:worker) { worker_class.new }
context "latency sensitive" do
it_behaves_like "a metrics client middleware" do
let(:latency_sensitive) { true }
let(:labels) { default_labels.merge(latency_sensitive: "yes") }
end
end
context "external dependencies" do
it_behaves_like "a metrics client middleware" do
let(:external_dependencies) { true }
let(:labels) { default_labels.merge(external_dependencies: "yes") }
end
end
context "cpu boundary" do
it_behaves_like "a metrics client middleware" do
let(:resource_boundary) { :cpu }
let(:labels) { default_labels.merge(boundary: "cpu") }
end
end
context "memory boundary" do
it_behaves_like "a metrics client middleware" do
let(:resource_boundary) { :memory }
let(:labels) { default_labels.merge(boundary: "memory") }
end
end
context "feature category" do
it_behaves_like "a metrics client middleware" do
let(:feature_category) { :authentication }
let(:labels) { default_labels.merge(feature_category: "authentication") }
end
end
context "combined" do
it_behaves_like "a metrics client middleware" do
let(:latency_sensitive) { true }
let(:external_dependencies) { true }
let(:resource_boundary) { :cpu }
let(:feature_category) { :authentication }
let(:labels) { default_labels.merge(latency_sensitive: "yes", external_dependencies: "yes", boundary: "cpu", feature_category: "authentication") }
end
end
end
end
end
# frozen_string_literal: true # frozen_string_literal: true
require 'fast_spec_helper' require 'spec_helper'
describe Gitlab::SidekiqMiddleware::Metrics do describe Gitlab::SidekiqMiddleware::ServerMetrics do
context "with worker attribution" do context "with worker attribution" do
subject { described_class.new } subject { described_class.new }
......
...@@ -41,7 +41,7 @@ describe Gitlab::SidekiqMiddleware do ...@@ -41,7 +41,7 @@ describe Gitlab::SidekiqMiddleware do
Labkit::Middleware::Sidekiq::Server, Labkit::Middleware::Sidekiq::Server,
Gitlab::SidekiqMiddleware::InstrumentationLogger, Gitlab::SidekiqMiddleware::InstrumentationLogger,
Gitlab::SidekiqStatus::ServerMiddleware, Gitlab::SidekiqStatus::ServerMiddleware,
Gitlab::SidekiqMiddleware::Metrics, Gitlab::SidekiqMiddleware::ServerMetrics,
Gitlab::SidekiqMiddleware::ArgumentsLogger, Gitlab::SidekiqMiddleware::ArgumentsLogger,
Gitlab::SidekiqMiddleware::MemoryKiller, Gitlab::SidekiqMiddleware::MemoryKiller,
Gitlab::SidekiqMiddleware::RequestStoreMiddleware Gitlab::SidekiqMiddleware::RequestStoreMiddleware
...@@ -74,7 +74,7 @@ describe Gitlab::SidekiqMiddleware do ...@@ -74,7 +74,7 @@ describe Gitlab::SidekiqMiddleware do
let(:request_store) { false } let(:request_store) { false }
let(:disabled_sidekiq_middlewares) do let(:disabled_sidekiq_middlewares) do
[ [
Gitlab::SidekiqMiddleware::Metrics, Gitlab::SidekiqMiddleware::ServerMetrics,
Gitlab::SidekiqMiddleware::ArgumentsLogger, Gitlab::SidekiqMiddleware::ArgumentsLogger,
Gitlab::SidekiqMiddleware::MemoryKiller, Gitlab::SidekiqMiddleware::MemoryKiller,
Gitlab::SidekiqMiddleware::RequestStoreMiddleware Gitlab::SidekiqMiddleware::RequestStoreMiddleware
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment