Commit 4d34cc9f authored by Sean McGivern's avatar Sean McGivern

Merge branch...

Merge branch 'qmnguyen0711/640-sidekiq-jobretry-skip-should-not-mask-the-underlying-sidekiq-error' into 'master'

Unwrap Sidekiq exceptions and jobs

See merge request gitlab-org/gitlab!53006
parents 9b900fa7 96fdb1b0
---
title: Unwrap Sidekiq exceptions and jobs in the structured logs and metrics
merge_request: 53006
author:
type: changed
...@@ -31,9 +31,9 @@ Sidekiq.configure_server do |config| ...@@ -31,9 +31,9 @@ Sidekiq.configure_server do |config|
Sidekiq.logger.formatter = Gitlab::SidekiqLogging::JSONFormatter.new Sidekiq.logger.formatter = Gitlab::SidekiqLogging::JSONFormatter.new
config.options[:job_logger] = Gitlab::SidekiqLogging::StructuredLogger config.options[:job_logger] = Gitlab::SidekiqLogging::StructuredLogger
# Remove the default-provided handler # Remove the default-provided handler. The exception is logged inside
# Gitlab::SidekiqLogging::StructuredLogger
config.error_handlers.reject! { |handler| handler.is_a?(Sidekiq::ExceptionHandler::Logger) } config.error_handlers.reject! { |handler| handler.is_a?(Sidekiq::ExceptionHandler::Logger) }
config.error_handlers << Gitlab::SidekiqLogging::ExceptionHandler.new
end end
config.redis = queues_config_hash config.redis = queues_config_hash
......
...@@ -6,7 +6,7 @@ module Gitlab ...@@ -6,7 +6,7 @@ module Gitlab
include ::Gitlab::SidekiqMiddleware::MetricsHelper include ::Gitlab::SidekiqMiddleware::MetricsHelper
def handler(job, _exception) def handler(job, _exception)
labels = create_labels(job['class'].constantize, job['queue']) labels = create_labels(job['class'].constantize, job['queue'], job)
counter.increment(labels) counter.increment(labels)
end end
......
# frozen_string_literal: true
module Gitlab
module SidekiqLogging
class ExceptionHandler
def call(job_exception, context)
data = {
error_class: job_exception.class.name,
error_message: job_exception.message
}
if context.is_a?(Hash)
data.merge!(context)
# correlation_id, jid, and class are available inside the job
# Hash, so promote these arguments to the root tree so that
# can be searched alongside other Sidekiq log messages.
job_data = data.delete(:job)
data.merge!(job_data) if job_data.present?
end
data[:error_backtrace] = Rails.backtrace_cleaner.clean(job_exception.backtrace) if job_exception.backtrace.present?
Sidekiq.logger.warn(data)
end
end
end
end
...@@ -12,6 +12,7 @@ module Gitlab ...@@ -12,6 +12,7 @@ module Gitlab
# Error information from the previous try is in the payload for # Error information from the previous try is in the payload for
# displaying in the Sidekiq UI, but is very confusing in logs! # displaying in the Sidekiq UI, but is very confusing in logs!
job = job.except('error_backtrace', 'error_class', 'error_message') job = job.except('error_backtrace', 'error_class', 'error_message')
job['class'] = job.delete('wrapped') if job['wrapped'].present?
# Add process id params # Add process id params
job['pid'] = ::Process.pid job['pid'] = ::Process.pid
......
...@@ -18,6 +18,17 @@ module Gitlab ...@@ -18,6 +18,17 @@ module Gitlab
yield yield
Sidekiq.logger.info log_job_done(job, started_time, base_payload) Sidekiq.logger.info log_job_done(job, started_time, base_payload)
rescue Sidekiq::JobRetry::Handled => job_exception
# Sidekiq::JobRetry::Handled is raised by the internal Sidekiq
# processor. It is a wrapper around real exception indicating an
# exception is already handled by the Job retrier. The real exception
# should be unwrapped before being logged.
#
# For more information:
# https://github.com/mperham/sidekiq/blob/v5.2.7/lib/sidekiq/processor.rb#L173
Sidekiq.logger.warn log_job_done(job, started_time, base_payload, job_exception.cause || job_exception)
raise
rescue => job_exception rescue => job_exception
Sidekiq.logger.warn log_job_done(job, started_time, base_payload, job_exception) Sidekiq.logger.warn log_job_done(job, started_time, base_payload, job_exception)
...@@ -68,6 +79,7 @@ module Gitlab ...@@ -68,6 +79,7 @@ module Gitlab
payload['job_status'] = 'fail' payload['job_status'] = 'fail'
payload['error_message'] = job_exception.message payload['error_message'] = job_exception.message
payload['error_class'] = job_exception.class.name payload['error_class'] = job_exception.class.name
add_exception_backtrace!(job_exception, payload)
else else
payload['message'] = "#{message}: done: #{payload['duration_s']} sec" payload['message'] = "#{message}: done: #{payload['duration_s']} sec"
payload['job_status'] = 'done' payload['job_status'] = 'done'
...@@ -88,6 +100,12 @@ module Gitlab ...@@ -88,6 +100,12 @@ module Gitlab
payload['completed_at'] = Time.now.utc.to_f payload['completed_at'] = Time.now.utc.to_f
end end
def add_exception_backtrace!(job_exception, payload)
return if job_exception.backtrace.blank?
payload['error_backtrace'] = Rails.backtrace_cleaner.clean(job_exception.backtrace)
end
def elapsed(t0) def elapsed(t0)
t1 = get_time t1 = get_time
{ {
......
...@@ -11,10 +11,10 @@ module Gitlab ...@@ -11,10 +11,10 @@ module Gitlab
@metrics = init_metrics @metrics = init_metrics
end end
def call(worker_class, _job, queue, _redis_pool) def call(worker_class, job, queue, _redis_pool)
# worker_class can either be the string or class of the worker being enqueued. # worker_class can either be the string or class of the worker being enqueued.
worker_class = worker_class.safe_constantize if worker_class.respond_to?(:safe_constantize) worker_class = worker_class.safe_constantize if worker_class.respond_to?(:safe_constantize)
labels = create_labels(worker_class, queue) labels = create_labels(worker_class, queue, job)
@metrics.fetch(ENQUEUED).increment(labels, 1) @metrics.fetch(ENQUEUED).increment(labels, 1)
......
...@@ -8,9 +8,11 @@ module Gitlab ...@@ -8,9 +8,11 @@ module Gitlab
private private
def create_labels(worker_class, queue) def create_labels(worker_class, queue, job)
worker_name = (job['wrapped'].presence || worker_class).to_s
labels = { queue: queue.to_s, labels = { queue: queue.to_s,
worker: worker_class.to_s, worker: worker_name,
urgency: "", urgency: "",
external_dependencies: FALSE_LABEL, external_dependencies: FALSE_LABEL,
feature_category: "", feature_category: "",
......
...@@ -20,7 +20,7 @@ module Gitlab ...@@ -20,7 +20,7 @@ module Gitlab
# in metrics and can use them in the `ThreadsSampler` for setting a label # in metrics and can use them in the `ThreadsSampler` for setting a label
Thread.current.name ||= Gitlab::Metrics::Samplers::ThreadsSampler::SIDEKIQ_WORKER_THREAD_NAME Thread.current.name ||= Gitlab::Metrics::Samplers::ThreadsSampler::SIDEKIQ_WORKER_THREAD_NAME
labels = create_labels(worker.class, queue) labels = create_labels(worker.class, queue, job)
queue_duration = ::Gitlab::InstrumentationHelper.queue_duration_for_job(job) queue_duration = ::Gitlab::InstrumentationHelper.queue_duration_for_job(job)
@metrics[:sidekiq_jobs_queue_duration_seconds].observe(labels, queue_duration) if queue_duration @metrics[:sidekiq_jobs_queue_duration_seconds].observe(labels, queue_duration) if queue_duration
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::SidekiqLogging::ExceptionHandler do
describe '#call' do
let(:job) do
{
"class" => "TestWorker",
"args" => [1234, 'hello'],
"retry" => false,
"queue" => "cronjob:test_queue",
"queue_namespace" => "cronjob",
"jid" => "da883554ee4fe414012f5f42",
"correlation_id" => 'cid'
}
end
let(:exception_message) { 'An error was thrown' }
let(:backtrace) { caller }
let(:exception) { RuntimeError.new(exception_message) }
let(:logger) { double }
before do
allow(Sidekiq).to receive(:logger).and_return(logger)
allow(exception).to receive(:backtrace).and_return(backtrace)
end
subject { described_class.new.call(exception, { context: 'Test', job: job }) }
it 'logs job data into root tree' do
expected_data = job.merge(
error_class: 'RuntimeError',
error_message: exception_message,
context: 'Test',
error_backtrace: Rails.backtrace_cleaner.clean(backtrace)
)
expect(logger).to receive(:warn).with(expected_data)
subject
end
end
end
...@@ -59,7 +59,8 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do ...@@ -59,7 +59,8 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: fail: 0.0 sec', 'message' => 'TestWorker JID-da883554ee4fe414012f5f42: fail: 0.0 sec',
'job_status' => 'fail', 'job_status' => 'fail',
'error_class' => 'ArgumentError', 'error_class' => 'ArgumentError',
'error_message' => 'some exception' 'error_message' => 'Something went wrong',
'error_backtrace' => be_a(Array).and(be_present)
) )
end end
...@@ -89,21 +90,92 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do ...@@ -89,21 +90,92 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end end
end end
it 'logs real job wrapped by active job worker' do
wrapped_job = job.merge(
"class" => "ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper",
"wrapped" => "TestWorker"
)
Timecop.freeze(timestamp) do
expect(logger).to receive(:info).with(start_payload).ordered
expect(logger).to receive(:info).with(end_payload).ordered
expect(subject).to receive(:log_job_start).and_call_original
expect(subject).to receive(:log_job_done).and_call_original
subject.call(wrapped_job, 'test_queue') {}
end
end
it 'logs an exception in job' do it 'logs an exception in job' do
Timecop.freeze(timestamp) do Timecop.freeze(timestamp) do
expect(logger).to receive(:info).with(start_payload) expect(logger).to receive(:info).with(start_payload)
expect(logger).to receive(:warn).with(hash_including(exception_payload)) expect(logger).to receive(:warn).with(include(exception_payload))
expect(subject).to receive(:log_job_start).and_call_original expect(subject).to receive(:log_job_start).and_call_original
expect(subject).to receive(:log_job_done).and_call_original expect(subject).to receive(:log_job_done).and_call_original
expect do expect do
subject.call(job, 'test_queue') do subject.call(job, 'test_queue') do
raise ArgumentError, 'some exception' raise ArgumentError, 'Something went wrong'
end end
end.to raise_error(ArgumentError) end.to raise_error(ArgumentError)
end end
end end
it 'logs the root cause of an Sidekiq::JobRetry::Skip exception in the job' do
Timecop.freeze(timestamp) do
expect(logger).to receive(:info).with(start_payload)
expect(logger).to receive(:warn).with(include(exception_payload))
expect(subject).to receive(:log_job_start).and_call_original
expect(subject).to receive(:log_job_done).and_call_original
expect do
subject.call(job, 'test_queue') do
raise ArgumentError, 'Something went wrong'
rescue
raise Sidekiq::JobRetry::Skip
end
end.to raise_error(Sidekiq::JobRetry::Skip)
end
end
it 'logs the root cause of an Sidekiq::JobRetry::Handled exception in the job' do
Timecop.freeze(timestamp) do
expect(logger).to receive(:info).with(start_payload)
expect(logger).to receive(:warn).with(include(exception_payload))
expect(subject).to receive(:log_job_start).and_call_original
expect(subject).to receive(:log_job_done).and_call_original
expect do
subject.call(job, 'test_queue') do
raise ArgumentError, 'Something went wrong'
rescue
raise Sidekiq::JobRetry::Handled
end
end.to raise_error(Sidekiq::JobRetry::Handled)
end
end
it 'keeps Sidekiq::JobRetry::Handled exception if the cause does not exist' do
Timecop.freeze(timestamp) do
expect(logger).to receive(:info).with(start_payload)
expect(logger).to receive(:warn).with(
include(
'message' => 'TestWorker JID-da883554ee4fe414012f5f42: fail: 0.0 sec',
'job_status' => 'fail',
'error_class' => 'Sidekiq::JobRetry::Skip',
'error_message' => 'Sidekiq::JobRetry::Skip'
)
)
expect(subject).to receive(:log_job_start).and_call_original
expect(subject).to receive(:log_job_done).and_call_original
expect do
subject.call(job, 'test_queue') do
raise Sidekiq::JobRetry::Skip
end
end.to raise_error(Sidekiq::JobRetry::Skip)
end
end
it 'does not modify the job' do it 'does not modify the job' do
Timecop.freeze(timestamp) do Timecop.freeze(timestamp) do
job_copy = job.deep_dup job_copy = job.deep_dup
...@@ -117,6 +189,24 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do ...@@ -117,6 +189,24 @@ RSpec.describe Gitlab::SidekiqLogging::StructuredLogger do
end end
end end
end end
it 'does not modify the wrapped job' do
Timecop.freeze(timestamp) do
wrapped_job = job.merge(
"class" => "ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper",
"wrapped" => "TestWorker"
)
job_copy = wrapped_job.deep_dup
allow(logger).to receive(:info)
allow(subject).to receive(:log_job_start).and_call_original
allow(subject).to receive(:log_job_done).and_call_original
subject.call(wrapped_job, 'test_queue') do
expect(wrapped_job).to eq(job_copy)
end
end
end
end end
context 'with SIDEKIQ_LOG_ARGUMENTS disabled' do context 'with SIDEKIQ_LOG_ARGUMENTS disabled' do
......
...@@ -60,6 +60,27 @@ RSpec.describe Gitlab::SidekiqMiddleware::ClientMetrics do ...@@ -60,6 +60,27 @@ RSpec.describe Gitlab::SidekiqMiddleware::ClientMetrics do
end end
end end
context "when a worker is wrapped into ActiveJob" do
before do
stub_const('TestWrappedWorker', Class.new)
TestWrappedWorker.class_eval do
include Sidekiq::Worker
end
end
it_behaves_like "a metrics client middleware" do
let(:job) do
{
"class" => ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper,
"wrapped" => TestWrappedWorker
}
end
let(:worker) { TestWrappedWorker.new }
let(:labels) { default_labels.merge(urgency: "") }
end
end
context "when workers are attributed" do context "when workers are attributed" do
def create_attributed_worker_class(urgency, external_dependencies, resource_boundary, category) def create_attributed_worker_class(urgency, external_dependencies, resource_boundary, category)
klass = Class.new do klass = Class.new do
......
...@@ -198,6 +198,28 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do ...@@ -198,6 +198,28 @@ RSpec.describe Gitlab::SidekiqMiddleware::ServerMetrics do
it_behaves_like "a metrics middleware" it_behaves_like "a metrics middleware"
end end
context "when a worker is wrapped into ActiveJob" do
before do
stub_const('TestWrappedWorker', Class.new)
TestWrappedWorker.class_eval do
include Sidekiq::Worker
end
end
let(:job) do
{
"class" => ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper,
"wrapped" => TestWrappedWorker
}
end
let(:worker) { TestWrappedWorker.new }
let(:worker_class) { TestWrappedWorker }
let(:labels) { default_labels.merge(urgency: "") }
it_behaves_like "a metrics middleware"
end
context "when workers are attributed" do context "when workers are attributed" do
def create_attributed_worker_class(urgency, external_dependencies, resource_boundary, category) def create_attributed_worker_class(urgency, external_dependencies, resource_boundary, category)
Class.new do Class.new do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment