Commit d48fc458 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch '215613-handle-ci-minutes-reset-errors' into 'master'

Allow ci minutes reset service to continue in case of failure

See merge request gitlab-org/gitlab!32867
parents 246f7469 3c558b16
...@@ -7,12 +7,21 @@ module Ci ...@@ -7,12 +7,21 @@ module Ci
BATCH_SIZE = 1000.freeze BATCH_SIZE = 1000.freeze
def initialize
@errors = []
end
def execute!(ids_range: nil, batch_size: BATCH_SIZE) def execute!(ids_range: nil, batch_size: BATCH_SIZE)
relation = Namespace relation = Namespace
relation = relation.id_in(ids_range) if ids_range relation = relation.id_in(ids_range) if ids_range
relation.each_batch(of: batch_size) do |namespaces| relation.each_batch(of: batch_size) do |namespaces|
reset_ci_minutes!(namespaces) reset_ci_minutes!(namespaces)
end end
if @errors.any?
exception = BatchNotResetError.new('Some namespace shared runner minutes were not reset.')
Gitlab::ErrorTracking.track_and_raise_exception(exception, namespace_ranges: @errors)
end
end end
private private
...@@ -27,13 +36,7 @@ module Ci ...@@ -27,13 +36,7 @@ module Ci
reset_ci_minutes_notifications!(namespaces) reset_ci_minutes_notifications!(namespaces)
end end
rescue ActiveRecord::ActiveRecordError rescue ActiveRecord::ActiveRecordError
# We don't need to print a thousand of namespace_ids @errors << { count: namespaces.size, first_id: namespaces.first.id, last_id: namespaces.last.id }
# in the message if all batches failed.
# A small batch would be sufficient for investigation.
failed_namespace_ids = namespaces.limit(10).ids # rubocop: disable CodeReuse/ActiveRecord
raise BatchNotResetError.new(
"#{namespaces.size} namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: #{failed_namespace_ids}")
end end
def recalculate_extra_shared_runners_minutes_limits!(namespaces) def recalculate_extra_shared_runners_minutes_limits!(namespaces)
......
---
title: Allow ci minutes reset service to continue in case of failure
merge_request: 32867
author:
type: other
...@@ -95,14 +95,29 @@ RSpec.describe Ci::Minutes::BatchResetService do ...@@ -95,14 +95,29 @@ RSpec.describe Ci::Minutes::BatchResetService do
let(:ids_range) { nil } let(:ids_range) { nil }
before do before do
allow(Namespace).to receive(:transaction).and_raise(ActiveRecord::ActiveRecordError) expect(Namespace).to receive(:transaction).once.ordered.and_raise(ActiveRecord::ActiveRecordError)
expect(Namespace).to receive(:transaction).once.ordered.and_call_original
end end
it 'decorates the error with more information' do it 'continues its progress' do
expect { subject } expect(service).to receive(:reset_ci_minutes!).with([namespace_1, namespace_2, namespace_3]).and_call_original
.to raise_error( expect(service).to receive(:reset_ci_minutes!).with([namespace_4, namespace_5, namespace_6]).and_call_original
Ci::Minutes::BatchResetService::BatchNotResetError,
'3 namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: [1, 2, 3]') expect(Gitlab::ErrorTracking).to receive(:track_and_raise_exception)
subject
end
it 'raises exception with namespace details' do
expect(Gitlab::ErrorTracking).to receive(
:track_and_raise_exception
).with(
Ci::Minutes::BatchResetService::BatchNotResetError.new(
'Some namespace shared runner minutes were not reset.'
),
{ namespace_ranges: [{ count: 3, first_id: 1, last_id: 3 }] }
).once.and_call_original
expect { subject }.to raise_error(Ci::Minutes::BatchResetService::BatchNotResetError)
end end
end end
end end
......
...@@ -50,7 +50,8 @@ RSpec.describe ClearSharedRunnersMinutesWorker do ...@@ -50,7 +50,8 @@ RSpec.describe ClearSharedRunnersMinutesWorker do
it 'raises an exception' do it 'raises an exception' do
expect { worker.perform }.to raise_error( expect { worker.perform }.to raise_error(
Ci::Minutes::BatchResetService::BatchNotResetError, Ci::Minutes::BatchResetService::BatchNotResetError,
"#{namespace_ids.count} namespace shared runner minutes were not reset and the transaction was rolled back. Namespace Ids: #{namespace_ids}") 'Some namespace shared runner minutes were not reset.'
)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment