Commit 7c386e84 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Release build trace lock after build status commit

This commit makes it possible to run hooks precisely after we commit a
build status change to a database, but before we run after commit hooks
that schedule asynchronous workers. This makes it possible to avoid race
conditions.
parent 47475322
...@@ -327,6 +327,8 @@ module Ci ...@@ -327,6 +327,8 @@ module Ci
after_transition any => [:success, :failed, :canceled] do |build| after_transition any => [:success, :failed, :canceled] do |build|
build.run_after_commit do build.run_after_commit do
build.run_status_commit_hooks!
BuildFinishedWorker.perform_async(id) BuildFinishedWorker.perform_async(id)
end end
end end
...@@ -963,8 +965,24 @@ module Ci ...@@ -963,8 +965,24 @@ module Ci
pending_state.try(:delete) pending_state.try(:delete)
end end
def run_on_status_commit(&block)
status_commit_hooks.push(block)
end
protected
def run_status_commit_hooks!
status_commit_hooks.reverse_each do |hook|
instance_eval(&hook)
end
end
private private
def status_commit_hooks
@status_commit_hooks ||= []
end
def auto_retry def auto_retry
strong_memoize(:auto_retry) do strong_memoize(:auto_retry) do
Gitlab::Ci::Build::AutoRetry.new(self) Gitlab::Ci::Build::AutoRetry.new(self)
......
...@@ -151,9 +151,27 @@ module Ci ...@@ -151,9 +151,27 @@ module Ci
build.pending_state build.pending_state
end end
##
# This method is releasing an exclusive lock on a build trace the moment we
# conclude that build status has been written and the build state update
# has been committed to the database.
#
# Because a build state machine schedules a bunch of workers to run after
# build status transition to complete, we do not want to keep the lease
# until all the workers are scheduled because it opens a possibility of
# race conditions happening.
#
# Instead of keeping the lease until the transition is fully done and
# workers are scheduled, we immediately release the lock after the database
# commit happens.
#
def in_build_trace_lock(&block) def in_build_trace_lock(&block)
build.trace.lock(&block) # rubocop:disable CodeReuse/ActiveRecord build.trace.lock do |_, lease| # rubocop:disable CodeReuse/ActiveRecord
rescue FailedToObtainLockError build.run_on_status_commit { lease.cancel }
yield
end
rescue ::Gitlab::Ci::Trace::LockedError
metrics.increment_trace_operation(operation: :locked) metrics.increment_trace_operation(operation: :locked)
accept_build_state! accept_build_state!
......
...@@ -35,7 +35,7 @@ module Gitlab ...@@ -35,7 +35,7 @@ module Gitlab
lease.obtain(1 + retries) lease.obtain(1 + retries)
yield(lease.retried?) yield(lease.retried?, lease)
ensure ensure
lease&.cancel lease&.cancel
end end
......
...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::Ci::Trace::Checksum do ...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::Ci::Trace::Checksum do
subject { described_class.new(build) } subject { described_class.new(build) }
context 'when build pending state exits' do context 'when build pending state exists' do
before do before do
create(:ci_build_pending_state, build: build, trace_checksum: 'crc32:3564598592') create(:ci_build_pending_state, build: build, trace_checksum: 'crc32:3564598592')
end end
......
...@@ -4652,4 +4652,24 @@ RSpec.describe Ci::Build do ...@@ -4652,4 +4652,24 @@ RSpec.describe Ci::Build do
it { is_expected.to be_nil } it { is_expected.to be_nil }
end end
end end
describe '#run_on_status_commit' do
it 'runs provided hook after status commit' do
action = spy('action')
build.run_on_status_commit { action.perform! }
build.success!
expect(action).to have_received(:perform!).once
end
it 'does not run hooks when status has not changed' do
action = spy('action')
build.run_on_status_commit { action.perform! }
build.save!
expect(action).not_to have_received(:perform!)
end
end
end end
...@@ -120,19 +120,15 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do ...@@ -120,19 +120,15 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
context 'when runner retries request after receiving 202' do context 'when runner retries request after receiving 202' do
it 'responds with 202 and then with 200', :sidekiq_inline do it 'responds with 202 and then with 200', :sidekiq_inline do
perform_enqueued_jobs do
update_job(state: 'success', checksum: 'crc32:12345678') update_job(state: 'success', checksum: 'crc32:12345678')
end
expect(job.reload.pending_state).to be_present
expect(response).to have_gitlab_http_status(:accepted) expect(response).to have_gitlab_http_status(:accepted)
expect(job.reload.pending_state).to be_present
perform_enqueued_jobs do
update_job(state: 'success', checksum: 'crc32:12345678') update_job(state: 'success', checksum: 'crc32:12345678')
end
expect(job.reload.pending_state).not_to be_present
expect(response).to have_gitlab_http_status(:ok) expect(response).to have_gitlab_http_status(:ok)
expect(job.reload.pending_state).not_to be_present
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment