Commit 68a9e9a7 authored by Grzegorz Bizon's avatar Grzegorz Bizon Committed by Marius Bobin

Remove pending builds from the queue on conflict

When the new denormalized pending builds table is enabled,
database-level consistency guarantees are reduced because of the
denormalization.

Here we are improving our resiliency mechanisms by properly reconciling
the database state in case of a conflicting data being detected. We
remove a queuing entry of a build that has already been processed.

Changelog: fixed
parent 378ed082
......@@ -156,6 +156,18 @@ module Ci
def process_build(build, params)
unless build.pending?
@metrics.increment_queue_operation(:build_not_pending)
if Feature.enabled?(:ci_pending_builds_table_resiliency, default_enabled: :yaml)
##
# If this build can not be picked because we had stale data in
# `ci_pending_builds` table, we need to respond with 409 to retry
# this operation.
#
if ::Ci::UpdateBuildQueueService.new.remove!(build)
return Result.new(nil, nil, false)
end
end
return
end
......
......@@ -37,14 +37,19 @@ module Ci
raise InvalidQueueTransition unless transition.from == 'pending'
transition.within_transaction do
removed = build.all_queuing_entries.delete_all
transition.within_transaction { remove!(build) }
end
if removed > 0
metrics.increment_queue_operation(:build_queue_pop)
##
# Force recemove build from the queue, without checking a transition state
#
def remove!(build)
removed = build.all_queuing_entries.delete_all
build.id
end
if removed > 0
metrics.increment_queue_operation(:build_queue_pop)
build.id
end
end
......
---
name: ci_pending_builds_table_resiliency
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/84359
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/357988
milestone: '14.10'
type: development
group: group::pipeline execution
default_enabled: true
......@@ -785,6 +785,25 @@ module Ci
include_examples 'handles runner assignment'
end
context 'when a conflicting data is stored in denormalized table' do
let!(:specific_runner) { create(:ci_runner, :project, projects: [project], tag_list: %w[conflict]) }
let!(:pending_job) { create(:ci_build, :pending, :queued, pipeline: pipeline, tag_list: %w[conflict]) }
before do
pending_job.update_column(:status, :running)
end
it 'removes queuing entry upon build assignment attempt' do
expect(pending_job.reload).to be_running
expect(pending_job.queuing_entry).to be_present
result = described_class.new(specific_runner).execute
expect(result).not_to be_valid
expect(pending_job.reload.queuing_entry).not_to be_present
end
end
end
context 'when not using pending builds table' do
......
......@@ -103,6 +103,28 @@ RSpec.describe Ci::UpdateBuildQueueService do
end
end
end
describe '#remove!' do
context 'when pending build exists' do
before do
create(:ci_pending_build, build: build, project: build.project)
end
it 'removes pending build in a transaction' do
dequeued = subject.remove!(build)
expect(dequeued).to eq build.id
end
end
context 'when pending build does not exist' do
it 'does nothing if there is no pending build to remove' do
dequeued = subject.remove!(build)
expect(dequeued).to be_nil
end
end
end
end
describe 'shared runner builds tracking' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment