Commit 8ab6cefe authored by Allison Browne's avatar Allison Browne

Further optimize pending stuck job query

Solve for high cold cache times using a lookback window in
the StuckCiJobsWorker.
parent 056675c6
......@@ -57,6 +57,9 @@ class CommitStatus < ApplicationRecord
scope :in_pipelines, ->(pipelines) { where(pipeline: pipelines) }
scope :eager_load_pipeline, -> { eager_load(:pipeline, project: { namespace: :route }) }
scope :with_pipeline, -> { joins(:pipeline) }
scope :updated_before, ->(lookback:, timeout:) {
where('(ci_builds.created_at BETWEEN ? AND ?) AND (ci_builds.updated_at BETWEEN ? AND ?)', lookback, timeout, lookback, timeout)
}
scope :for_project_paths, -> (paths) do
where(project: Project.where_full_path_in(Array(paths)))
......
......@@ -15,43 +15,24 @@ class StuckCiJobsWorker # rubocop:disable Scalability/IdempotentWorker
BUILD_PENDING_OUTDATED_TIMEOUT = 1.day
BUILD_SCHEDULED_OUTDATED_TIMEOUT = 1.hour
BUILD_PENDING_STUCK_TIMEOUT = 1.hour
BUILD_LOOKBACK = 5.days
def perform
return unless try_obtain_lease
Gitlab::AppLogger.info "#{self.class}: Cleaning stuck builds"
drop(
Ci::Build.running.where( # rubocop: disable CodeReuse/ActiveRecord
'ci_builds.updated_at < ?',
BUILD_RUNNING_OUTDATED_TIMEOUT.ago
),
failure_reason: :stuck_or_timeout_failure
)
drop(running_timed_out_builds, failure_reason: :stuck_or_timeout_failure)
drop(
Ci::Build.pending.where( # rubocop: disable CodeReuse/ActiveRecord
'ci_builds.created_at < ? AND ci_builds.updated_at < ?',
BUILD_PENDING_OUTDATED_TIMEOUT.ago,
BUILD_PENDING_OUTDATED_TIMEOUT.ago
),
Ci::Build.pending.updated_before(lookback: BUILD_LOOKBACK.ago, timeout: BUILD_PENDING_OUTDATED_TIMEOUT.ago),
failure_reason: :stuck_or_timeout_failure
)
drop(
Ci::Build.where(status: :scheduled).where( # rubocop: disable CodeReuse/ActiveRecord
'ci_builds.scheduled_at IS NOT NULL AND ci_builds.scheduled_at < ?',
BUILD_SCHEDULED_OUTDATED_TIMEOUT.ago
),
failure_reason: :stale_schedule
)
drop(scheduled_timed_out_builds, failure_reason: :stale_schedule)
drop_stuck(
Ci::Build.pending.where( # rubocop: disable CodeReuse/ActiveRecord
'ci_builds.created_at < ? AND ci_builds.updated_at < ?',
BUILD_PENDING_STUCK_TIMEOUT.ago,
BUILD_PENDING_STUCK_TIMEOUT.ago
),
Ci::Build.pending.updated_before(lookback: BUILD_LOOKBACK.ago, timeout: BUILD_PENDING_STUCK_TIMEOUT.ago),
failure_reason: :stuck_or_timeout_failure
)
......@@ -60,6 +41,20 @@ class StuckCiJobsWorker # rubocop:disable Scalability/IdempotentWorker
private
def scheduled_timed_out_builds
Ci::Build.where(status: :scheduled).where( # rubocop: disable CodeReuse/ActiveRecord
'ci_builds.scheduled_at IS NOT NULL AND ci_builds.scheduled_at < ?',
BUILD_SCHEDULED_OUTDATED_TIMEOUT.ago
)
end
def running_timed_out_builds
Ci::Build.running.where( # rubocop: disable CodeReuse/ActiveRecord
'ci_builds.updated_at < ?',
BUILD_RUNNING_OUTDATED_TIMEOUT.ago
)
end
def try_obtain_lease
@uuid = Gitlab::ExclusiveLease.new(EXCLUSIVE_LEASE_KEY, timeout: 30.minutes).try_obtain
end
......
......@@ -79,6 +79,32 @@ RSpec.describe CommitStatus do
end
end
describe '.updated_before' do
let!(:lookback) { 5.days.ago }
let!(:timeout) { 1.day.ago }
let!(:before_lookback) { lookback - 1.hour }
let!(:after_lookback) { lookback + 1.hour }
let!(:before_timeout) { timeout - 1.hour }
let!(:after_timeout) { timeout + 1.hour }
subject { described_class.updated_before(lookback: lookback, timeout: timeout) }
def create_build_with_set_timestamps(created_at:, updated_at:)
travel_to(created_at) { create(:ci_build, created_at: Time.current) }.tap do |build|
travel_to(updated_at) { build.update!(status: :failed) }
end
end
it 'finds builds updated and created in the window between lookback and timeout' do
build_in_lookback_timeout_window = create_build_with_set_timestamps(created_at: after_lookback, updated_at: before_timeout)
build_outside_lookback_window = create_build_with_set_timestamps(created_at: before_lookback, updated_at: before_timeout)
build_outside_timeout_window = create_build_with_set_timestamps(created_at: after_lookback, updated_at: after_timeout)
expect(subject).to contain_exactly(build_in_lookback_timeout_window)
expect(subject).not_to include(build_outside_lookback_window, build_outside_timeout_window)
end
end
describe '#processed' do
subject { commit_status.processed }
......
......@@ -82,6 +82,12 @@ RSpec.describe StuckCiJobsWorker do
it_behaves_like 'job is dropped'
end
context 'when created_at is outside lookback window' do
let(:created_at) { described_class::BUILD_LOOKBACK - 1.day }
it_behaves_like 'job is unchanged'
end
end
context 'when job was updated less than 1 day ago' do
......@@ -98,6 +104,12 @@ RSpec.describe StuckCiJobsWorker do
it_behaves_like 'job is unchanged'
end
context 'when created_at is outside lookback window' do
let(:created_at) { described_class::BUILD_LOOKBACK - 1.day }
it_behaves_like 'job is unchanged'
end
end
context 'when job was updated more than 1 hour ago' do
......@@ -114,6 +126,12 @@ RSpec.describe StuckCiJobsWorker do
it_behaves_like 'job is unchanged'
end
context 'when created_at is outside lookback window' do
let(:created_at) { described_class::BUILD_LOOKBACK - 1.day }
it_behaves_like 'job is unchanged'
end
end
end
......@@ -136,6 +154,12 @@ RSpec.describe StuckCiJobsWorker do
it_behaves_like 'job is dropped'
end
context 'when created_at is outside lookback window' do
let(:created_at) { described_class::BUILD_LOOKBACK - 1.day }
it_behaves_like 'job is unchanged'
end
end
context 'when job was updated in less than 1 hour ago' do
......@@ -152,6 +176,12 @@ RSpec.describe StuckCiJobsWorker do
it_behaves_like 'job is unchanged'
end
context 'when created_at is outside lookback window' do
let(:created_at) { described_class::BUILD_LOOKBACK - 1.day }
it_behaves_like 'job is unchanged'
end
end
end
end
......@@ -162,17 +192,7 @@ RSpec.describe StuckCiJobsWorker do
context 'when job was updated_at more than an hour ago' do
let(:updated_at) { 2.hours.ago }
context 'when created_at is the same as updated_at' do
let(:created_at) { 2.hours.ago }
it_behaves_like 'job is dropped'
end
context 'when created_at is before updated_at' do
let(:created_at) { 3.days.ago }
it_behaves_like 'job is dropped'
end
it_behaves_like 'job is dropped'
end
context 'when job was updated in less than 1 hour ago' do
......@@ -198,6 +218,12 @@ RSpec.describe StuckCiJobsWorker do
it_behaves_like 'job is unchanged'
end
context 'when created_at is outside lookback window' do
let(:created_at) { described_class::BUILD_LOOKBACK - 1.day }
it_behaves_like 'job is unchanged'
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment