Commit 8bcd5b5d authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch 'drop-scheduled-builds' into 'master'

Introduce StuckBuilds::DropScheduledService

See merge request gitlab-org/gitlab!71229
parents 7202bb88 111c4a27
# frozen_string_literal: true
module Ci
module StuckBuilds
class DropScheduledService
include DropHelpers
BUILD_SCHEDULED_OUTDATED_TIMEOUT = 1.hour
def execute
Gitlab::AppLogger.info "#{self.class}: Cleaning scheduled, timed-out builds"
drop(scheduled_timed_out_builds, failure_reason: :stale_schedule)
end
private
def scheduled_timed_out_builds
Ci::Build.where(status: :scheduled).where( # rubocop: disable CodeReuse/ActiveRecord
'ci_builds.scheduled_at IS NOT NULL AND ci_builds.scheduled_at < ?',
BUILD_SCHEDULED_OUTDATED_TIMEOUT.ago
)
end
end
end
end
......@@ -6,7 +6,6 @@ module Ci
include DropHelpers
BUILD_PENDING_OUTDATED_TIMEOUT = 1.day
BUILD_SCHEDULED_OUTDATED_TIMEOUT = 1.hour
BUILD_PENDING_STUCK_TIMEOUT = 1.hour
BUILD_LOOKBACK = 5.days
......@@ -18,8 +17,6 @@ module Ci
failure_reason: :stuck_or_timeout_failure
)
drop(scheduled_timed_out_builds, failure_reason: :stale_schedule)
drop_stuck(
pending_builds(BUILD_PENDING_STUCK_TIMEOUT.ago),
failure_reason: :stuck_or_timeout_failure
......@@ -40,13 +37,6 @@ module Ci
end
end
# rubocop: enable CodeReuse/ActiveRecord
def scheduled_timed_out_builds
Ci::Build.where(status: :scheduled).where( # rubocop: disable CodeReuse/ActiveRecord
'ci_builds.scheduled_at IS NOT NULL AND ci_builds.scheduled_at < ?',
BUILD_SCHEDULED_OUTDATED_TIMEOUT.ago
)
end
end
end
end
......@@ -237,6 +237,15 @@
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:ci_stuck_builds_drop_scheduled
:worker_name: Ci::StuckBuilds::DropScheduledWorker
:feature_category: :continuous_integration
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:container_expiration_policy
:worker_name: ContainerExpirationPolicyWorker
:feature_category: :container_registry
......
# frozen_string_literal: true
module Ci
module StuckBuilds
class DropScheduledWorker
include ApplicationWorker
include ExclusiveLeaseGuard
idempotent!
# rubocop:disable Scalability/CronWorkerContext
# This is an instance-wide cleanup query, so there's no meaningful
# scope to consider this in the context of.
include CronjobQueue
# rubocop:enable Scalability/CronWorkerContext
data_consistency :always
feature_category :continuous_integration
def perform
try_obtain_lease do
Ci::StuckBuilds::DropScheduledService.new.execute
end
end
private
def lease_timeout
30.minutes
end
end
end
end
......@@ -18,6 +18,7 @@ class StuckCiJobsWorker # rubocop:disable Scalability/IdempotentWorker
def perform
Ci::StuckBuilds::DropRunningWorker.perform_in(20.minutes)
Ci::StuckBuilds::DropScheduledWorker.perform_in(40.minutes)
return unless try_obtain_lease
......
......@@ -22,7 +22,7 @@ RSpec.describe Ci::StuckBuilds::DropRunningService do
let(:created_at) { outdated_time }
let(:updated_at) { outdated_time }
it_behaves_like 'job is dropped'
it_behaves_like 'job is dropped with failure reason', 'stuck_or_timeout_failure'
end
context 'when job is fresh' do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Ci::StuckBuilds::DropScheduledService do
let_it_be(:runner) { create :ci_runner }
let!(:job) { create :ci_build, :scheduled, scheduled_at: scheduled_at, runner: runner }
subject(:service) { described_class.new }
context 'when job is scheduled' do
context 'for more than an hour ago' do
let(:scheduled_at) { 2.hours.ago }
it_behaves_like 'job is dropped with failure reason', 'stale_schedule'
end
context 'for less than 1 hour ago' do
let(:scheduled_at) { 30.minutes.ago }
it_behaves_like 'job is unchanged'
end
end
%w(success skipped failed canceled running pending).each do |status|
context "when job is #{status}" do
before do
job.update!(status: status)
end
context 'and scheduled for more than an hour ago' do
let(:scheduled_at) { 2.hours.ago }
it_behaves_like 'job is unchanged'
end
context 'and scheduled for less than 1 hour ago' do
let(:scheduled_at) { 30.minutes.ago }
it_behaves_like 'job is unchanged'
end
end
end
context 'when there are no stale scheduled builds' do
let(:job) { }
it 'does not drop the stale scheduled build yet' do
expect { service.execute }.not_to raise_error
end
end
end
......@@ -33,13 +33,13 @@ RSpec.describe Ci::StuckBuilds::DropService do
context 'when created_at is the same as updated_at' do
let(:created_at) { 1.5.days.ago }
it_behaves_like 'job is dropped'
it_behaves_like 'job is dropped with failure reason', 'stuck_or_timeout_failure'
end
context 'when created_at is before updated_at' do
let(:created_at) { 3.days.ago }
it_behaves_like 'job is dropped'
it_behaves_like 'job is dropped with failure reason', 'stuck_or_timeout_failure'
end
context 'when created_at is outside lookback window' do
......@@ -107,13 +107,13 @@ RSpec.describe Ci::StuckBuilds::DropService do
context 'when created_at is the same as updated_at' do
let(:created_at) { 1.5.hours.ago }
it_behaves_like 'job is dropped'
it_behaves_like 'job is dropped with failure reason', 'stuck_or_timeout_failure'
end
context 'when created_at is before updated_at' do
let(:created_at) { 3.days.ago }
it_behaves_like 'job is dropped'
it_behaves_like 'job is dropped with failure reason', 'stuck_or_timeout_failure'
end
context 'when created_at is outside lookback window' do
......@@ -198,45 +198,4 @@ RSpec.describe Ci::StuckBuilds::DropService do
it_behaves_like 'job is unchanged'
end
describe 'drop stale scheduled builds' do
let(:status) { 'scheduled' }
let(:updated_at) { }
context 'when scheduled at 2 hours ago but it is not executed yet' do
let!(:job) { create(:ci_build, :scheduled, scheduled_at: 2.hours.ago) }
it 'drops the stale scheduled build' do
expect(Ci::Build.scheduled.count).to eq(1)
expect(job).to be_scheduled
service.execute
job.reload
expect(Ci::Build.scheduled.count).to eq(0)
expect(job).to be_failed
expect(job).to be_stale_schedule
end
end
context 'when scheduled at 30 minutes ago but it is not executed yet' do
let!(:job) { create(:ci_build, :scheduled, scheduled_at: 30.minutes.ago) }
it 'does not drop the stale scheduled build yet' do
expect(Ci::Build.scheduled.count).to eq(1)
expect(job).to be_scheduled
service.execute
expect(Ci::Build.scheduled.count).to eq(1)
expect(job).to be_scheduled
end
end
context 'when there are no stale scheduled builds' do
it 'does not drop the stale scheduled build yet' do
expect { service.execute }.not_to raise_error
end
end
end
end
# frozen_string_literal: true
RSpec.shared_examples 'job is dropped' do
RSpec.shared_examples 'job is dropped with failure reason' do |failure_reason|
it 'changes status' do
service.execute
job.reload
expect(job).to be_failed
expect(job).to be_stuck_or_timeout_failure
expect(job.failure_reason).to eq(failure_reason)
end
context 'when job has data integrity problem' do
......@@ -23,16 +23,13 @@ RSpec.shared_examples 'job is dropped' do
job.reload
expect(job).to be_failed
expect(job).to be_data_integrity_failure
expect(job.failure_reason).to eq('data_integrity_failure')
end
end
end
RSpec.shared_examples 'job is unchanged' do
it 'does not change status' do
service.execute
job.reload
expect(job.status).to eq(status)
expect { service.execute }.not_to change(job, :status)
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Ci::StuckBuilds::DropScheduledWorker do
include ExclusiveLeaseHelpers
let(:worker) { described_class.new }
let(:lease_uuid) { SecureRandom.uuid }
describe '#perform' do
subject { worker.perform }
it_behaves_like 'an idempotent worker'
it 'executes an instance of Ci::StuckBuilds::DropScheduledService with an exclusive lease' do
expect_to_obtain_exclusive_lease(worker.lease_key, lease_uuid)
expect_next_instance_of(Ci::StuckBuilds::DropScheduledService) do |service|
expect(service).to receive(:execute).exactly(:once)
end
expect_to_cancel_exclusive_lease(worker.lease_key, lease_uuid)
subject
end
end
end
......@@ -22,6 +22,12 @@ RSpec.describe StuckCiJobsWorker do
worker.perform
end
it 'enqueues a Ci::StuckBuilds::DropScheduledWorker job' do
expect(Ci::StuckBuilds::DropScheduledWorker).to receive(:perform_in).with(40.minutes).exactly(:once)
worker.perform
end
it 'executes an instance of Ci::StuckBuilds::DropService' do
expect_next_instance_of(Ci::StuckBuilds::DropService) do |service|
expect(service).to receive(:execute).exactly(:once)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment