Commit 48a0acb9 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'enable-ci-deleted-objects-ff' into 'master'

Rollout ci_deleted_objects feature

See merge request gitlab-org/gitlab!46971
parents 2cd3043f a10774a1
......@@ -4,24 +4,23 @@ module Ci
class DestroyExpiredJobArtifactsService
include ::Gitlab::ExclusiveLeaseHelpers
include ::Gitlab::LoopHelpers
include ::Gitlab::Utils::StrongMemoize
BATCH_SIZE = 100
LOOP_TIMEOUT = 5.minutes
LEGACY_LOOP_TIMEOUT = 45.minutes
LOOP_LIMIT = 1000
EXCLUSIVE_LOCK_KEY = 'expired_job_artifacts:destroy:lock'
LOCK_TIMEOUT = 10.minutes
LEGACY_LOCK_TIMEOUT = 50.minutes
LOCK_TIMEOUT = 6.minutes
##
# Destroy expired job artifacts on GitLab instance
#
# This destroy process cannot run for more than 10 minutes. This is for
# This destroy process cannot run for more than 6 minutes. This is for
# preventing multiple `ExpireBuildArtifactsWorker` CRON jobs run concurrently,
# which is scheduled at every hour.
# which is scheduled every 7 minutes.
def execute
in_lock(EXCLUSIVE_LOCK_KEY, ttl: lock_timeout, retries: 1) do
loop_until(timeout: loop_timeout, limit: LOOP_LIMIT) do
in_lock(EXCLUSIVE_LOCK_KEY, ttl: LOCK_TIMEOUT, retries: 1) do
loop_until(timeout: LOOP_TIMEOUT, limit: LOOP_LIMIT) do
destroy_artifacts_batch
end
end
......@@ -42,30 +41,19 @@ module Ci
return false if artifacts.empty?
if parallel_destroy?
parallel_destroy_batch(artifacts)
else
legacy_destroy_batch(artifacts)
destroy_related_records_for(artifacts)
end
parallel_destroy_batch(artifacts)
true
end
# TODO: Make sure this can also be parallelized
# https://gitlab.com/gitlab-org/gitlab/-/issues/270973
def destroy_pipeline_artifacts_batch
artifacts = Ci::PipelineArtifact.expired(BATCH_SIZE).to_a
return false if artifacts.empty?
legacy_destroy_batch(artifacts)
true
end
def parallel_destroy?
::Feature.enabled?(:ci_delete_objects)
end
def legacy_destroy_batch(artifacts)
artifacts.each(&:destroy!)
true
end
def parallel_destroy_batch(job_artifacts)
......@@ -77,6 +65,7 @@ module Ci
# This is executed outside of the transaction because it depends on Redis
update_statistics_for(job_artifacts)
destroyed_artifacts_counter.increment({}, job_artifacts.size)
end
# This method is implemented in EE and it must do only database work
......@@ -91,19 +80,12 @@ module Ci
end
end
def loop_timeout
if parallel_destroy?
LOOP_TIMEOUT
else
LEGACY_LOOP_TIMEOUT
end
end
def destroyed_artifacts_counter
strong_memoize(:destroyed_artifacts_counter) do
name = :destroyed_job_artifacts_count_total
comment = 'Counter of destroyed expired job artifacts'
def lock_timeout
if parallel_destroy?
LOCK_TIMEOUT
else
LEGACY_LOCK_TIMEOUT
::Gitlab::Metrics.counter(name, comment)
end
end
end
......
......@@ -18,14 +18,12 @@ module Ci
end
def max_running_jobs
if ::Feature.enabled?(:ci_delete_objects_low_concurrency)
2
elsif ::Feature.enabled?(:ci_delete_objects_medium_concurrency)
if ::Feature.enabled?(:ci_delete_objects_medium_concurrency)
20
elsif ::Feature.enabled?(:ci_delete_objects_high_concurrency)
50
else
0
2
end
end
......
---
title: Parallelize the removal of expired job artifacts
merge_request: 46971
author:
type: performance
---
name: ci_delete_objects
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/42242
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247103
group: group::continuous integration
type: development
default_enabled: false
\ No newline at end of file
---
name: ci_delete_objects_low_concurrency
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/39464
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247103
group: group::continuous integration
type: development
default_enabled: false
\ No newline at end of file
......@@ -435,7 +435,7 @@ production: &base
cron: "19 * * * *"
# Remove expired build artifacts
expire_build_artifacts_worker:
cron: "50 * * * *"
cron: "*/7 * * * *"
# Remove files from object storage
ci_schedule_delete_objects_worker:
cron: "*/16 * * * *"
......
......@@ -415,7 +415,7 @@ Settings.cron_jobs['pipeline_schedule_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['pipeline_schedule_worker']['cron'] ||= '19 * * * *'
Settings.cron_jobs['pipeline_schedule_worker']['job_class'] = 'PipelineScheduleWorker'
Settings.cron_jobs['expire_build_artifacts_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '50 * * * *'
Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '*/7 * * * *'
Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker'
Settings.cron_jobs['ci_schedule_delete_objects_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['ci_schedule_delete_objects_worker']['cron'] ||= '*/16 * * * *'
......
......@@ -208,6 +208,7 @@ configuration option in `gitlab.yml`. These metrics are served from the
| `limited_capacity_worker_running_jobs` | Gauge | 13.5 | Number of running jobs | `worker` |
| `limited_capacity_worker_max_running_jobs` | Gauge | 13.5 | Maximum number of running jobs | `worker` |
| `limited_capacity_worker_remaining_work_count` | Gauge | 13.5 | Number of jobs waiting to be enqueued | `worker` |
| `destroyed_job_artifacts_count_total` | Counter | 13.6 | Number of destroyed expired job artifacts | |
## Database load balancing metrics **(PREMIUM ONLY)**
......
......@@ -61,35 +61,17 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
end
context 'when failed to destroy artifact' do
context 'with ci_delete_objects disabled' do
before do
stub_feature_flags(ci_delete_objects: false)
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
allow_next_found_instance_of(Ci::JobArtifact) do |artifact|
allow(artifact).to receive(:destroy!).and_raise(ActiveRecord::RecordNotDestroyed)
end
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Security::Finding.count }.from(1)
end
before do
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
expect(Ci::DeletedObject)
.to receive(:bulk_import)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
context 'with ci_delete_objects enabled' do
before do
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
expect(Ci::DeletedObject)
.to receive(:bulk_import)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Security::Finding.count }.from(1)
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Security::Finding.count }.from(1)
end
end
......
......@@ -75,6 +75,14 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
it 'does not remove the files' do
expect { subject }.not_to change { artifact.file.exists? }
end
it 'reports metrics for destroyed artifacts' do
counter = service.send(:destroyed_artifacts_counter)
expect(counter).to receive(:increment).with({}, 1).and_call_original
subject
end
end
end
......@@ -114,49 +122,31 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
end
context 'with ci_delete_objects disabled' do
context 'when the import fails' do
before do
stub_feature_flags(ci_delete_objects: false)
allow_any_instance_of(Ci::JobArtifact)
.to receive(:destroy!)
expect(Ci::DeletedObject)
.to receive(:bulk_import)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Ci::JobArtifact.count }.from(1)
end
end
context 'with ci_delete_objects enabled' do
context 'when the import fails' do
before do
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
expect(Ci::DeletedObject)
.to receive(:bulk_import)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Ci::JobArtifact.count }.from(1)
end
context 'when the delete fails' do
before do
expect(Ci::JobArtifact)
.to receive(:id_in)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
context 'when the delete fails' do
before do
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
expect(Ci::JobArtifact)
.to receive(:id_in)
.once
.and_raise(ActiveRecord::RecordNotDestroyed)
end
it 'raises an exception rolls back the insert' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Ci::DeletedObject.count }.from(0)
end
it 'raises an exception rolls back the insert' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Ci::DeletedObject.count }.from(0)
end
end
end
......
......@@ -28,7 +28,6 @@ RSpec.describe Ci::DeleteObjectsWorker do
before do
stub_feature_flags(
ci_delete_objects_low_concurrency: low,
ci_delete_objects_medium_concurrency: medium,
ci_delete_objects_high_concurrency: high
)
......@@ -36,13 +35,11 @@ RSpec.describe Ci::DeleteObjectsWorker do
subject(:max_running_jobs) { worker.max_running_jobs }
where(:low, :medium, :high, :expected) do
false | false | false | 0
true | true | true | 2
true | false | false | 2
false | true | false | 20
false | true | true | 20
false | false | true | 50
where(:medium, :high, :expected) do
false | false | 2
true | false | 20
true | true | 20
false | true | 50
end
with_them do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment