Commit 48a0acb9 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'enable-ci-deleted-objects-ff' into 'master'

Rollout ci_deleted_objects feature

See merge request gitlab-org/gitlab!46971
parents 2cd3043f a10774a1
...@@ -4,24 +4,23 @@ module Ci ...@@ -4,24 +4,23 @@ module Ci
class DestroyExpiredJobArtifactsService class DestroyExpiredJobArtifactsService
include ::Gitlab::ExclusiveLeaseHelpers include ::Gitlab::ExclusiveLeaseHelpers
include ::Gitlab::LoopHelpers include ::Gitlab::LoopHelpers
include ::Gitlab::Utils::StrongMemoize
BATCH_SIZE = 100 BATCH_SIZE = 100
LOOP_TIMEOUT = 5.minutes LOOP_TIMEOUT = 5.minutes
LEGACY_LOOP_TIMEOUT = 45.minutes
LOOP_LIMIT = 1000 LOOP_LIMIT = 1000
EXCLUSIVE_LOCK_KEY = 'expired_job_artifacts:destroy:lock' EXCLUSIVE_LOCK_KEY = 'expired_job_artifacts:destroy:lock'
LOCK_TIMEOUT = 10.minutes LOCK_TIMEOUT = 6.minutes
LEGACY_LOCK_TIMEOUT = 50.minutes
## ##
# Destroy expired job artifacts on GitLab instance # Destroy expired job artifacts on GitLab instance
# #
# This destroy process cannot run for more than 10 minutes. This is for # This destroy process cannot run for more than 6 minutes. This is for
# preventing multiple `ExpireBuildArtifactsWorker` CRON jobs run concurrently, # preventing multiple `ExpireBuildArtifactsWorker` CRON jobs run concurrently,
# which is scheduled at every hour. # which is scheduled every 7 minutes.
def execute def execute
in_lock(EXCLUSIVE_LOCK_KEY, ttl: lock_timeout, retries: 1) do in_lock(EXCLUSIVE_LOCK_KEY, ttl: LOCK_TIMEOUT, retries: 1) do
loop_until(timeout: loop_timeout, limit: LOOP_LIMIT) do loop_until(timeout: LOOP_TIMEOUT, limit: LOOP_LIMIT) do
destroy_artifacts_batch destroy_artifacts_batch
end end
end end
...@@ -42,30 +41,19 @@ module Ci ...@@ -42,30 +41,19 @@ module Ci
return false if artifacts.empty? return false if artifacts.empty?
if parallel_destroy?
parallel_destroy_batch(artifacts) parallel_destroy_batch(artifacts)
else
legacy_destroy_batch(artifacts)
destroy_related_records_for(artifacts)
end
true true
end end
# TODO: Make sure this can also be parallelized
# https://gitlab.com/gitlab-org/gitlab/-/issues/270973
def destroy_pipeline_artifacts_batch def destroy_pipeline_artifacts_batch
artifacts = Ci::PipelineArtifact.expired(BATCH_SIZE).to_a artifacts = Ci::PipelineArtifact.expired(BATCH_SIZE).to_a
return false if artifacts.empty? return false if artifacts.empty?
legacy_destroy_batch(artifacts)
true
end
def parallel_destroy?
::Feature.enabled?(:ci_delete_objects)
end
def legacy_destroy_batch(artifacts)
artifacts.each(&:destroy!) artifacts.each(&:destroy!)
true
end end
def parallel_destroy_batch(job_artifacts) def parallel_destroy_batch(job_artifacts)
...@@ -77,6 +65,7 @@ module Ci ...@@ -77,6 +65,7 @@ module Ci
# This is executed outside of the transaction because it depends on Redis # This is executed outside of the transaction because it depends on Redis
update_statistics_for(job_artifacts) update_statistics_for(job_artifacts)
destroyed_artifacts_counter.increment({}, job_artifacts.size)
end end
# This method is implemented in EE and it must do only database work # This method is implemented in EE and it must do only database work
...@@ -91,19 +80,12 @@ module Ci ...@@ -91,19 +80,12 @@ module Ci
end end
end end
def loop_timeout def destroyed_artifacts_counter
if parallel_destroy? strong_memoize(:destroyed_artifacts_counter) do
LOOP_TIMEOUT name = :destroyed_job_artifacts_count_total
else comment = 'Counter of destroyed expired job artifacts'
LEGACY_LOOP_TIMEOUT
end
end
def lock_timeout ::Gitlab::Metrics.counter(name, comment)
if parallel_destroy?
LOCK_TIMEOUT
else
LEGACY_LOCK_TIMEOUT
end end
end end
end end
......
...@@ -18,14 +18,12 @@ module Ci ...@@ -18,14 +18,12 @@ module Ci
end end
def max_running_jobs def max_running_jobs
if ::Feature.enabled?(:ci_delete_objects_low_concurrency) if ::Feature.enabled?(:ci_delete_objects_medium_concurrency)
2
elsif ::Feature.enabled?(:ci_delete_objects_medium_concurrency)
20 20
elsif ::Feature.enabled?(:ci_delete_objects_high_concurrency) elsif ::Feature.enabled?(:ci_delete_objects_high_concurrency)
50 50
else else
0 2
end end
end end
......
---
title: Parallelize the removal of expired job artifacts
merge_request: 46971
author:
type: performance
---
name: ci_delete_objects
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/42242
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247103
group: group::continuous integration
type: development
default_enabled: false
\ No newline at end of file
---
name: ci_delete_objects_low_concurrency
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/39464
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247103
group: group::continuous integration
type: development
default_enabled: false
\ No newline at end of file
...@@ -435,7 +435,7 @@ production: &base ...@@ -435,7 +435,7 @@ production: &base
cron: "19 * * * *" cron: "19 * * * *"
# Remove expired build artifacts # Remove expired build artifacts
expire_build_artifacts_worker: expire_build_artifacts_worker:
cron: "50 * * * *" cron: "*/7 * * * *"
# Remove files from object storage # Remove files from object storage
ci_schedule_delete_objects_worker: ci_schedule_delete_objects_worker:
cron: "*/16 * * * *" cron: "*/16 * * * *"
......
...@@ -415,7 +415,7 @@ Settings.cron_jobs['pipeline_schedule_worker'] ||= Settingslogic.new({}) ...@@ -415,7 +415,7 @@ Settings.cron_jobs['pipeline_schedule_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['pipeline_schedule_worker']['cron'] ||= '19 * * * *' Settings.cron_jobs['pipeline_schedule_worker']['cron'] ||= '19 * * * *'
Settings.cron_jobs['pipeline_schedule_worker']['job_class'] = 'PipelineScheduleWorker' Settings.cron_jobs['pipeline_schedule_worker']['job_class'] = 'PipelineScheduleWorker'
Settings.cron_jobs['expire_build_artifacts_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['expire_build_artifacts_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '50 * * * *' Settings.cron_jobs['expire_build_artifacts_worker']['cron'] ||= '*/7 * * * *'
Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker' Settings.cron_jobs['expire_build_artifacts_worker']['job_class'] = 'ExpireBuildArtifactsWorker'
Settings.cron_jobs['ci_schedule_delete_objects_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['ci_schedule_delete_objects_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['ci_schedule_delete_objects_worker']['cron'] ||= '*/16 * * * *' Settings.cron_jobs['ci_schedule_delete_objects_worker']['cron'] ||= '*/16 * * * *'
......
...@@ -208,6 +208,7 @@ configuration option in `gitlab.yml`. These metrics are served from the ...@@ -208,6 +208,7 @@ configuration option in `gitlab.yml`. These metrics are served from the
| `limited_capacity_worker_running_jobs` | Gauge | 13.5 | Number of running jobs | `worker` | | `limited_capacity_worker_running_jobs` | Gauge | 13.5 | Number of running jobs | `worker` |
| `limited_capacity_worker_max_running_jobs` | Gauge | 13.5 | Maximum number of running jobs | `worker` | | `limited_capacity_worker_max_running_jobs` | Gauge | 13.5 | Maximum number of running jobs | `worker` |
| `limited_capacity_worker_remaining_work_count` | Gauge | 13.5 | Number of jobs waiting to be enqueued | `worker` | | `limited_capacity_worker_remaining_work_count` | Gauge | 13.5 | Number of jobs waiting to be enqueued | `worker` |
| `destroyed_job_artifacts_count_total` | Counter | 13.6 | Number of destroyed expired job artifacts | |
## Database load balancing metrics **(PREMIUM ONLY)** ## Database load balancing metrics **(PREMIUM ONLY)**
......
...@@ -61,23 +61,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -61,23 +61,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
end end
context 'when failed to destroy artifact' do context 'when failed to destroy artifact' do
context 'with ci_delete_objects disabled' do
before do
stub_feature_flags(ci_delete_objects: false)
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
allow_next_found_instance_of(Ci::JobArtifact) do |artifact|
allow(artifact).to receive(:destroy!).and_raise(ActiveRecord::RecordNotDestroyed)
end
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
.and not_change { Security::Finding.count }.from(1)
end
end
context 'with ci_delete_objects enabled' do
before do before do
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
expect(Ci::DeletedObject) expect(Ci::DeletedObject)
...@@ -91,7 +74,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -91,7 +74,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
.and not_change { Security::Finding.count }.from(1) .and not_change { Security::Finding.count }.from(1)
end end
end end
end
context 'when there are artifacts more than batch sizes' do context 'when there are artifacts more than batch sizes' do
before do before do
......
...@@ -75,6 +75,14 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -75,6 +75,14 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
it 'does not remove the files' do it 'does not remove the files' do
expect { subject }.not_to change { artifact.file.exists? } expect { subject }.not_to change { artifact.file.exists? }
end end
it 'reports metrics for destroyed artifacts' do
counter = service.send(:destroyed_artifacts_counter)
expect(counter).to receive(:increment).with({}, 1).and_call_original
subject
end
end end
end end
...@@ -114,24 +122,8 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -114,24 +122,8 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10) stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
end end
context 'with ci_delete_objects disabled' do
before do
stub_feature_flags(ci_delete_objects: false)
allow_any_instance_of(Ci::JobArtifact)
.to receive(:destroy!)
.and_raise(ActiveRecord::RecordNotDestroyed)
end
it 'raises an exception and stop destroying' do
expect { subject }.to raise_error(ActiveRecord::RecordNotDestroyed)
end
end
context 'with ci_delete_objects enabled' do
context 'when the import fails' do context 'when the import fails' do
before do before do
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
expect(Ci::DeletedObject) expect(Ci::DeletedObject)
.to receive(:bulk_import) .to receive(:bulk_import)
.once .once
...@@ -146,7 +138,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -146,7 +138,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
context 'when the delete fails' do context 'when the delete fails' do
before do before do
stub_const('Ci::DestroyExpiredJobArtifactsService::LOOP_LIMIT', 10)
expect(Ci::JobArtifact) expect(Ci::JobArtifact)
.to receive(:id_in) .to receive(:id_in)
.once .once
...@@ -159,7 +150,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared ...@@ -159,7 +150,6 @@ RSpec.describe Ci::DestroyExpiredJobArtifactsService, :clean_gitlab_redis_shared
end end
end end
end end
end
context 'when exclusive lease has already been taken by the other instance' do context 'when exclusive lease has already been taken by the other instance' do
before do before do
......
...@@ -28,7 +28,6 @@ RSpec.describe Ci::DeleteObjectsWorker do ...@@ -28,7 +28,6 @@ RSpec.describe Ci::DeleteObjectsWorker do
before do before do
stub_feature_flags( stub_feature_flags(
ci_delete_objects_low_concurrency: low,
ci_delete_objects_medium_concurrency: medium, ci_delete_objects_medium_concurrency: medium,
ci_delete_objects_high_concurrency: high ci_delete_objects_high_concurrency: high
) )
...@@ -36,13 +35,11 @@ RSpec.describe Ci::DeleteObjectsWorker do ...@@ -36,13 +35,11 @@ RSpec.describe Ci::DeleteObjectsWorker do
subject(:max_running_jobs) { worker.max_running_jobs } subject(:max_running_jobs) { worker.max_running_jobs }
where(:low, :medium, :high, :expected) do where(:medium, :high, :expected) do
false | false | false | 0 false | false | 2
true | true | true | 2 true | false | 20
true | false | false | 2 true | true | 20
false | true | false | 20 false | true | 50
false | true | true | 20
false | false | true | 50
end end
with_them do with_them do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment