Commit e5487660 authored by Fabio Pitino's avatar Fabio Pitino

Merge branch '353128-resume-clean-up-expired-artifact' into 'master'

Detect and fix artifacts with backfilled expire_at

See merge request gitlab-org/gitlab!82084
parents 62ad1a2f bbc5329a
...@@ -12,7 +12,7 @@ module Ci ...@@ -12,7 +12,7 @@ module Ci
def destroy_records def destroy_records
@job_artifacts_relation.each_batch(of: BATCH_SIZE) do |relation| @job_artifacts_relation.each_batch(of: BATCH_SIZE) do |relation|
service = Ci::JobArtifacts::DestroyBatchService.new(relation, pick_up_at: Time.current) service = Ci::JobArtifacts::DestroyBatchService.new(relation, pick_up_at: Time.current, fix_expire_at: false)
result = service.execute(update_stats: false) result = service.execute(update_stats: false)
updates = result[:statistics_updates] updates = result[:statistics_updates]
......
...@@ -17,13 +17,18 @@ module Ci ...@@ -17,13 +17,18 @@ module Ci
# +pick_up_at+:: When to pick up for deletion of files # +pick_up_at+:: When to pick up for deletion of files
# Returns: # Returns:
# +Hash+:: A hash with status and destroyed_artifacts_count keys # +Hash+:: A hash with status and destroyed_artifacts_count keys
def initialize(job_artifacts, pick_up_at: nil) def initialize(job_artifacts, pick_up_at: nil, fix_expire_at: fix_expire_at?)
@job_artifacts = job_artifacts.with_destroy_preloads.to_a @job_artifacts = job_artifacts.with_destroy_preloads.to_a
@pick_up_at = pick_up_at @pick_up_at = pick_up_at
@fix_expire_at = fix_expire_at
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def execute(update_stats: true) def execute(update_stats: true)
# Detect and fix artifacts that had `expire_at` wrongly backfilled by migration
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/47723
detect_and_fix_wrongly_expired_artifacts
return success(destroyed_artifacts_count: 0, statistics_updates: {}) if @job_artifacts.empty? return success(destroyed_artifacts_count: 0, statistics_updates: {}) if @job_artifacts.empty?
destroy_related_records(@job_artifacts) destroy_related_records(@job_artifacts)
...@@ -89,6 +94,55 @@ module Ci ...@@ -89,6 +94,55 @@ module Ci
@job_artifacts.sum { |artifact| artifact.try(:size) || 0 } @job_artifacts.sum { |artifact| artifact.try(:size) || 0 }
end end
end end
# This detects and fixes job artifacts that have `expire_at` wrongly backfilled by the migration
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/47723.
# These job artifacts will not be deleted and will have their `expire_at` removed.
#
# The migration would have backfilled `expire_at`
# to midnight on the 22nd of the month of the local timezone,
# storing it as UTC time in the database.
#
# If the timezone setting has changed since the migration,
# the `expire_at` stored in the database could have changed to a different local time other than midnight.
# For example:
# - changing timezone from UTC+02:00 to UTC+02:30 would change the `expire_at` in local time 00:00:00 to 00:30:00.
# - changing timezone from UTC+00:00 to UTC-01:00 would change the `expire_at` in local time 00:00:00 to 23:00:00 on the previous day (21st).
#
# Therefore job artifacts that have `expire_at` exactly on the 00, 30 or 45 minute mark
# on the dates 21, 22, 23 of the month will not be deleted.
# https://en.wikipedia.org/wiki/List_of_UTC_time_offsets
def detect_and_fix_wrongly_expired_artifacts
return unless @fix_expire_at
wrongly_expired_artifacts, @job_artifacts = @job_artifacts.partition { |artifact| wrongly_expired?(artifact) }
remove_expire_at(wrongly_expired_artifacts)
end
def fix_expire_at?
Feature.enabled?(:ci_detect_wrongly_expired_artifacts, default_enabled: :yaml)
end
def wrongly_expired?(artifact)
return false unless artifact.expire_at.present?
match_date?(artifact.expire_at) && match_time?(artifact.expire_at)
end
def match_date?(expire_at)
[21, 22, 23].include?(expire_at.day)
end
def match_time?(expire_at)
%w[00:00.000 30:00.000 45:00.000].include?(expire_at.strftime('%M:%S.%L'))
end
def remove_expire_at(artifacts)
Ci::JobArtifact.id_in(artifacts).update_all(expire_at: nil)
Gitlab::AppLogger.info(message: "Fixed expire_at from artifacts.", fixed_artifacts_expire_at_count: artifacts.count)
end
end end
end end
end end
......
---
name: ci_detect_wrongly_expired_artifacts
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/82084
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/354955
milestone: '14.9'
type: development
group: group::pipeline insights
default_enabled: false
...@@ -102,5 +102,81 @@ RSpec.describe Ci::JobArtifacts::DestroyBatchService do ...@@ -102,5 +102,81 @@ RSpec.describe Ci::JobArtifacts::DestroyBatchService do
is_expected.to eq(destroyed_artifacts_count: 0, statistics_updates: {}, status: :success) is_expected.to eq(destroyed_artifacts_count: 0, statistics_updates: {}, status: :success)
end end
end end
context 'with artifacts that has backfilled expire_at' do
let!(:created_on_00_30_45_minutes_on_21_22_23) do
[
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-21 00:00:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-21 01:30:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-22 12:00:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-22 12:30:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-23 23:00:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-23 23:30:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-23 06:45:00.000'))
]
end
let!(:created_close_to_00_or_30_minutes) do
[
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-21 00:00:00.001')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-21 00:30:00.999'))
]
end
let!(:created_on_00_or_30_minutes_on_other_dates) do
[
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-01 00:00:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-19 12:00:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-24 23:30:00.000'))
]
end
let!(:created_at_other_times) do
[
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-19 00:00:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-19 00:30:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-24 00:00:00.000')),
create(:ci_job_artifact, expire_at: Time.zone.parse('2022-01-24 00:30:00.000'))
]
end
let(:artifacts_to_keep) { created_on_00_30_45_minutes_on_21_22_23 }
let(:artifacts_to_delete) { created_close_to_00_or_30_minutes + created_on_00_or_30_minutes_on_other_dates + created_at_other_times }
let(:all_artifacts) { artifacts_to_keep + artifacts_to_delete }
let(:artifacts) { Ci::JobArtifact.where(id: all_artifacts.map(&:id)) }
it 'deletes job artifacts that do not have expire_at on 00, 30 or 45 minute of 21, 22, 23 of the month' do
expect { subject }.to change { Ci::JobArtifact.count }.by(artifacts_to_delete.size * -1)
end
it 'keeps job artifacts that have expire_at on 00, 30 or 45 minute of 21, 22, 23 of the month' do
expect { subject }.not_to change { Ci::JobArtifact.where(id: artifacts_to_keep.map(&:id)).count }
end
it 'removes expire_at on job artifacts that have expire_at on 00, 30 or 45 minute of 21, 22, 23 of the month' do
subject
expect(artifacts_to_keep.all? { |artifact| artifact.reload.expire_at.nil? }).to be(true)
end
context 'when feature flag is disabled' do
before do
stub_feature_flags(ci_detect_wrongly_expired_artifacts: false)
end
it 'deletes all job artifacts' do
expect { subject }.to change { Ci::JobArtifact.count }.by(all_artifacts.size * -1)
end
end
context 'when fix_expire_at is false' do
let(:service) { described_class.new(artifacts, pick_up_at: Time.current, fix_expire_at: false) }
it 'deletes all job artifacts' do
expect { subject }.to change { Ci::JobArtifact.count }.by(all_artifacts.size * -1)
end
end
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment