Commit 32c28c9a authored by Sean McGivern's avatar Sean McGivern

Merge branch '245263-mr-refs-cleanup-scheduler' into 'master'

Schedule clean up of merge request refs efficiently

See merge request gitlab-org/gitlab!46758
parents 07f59609 54631a2e
......@@ -56,6 +56,7 @@ class MergeRequest < ApplicationRecord
has_one :merge_request_diff,
-> { order('merge_request_diffs.id DESC') }, inverse_of: :merge_request
has_one :cleanup_schedule, inverse_of: :merge_request
belongs_to :latest_merge_request_diff, class_name: 'MergeRequestDiff'
manual_inverse_association :latest_merge_request_diff, :merge_request
......
# frozen_string_literal: true
class MergeRequest::CleanupSchedule < ApplicationRecord
belongs_to :merge_request, inverse_of: :cleanup_schedule
validates :scheduled_at, presence: true
def self.scheduled_merge_request_ids(limit)
where('completed_at IS NULL AND scheduled_at <= NOW()')
.order('scheduled_at DESC')
.limit(limit)
.pluck(:merge_request_id)
end
end
......@@ -9,7 +9,7 @@ module MergeRequests
attr_reader :merge_request
def self.schedule(merge_request)
MergeRequestCleanupRefsWorker.perform_in(TIME_THRESHOLD, merge_request.id)
merge_request.create_cleanup_schedule(scheduled_at: TIME_THRESHOLD.from_now)
end
def initialize(merge_request)
......@@ -22,6 +22,7 @@ module MergeRequests
end
def execute
return error("Merge request is not scheduled to be cleaned up yet.") unless scheduled?
return error("Merge request has not been closed nor merged for #{TIME_THRESHOLD.inspect}.") unless eligible?
# Ensure that commit shas of refs are kept around so we won't lose them when GC runs.
......@@ -31,6 +32,9 @@ module MergeRequests
return error('Failed to cache merge ref sha.') unless cache_merge_ref_sha
delete_refs
return error('Failed to update schedule.') unless update_schedule
success
end
......@@ -38,6 +42,10 @@ module MergeRequests
attr_reader :repository, :ref_path, :merge_ref_path, :ref_head_sha, :merge_ref_sha
def scheduled?
merge_request.cleanup_schedule.present? && merge_request.cleanup_schedule.scheduled_at <= Time.current
end
def eligible?
return met_time_threshold?(merge_request.metrics&.latest_closed_at) if merge_request.closed?
......@@ -71,5 +79,9 @@ module MergeRequests
def delete_refs
repository.delete_refs(ref_path, merge_ref_path)
end
def update_schedule
merge_request.cleanup_schedule.update(completed_at: Time.current)
end
end
end
......@@ -15,6 +15,7 @@ module MergeRequests
invalidate_cache_counts(merge_request, users: merge_request.assignees)
merge_request.update_project_counter_caches
merge_request.cache_merge_request_closes_issues!(current_user)
merge_request.cleanup_schedule&.destroy
end
merge_request
......
......@@ -379,6 +379,14 @@
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:schedule_merge_request_cleanup_refs
:feature_category: :source_code_management
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:schedule_migrate_external_diffs
:feature_category: :source_code_management
:has_external_dependencies:
......
# frozen_string_literal: true
class ScheduleMergeRequestCleanupRefsWorker
include ApplicationWorker
include CronjobQueue # rubocop:disable Scalability/CronWorkerContext
feature_category :source_code_management
idempotent!
# Based on existing data, MergeRequestCleanupRefsWorker can run 3 jobs per
# second. This means that 180 jobs can be performed but since there are some
# spikes from time time, it's better to give it some allowance.
LIMIT = 180
DELAY = 10.seconds
BATCH_SIZE = 30
def perform
return if Gitlab::Database.read_only?
ids = MergeRequest::CleanupSchedule.scheduled_merge_request_ids(LIMIT).map { |id| [id] }
MergeRequestCleanupRefsWorker.bulk_perform_in(DELAY, ids, batch_size: BATCH_SIZE) # rubocop:disable Scalability/BulkPerformWithContext
log_extra_metadata_on_done(:merge_requests_count, ids.size)
end
end
---
title: Schedule clean up of merge request refs efficiently
merge_request: 46758
author:
type: performance
......@@ -531,6 +531,9 @@ Settings.cron_jobs['analytics_instance_statistics_count_job_trigger_worker']['jo
Settings.cron_jobs['member_invitation_reminder_emails_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['member_invitation_reminder_emails_worker']['cron'] ||= '0 0 * * *'
Settings.cron_jobs['member_invitation_reminder_emails_worker']['job_class'] = 'MemberInvitationReminderEmailsWorker'
Settings.cron_jobs['schedule_merge_request_cleanup_refs_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['schedule_merge_request_cleanup_refs_worker']['cron'] ||= '* * * * *'
Settings.cron_jobs['schedule_merge_request_cleanup_refs_worker']['job_class'] = 'ScheduleMergeRequestCleanupRefsWorker'
Gitlab.ee do
Settings.cron_jobs['active_user_count_threshold_worker'] ||= Settingslogic.new({})
......
# frozen_string_literal: true
class CreateMergeRequestCleanupSchedules < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
def up
with_lock_retries do
create_table :merge_request_cleanup_schedules, id: false do |t|
t.references :merge_request, primary_key: true, index: { unique: true }, null: false, foreign_key: { on_delete: :cascade }
t.datetime_with_timezone :scheduled_at, null: false
t.datetime_with_timezone :completed_at, null: true
t.timestamps_with_timezone
t.index :scheduled_at, where: 'completed_at IS NULL', name: 'index_mr_cleanup_schedules_timestamps'
end
end
end
def down
with_lock_retries do
drop_table :merge_request_cleanup_schedules
end
end
end
f4fb466c74e1366d5980a54d6e5fc42fe78237cae33d8cdaf5573d2fe75f8a5a
\ No newline at end of file
......@@ -13463,6 +13463,23 @@ CREATE SEQUENCE merge_request_blocks_id_seq
ALTER SEQUENCE merge_request_blocks_id_seq OWNED BY merge_request_blocks.id;
CREATE TABLE merge_request_cleanup_schedules (
merge_request_id bigint NOT NULL,
scheduled_at timestamp with time zone NOT NULL,
completed_at timestamp with time zone,
created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL
);
CREATE SEQUENCE merge_request_cleanup_schedules_merge_request_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE merge_request_cleanup_schedules_merge_request_id_seq OWNED BY merge_request_cleanup_schedules.merge_request_id;
CREATE TABLE merge_request_context_commit_diff_files (
sha bytea NOT NULL,
relative_order integer NOT NULL,
......@@ -17934,6 +17951,8 @@ ALTER TABLE ONLY merge_request_assignees ALTER COLUMN id SET DEFAULT nextval('me
ALTER TABLE ONLY merge_request_blocks ALTER COLUMN id SET DEFAULT nextval('merge_request_blocks_id_seq'::regclass);
ALTER TABLE ONLY merge_request_cleanup_schedules ALTER COLUMN merge_request_id SET DEFAULT nextval('merge_request_cleanup_schedules_merge_request_id_seq'::regclass);
ALTER TABLE ONLY merge_request_context_commits ALTER COLUMN id SET DEFAULT nextval('merge_request_context_commits_id_seq'::regclass);
ALTER TABLE ONLY merge_request_diff_details ALTER COLUMN merge_request_diff_id SET DEFAULT nextval('merge_request_diff_details_merge_request_diff_id_seq'::regclass);
......@@ -19146,6 +19165,9 @@ ALTER TABLE ONLY merge_request_assignees
ALTER TABLE ONLY merge_request_blocks
ADD CONSTRAINT merge_request_blocks_pkey PRIMARY KEY (id);
ALTER TABLE ONLY merge_request_cleanup_schedules
ADD CONSTRAINT merge_request_cleanup_schedules_pkey PRIMARY KEY (merge_request_id);
ALTER TABLE ONLY merge_request_context_commits
ADD CONSTRAINT merge_request_context_commits_pkey PRIMARY KEY (id);
......@@ -21086,6 +21108,8 @@ CREATE INDEX index_merge_request_assignees_on_user_id ON merge_request_assignees
CREATE INDEX index_merge_request_blocks_on_blocked_merge_request_id ON merge_request_blocks USING btree (blocked_merge_request_id);
CREATE UNIQUE INDEX index_merge_request_cleanup_schedules_on_merge_request_id ON merge_request_cleanup_schedules USING btree (merge_request_id);
CREATE INDEX index_merge_request_diff_commits_on_sha ON merge_request_diff_commits USING btree (sha);
CREATE INDEX index_merge_request_diff_details_on_merge_request_diff_id ON merge_request_diff_details USING btree (merge_request_diff_id);
......@@ -21200,6 +21224,8 @@ CREATE INDEX index_mirror_data_on_next_execution_and_retry_count ON project_mirr
CREATE UNIQUE INDEX index_mr_blocks_on_blocking_and_blocked_mr_ids ON merge_request_blocks USING btree (blocking_merge_request_id, blocked_merge_request_id);
CREATE INDEX index_mr_cleanup_schedules_timestamps ON merge_request_cleanup_schedules USING btree (scheduled_at) WHERE (completed_at IS NULL);
CREATE UNIQUE INDEX index_mr_context_commits_on_merge_request_id_and_sha ON merge_request_context_commits USING btree (merge_request_id, sha);
CREATE UNIQUE INDEX index_namespace_aggregation_schedules_on_namespace_id ON namespace_aggregation_schedules USING btree (namespace_id);
......@@ -23961,6 +23987,9 @@ ALTER TABLE ONLY project_error_tracking_settings
ALTER TABLE ONLY list_user_preferences
ADD CONSTRAINT fk_rails_916d72cafd FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE;
ALTER TABLE ONLY merge_request_cleanup_schedules
ADD CONSTRAINT fk_rails_92dd0e705c FOREIGN KEY (merge_request_id) REFERENCES merge_requests(id) ON DELETE CASCADE;
ALTER TABLE ONLY board_labels
ADD CONSTRAINT fk_rails_9374a16edd FOREIGN KEY (board_id) REFERENCES boards(id) ON DELETE CASCADE;
......
# frozen_string_literal: true
FactoryBot.define do
factory :merge_request_cleanup_schedule, class: 'MergeRequest::CleanupSchedule' do
merge_request
scheduled_at { Time.current }
end
end
......@@ -179,6 +179,7 @@ merge_requests:
- user_mentions
- system_note_metadata
- note_authors
- cleanup_schedule
external_pull_requests:
- project
merge_request_diff:
......@@ -195,6 +196,8 @@ merge_request_diff_files:
merge_request_context_commits:
- merge_request
- diff_files
cleanup_schedule:
- merge_request
ci_pipelines:
- project
- user
......
......@@ -878,3 +878,9 @@ PushRule:
- reject_unsigned_commits
- commit_committer_check
- regexp_uses_re2
MergeRequest::CleanupSchedule:
- id
- scheduled_at
- completed_at
- created_at
- updated_at
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe MergeRequest::CleanupSchedule do
describe 'associations' do
it { is_expected.to belong_to(:merge_request) }
end
describe 'validations' do
it { is_expected.to validate_presence_of(:scheduled_at) }
end
describe '.scheduled_merge_request_ids' do
let_it_be(:mr_cleanup_schedule_1) { create(:merge_request_cleanup_schedule, scheduled_at: 2.days.ago) }
let_it_be(:mr_cleanup_schedule_2) { create(:merge_request_cleanup_schedule, scheduled_at: 1.day.ago) }
let_it_be(:mr_cleanup_schedule_3) { create(:merge_request_cleanup_schedule, scheduled_at: 1.day.ago, completed_at: Time.current) }
let_it_be(:mr_cleanup_schedule_4) { create(:merge_request_cleanup_schedule, scheduled_at: 4.days.ago) }
let_it_be(:mr_cleanup_schedule_5) { create(:merge_request_cleanup_schedule, scheduled_at: 3.days.ago) }
let_it_be(:mr_cleanup_schedule_6) { create(:merge_request_cleanup_schedule, scheduled_at: 1.day.from_now) }
let_it_be(:mr_cleanup_schedule_7) { create(:merge_request_cleanup_schedule, scheduled_at: 5.days.ago) }
it 'only includes incomplete schedule within the specified limit' do
expect(described_class.scheduled_merge_request_ids(4)).to eq([
mr_cleanup_schedule_2.merge_request_id,
mr_cleanup_schedule_1.merge_request_id,
mr_cleanup_schedule_5.merge_request_id,
mr_cleanup_schedule_4.merge_request_id
])
end
end
end
......@@ -30,6 +30,7 @@ RSpec.describe MergeRequest, factory_default: :keep do
it { is_expected.to have_many(:resource_state_events) }
it { is_expected.to have_many(:draft_notes) }
it { is_expected.to have_many(:reviews).inverse_of(:merge_request) }
it { is_expected.to have_one(:cleanup_schedule).inverse_of(:merge_request) }
context 'for forks' do
let!(:project) { create(:project) }
......
......@@ -4,14 +4,15 @@ require 'spec_helper'
RSpec.describe MergeRequests::CleanupRefsService do
describe '.schedule' do
let(:merge_request) { build(:merge_request) }
let(:merge_request) { create(:merge_request) }
it 'schedules MergeRequestCleanupRefsWorker' do
expect(MergeRequestCleanupRefsWorker)
.to receive(:perform_in)
.with(described_class::TIME_THRESHOLD, merge_request.id)
it 'creates a merge request cleanup schedule' do
freeze_time do
described_class.schedule(merge_request)
described_class.schedule(merge_request)
expect(merge_request.reload.cleanup_schedule.scheduled_at)
.to eq(described_class::TIME_THRESHOLD.from_now)
end
end
end
......@@ -20,6 +21,8 @@ RSpec.describe MergeRequests::CleanupRefsService do
# Need to re-enable this as it's being stubbed in spec_helper for
# performance reasons but is needed to run for this test.
allow(Gitlab::Git::KeepAround).to receive(:execute).and_call_original
merge_request.create_cleanup_schedule(scheduled_at: described_class::TIME_THRESHOLD.ago)
end
subject(:result) { described_class.new(merge_request).execute }
......@@ -32,6 +35,7 @@ RSpec.describe MergeRequests::CleanupRefsService do
expect(result[:status]).to eq(:success)
expect(kept_around?(old_ref_head)).to be_truthy
expect(ref_head).to be_nil
expect(merge_request.cleanup_schedule.completed_at).to be_present
end
end
......@@ -43,6 +47,7 @@ RSpec.describe MergeRequests::CleanupRefsService do
it 'does not fail' do
expect(result[:status]).to eq(:success)
expect(merge_request.cleanup_schedule.completed_at).to be_present
end
end
......@@ -85,6 +90,31 @@ RSpec.describe MergeRequests::CleanupRefsService do
it_behaves_like 'service that does not clean up merge request refs'
end
context 'when cleanup schedule fails to update' do
before do
allow(merge_request.cleanup_schedule).to receive(:update).and_return(false)
end
it 'creates keep around ref and deletes merge request refs' do
old_ref_head = ref_head
aggregate_failures do
expect(result[:status]).to eq(:error)
expect(kept_around?(old_ref_head)).to be_truthy
expect(ref_head).to be_nil
expect(merge_request.cleanup_schedule.completed_at).not_to be_present
end
end
end
context 'when merge request is not scheduled to be cleaned up yet' do
before do
merge_request.cleanup_schedule.update!(scheduled_at: 1.day.from_now)
end
it_behaves_like 'service that does not clean up merge request refs'
end
end
shared_examples_for 'service that does not clean up merge request refs' do
......@@ -92,6 +122,7 @@ RSpec.describe MergeRequests::CleanupRefsService do
aggregate_failures do
expect(result[:status]).to eq(:error)
expect(ref_head).to be_present
expect(merge_request.cleanup_schedule.completed_at).not_to be_present
end
end
end
......
......@@ -23,6 +23,7 @@ RSpec.describe MergeRequests::ReopenService do
before do
allow(service).to receive(:execute_hooks)
merge_request.create_cleanup_schedule(scheduled_at: Time.current)
perform_enqueued_jobs do
service.execute(merge_request)
......@@ -43,6 +44,10 @@ RSpec.describe MergeRequests::ReopenService do
expect(email.subject).to include(merge_request.title)
end
it 'destroys cleanup schedule record' do
expect(merge_request.reload.cleanup_schedule).to be_nil
end
context 'note creation' do
it 'creates resource state event about merge_request reopen' do
event = merge_request.resource_state_events.last
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ScheduleMergeRequestCleanupRefsWorker do
subject(:worker) { described_class.new }
describe '#perform' do
before do
allow(MergeRequest::CleanupSchedule)
.to receive(:scheduled_merge_request_ids)
.with(described_class::LIMIT)
.and_return([1, 2, 3, 4])
end
it 'does nothing if the database is read-only' do
allow(Gitlab::Database).to receive(:read_only?).and_return(true)
expect(MergeRequestCleanupRefsWorker).not_to receive(:bulk_perform_in)
worker.perform
end
include_examples 'an idempotent worker' do
it 'schedules MergeRequestCleanupRefsWorker to be performed by batch' do
expect(MergeRequestCleanupRefsWorker)
.to receive(:bulk_perform_in)
.with(
described_class::DELAY,
[[1], [2], [3], [4]],
batch_size: described_class::BATCH_SIZE
)
expect(worker).to receive(:log_extra_metadata_on_done).with(:merge_requests_count, 4)
worker.perform
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment