Commit bcf1f223 authored by Yorick Peterse's avatar Yorick Peterse

Split diff commit migrations into smaller chunks

On GitLab.com we are running into issues where batch sizes for the
migration MigrateMergeRequestDiffCommitUsers are too large, resulting in
Sidekiq hosts running out of memory. See issue
https://gitlab.com/gitlab-com/gl-infra/infrastructure/-/issues/14097 for
more details on this.

To work around this, we add a migration that slices the existing jobs
into smaller chunks. This done such that jobs using the old ranges
won't be processed any more. We also change the number of database rows
loaded into memory when migrating data, in an attempt to further reduce
the amount of memory that's necessary.

See https://gitlab.com/gitlab-org/gitlab/-/issues/334394 for more
information.

Changelog: added
parent 3e46f22b
# frozen_string_literal: true
class SliceMergeRequestDiffCommitMigrations < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
BATCH_SIZE = 5_000
MIGRATION_CLASS = 'MigrateMergeRequestDiffCommitUsers'
STEAL_MIGRATION_CLASS = 'StealMigrateMergeRequestDiffCommitUsers'
def up
old_jobs = Gitlab::Database::BackgroundMigrationJob
.for_migration_class(MIGRATION_CLASS)
.pending
.to_a
return if old_jobs.empty?
# This ensures we stop processing the old ranges, as the background
# migrations skip already processed jobs.
Gitlab::Database::BackgroundMigrationJob
.for_migration_class(MIGRATION_CLASS)
.pending
.update_all(status: :succeeded)
rows = []
old_jobs.each do |job|
min, max = job.arguments
while min < max
rows << {
class_name: MIGRATION_CLASS,
arguments: [min, min + BATCH_SIZE],
created_at: Time.now.utc,
updated_at: Time.now.utc
}
min += BATCH_SIZE
end
end
Gitlab::Database::BackgroundMigrationJob.insert_all!(rows)
job = Gitlab::Database::BackgroundMigrationJob
.for_migration_class(MIGRATION_CLASS)
.pending
.first
migrate_in(1.hour, STEAL_MIGRATION_CLASS, job.arguments)
end
def down
# no-op
end
end
34287b86616026b94374856991c793ad869c52badddc09be923984002c6214bd
\ No newline at end of file
......@@ -14,7 +14,7 @@ module Gitlab
# The number of rows in merge_request_diff_commits to get in a single
# query.
COMMIT_ROWS_PER_QUERY = 10_000
COMMIT_ROWS_PER_QUERY = 1_000
# The number of rows in merge_request_diff_commits to update in a single
# query.
......
......@@ -15,13 +15,10 @@ module Gitlab
end
def schedule_next_job
# We process jobs in reverse order, so that (hopefully) we are less
# likely to process jobs that the regular background migration job is
# also processing.
next_job = Database::BackgroundMigrationJob
.for_migration_class('MigrateMergeRequestDiffCommitUsers')
.pending
.last
.first
return unless next_job
......
......@@ -23,7 +23,7 @@ RSpec.describe Gitlab::BackgroundMigration::StealMigrateMergeRequestDiffCommitUs
end
describe '#schedule_next_job' do
it 'schedules the next job in reverse order' do
it 'schedules the next job in ascending order' do
Gitlab::Database::BackgroundMigrationJob.create!(
class_name: 'MigrateMergeRequestDiffCommitUsers',
arguments: [10, 20]
......@@ -36,7 +36,7 @@ RSpec.describe Gitlab::BackgroundMigration::StealMigrateMergeRequestDiffCommitUs
expect(BackgroundMigrationWorker)
.to receive(:perform_in)
.with(5.minutes, 'StealMigrateMergeRequestDiffCommitUsers', [40, 50])
.with(5.minutes, 'StealMigrateMergeRequestDiffCommitUsers', [10, 20])
migration.schedule_next_job
end
......
# frozen_string_literal: true
require 'spec_helper'
require_migration! 'slice_merge_request_diff_commit_migrations'
RSpec.describe SliceMergeRequestDiffCommitMigrations, :migration do
let(:migration) { described_class.new }
describe '#up' do
context 'when there are no jobs to process' do
it 'does nothing' do
expect(migration).not_to receive(:migrate_in)
expect(Gitlab::Database::BackgroundMigrationJob).not_to receive(:create!)
migration.up
end
end
context 'when there are pending jobs' do
let!(:job1) do
Gitlab::Database::BackgroundMigrationJob.create!(
class_name: described_class::MIGRATION_CLASS,
arguments: [1, 10_001]
)
end
let!(:job2) do
Gitlab::Database::BackgroundMigrationJob.create!(
class_name: described_class::MIGRATION_CLASS,
arguments: [10_001, 20_001]
)
end
it 'marks the old jobs as finished' do
migration.up
job1.reload
job2.reload
expect(job1).to be_succeeded
expect(job2).to be_succeeded
end
it 'the jobs are slices into smaller ranges' do
migration.up
new_jobs = Gitlab::Database::BackgroundMigrationJob
.for_migration_class(described_class::MIGRATION_CLASS)
.pending
.to_a
expect(new_jobs.map(&:arguments)).to eq([
[1, 5_001],
[5_001, 10_001],
[10_001, 15_001],
[15_001, 20_001]
])
end
it 'schedules a background migration for the first job' do
expect(migration)
.to receive(:migrate_in)
.with(1.hour, described_class::STEAL_MIGRATION_CLASS, [1, 5_001])
migration.up
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment