Commit 472be7fe authored by Sean McGivern's avatar Sean McGivern

Reschedule merge request diff background migration

The first attempt didn't migrate all rows on GitLab.com, due to a couple of
issues:

1. Some rows in merge_request_diffs had truly huge numbers of commits and diffs
   serialised - one in particular had 26,000 commits!
2. The jobs were sometimes on Sidekiq hosts with frequent OOM errors, leading to
   the job being lost.

The previous commit adds more logging, and a more robust insertion method. This
commit reschedules the jobs, with a generous pause between each.
parent 1507ff8a
---
title: Reschedule merge request diff background migrations to catch failures from
9.5 run
merge_request:
author:
type: fixed
class ScheduleMergeRequestDiffMigrationsTakeTwo < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 500
MIGRATION = 'DeserializeMergeRequestDiffsAndCommits'
DELAY_INTERVAL = 10.minutes
disable_ddl_transaction!
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
include ::EachBatch
default_scope { where('st_commits IS NOT NULL OR st_diffs IS NOT NULL') }
end
# By this point, we assume ScheduleMergeRequestDiffMigrations - the first
# version of this - has already run. On GitLab.com, we have ~220k un-migrated
# rows, but these rows will, in general, take a long time.
#
# With a gap of 10 minutes per batch, and 500 rows per batch, these migrations
# are scheduled over 220_000 / 500 / 6 ~= 74 hours, which is a little over
# three days.
def up
queue_background_migration_jobs_by_range_at_intervals(MergeRequestDiff, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE)
end
def down
end
end
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20170926150348_schedule_merge_request_diff_migrations_take_two')
describe ScheduleMergeRequestDiffMigrationsTakeTwo, :migration, :sidekiq do
matcher :be_scheduled_migration do |time, *expected|
match do |migration|
BackgroundMigrationWorker.jobs.any? do |job|
job['args'] == [migration, expected] &&
job['at'].to_i == time.to_i
end
end
failure_message do |migration|
"Migration `#{migration}` with args `#{expected.inspect}` not scheduled!"
end
end
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
let(:projects) { table(:projects) }
before do
stub_const("#{described_class.name}::BATCH_SIZE", 1)
projects.create!(id: 1, name: 'gitlab', path: 'gitlab')
merge_requests.create!(id: 1, target_project_id: 1, source_project_id: 1, target_branch: 'feature', source_branch: 'master')
merge_request_diffs.create!(id: 1, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: nil)
merge_request_diffs.create!(id: 2, merge_request_id: 1, st_commits: nil, st_diffs: YAML.dump([]))
merge_request_diffs.create!(id: 3, merge_request_id: 1, st_commits: nil, st_diffs: nil)
merge_request_diffs.create!(id: 4, merge_request_id: 1, st_commits: YAML.dump([]), st_diffs: YAML.dump([]))
end
it 'correctly schedules background migrations' do
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
expect(described_class::MIGRATION).to be_scheduled_migration(10.minutes.from_now, 1, 1)
expect(described_class::MIGRATION).to be_scheduled_migration(20.minutes.from_now, 2, 2)
expect(described_class::MIGRATION).to be_scheduled_migration(30.minutes.from_now, 4, 4)
expect(BackgroundMigrationWorker.jobs.size).to eq 3
end
end
end
it 'migrates the data' do
Sidekiq::Testing.inline! do
non_empty = 'st_commits IS NOT NULL OR st_diffs IS NOT NULL'
expect(merge_request_diffs.where(non_empty).count).to eq 3
migrate!
expect(merge_request_diffs.where(non_empty).count).to eq 0
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment