Commit 1507ff8a authored by Sean McGivern's avatar Sean McGivern

Make MR diff background migration less likely to time out

This version does not use transactions, but individual statements. As we have
unique constraints on the target tables for the inserts, we can just ignore
uniqueness violations there (as long as we always insert the same batch size, in
the same order).

This means the spec now must use truncation, not a transaction, as the
uniqueness violation means that the whole transaction for that spec would be
invalid, which isn't what we'd want. In real-world use, this isn't run in a
transaction anyway.

This commit also wraps unhandled exceptions, for easier finding in Sentry, and
logs with a consistent format, for easier searching.
parent 91719415
...@@ -3,6 +3,12 @@ module Gitlab ...@@ -3,6 +3,12 @@ module Gitlab
class DeserializeMergeRequestDiffsAndCommits class DeserializeMergeRequestDiffsAndCommits
attr_reader :diff_ids, :commit_rows, :file_rows attr_reader :diff_ids, :commit_rows, :file_rows
class Error < StandardError
def backtrace
cause.backtrace
end
end
class MergeRequestDiff < ActiveRecord::Base class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs' self.table_name = 'merge_request_diffs'
end end
...@@ -34,6 +40,10 @@ module Gitlab ...@@ -34,6 +40,10 @@ module Gitlab
end end
flush_buffers! flush_buffers!
rescue => e
Rails.logger.info("#{self.class.name}: failed for IDs #{merge_request_diffs.map(&:id)} with #{e.class.name}")
raise Error.new(e.inspect)
end end
private private
...@@ -46,22 +56,28 @@ module Gitlab ...@@ -46,22 +56,28 @@ module Gitlab
def flush_buffers! def flush_buffers!
if diff_ids.any? if diff_ids.any?
MergeRequestDiff.transaction do commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice|
commit_rows.each_slice(BUFFER_ROWS).each do |commit_rows_slice| bulk_insert('merge_request_diff_commits', commit_rows_slice)
Gitlab::Database.bulk_insert('merge_request_diff_commits', commit_rows_slice) end
end
file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
Gitlab::Database.bulk_insert('merge_request_diff_files', file_rows_slice)
end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil) file_rows.each_slice(DIFF_FILE_BUFFER_ROWS).each do |file_rows_slice|
bulk_insert('merge_request_diff_files', file_rows_slice)
end end
MergeRequestDiff.where(id: diff_ids).update_all(st_commits: nil, st_diffs: nil)
end end
reset_buffers! reset_buffers!
end end
def bulk_insert(table, rows)
Gitlab::Database.bulk_insert(table, rows)
rescue ActiveRecord::RecordNotUnique
ids = rows.map { |row| row[:merge_request_diff_id] }.uniq.sort
Rails.logger.info("#{self.class.name}: rows inserted twice for IDs #{ids}")
end
def single_diff_rows(merge_request_diff) def single_diff_rows(merge_request_diff)
sha_attribute = Gitlab::Database::ShaAttribute.new sha_attribute = Gitlab::Database::ShaAttribute.new
commits = YAML.load(merge_request_diff.st_commits) rescue [] commits = YAML.load(merge_request_diff.st_commits) rescue []
......
require 'spec_helper' require 'spec_helper'
describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits, :truncate do
describe '#perform' do describe '#perform' do
set(:merge_request) { create(:merge_request) } let(:merge_request) { create(:merge_request) }
set(:merge_request_diff) { merge_request.merge_request_diff } let(:merge_request_diff) { merge_request.merge_request_diff }
let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) } let(:updated_merge_request_diff) { MergeRequestDiff.find(merge_request_diff.id) }
def diffs_to_hashes(diffs) def diffs_to_hashes(diffs)
...@@ -84,12 +84,6 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do ...@@ -84,12 +84,6 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
allow(Gitlab::Database).to receive(:bulk_insert).and_call_original allow(Gitlab::Database).to receive(:bulk_insert).and_call_original
end end
it 'updates and continues' do
expect(described_class::MergeRequestDiff).to receive(:transaction).twice
subject.perform(start_id, stop_id)
end
it 'inserts commit rows in chunks of BUFFER_ROWS' do it 'inserts commit rows in chunks of BUFFER_ROWS' do
# There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9. # There are 29 commits in each diff, so we should have slices of 20 + 9 + 20 + 9.
stub_const("#{described_class}::BUFFER_ROWS", 20) stub_const("#{described_class}::BUFFER_ROWS", 20)
...@@ -119,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do ...@@ -119,27 +113,87 @@ describe Gitlab::BackgroundMigration::DeserializeMergeRequestDiffsAndCommits do
context 'when BUFFER_ROWS is not exceeded' do context 'when BUFFER_ROWS is not exceeded' do
it 'only updates once' do it 'only updates once' do
expect(described_class::MergeRequestDiff).to receive(:transaction).once expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_commits', anything)
.once
.and_call_original
expect(Gitlab::Database).to receive(:bulk_insert)
.with('merge_request_diff_files', anything)
.once
.and_call_original
subject.perform(start_id, stop_id) subject.perform(start_id, stop_id)
end end
end end
end
context 'when the merge request diff update fails' do context 'when some rows were already inserted due to a previous failure' do
before do before do
allow(described_class::MergeRequestDiff) subject.perform(start_id, stop_id)
.to receive(:update_all).and_raise(ActiveRecord::Rollback)
end
it 'does not add any diff commits' do convert_to_yaml(start_id, merge_request_diff.commits, diffs_to_hashes(merge_request_diff.merge_request_diff_files))
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } convert_to_yaml(stop_id, updated_merge_request_diff.commits, diffs_to_hashes(updated_merge_request_diff.merge_request_diff_files))
.not_to change { MergeRequestDiffCommit.count } end
it 'does not raise' do
expect { subject.perform(start_id, stop_id) }.not_to raise_exception
end
it 'logs a message' do
expect(Rails.logger).to receive(:info)
.with(
a_string_matching(described_class.name).and(matching([start_id, stop_id].inspect))
)
.twice
subject.perform(start_id, stop_id)
end
it 'ends up with the correct rows' do
expect(updated_merge_request_diff.commits.count).to eq(29)
expect(updated_merge_request_diff.raw_diffs.count).to eq(20)
end
end end
it 'does not add any diff files' do context 'when the merge request diff update fails' do
expect { subject.perform(merge_request_diff.id, merge_request_diff.id) } let(:exception) { ActiveRecord::RecordNotFound }
.not_to change { MergeRequestDiffFile.count }
let(:perform_ignoring_exceptions) do
begin
subject.perform(start_id, stop_id)
rescue described_class::Error
end
end
before do
allow_any_instance_of(described_class::MergeRequestDiff::ActiveRecord_Relation)
.to receive(:update_all).and_raise(exception)
end
it 'raises an error' do
expect { subject.perform(start_id, stop_id) }
.to raise_exception(described_class::Error)
end
it 'logs the error' do
expect(Rails.logger).to receive(:info).with(
a_string_matching(described_class.name)
.and(matching([start_id, stop_id].inspect))
.and(matching(exception.name))
)
perform_ignoring_exceptions
end
it 'still adds diff commits' do
expect { perform_ignoring_exceptions }
.to change { MergeRequestDiffCommit.count }
end
it 'still adds diff files' do
expect { perform_ignoring_exceptions }
.to change { MergeRequestDiffFile.count }
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment