Commit 5caf3e49 authored by Nick Thomas's avatar Nick Thomas Committed by Adam Hegyi

Start tracking how many files are in a merge request diff

The `merge_request_diffs` table is very large, and joining against the
`merge_request_diff_files` table to learn this is too slow to be useful
parent f2b05afe
......@@ -149,6 +149,7 @@ class MergeRequestDiff < ApplicationRecord
# All diff information is collected from repository after object is created.
# It allows you to override variables like head_commit_sha before getting diff.
after_create :save_git_content, unless: :importing?
after_create :set_count_columns
after_create_commit :set_as_latest_diff, unless: :importing?
after_save :update_external_diff_store
......@@ -642,6 +643,7 @@ class MergeRequestDiff < ApplicationRecord
rows = build_merge_request_diff_files(diff_collection)
create_merge_request_diff_files(rows)
self.class.uncached { merge_request_diff_files.reset }
end
# Set our state to 'overflow' to make the #empty? and #collected?
......@@ -657,12 +659,14 @@ class MergeRequestDiff < ApplicationRecord
def save_commits
MergeRequestDiffCommit.create_bulk(self.id, compare.commits.reverse)
self.class.uncached { merge_request_diff_commits.reset }
end
# merge_request_diff_commits.reset is preferred way to reload associated
# objects but it returns cached result for some reason in this case
# we can circumvent that by specifying that we need an uncached reload
commits = self.class.uncached { merge_request_diff_commits.reset }
self.commits_count = commits.size
def set_count_columns
update_columns(
commits_count: merge_request_diff_commits.size,
files_count: merge_request_diff_files.size
)
end
def repository
......
---
title: Add a cache column for the number of changed files in a merge request diff
merge_request: 38936
author:
type: changed
# frozen_string_literal: true
class AddFileCountToMergeRequestDiffs < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
disable_ddl_transaction!
def up
with_lock_retries do
add_column :merge_request_diffs, :files_count, :smallint
end
end
def down
with_lock_retries do
remove_column :merge_request_diffs, :files_count, :smallint
end
end
end
# frozen_string_literal: true
class BackfillMergeRequestDiffsFilesCounts < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
# There are ~72 million records on GitLab.com at time of writing, so go fast
BATCH_SIZE = 10_000
DELAY_INTERVAL = 2.minutes.to_i
MIGRATION = 'SetMergeRequestDiffFilesCount'
disable_ddl_transaction!
class MergeRequestDiff < ActiveRecord::Base
include EachBatch
self.table_name = 'merge_request_diffs'
end
def up
queue_background_migration_jobs_by_range_at_intervals(
MergeRequestDiff, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE
)
end
def down
# no-op
end
end
fdb18abe24003a75c76019fccf5ca0aa65a900bc2a86eb578012bc5a032730cf
\ No newline at end of file
5152e094538b498fbe28f3fd11f6c1b9a9c77dc89ac0079cb39a6090a567db68
\ No newline at end of file
......@@ -13068,7 +13068,8 @@ CREATE TABLE public.merge_request_diffs (
commits_count integer,
external_diff character varying,
external_diff_store integer DEFAULT 1,
stored_externally boolean
stored_externally boolean,
files_count smallint
);
CREATE SEQUENCE public.merge_request_diffs_id_seq
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Sets the MergeRequestDiff#files_count value for old rows
class SetMergeRequestDiffFilesCount
COUNT_SUBQUERY = <<~SQL
files_count = (
SELECT count(*)
FROM merge_request_diff_files
WHERE merge_request_diff_files.merge_request_diff_id = merge_request_diffs.id
)
SQL
class MergeRequestDiff < ActiveRecord::Base # rubocop:disable Style/Documentation
include EachBatch
self.table_name = 'merge_request_diffs'
end
def perform(start_id, end_id)
MergeRequestDiff.where(id: start_id..end_id).each_batch do |relation|
relation.update_all(COUNT_SUBQUERY)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::SetMergeRequestDiffFilesCount, schema: 20200807152315 do
let(:merge_request_diff_files) { table(:merge_request_diff_files) }
let(:merge_request_diffs) { table(:merge_request_diffs) }
let(:merge_requests) { table(:merge_requests) }
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:namespace) { namespaces.create!(name: 'foo', path: 'foo') }
let(:project) { projects.create!(namespace_id: namespace.id) }
let(:merge_request) { merge_requests.create!(source_branch: 'x', target_branch: 'master', target_project_id: project.id) }
it 'fills the files_count column' do
empty_diff = merge_request_diffs.create!(merge_request_id: merge_request.id)
filled_diff = merge_request_diffs.create!(merge_request_id: merge_request.id)
3.times do |n|
merge_request_diff_files.create!(
merge_request_diff_id: filled_diff.id,
relative_order: n,
new_file: false,
renamed_file: false,
deleted_file: false,
too_large: false,
a_mode: '',
b_mode: '',
old_path: '',
new_path: ''
)
end
described_class.new.perform(empty_diff.id, filled_diff.id)
expect(empty_diff.reload.files_count).to eq(0)
expect(filled_diff.reload.files_count).to eq(3)
end
end
......@@ -217,6 +217,7 @@ MergeRequestDiff:
- head_commit_sha
- start_commit_sha
- commits_count
- files_count
MergeRequestDiffCommit:
- merge_request_diff_id
- relative_order
......
......@@ -672,6 +672,12 @@ RSpec.describe MergeRequestDiff do
end
end
describe '#files_count' do
it 'returns number of diff files' do
expect(diff_with_commits.files_count).to eq(diff_with_commits.merge_request_diff_files.count)
end
end
describe '#first_commit' do
it 'returns first commit' do
expect(diff_with_commits.first_commit.sha).to eq(diff_with_commits.merge_request_diff_commits.last.sha)
......@@ -721,10 +727,12 @@ RSpec.describe MergeRequestDiff do
describe '#modified_paths' do
subject do
diff = create(:merge_request_diff)
create(:merge_request_diff).tap do |diff|
create(:merge_request_diff_file, :new_file, merge_request_diff: diff)
create(:merge_request_diff_file, :renamed_file, merge_request_diff: diff)
diff
diff.merge_request_diff_files.reset
end
end
it 'returns affected file paths' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment