Commit 828f2ac4 authored by Toon Claes's avatar Toon Claes

Merge branch '227570-use-files-count-for-external-diff-migration' into 'master'

Use more-efficient indexing for the MergeRequestDiff storage migration

Closes #227570

See merge request gitlab-org/gitlab!39470
parents f2ce671b 5d9a1326
...@@ -51,14 +51,16 @@ class MergeRequestDiff < ApplicationRecord ...@@ -51,14 +51,16 @@ class MergeRequestDiff < ApplicationRecord
scope :by_commit_sha, ->(sha) do scope :by_commit_sha, ->(sha) do
joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil) joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
end end
scope :has_diff_files, -> { where(id: MergeRequestDiffFile.select(:merge_request_diff_id)) }
scope :by_project_id, -> (project_id) do scope :by_project_id, -> (project_id) do
joins(:merge_request).where(merge_requests: { target_project_id: project_id }) joins(:merge_request).where(merge_requests: { target_project_id: project_id })
end end
scope :recent, -> { order(id: :desc).limit(100) } scope :recent, -> { order(id: :desc).limit(100) }
scope :files_in_database, -> { where(stored_externally: [false, nil]) }
scope :files_in_database, -> do
where(stored_externally: [false, nil]).where(arel_table[:files_count].gt(0))
end
scope :not_latest_diffs, -> do scope :not_latest_diffs, -> do
merge_requests = MergeRequest.arel_table merge_requests = MergeRequest.arel_table
...@@ -115,29 +117,28 @@ class MergeRequestDiff < ApplicationRecord ...@@ -115,29 +117,28 @@ class MergeRequestDiff < ApplicationRecord
end end
def ids_for_external_storage_migration_strategy_always(limit:) def ids_for_external_storage_migration_strategy_always(limit:)
has_diff_files.files_in_database.limit(limit).pluck(:id) files_in_database.limit(limit).pluck(:id)
end end
def ids_for_external_storage_migration_strategy_outdated(limit:) def ids_for_external_storage_migration_strategy_outdated(limit:)
# Outdated is too complex to be a single SQL query, so split into three # Outdated is too complex to be a single SQL query, so split into three
before = EXTERNAL_DIFF_CUTOFF.ago before = EXTERNAL_DIFF_CUTOFF.ago
potentials = has_diff_files.files_in_database
ids = potentials ids = files_in_database
.old_merged_diffs(before) .old_merged_diffs(before)
.limit(limit) .limit(limit)
.pluck(:id) .pluck(:id)
return ids if ids.size >= limit return ids if ids.size >= limit
ids += potentials ids += files_in_database
.old_closed_diffs(before) .old_closed_diffs(before)
.limit(limit - ids.size) .limit(limit - ids.size)
.pluck(:id) .pluck(:id)
return ids if ids.size >= limit return ids if ids.size >= limit
ids + potentials ids + files_in_database
.not_latest_diffs .not_latest_diffs
.limit(limit - ids.size) .limit(limit - ids.size)
.pluck(:id) .pluck(:id)
......
---
title: Use more-efficient indexing for the MergeRequestDiff storage migration
merge_request: 39470
author:
type: performance
# frozen_string_literal: true
class AddNewExternalDiffMigrationIndex < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'index_merge_request_diffs_by_id_partial'
disable_ddl_transaction!
def up
add_concurrent_index(
:merge_request_diffs,
:id,
name: INDEX_NAME,
where: 'files_count > 0 AND ((NOT stored_externally) OR (stored_externally IS NULL))'
)
end
def down
remove_concurrent_index_by_name(:merge_request_diffs, INDEX_NAME)
end
end
# frozen_string_literal: true
class RemoveOldExternalDiffMigrationIndex < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
remove_concurrent_index_by_name(
:merge_request_diffs,
'index_merge_request_diffs_on_merge_request_id_and_id_partial'
)
end
def down
add_concurrent_index(
:merge_request_diffs,
[:merge_request_id, :id],
where: { stored_externally: [nil, false] }
)
end
end
12bb243862adf930fc68f2f7641ee7cc6170bfbcc3aae2b98bfa262dacfeba49
\ No newline at end of file
7c4fec044a278fa51fdd23320a372528c420e8a650a26770fccf0cabe91163c5
\ No newline at end of file
...@@ -20003,14 +20003,14 @@ CREATE INDEX index_merge_request_diff_commits_on_sha ON public.merge_request_dif ...@@ -20003,14 +20003,14 @@ CREATE INDEX index_merge_request_diff_commits_on_sha ON public.merge_request_dif
CREATE UNIQUE INDEX index_merge_request_diff_files_on_mr_diff_id_and_order ON public.merge_request_diff_files USING btree (merge_request_diff_id, relative_order); CREATE UNIQUE INDEX index_merge_request_diff_files_on_mr_diff_id_and_order ON public.merge_request_diff_files USING btree (merge_request_diff_id, relative_order);
CREATE INDEX index_merge_request_diffs_by_id_partial ON public.merge_request_diffs USING btree (id) WHERE ((files_count > 0) AND ((NOT stored_externally) OR (stored_externally IS NULL)));
CREATE INDEX index_merge_request_diffs_external_diff_store_is_null ON public.merge_request_diffs USING btree (id) WHERE (external_diff_store IS NULL); CREATE INDEX index_merge_request_diffs_external_diff_store_is_null ON public.merge_request_diffs USING btree (id) WHERE (external_diff_store IS NULL);
CREATE INDEX index_merge_request_diffs_on_external_diff_store ON public.merge_request_diffs USING btree (external_diff_store); CREATE INDEX index_merge_request_diffs_on_external_diff_store ON public.merge_request_diffs USING btree (external_diff_store);
CREATE INDEX index_merge_request_diffs_on_merge_request_id_and_id ON public.merge_request_diffs USING btree (merge_request_id, id); CREATE INDEX index_merge_request_diffs_on_merge_request_id_and_id ON public.merge_request_diffs USING btree (merge_request_id, id);
CREATE INDEX index_merge_request_diffs_on_merge_request_id_and_id_partial ON public.merge_request_diffs USING btree (merge_request_id, id) WHERE ((NOT stored_externally) OR (stored_externally IS NULL));
CREATE INDEX index_merge_request_metrics_on_first_deployed_to_production_at ON public.merge_request_metrics USING btree (first_deployed_to_production_at); CREATE INDEX index_merge_request_metrics_on_first_deployed_to_production_at ON public.merge_request_metrics USING btree (first_deployed_to_production_at);
CREATE INDEX index_merge_request_metrics_on_latest_closed_at ON public.merge_request_metrics USING btree (latest_closed_at) WHERE (latest_closed_at IS NOT NULL); CREATE INDEX index_merge_request_metrics_on_latest_closed_at ON public.merge_request_metrics USING btree (latest_closed_at) WHERE (latest_closed_at IS NOT NULL);
......
...@@ -103,6 +103,8 @@ RSpec.describe MergeRequestDiff do ...@@ -103,6 +103,8 @@ RSpec.describe MergeRequestDiff do
it 'ignores diffs with 0 files' do it 'ignores diffs with 0 files' do
MergeRequestDiffFile.where(merge_request_diff_id: [closed_recently.id, merged_recently.id]).delete_all MergeRequestDiffFile.where(merge_request_diff_id: [closed_recently.id, merged_recently.id]).delete_all
closed_recently.update!(files_count: 0)
merged_recently.update!(files_count: 0)
is_expected.to contain_exactly(outdated.id, latest.id, closed.id, merged.id) is_expected.to contain_exactly(outdated.id, latest.id, closed.id, merged.id)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment