Commit 5d9a1326 authored by Nick Thomas's avatar Nick Thomas

Use more-efficient indexing for the MergeRequestDiff storage migration

Specifying everything that's needed in the index allows us to pluck new
records with an index-only scan. The `files_count` encapsulates
everything we want to know in the `when: always` case.

The `when: outdated` case is probably also helped by this, but might
need some additional indexes to help out at scale. We aren't using it
so I haven't investigated this.
parent 55753de8
......@@ -51,14 +51,16 @@ class MergeRequestDiff < ApplicationRecord
scope :by_commit_sha, ->(sha) do
joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
end
scope :has_diff_files, -> { where(id: MergeRequestDiffFile.select(:merge_request_diff_id)) }
scope :by_project_id, -> (project_id) do
joins(:merge_request).where(merge_requests: { target_project_id: project_id })
end
scope :recent, -> { order(id: :desc).limit(100) }
scope :files_in_database, -> { where(stored_externally: [false, nil]) }
scope :files_in_database, -> do
where(stored_externally: [false, nil]).where(arel_table[:files_count].gt(0))
end
scope :not_latest_diffs, -> do
merge_requests = MergeRequest.arel_table
......@@ -115,29 +117,28 @@ class MergeRequestDiff < ApplicationRecord
end
def ids_for_external_storage_migration_strategy_always(limit:)
has_diff_files.files_in_database.limit(limit).pluck(:id)
files_in_database.limit(limit).pluck(:id)
end
def ids_for_external_storage_migration_strategy_outdated(limit:)
# Outdated is too complex to be a single SQL query, so split into three
before = EXTERNAL_DIFF_CUTOFF.ago
potentials = has_diff_files.files_in_database
ids = potentials
ids = files_in_database
.old_merged_diffs(before)
.limit(limit)
.pluck(:id)
return ids if ids.size >= limit
ids += potentials
ids += files_in_database
.old_closed_diffs(before)
.limit(limit - ids.size)
.pluck(:id)
return ids if ids.size >= limit
ids + potentials
ids + files_in_database
.not_latest_diffs
.limit(limit - ids.size)
.pluck(:id)
......
---
title: Use more-efficient indexing for the MergeRequestDiff storage migration
merge_request: 39470
author:
type: performance
# frozen_string_literal: true
class AddNewExternalDiffMigrationIndex < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'index_merge_request_diffs_by_id_partial'
disable_ddl_transaction!
def up
add_concurrent_index(
:merge_request_diffs,
:id,
name: INDEX_NAME,
where: 'files_count > 0 AND ((NOT stored_externally) OR (stored_externally IS NULL))'
)
end
def down
remove_concurrent_index_by_name(:merge_request_diffs, INDEX_NAME)
end
end
# frozen_string_literal: true
class RemoveOldExternalDiffMigrationIndex < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
remove_concurrent_index_by_name(
:merge_request_diffs,
'index_merge_request_diffs_on_merge_request_id_and_id_partial'
)
end
def down
add_concurrent_index(
:merge_request_diffs,
[:merge_request_id, :id],
where: { stored_externally: [nil, false] }
)
end
end
12bb243862adf930fc68f2f7641ee7cc6170bfbcc3aae2b98bfa262dacfeba49
\ No newline at end of file
7c4fec044a278fa51fdd23320a372528c420e8a650a26770fccf0cabe91163c5
\ No newline at end of file
......@@ -19972,14 +19972,14 @@ CREATE INDEX index_merge_request_diff_commits_on_sha ON public.merge_request_dif
CREATE UNIQUE INDEX index_merge_request_diff_files_on_mr_diff_id_and_order ON public.merge_request_diff_files USING btree (merge_request_diff_id, relative_order);
CREATE INDEX index_merge_request_diffs_by_id_partial ON public.merge_request_diffs USING btree (id) WHERE ((files_count > 0) AND ((NOT stored_externally) OR (stored_externally IS NULL)));
CREATE INDEX index_merge_request_diffs_external_diff_store_is_null ON public.merge_request_diffs USING btree (id) WHERE (external_diff_store IS NULL);
CREATE INDEX index_merge_request_diffs_on_external_diff_store ON public.merge_request_diffs USING btree (external_diff_store);
CREATE INDEX index_merge_request_diffs_on_merge_request_id_and_id ON public.merge_request_diffs USING btree (merge_request_id, id);
CREATE INDEX index_merge_request_diffs_on_merge_request_id_and_id_partial ON public.merge_request_diffs USING btree (merge_request_id, id) WHERE ((NOT stored_externally) OR (stored_externally IS NULL));
CREATE INDEX index_merge_request_metrics_on_first_deployed_to_production_at ON public.merge_request_metrics USING btree (first_deployed_to_production_at);
CREATE INDEX index_merge_request_metrics_on_latest_closed_at ON public.merge_request_metrics USING btree (latest_closed_at) WHERE (latest_closed_at IS NOT NULL);
......
......@@ -103,6 +103,8 @@ RSpec.describe MergeRequestDiff do
it 'ignores diffs with 0 files' do
MergeRequestDiffFile.where(merge_request_diff_id: [closed_recently.id, merged_recently.id]).delete_all
closed_recently.update!(files_count: 0)
merged_recently.update!(files_count: 0)
is_expected.to contain_exactly(outdated.id, latest.id, closed.id, merged.id)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment