Commit f85ceac9 authored by Terri Chu's avatar Terri Chu

Reduce N+1 queries for Search API commits scope

Add preloading for commits and associated projects
returned in Elastic Search results.

Two fixes were added:

1. New scope in Project to load required data
for the commits scope. The scope is passed down
through the search_results method and used where
Project preloading was already happening.

2. Use Commitcollection to preload last_pipeline
and status for each commit.
parent 39694dce
......@@ -320,7 +320,9 @@ module Ci
# ref - The ref to scope the data to (e.g. "master"). If the ref is not
# given we simply get the latest pipelines for the commits, regardless
# of what refs the pipelines belong to.
def self.latest_pipeline_per_commit(commits, ref = nil)
# project_key - Support `commits` from different projects, returns results
# keyed by `hash[project_id][commit_id]`
def self.latest_pipeline_per_commit(commits, ref = nil, project_key: false)
p1 = arel_table
p2 = arel_table.alias
......@@ -341,7 +343,13 @@ module Ci
relation = relation.where(ref: ref) if ref
relation.each_with_object({}) do |pipeline, hash|
hash[pipeline.sha] = pipeline
commits = if project_key
hash[pipeline.project_id] ||= {}
else
hash
end
commits[pipeline.sha] = pipeline
end
end
......
# frozen_string_literal: true
# A collection of Commit instances for a specific container and Git reference.
# A collection of Commit instances for a specific Git reference.
class CommitCollection
include Enumerable
include Gitlab::Utils::StrongMemoize
attr_reader :container, :ref, :commits
delegate :repository, to: :container, allow_nil: true
delegate :project, to: :repository, allow_nil: true
# container - The object the commits belong to.
# container - The object the commits belong to (each commit project will be used if not provided).
# commits - The Commit instances to store.
# ref - The name of the ref (e.g. "master").
def initialize(container, commits, ref = nil)
......@@ -42,12 +39,13 @@ class CommitCollection
# Setting the pipeline for each commit ahead of time removes the need for running
# a query for every commit we're displaying.
def with_latest_pipeline(ref = nil)
return self unless project
pipelines = project.ci_pipelines.latest_pipeline_per_commit(map(&:id), ref)
# since commit ids are not unique across all projects, use project_key = true to get commits by project
pipelines = ::Ci::Pipeline.ci_sources.latest_pipeline_per_commit(map(&:id), ref, project_key: true)
# set the pipeline for each commit by project_id and commit for the latest pipeline for ref
each do |commit|
commit.set_latest_pipeline_for_ref(ref, pipelines[commit.id])
project_id = container&.id || commit.project_id
commit.set_latest_pipeline_for_ref(ref, pipelines.dig(project_id, commit.id))
end
self
......@@ -64,16 +62,19 @@ class CommitCollection
# Batch load any commits that are not backed by full gitaly data, and
# replace them in the collection.
def enrich!
return self if fully_enriched?
# Batch load full Commits from the repository
# and map to a Hash of id => Commit
# A container is needed in order to fetch data from gitaly. Containers
# can be absent from commits in certain rare situations (like when
# viewing a MR of a deleted fork). In these cases, assume that the
# enriched data is not needed.
return self if container.blank? || fully_enriched?
# Batch load full Commits from the repository
# and map to a Hash of id => Commit
replacements = Hash[unenriched.map do |c|
[c.id, Commit.lazy(container, c.id)]
commits_to_enrich = unenriched.select { |c| container.present? || c.container.present? }
replacements = Hash[commits_to_enrich.map do |c|
commit_container = container || c.container
[c.id, Commit.lazy(commit_container, c.id)]
end.compact]
# Replace the commits, keeping the same order
......
......@@ -520,6 +520,10 @@ class Project < ApplicationRecord
group: :ip_restrictions, namespace: [:route, :owner])
}
scope :with_api_commit_entity_associations, -> {
preload(:project_feature, :route, namespace: [:route, :owner])
}
enum auto_cancel_pending_pipelines: { disabled: 0, enabled: 1 }
chronic_duration_attr :build_timeout_human_readable, :build_timeout,
......
---
title: Fix N+1 queries for Elastic Search commits scope.
merge_request: 32892
author:
type: performance
......@@ -181,7 +181,7 @@ module Elastic
# Wrap returned results into GitLab model objects and paginate
#
# @return [Kaminari::PaginatableArray]
def elastic_search_and_wrap(query, type:, page: 1, per: 20, options: {}, &blk)
def elastic_search_and_wrap(query, type:, page: 1, per: 20, options: {}, preload_method: nil, &blk)
response = elastic_search(
query,
type: type,
......@@ -190,17 +190,19 @@ module Elastic
options: options
)[type.pluralize.to_sym][:results]
items, total_count = yield_each_search_result(response, type, &blk)
items, total_count = yield_each_search_result(response, type, preload_method, &blk)
# Before "map" we had a paginated array so we need to recover it
offset = per * ((page || 1) - 1)
Kaminari.paginate_array(items, total_count: total_count, limit: per, offset: offset)
end
def yield_each_search_result(response, type)
def yield_each_search_result(response, type, preload_method)
# Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map { |result| project_id_for_commit_or_blob(result, type) }.uniq
projects = Project.with_route.id_in(project_ids).index_by(&:id)
projects = Project.with_route.id_in(project_ids)
projects = projects.public_send(preload_method) if preload_method # rubocop:disable GitlabSecurity/PublicSend
projects = projects.index_by(&:id)
total_count = response.total_count
items = response.map do |result|
......
......@@ -10,8 +10,8 @@ module Elastic
end
# @return [Kaminari::PaginatableArray]
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {})
elastic_search_and_wrap(query, type: 'commit', page: page, per: per_page, options: options) do |result, project|
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {}, preload_method: nil)
elastic_search_and_wrap(query, type: 'commit', page: page, per: per_page, options: options, preload_method: preload_method) do |result, project|
raw_commit = Gitlab::Git::Commit.new(
project.repository.raw,
prepare_commit(result['_source']['commit']),
......
......@@ -8,7 +8,7 @@ module Elastic
delegate :project, to: :target
delegate :id, to: :project, prefix: true
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20)
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, preload_method: nil)
response = elastic_search(query, type: 'commit', page: page, per: per_page)[:commits][:results]
commits = response.map do |result|
......
......@@ -61,7 +61,7 @@ module Gitlab
Note.elastic_search(query, options: opt)
end
def commits(page: 1, per_page: DEFAULT_PER_PAGE)
def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil)
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :download_code, project)
if project.empty_repo? || query.blank?
......@@ -72,7 +72,8 @@ module Gitlab
project.repository.find_commits_by_message_with_elastic(
query,
page: (page || 1).to_i,
per_page: per_page
per_page: per_page,
preload_method: preload_method
)
else
offset = per_page * ((page || 1) - 1)
......
......@@ -43,7 +43,7 @@ module Gitlab
when 'wiki_blobs'
wiki_blobs(page: page, per_page: per_page)
when 'commits'
commits(page: page, per_page: per_page)
commits(page: page, per_page: per_page, preload_method: preload_method)
when 'users'
users.page(page).per(per_page)
else
......@@ -270,7 +270,7 @@ module Gitlab
# hitting ES twice for any page that's not page 1, and that's something we want to avoid.
#
# It is safe to memoize the page we get here because this method is _always_ called before `#commits_count`
def commits(page: 1, per_page: DEFAULT_PER_PAGE)
def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil)
return Kaminari.paginate_array([]) if query.blank?
strong_memoize(:commits) do
......@@ -282,7 +282,8 @@ module Gitlab
query,
page: (page || 1).to_i,
per_page: per_page,
options: options
options: options,
preload_method: preload_method
)
end
end
......
......@@ -102,7 +102,7 @@ RSpec.describe API::Search do
it_behaves_like 'pagination', scope: 'wiki_blobs'
end
context 'for commits scope', :sidekiq_might_not_need_inline do
context 'for commits scope', :sidekiq_inline do
before do
project.repository.index_commits_and_blobs
ensure_elasticsearch_index!
......@@ -113,6 +113,20 @@ RSpec.describe API::Search do
it_behaves_like 'response is correct', schema: 'public_api/v4/commits_details', size: 2
it_behaves_like 'pagination', scope: 'commits'
it 'avoids N+1 queries' do
control = ActiveRecord::QueryRecorder.new { get api(endpoint, user), params: { scope: 'commits', search: 'folder' } }
project_2 = create(:project, :public, :repository, :wiki_repo, name: 'awesome project 2')
project_3 = create(:project, :public, :repository, :wiki_repo, name: 'awesome project 3')
project_2.repository.index_commits_and_blobs
project_3.repository.index_commits_and_blobs
ensure_elasticsearch_index!
# Some N+1 queries still exist
expect { get api(endpoint, user), params: { scope: 'commits', search: 'folder' } }.not_to exceed_query_limit(control.count + 2)
end
end
context 'for blobs scope', :sidekiq_might_not_need_inline do
......
......@@ -8,14 +8,15 @@ module API
expose :project_id
expose :last_pipeline do |commit, options|
pipeline = commit.last_pipeline if can_read_pipeline?
pipeline = commit.latest_pipeline if can_read_pipeline?
::API::Entities::PipelineBasic.represent(pipeline, options)
end
private
def can_read_pipeline?
Ability.allowed?(options[:current_user], :read_pipeline, object.last_pipeline)
Ability.allowed?(options[:current_user], :read_pipeline, object.latest_pipeline)
end
end
end
......
......@@ -24,7 +24,8 @@ module API
merge_requests: :with_api_entity_associations,
projects: :with_api_entity_associations,
issues: :with_api_entity_associations,
milestones: :with_api_entity_associations
milestones: :with_api_entity_associations,
commits: :with_api_commit_entity_associations
}.freeze
def search(additional_params = {})
......@@ -38,6 +39,9 @@ module API
results = SearchService.new(current_user, search_params).search_objects(preload_method)
# preload commit data
results = CommitCollection.new(nil, results).with_latest_pipeline if params[:scope].to_sym == :commits
Gitlab::UsageDataCounters::SearchCounter.count(:all_searches)
paginate(results)
......
......@@ -1923,6 +1923,37 @@ RSpec.describe Ci::Pipeline, :mailer do
end
end
context 'when there are two pipelines for a ref, sha across multiple projects' do
let(:project_2) { create(:project) }
let!(:commit_456_project_2_ref_test) do
create(
:ci_empty_pipeline,
status: 'success',
ref: 'test',
sha: '456',
project: project_2
)
end
context 'when project_key is false' do
it 'returns the latest pipeline' do
result = described_class.latest_pipeline_per_commit(%w[456])
expect(result).to match('456' => commit_456_project_2_ref_test)
end
end
context 'when project_key is true' do
it 'returns the latest pipeline per project' do
result = described_class.latest_pipeline_per_commit(%w[456], project_key: true)
expect(result[project.id]).to match('456' => commit_456_ref_test)
expect(result[project_2.id]).to match('456' => commit_456_project_2_ref_test)
end
end
end
context 'with a ref' do
it 'only includes the pipelines for the given ref' do
result = described_class.latest_pipeline_per_commit(%w[123 456], 'master')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment