Commit f85ceac9 authored by Terri Chu's avatar Terri Chu

Reduce N+1 queries for Search API commits scope

Add preloading for commits and associated projects
returned in Elastic Search results.

Two fixes were added:

1. New scope in Project to load required data
for the commits scope. The scope is passed down
through the search_results method and used where
Project preloading was already happening.

2. Use Commitcollection to preload last_pipeline
and status for each commit.
parent 39694dce
...@@ -320,7 +320,9 @@ module Ci ...@@ -320,7 +320,9 @@ module Ci
# ref - The ref to scope the data to (e.g. "master"). If the ref is not # ref - The ref to scope the data to (e.g. "master"). If the ref is not
# given we simply get the latest pipelines for the commits, regardless # given we simply get the latest pipelines for the commits, regardless
# of what refs the pipelines belong to. # of what refs the pipelines belong to.
def self.latest_pipeline_per_commit(commits, ref = nil) # project_key - Support `commits` from different projects, returns results
# keyed by `hash[project_id][commit_id]`
def self.latest_pipeline_per_commit(commits, ref = nil, project_key: false)
p1 = arel_table p1 = arel_table
p2 = arel_table.alias p2 = arel_table.alias
...@@ -341,7 +343,13 @@ module Ci ...@@ -341,7 +343,13 @@ module Ci
relation = relation.where(ref: ref) if ref relation = relation.where(ref: ref) if ref
relation.each_with_object({}) do |pipeline, hash| relation.each_with_object({}) do |pipeline, hash|
hash[pipeline.sha] = pipeline commits = if project_key
hash[pipeline.project_id] ||= {}
else
hash
end
commits[pipeline.sha] = pipeline
end end
end end
......
# frozen_string_literal: true # frozen_string_literal: true
# A collection of Commit instances for a specific container and Git reference. # A collection of Commit instances for a specific Git reference.
class CommitCollection class CommitCollection
include Enumerable include Enumerable
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
attr_reader :container, :ref, :commits attr_reader :container, :ref, :commits
delegate :repository, to: :container, allow_nil: true # container - The object the commits belong to (each commit project will be used if not provided).
delegate :project, to: :repository, allow_nil: true
# container - The object the commits belong to.
# commits - The Commit instances to store. # commits - The Commit instances to store.
# ref - The name of the ref (e.g. "master"). # ref - The name of the ref (e.g. "master").
def initialize(container, commits, ref = nil) def initialize(container, commits, ref = nil)
...@@ -42,12 +39,13 @@ class CommitCollection ...@@ -42,12 +39,13 @@ class CommitCollection
# Setting the pipeline for each commit ahead of time removes the need for running # Setting the pipeline for each commit ahead of time removes the need for running
# a query for every commit we're displaying. # a query for every commit we're displaying.
def with_latest_pipeline(ref = nil) def with_latest_pipeline(ref = nil)
return self unless project # since commit ids are not unique across all projects, use project_key = true to get commits by project
pipelines = ::Ci::Pipeline.ci_sources.latest_pipeline_per_commit(map(&:id), ref, project_key: true)
pipelines = project.ci_pipelines.latest_pipeline_per_commit(map(&:id), ref)
# set the pipeline for each commit by project_id and commit for the latest pipeline for ref
each do |commit| each do |commit|
commit.set_latest_pipeline_for_ref(ref, pipelines[commit.id]) project_id = container&.id || commit.project_id
commit.set_latest_pipeline_for_ref(ref, pipelines.dig(project_id, commit.id))
end end
self self
...@@ -64,16 +62,19 @@ class CommitCollection ...@@ -64,16 +62,19 @@ class CommitCollection
# Batch load any commits that are not backed by full gitaly data, and # Batch load any commits that are not backed by full gitaly data, and
# replace them in the collection. # replace them in the collection.
def enrich! def enrich!
return self if fully_enriched?
# Batch load full Commits from the repository
# and map to a Hash of id => Commit
# A container is needed in order to fetch data from gitaly. Containers # A container is needed in order to fetch data from gitaly. Containers
# can be absent from commits in certain rare situations (like when # can be absent from commits in certain rare situations (like when
# viewing a MR of a deleted fork). In these cases, assume that the # viewing a MR of a deleted fork). In these cases, assume that the
# enriched data is not needed. # enriched data is not needed.
return self if container.blank? || fully_enriched? commits_to_enrich = unenriched.select { |c| container.present? || c.container.present? }
replacements = Hash[commits_to_enrich.map do |c|
# Batch load full Commits from the repository commit_container = container || c.container
# and map to a Hash of id => Commit [c.id, Commit.lazy(commit_container, c.id)]
replacements = Hash[unenriched.map do |c|
[c.id, Commit.lazy(container, c.id)]
end.compact] end.compact]
# Replace the commits, keeping the same order # Replace the commits, keeping the same order
......
...@@ -520,6 +520,10 @@ class Project < ApplicationRecord ...@@ -520,6 +520,10 @@ class Project < ApplicationRecord
group: :ip_restrictions, namespace: [:route, :owner]) group: :ip_restrictions, namespace: [:route, :owner])
} }
scope :with_api_commit_entity_associations, -> {
preload(:project_feature, :route, namespace: [:route, :owner])
}
enum auto_cancel_pending_pipelines: { disabled: 0, enabled: 1 } enum auto_cancel_pending_pipelines: { disabled: 0, enabled: 1 }
chronic_duration_attr :build_timeout_human_readable, :build_timeout, chronic_duration_attr :build_timeout_human_readable, :build_timeout,
......
---
title: Fix N+1 queries for Elastic Search commits scope.
merge_request: 32892
author:
type: performance
...@@ -181,7 +181,7 @@ module Elastic ...@@ -181,7 +181,7 @@ module Elastic
# Wrap returned results into GitLab model objects and paginate # Wrap returned results into GitLab model objects and paginate
# #
# @return [Kaminari::PaginatableArray] # @return [Kaminari::PaginatableArray]
def elastic_search_and_wrap(query, type:, page: 1, per: 20, options: {}, &blk) def elastic_search_and_wrap(query, type:, page: 1, per: 20, options: {}, preload_method: nil, &blk)
response = elastic_search( response = elastic_search(
query, query,
type: type, type: type,
...@@ -190,17 +190,19 @@ module Elastic ...@@ -190,17 +190,19 @@ module Elastic
options: options options: options
)[type.pluralize.to_sym][:results] )[type.pluralize.to_sym][:results]
items, total_count = yield_each_search_result(response, type, &blk) items, total_count = yield_each_search_result(response, type, preload_method, &blk)
# Before "map" we had a paginated array so we need to recover it # Before "map" we had a paginated array so we need to recover it
offset = per * ((page || 1) - 1) offset = per * ((page || 1) - 1)
Kaminari.paginate_array(items, total_count: total_count, limit: per, offset: offset) Kaminari.paginate_array(items, total_count: total_count, limit: per, offset: offset)
end end
def yield_each_search_result(response, type) def yield_each_search_result(response, type, preload_method)
# Avoid one SELECT per result by loading all projects into a hash # Avoid one SELECT per result by loading all projects into a hash
project_ids = response.map { |result| project_id_for_commit_or_blob(result, type) }.uniq project_ids = response.map { |result| project_id_for_commit_or_blob(result, type) }.uniq
projects = Project.with_route.id_in(project_ids).index_by(&:id) projects = Project.with_route.id_in(project_ids)
projects = projects.public_send(preload_method) if preload_method # rubocop:disable GitlabSecurity/PublicSend
projects = projects.index_by(&:id)
total_count = response.total_count total_count = response.total_count
items = response.map do |result| items = response.map do |result|
......
...@@ -10,8 +10,8 @@ module Elastic ...@@ -10,8 +10,8 @@ module Elastic
end end
# @return [Kaminari::PaginatableArray] # @return [Kaminari::PaginatableArray]
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {}) def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, options: {}, preload_method: nil)
elastic_search_and_wrap(query, type: 'commit', page: page, per: per_page, options: options) do |result, project| elastic_search_and_wrap(query, type: 'commit', page: page, per: per_page, options: options, preload_method: preload_method) do |result, project|
raw_commit = Gitlab::Git::Commit.new( raw_commit = Gitlab::Git::Commit.new(
project.repository.raw, project.repository.raw,
prepare_commit(result['_source']['commit']), prepare_commit(result['_source']['commit']),
......
...@@ -8,7 +8,7 @@ module Elastic ...@@ -8,7 +8,7 @@ module Elastic
delegate :project, to: :target delegate :project, to: :target
delegate :id, to: :project, prefix: true delegate :id, to: :project, prefix: true
def find_commits_by_message_with_elastic(query, page: 1, per_page: 20) def find_commits_by_message_with_elastic(query, page: 1, per_page: 20, preload_method: nil)
response = elastic_search(query, type: 'commit', page: page, per: per_page)[:commits][:results] response = elastic_search(query, type: 'commit', page: page, per: per_page)[:commits][:results]
commits = response.map do |result| commits = response.map do |result|
......
...@@ -61,7 +61,7 @@ module Gitlab ...@@ -61,7 +61,7 @@ module Gitlab
Note.elastic_search(query, options: opt) Note.elastic_search(query, options: opt)
end end
def commits(page: 1, per_page: DEFAULT_PER_PAGE) def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil)
return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :download_code, project) return Kaminari.paginate_array([]) unless Ability.allowed?(@current_user, :download_code, project)
if project.empty_repo? || query.blank? if project.empty_repo? || query.blank?
...@@ -72,7 +72,8 @@ module Gitlab ...@@ -72,7 +72,8 @@ module Gitlab
project.repository.find_commits_by_message_with_elastic( project.repository.find_commits_by_message_with_elastic(
query, query,
page: (page || 1).to_i, page: (page || 1).to_i,
per_page: per_page per_page: per_page,
preload_method: preload_method
) )
else else
offset = per_page * ((page || 1) - 1) offset = per_page * ((page || 1) - 1)
......
...@@ -43,7 +43,7 @@ module Gitlab ...@@ -43,7 +43,7 @@ module Gitlab
when 'wiki_blobs' when 'wiki_blobs'
wiki_blobs(page: page, per_page: per_page) wiki_blobs(page: page, per_page: per_page)
when 'commits' when 'commits'
commits(page: page, per_page: per_page) commits(page: page, per_page: per_page, preload_method: preload_method)
when 'users' when 'users'
users.page(page).per(per_page) users.page(page).per(per_page)
else else
...@@ -270,7 +270,7 @@ module Gitlab ...@@ -270,7 +270,7 @@ module Gitlab
# hitting ES twice for any page that's not page 1, and that's something we want to avoid. # hitting ES twice for any page that's not page 1, and that's something we want to avoid.
# #
# It is safe to memoize the page we get here because this method is _always_ called before `#commits_count` # It is safe to memoize the page we get here because this method is _always_ called before `#commits_count`
def commits(page: 1, per_page: DEFAULT_PER_PAGE) def commits(page: 1, per_page: DEFAULT_PER_PAGE, preload_method: nil)
return Kaminari.paginate_array([]) if query.blank? return Kaminari.paginate_array([]) if query.blank?
strong_memoize(:commits) do strong_memoize(:commits) do
...@@ -282,7 +282,8 @@ module Gitlab ...@@ -282,7 +282,8 @@ module Gitlab
query, query,
page: (page || 1).to_i, page: (page || 1).to_i,
per_page: per_page, per_page: per_page,
options: options options: options,
preload_method: preload_method
) )
end end
end end
......
...@@ -102,7 +102,7 @@ RSpec.describe API::Search do ...@@ -102,7 +102,7 @@ RSpec.describe API::Search do
it_behaves_like 'pagination', scope: 'wiki_blobs' it_behaves_like 'pagination', scope: 'wiki_blobs'
end end
context 'for commits scope', :sidekiq_might_not_need_inline do context 'for commits scope', :sidekiq_inline do
before do before do
project.repository.index_commits_and_blobs project.repository.index_commits_and_blobs
ensure_elasticsearch_index! ensure_elasticsearch_index!
...@@ -113,6 +113,20 @@ RSpec.describe API::Search do ...@@ -113,6 +113,20 @@ RSpec.describe API::Search do
it_behaves_like 'response is correct', schema: 'public_api/v4/commits_details', size: 2 it_behaves_like 'response is correct', schema: 'public_api/v4/commits_details', size: 2
it_behaves_like 'pagination', scope: 'commits' it_behaves_like 'pagination', scope: 'commits'
it 'avoids N+1 queries' do
control = ActiveRecord::QueryRecorder.new { get api(endpoint, user), params: { scope: 'commits', search: 'folder' } }
project_2 = create(:project, :public, :repository, :wiki_repo, name: 'awesome project 2')
project_3 = create(:project, :public, :repository, :wiki_repo, name: 'awesome project 3')
project_2.repository.index_commits_and_blobs
project_3.repository.index_commits_and_blobs
ensure_elasticsearch_index!
# Some N+1 queries still exist
expect { get api(endpoint, user), params: { scope: 'commits', search: 'folder' } }.not_to exceed_query_limit(control.count + 2)
end
end end
context 'for blobs scope', :sidekiq_might_not_need_inline do context 'for blobs scope', :sidekiq_might_not_need_inline do
......
...@@ -8,14 +8,15 @@ module API ...@@ -8,14 +8,15 @@ module API
expose :project_id expose :project_id
expose :last_pipeline do |commit, options| expose :last_pipeline do |commit, options|
pipeline = commit.last_pipeline if can_read_pipeline? pipeline = commit.latest_pipeline if can_read_pipeline?
::API::Entities::PipelineBasic.represent(pipeline, options) ::API::Entities::PipelineBasic.represent(pipeline, options)
end end
private private
def can_read_pipeline? def can_read_pipeline?
Ability.allowed?(options[:current_user], :read_pipeline, object.last_pipeline) Ability.allowed?(options[:current_user], :read_pipeline, object.latest_pipeline)
end end
end end
end end
......
...@@ -24,7 +24,8 @@ module API ...@@ -24,7 +24,8 @@ module API
merge_requests: :with_api_entity_associations, merge_requests: :with_api_entity_associations,
projects: :with_api_entity_associations, projects: :with_api_entity_associations,
issues: :with_api_entity_associations, issues: :with_api_entity_associations,
milestones: :with_api_entity_associations milestones: :with_api_entity_associations,
commits: :with_api_commit_entity_associations
}.freeze }.freeze
def search(additional_params = {}) def search(additional_params = {})
...@@ -38,6 +39,9 @@ module API ...@@ -38,6 +39,9 @@ module API
results = SearchService.new(current_user, search_params).search_objects(preload_method) results = SearchService.new(current_user, search_params).search_objects(preload_method)
# preload commit data
results = CommitCollection.new(nil, results).with_latest_pipeline if params[:scope].to_sym == :commits
Gitlab::UsageDataCounters::SearchCounter.count(:all_searches) Gitlab::UsageDataCounters::SearchCounter.count(:all_searches)
paginate(results) paginate(results)
......
...@@ -1923,6 +1923,37 @@ RSpec.describe Ci::Pipeline, :mailer do ...@@ -1923,6 +1923,37 @@ RSpec.describe Ci::Pipeline, :mailer do
end end
end end
context 'when there are two pipelines for a ref, sha across multiple projects' do
let(:project_2) { create(:project) }
let!(:commit_456_project_2_ref_test) do
create(
:ci_empty_pipeline,
status: 'success',
ref: 'test',
sha: '456',
project: project_2
)
end
context 'when project_key is false' do
it 'returns the latest pipeline' do
result = described_class.latest_pipeline_per_commit(%w[456])
expect(result).to match('456' => commit_456_project_2_ref_test)
end
end
context 'when project_key is true' do
it 'returns the latest pipeline per project' do
result = described_class.latest_pipeline_per_commit(%w[456], project_key: true)
expect(result[project.id]).to match('456' => commit_456_ref_test)
expect(result[project_2.id]).to match('456' => commit_456_project_2_ref_test)
end
end
end
context 'with a ref' do context 'with a ref' do
it 'only includes the pipelines for the given ref' do it 'only includes the pipelines for the given ref' do
result = described_class.latest_pipeline_per_commit(%w[123 456], 'master') result = described_class.latest_pipeline_per_commit(%w[123 456], 'master')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment