Commit 3405f757 authored by Dmitry Gruzd's avatar Dmitry Gruzd Committed by Dylan Griffith

Fix blobs search API degradation

When we have a large number of hits within our blobs search API
it's becoming slower and slower. This commit fixes this performance
issue by adding limit for gitaly results.
parent 92c62ed1
...@@ -993,10 +993,10 @@ class Repository ...@@ -993,10 +993,10 @@ class Repository
raw_repository.ls_files(actual_ref) raw_repository.ls_files(actual_ref)
end end
def search_files_by_content(query, ref) def search_files_by_content(query, ref, options = {})
return [] if empty? || query.blank? return [] if empty? || query.blank?
raw_repository.search_files_by_content(query, ref) raw_repository.search_files_by_content(query, ref, options)
end end
def search_files_by_name(query, ref) def search_files_by_name(query, ref)
......
---
title: Fix blobs search API degradation
merge_request: 24607
author:
type: fixed
...@@ -13,14 +13,15 @@ module Gitlab ...@@ -13,14 +13,15 @@ module Gitlab
@ref = ref @ref = ref
end end
def find(query) def find(query, content_match_cutoff: nil)
query = Gitlab::Search::Query.new(query, encode_binary: true) do query = Gitlab::Search::Query.new(query, encode_binary: true) do
filter :filename, matcher: ->(filter, blob) { blob.binary_path =~ /#{filter[:regex_value]}$/i } filter :filename, matcher: ->(filter, blob) { blob.binary_path =~ /#{filter[:regex_value]}$/i }
filter :path, matcher: ->(filter, blob) { blob.binary_path =~ /#{filter[:regex_value]}/i } filter :path, matcher: ->(filter, blob) { blob.binary_path =~ /#{filter[:regex_value]}/i }
filter :extension, matcher: ->(filter, blob) { blob.binary_path =~ /\.#{filter[:regex_value]}$/i } filter :extension, matcher: ->(filter, blob) { blob.binary_path =~ /\.#{filter[:regex_value]}$/i }
end end
files = find_by_path(query.term) + find_by_content(query.term) content_match_cutoff = nil if query.filters.any?
files = find_by_path(query.term) + find_by_content(query.term, { limit: content_match_cutoff })
files = query.filter_results(files) if query.filters.any? files = query.filter_results(files) if query.filters.any?
...@@ -29,8 +30,8 @@ module Gitlab ...@@ -29,8 +30,8 @@ module Gitlab
private private
def find_by_content(query) def find_by_content(query, options)
repository.search_files_by_content(query, ref).map do |result| repository.search_files_by_content(query, ref, options).map do |result|
Gitlab::Search::FoundBlob.new(content_match: result, project: project, ref: ref, repository: repository) Gitlab::Search::FoundBlob.new(content_match: result, project: project, ref: ref, repository: repository)
end end
end end
......
...@@ -956,13 +956,13 @@ module Gitlab ...@@ -956,13 +956,13 @@ module Gitlab
gitaly_ref_client.tag_names_contains_sha(sha) gitaly_ref_client.tag_names_contains_sha(sha)
end end
def search_files_by_content(query, ref) def search_files_by_content(query, ref, options = {})
return [] if empty? || query.blank? return [] if empty? || query.blank?
safe_query = Regexp.escape(query) safe_query = Regexp.escape(query)
ref ||= root_ref ref ||= root_ref
gitaly_repository_client.search_files_by_content(ref, safe_query) gitaly_repository_client.search_files_by_content(ref, safe_query, options)
end end
def can_be_merged?(source_sha, target_branch) def can_be_merged?(source_sha, target_branch)
......
...@@ -332,11 +332,11 @@ module Gitlab ...@@ -332,11 +332,11 @@ module Gitlab
GitalyClient.call(@storage, :repository_service, :search_files_by_name, request, timeout: GitalyClient.fast_timeout).flat_map(&:files) GitalyClient.call(@storage, :repository_service, :search_files_by_name, request, timeout: GitalyClient.fast_timeout).flat_map(&:files)
end end
def search_files_by_content(ref, query) def search_files_by_content(ref, query, options = {})
request = Gitaly::SearchFilesByContentRequest.new(repository: @gitaly_repo, ref: ref, query: query) request = Gitaly::SearchFilesByContentRequest.new(repository: @gitaly_repo, ref: ref, query: query)
response = GitalyClient.call(@storage, :repository_service, :search_files_by_content, request, timeout: GitalyClient.default_timeout) response = GitalyClient.call(@storage, :repository_service, :search_files_by_content, request, timeout: GitalyClient.default_timeout)
search_results_from_response(response) search_results_from_response(response, options)
end end
def disconnect_alternates def disconnect_alternates
...@@ -361,18 +361,24 @@ module Gitlab ...@@ -361,18 +361,24 @@ module Gitlab
private private
def search_results_from_response(gitaly_response) def search_results_from_response(gitaly_response, options = {})
limit = options[:limit]
matches = [] matches = []
matches_count = 0
current_match = +"" current_match = +""
gitaly_response.each do |message| gitaly_response.each do |message|
next if message.nil? next if message.nil?
break if limit && matches_count >= limit
current_match << message.match_data current_match << message.match_data
if message.end_of_match if message.end_of_match
matches << current_match matches << current_match
current_match = +"" current_match = +""
matches_count += 1
end end
end end
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
module Gitlab module Gitlab
class ProjectSearchResults < SearchResults class ProjectSearchResults < SearchResults
attr_reader :project, :repository_ref attr_reader :project, :repository_ref, :per_page
def initialize(current_user, project, query, repository_ref = nil, per_page: 20) def initialize(current_user, project, query, repository_ref = nil, per_page: 20)
@current_user = current_user @current_user = current_user
...@@ -17,7 +17,7 @@ module Gitlab ...@@ -17,7 +17,7 @@ module Gitlab
when 'notes' when 'notes'
notes.page(page).per(per_page) notes.page(page).per(per_page)
when 'blobs' when 'blobs'
paginated_blobs(blobs, page) paginated_blobs(blobs(page), page)
when 'wiki_blobs' when 'wiki_blobs'
paginated_blobs(wiki_blobs, page) paginated_blobs(wiki_blobs, page)
when 'commits' when 'commits'
...@@ -32,7 +32,7 @@ module Gitlab ...@@ -32,7 +32,7 @@ module Gitlab
def formatted_count(scope) def formatted_count(scope)
case scope case scope
when 'blobs' when 'blobs'
blobs_count.to_s formatted_limited_count(limited_blobs_count)
when 'notes' when 'notes'
formatted_limited_count(limited_notes_count) formatted_limited_count(limited_notes_count)
when 'wiki_blobs' when 'wiki_blobs'
...@@ -48,8 +48,8 @@ module Gitlab ...@@ -48,8 +48,8 @@ module Gitlab
super.where(id: @project.team.members) # rubocop:disable CodeReuse/ActiveRecord super.where(id: @project.team.members) # rubocop:disable CodeReuse/ActiveRecord
end end
def blobs_count def limited_blobs_count
@blobs_count ||= blobs.count @limited_blobs_count ||= blobs.count
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
...@@ -81,7 +81,7 @@ module Gitlab ...@@ -81,7 +81,7 @@ module Gitlab
counts = %i(limited_milestones_count limited_notes_count counts = %i(limited_milestones_count limited_notes_count
limited_merge_requests_count limited_issues_count limited_merge_requests_count limited_issues_count
blobs_count wiki_blobs_count) limited_blobs_count wiki_blobs_count)
counts.all? { |count_method| public_send(count_method).zero? } # rubocop:disable GitlabSecurity/PublicSend counts.all? { |count_method| public_send(count_method).zero? } # rubocop:disable GitlabSecurity/PublicSend
end end
...@@ -95,10 +95,16 @@ module Gitlab ...@@ -95,10 +95,16 @@ module Gitlab
results results
end end
def blobs def limit_up_to_page(page)
current_page = page&.to_i || 1
offset = per_page * (current_page - 1)
count_limit + offset
end
def blobs(page = 1)
return [] unless Ability.allowed?(@current_user, :download_code, @project) return [] unless Ability.allowed?(@current_user, :download_code, @project)
@blobs ||= Gitlab::FileFinder.new(project, repository_project_ref).find(query) @blobs ||= Gitlab::FileFinder.new(project, repository_project_ref).find(query, content_match_cutoff: limit_up_to_page(page))
end end
def wiki_blobs def wiki_blobs
......
...@@ -31,7 +31,7 @@ describe Gitlab::ProjectSearchResults do ...@@ -31,7 +31,7 @@ describe Gitlab::ProjectSearchResults do
let(:results) { described_class.new(user, project, query) } let(:results) { described_class.new(user, project, query) }
where(:scope, :count_method, :expected) do where(:scope, :count_method, :expected) do
'blobs' | :blobs_count | '1234' 'blobs' | :limited_blobs_count | max_limited_count
'notes' | :limited_notes_count | max_limited_count 'notes' | :limited_notes_count | max_limited_count
'wiki_blobs' | :wiki_blobs_count | '1234' 'wiki_blobs' | :wiki_blobs_count | '1234'
'commits' | :commits_count | '1234' 'commits' | :commits_count | '1234'
...@@ -141,9 +141,9 @@ describe Gitlab::ProjectSearchResults do ...@@ -141,9 +141,9 @@ describe Gitlab::ProjectSearchResults do
describe 'blob search' do describe 'blob search' do
let(:project) { create(:project, :public, :repository) } let(:project) { create(:project, :public, :repository) }
let(:blob_type) { 'blobs' }
it_behaves_like 'general blob search', 'repository', 'blobs' do it_behaves_like 'general blob search', 'repository', 'blobs' do
let(:blob_type) { 'blobs' }
let(:disabled_project) { create(:project, :public, :repository, :repository_disabled) } let(:disabled_project) { create(:project, :public, :repository, :repository_disabled) }
let(:private_project) { create(:project, :public, :repository, :repository_private) } let(:private_project) { create(:project, :public, :repository, :repository_private) }
let(:expected_file_by_path) { 'files/images/wm.svg' } let(:expected_file_by_path) { 'files/images/wm.svg' }
...@@ -151,9 +151,36 @@ describe Gitlab::ProjectSearchResults do ...@@ -151,9 +151,36 @@ describe Gitlab::ProjectSearchResults do
end end
it_behaves_like 'blob search repository ref', 'project' do it_behaves_like 'blob search repository ref', 'project' do
let(:blob_type) { 'blobs' }
let(:entity) { project } let(:entity) { project }
end end
context 'pagination' do
let(:per_page) { 20 }
let(:count_limit) { described_class::COUNT_LIMIT }
let(:file_finder) { instance_double('Gitlab::FileFinder') }
let(:results) { described_class.new(user, project, query, per_page: per_page) }
let(:repository_ref) { 'master' }
before do
allow(file_finder).to receive(:find).and_return([])
expect(Gitlab::FileFinder).to receive(:new).with(project, repository_ref).and_return(file_finder)
end
it 'limits search results based on the first page' do
expect(file_finder).to receive(:find).with(query, content_match_cutoff: count_limit)
results.objects(blob_type, 1)
end
it 'limits search results based on the second page' do
expect(file_finder).to receive(:find).with(query, content_match_cutoff: count_limit + per_page)
results.objects(blob_type, 2)
end
it 'limits search results based on the third page' do
expect(file_finder).to receive(:find).with(query, content_match_cutoff: count_limit + per_page * 2)
results.objects(blob_type, 3)
end
end
end end
describe 'wiki search' do describe 'wiki search' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment