Commit d4765096 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'track-ambiguous-refs' into 'master'

Optimise ExtractsRef when repository does not have ambiguous refs

See merge request gitlab-org/gitlab!38484
parents 004c51b7 04a5b342
...@@ -43,7 +43,7 @@ class Repository ...@@ -43,7 +43,7 @@ class Repository
gitlab_ci_yml branch_names tag_names branch_count gitlab_ci_yml branch_names tag_names branch_count
tag_count avatar exists? root_ref merged_branch_names tag_count avatar exists? root_ref merged_branch_names
has_visible_content? issue_template_names merge_request_template_names has_visible_content? issue_template_names merge_request_template_names
user_defined_metrics_dashboard_paths xcode_project?).freeze user_defined_metrics_dashboard_paths xcode_project? has_ambiguous_refs?).freeze
# Methods that use cache_method but only memoize the value # Methods that use cache_method but only memoize the value
MEMOIZED_CACHED_METHODS = %i(license).freeze MEMOIZED_CACHED_METHODS = %i(license).freeze
...@@ -196,6 +196,31 @@ class Repository ...@@ -196,6 +196,31 @@ class Repository
tag_exists?(ref) && branch_exists?(ref) tag_exists?(ref) && branch_exists?(ref)
end end
# It's possible for a tag name to be a prefix (including slash) of a branch
# name, or vice versa. For instance, a tag named `foo` means we can't create a
# tag `foo/bar`, but we _can_ create a branch `foo/bar`.
#
# If we know a repository has no refs of this type (which is the common case)
# then separating refs from paths - as in ExtractsRef - can be faster.
#
# This method only checks one level deep, so only prefixes that contain no
# slashes are considered. If a repository has a tag `foo/bar` and a branch
# `foo/bar/baz`, it will return false.
def has_ambiguous_refs?
return false unless branch_names.present? && tag_names.present?
with_slash, no_slash = (branch_names + tag_names).partition { |ref| ref.include?('/') }
return false if with_slash.empty?
prefixes = no_slash.map { |ref| Regexp.escape(ref) }.join('|')
prefix_regex = %r{^#{prefixes}/}
with_slash.any? do |ref|
prefix_regex.match?(ref)
end
end
def expand_ref(ref) def expand_ref(ref)
if tag_exists?(ref) if tag_exists?(ref)
Gitlab::Git::TAG_REF_PREFIX + ref Gitlab::Git::TAG_REF_PREFIX + ref
......
---
title: Speed up commit lists and file blob pages on repositories with huge amounts
of branches or tags
merge_request: 38484
author:
type: performance
...@@ -60,18 +60,24 @@ module ExtractsRef ...@@ -60,18 +60,24 @@ module ExtractsRef
id = [id, '/'].join id = [id, '/'].join
end end
valid_refs = ref_names.select { |v| id.start_with?("#{v}/") } first_path_segment, rest = id.split('/', 2)
if valid_refs.empty? if use_first_path_segment?(first_path_segment)
# No exact ref match, so just try our best pair = [first_path_segment, rest]
pair = id.match(%r{([^/]+)(.*)}).captures
else else
# There is a distinct possibility that multiple refs prefix the ID. valid_refs = ref_names.select { |v| id.start_with?("#{v}/") }
# Use the longest match to maximize the chance that we have the
# right ref. if valid_refs.empty?
best_match = valid_refs.max_by(&:length) # No exact ref match, so just try our best
# Partition the string into the ref and the path, ignoring the empty first value pair = id.match(%r{([^/]+)(.*)}).captures
pair = id.partition(best_match)[1..-1] else
# There is a distinct possibility that multiple refs prefix the ID.
# Use the longest match to maximize the chance that we have the
# right ref.
best_match = valid_refs.max_by(&:length)
# Partition the string into the ref and the path, ignoring the empty first value
pair = id.partition(best_match)[1..-1]
end
end end
end end
...@@ -111,6 +117,15 @@ module ExtractsRef ...@@ -111,6 +117,15 @@ module ExtractsRef
private private
def use_first_path_segment?(ref)
return false unless ::Feature.enabled?(:extracts_path_optimization)
return false unless repository_container
return false if repository_container.repository.has_ambiguous_refs?
repository_container.repository.branch_names_include?(ref) ||
repository_container.repository.tag_names_include?(ref)
end
# overridden in subclasses, do not remove # overridden in subclasses, do not remove
def get_id def get_id
id = [params[:id] || params[:ref]] id = [params[:id] || params[:ref]]
......
...@@ -1245,6 +1245,32 @@ RSpec.describe Repository do ...@@ -1245,6 +1245,32 @@ RSpec.describe Repository do
end end
end end
describe '#has_ambiguous_refs?' do
using RSpec::Parameterized::TableSyntax
where(:branch_names, :tag_names, :result) do
nil | nil | false
%w() | %w() | false
%w(a b) | %w() | false
%w() | %w(c d) | false
%w(a b) | %w(c d) | false
%w(a/b) | %w(c/d) | false
%w(a b) | %w(c d a/z) | true
%w(a b c/z) | %w(c d) | true
%w(a/b/z) | %w(a/b) | false # we only consider refs ambiguous before the first slash
%w(a/b/z) | %w(a/b a) | true
end
with_them do
it do
allow(repository).to receive(:branch_names).and_return(branch_names)
allow(repository).to receive(:tag_names).and_return(tag_names)
expect(repository.has_ambiguous_refs?).to eq(result)
end
end
end
describe '#expand_ref' do describe '#expand_ref' do
let(:ref) { 'ref' } let(:ref) { 'ref' }
...@@ -1927,7 +1953,8 @@ RSpec.describe Repository do ...@@ -1927,7 +1953,8 @@ RSpec.describe Repository do
:issue_template_names, :issue_template_names,
:merge_request_template_names, :merge_request_template_names,
:user_defined_metrics_dashboard_paths, :user_defined_metrics_dashboard_paths,
:xcode_project? :xcode_project?,
:has_ambiguous_refs?
]) ])
repository.after_change_head repository.after_change_head
......
...@@ -125,6 +125,56 @@ RSpec.shared_examples 'extracts refs' do ...@@ -125,6 +125,56 @@ RSpec.shared_examples 'extracts refs' do
expect(extract_ref('release/app/v1.0.0/README.md')).to eq( expect(extract_ref('release/app/v1.0.0/README.md')).to eq(
['release/app/v1.0.0', 'README.md']) ['release/app/v1.0.0', 'README.md'])
end end
context 'when the repository does not have ambiguous refs' do
before do
allow(container.repository).to receive(:has_ambiguous_refs?).and_return(false)
end
it 'does not fetch all ref names when the first path component is a ref' do
expect(self).not_to receive(:ref_names)
expect(container.repository).to receive(:branch_names_include?).with('v1.0.0').and_return(false)
expect(container.repository).to receive(:tag_names_include?).with('v1.0.0').and_return(true)
expect(extract_ref('v1.0.0/doc/README.md')).to eq(['v1.0.0', 'doc/README.md'])
end
it 'fetches all ref names when the first path component is not a ref' do
expect(self).to receive(:ref_names).and_call_original
expect(container.repository).to receive(:branch_names_include?).with('release').and_return(false)
expect(container.repository).to receive(:tag_names_include?).with('release').and_return(false)
expect(extract_ref('release/app/doc/README.md')).to eq(['release/app', 'doc/README.md'])
end
context 'when the extracts_path_optimization feature flag is disabled' do
before do
stub_feature_flags(extracts_path_optimization: false)
end
it 'always fetches all ref names' do
expect(self).to receive(:ref_names).and_call_original
expect(container.repository).not_to receive(:branch_names_include?)
expect(container.repository).not_to receive(:tag_names_include?)
expect(extract_ref('v1.0.0/doc/README.md')).to eq(['v1.0.0', 'doc/README.md'])
end
end
end
context 'when the repository has ambiguous refs' do
before do
allow(container.repository).to receive(:has_ambiguous_refs?).and_return(true)
end
it 'always fetches all ref names' do
expect(self).to receive(:ref_names).and_call_original
expect(container.repository).not_to receive(:branch_names_include?)
expect(container.repository).not_to receive(:tag_names_include?)
expect(extract_ref('v1.0.0/doc/README.md')).to eq(['v1.0.0', 'doc/README.md'])
end
end
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment