Commit a0c2636e authored by Thong Kuah's avatar Thong Kuah

Merge branch '195667-limit-commits-for-git-hook-services' into 'master'

Never fetch more than 101 commits when processing a git push

See merge request gitlab-org/gitlab!67491
parents 718e32ca 81be7217
......@@ -161,8 +161,8 @@ class Repository
CommitCollection.new(container, commits, ref)
end
def commits_between(from, to)
commits = Gitlab::Git::Commit.between(raw_repository, from, to)
def commits_between(from, to, limit: nil)
commits = Gitlab::Git::Commit.between(raw_repository, from, to, limit: limit)
commits = Commit.decorate(commits, container) if commits.present?
commits
end
......
......@@ -25,17 +25,13 @@ module Git
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
# The changeset, ordered with the newest commit last
def commits
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
# This should return PROCESS_COMMIT_LIMIT commits, ordered with newest last
def limited_commits
@limited_commits ||= commits.last(PROCESS_COMMIT_LIMIT)
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
def commits_count
commits.count
raise NotImplementedError, "Please implement #{self.class}##{__method__}"
end
def event_message
......
......@@ -18,20 +18,25 @@ module Git
:push_hooks
end
def commits
strong_memoize(:commits) do
def limited_commits
strong_memoize(:limited_commits) { threshold_commits.last(PROCESS_COMMIT_LIMIT) }
end
# Taking limit+1 commits allows us to detect when the limit is in effect
def threshold_commits
strong_memoize(:threshold_commits) do
if creating_default_branch?
# The most recent PROCESS_COMMIT_LIMIT commits in the default branch.
# They are returned newest-to-oldest, but we need to present them oldest-to-newest
project.repository.commits(newrev, limit: PROCESS_COMMIT_LIMIT).reverse
project.repository.commits(newrev, limit: PROCESS_COMMIT_LIMIT + 1).reverse!
elsif creating_branch?
# Use the pushed commits that aren't reachable by the default branch
# as a heuristic. This may include more commits than are actually
# pushed, but that shouldn't matter because we check for existing
# cross-references later.
project.repository.commits_between(project.default_branch, newrev)
project.repository.commits_between(project.default_branch, newrev, limit: PROCESS_COMMIT_LIMIT + 1)
elsif updating_branch?
project.repository.commits_between(oldrev, newrev)
project.repository.commits_between(oldrev, newrev, limit: PROCESS_COMMIT_LIMIT + 1)
else # removing branch
[]
end
......@@ -39,9 +44,21 @@ module Git
end
def commits_count
return count_commits_in_branch if creating_default_branch?
strong_memoize(:commits_count) do
next threshold_commits.count if
strong_memoized?(:threshold_commits) &&
threshold_commits.count <= PROCESS_COMMIT_LIMIT
super
if creating_default_branch?
project.repository.commit_count_for_ref(ref)
elsif creating_branch?
project.repository.count_commits_between(project.default_branch, newrev)
elsif updating_branch?
project.repository.count_commits_between(oldrev, newrev)
else # removing branch
0
end
end
end
override :invalidated_file_types
......@@ -179,12 +196,6 @@ module Git
creating_branch? && default_branch?
end
def count_commits_in_branch
strong_memoize(:count_commits_in_branch) do
project.repository.commit_count_for_ref(ref)
end
end
def default_branch?
strong_memoize(:default_branch) do
[nil, branch_name].include?(project.default_branch)
......
......@@ -8,10 +8,14 @@ module Git
:tag_push_hooks
end
def commits
def limited_commits
[tag_commit].compact
end
def commits_count
limited_commits.count
end
def event_message
tag&.message
end
......
......@@ -108,17 +108,26 @@ module Gitlab
# See also #repository.commits_between
#
# Ex.
# Commit.between(repo, '29eda46b', 'master')
# Commit.between(repo, '29eda46b', 'master') # all commits, ordered oldest to newest
# Commit.between(repo, '29eda46b', 'master', limit: 100) # 100 newest commits, ordered oldest to newest
#
def between(repo, base, head)
def between(repo, base, head, limit: nil)
# In either of these cases, we are guaranteed to return no commits, so
# shortcut the RPC call
return [] if Gitlab::Git.blank_ref?(base) || Gitlab::Git.blank_ref?(head)
wrapped_gitaly_errors do
revisions = [head, "^#{base}"] # base..head
repo.gitaly_commit_client.list_commits(revisions, reverse: true)
client = repo.gitaly_commit_client
# We must return the commits in chronological order but using both
# limit and reverse in the Gitaly RPC would return the oldest N,
# rather than newest N, commits, so reorder in Ruby with limit
if limit
client.list_commits(revisions, pagination_params: { limit: limit }).reverse!
else
client.list_commits(revisions, reverse: true)
end
end
end
......
......@@ -255,11 +255,12 @@ module Gitlab
consume_commits_response(response)
end
def list_commits(revisions, reverse: false)
def list_commits(revisions, reverse: false, pagination_params: nil)
request = Gitaly::ListCommitsRequest.new(
repository: @gitaly_repo,
revisions: Array.wrap(revisions),
reverse: reverse
reverse: reverse,
pagination_params: pagination_params
)
response = GitalyClient.call(@repository.storage, :commit_service, :list_commits, request, timeout: GitalyClient.medium_timeout)
......
......@@ -364,12 +364,40 @@ RSpec.describe Gitlab::Git::Commit, :seed_helper do
end
describe '.between' do
subject do
commits = described_class.between(repository, SeedRepo::Commit::PARENT_ID, SeedRepo::Commit::ID)
commits.map { |c| c.id }
let(:limit) { nil }
let(:commit_ids) { commits.map(&:id) }
subject(:commits) { described_class.between(repository, from, to, limit: limit) }
context 'requesting a single commit' do
let(:from) { SeedRepo::Commit::PARENT_ID }
let(:to) { SeedRepo::Commit::ID }
it { expect(commit_ids).to contain_exactly(to) }
end
it { is_expected.to contain_exactly(SeedRepo::Commit::ID) }
context 'requesting a commit range' do
let(:from) { 'v1.0.0' }
let(:to) { 'v1.2.0' }
let(:commits_in_range) do
%w[
570e7b2abdd848b95f2f578043fc23bd6f6fd24d
5937ac0a7beb003549fc5fd26fc247adbce4a52e
eb49186cfa5c4338011f5f590fac11bd66c5c631
]
end
context 'no limit' do
it { expect(commit_ids).to eq(commits_in_range) }
end
context 'limited' do
let(:limit) { 2 }
it { expect(commit_ids).to eq(commits_in_range.last(2)) }
end
end
end
describe '.shas_with_signatures' do
......
......@@ -311,6 +311,10 @@ RSpec.describe Gitlab::GitalyClient::CommitService do
end
describe '#list_commits' do
let(:revisions) { 'master' }
let(:reverse) { false }
let(:pagination_params) { nil }
shared_examples 'a ListCommits request' do
before do
::Gitlab::GitalyClient.clear_stubs!
......@@ -318,26 +322,35 @@ RSpec.describe Gitlab::GitalyClient::CommitService do
it 'sends a list_commits message' do
expect_next_instance_of(Gitaly::CommitService::Stub) do |service|
expect(service)
.to receive(:list_commits)
.with(gitaly_request_with_params(expected_params), kind_of(Hash))
.and_return([])
expected_request = gitaly_request_with_params(
Array.wrap(revisions),
reverse: reverse,
pagination_params: pagination_params
)
expect(service).to receive(:list_commits).with(expected_request, kind_of(Hash)).and_return([])
end
client.list_commits(revisions)
client.list_commits(revisions, reverse: reverse, pagination_params: pagination_params)
end
end
context 'with a single revision' do
let(:revisions) { 'master' }
let(:expected_params) { %w[master] }
it_behaves_like 'a ListCommits request'
context 'with multiple revisions' do
let(:revisions) { %w[master --not --all] }
it_behaves_like 'a ListCommits request'
end
context 'with reverse: true' do
let(:reverse) { true }
it_behaves_like 'a ListCommits request'
end
context 'with multiple revisions' do
let(:revisions) { %w[master --not --all] }
let(:expected_params) { %w[master --not --all] }
context 'with pagination params' do
let(:pagination_params) { { limit: 1, page_token: 'foo' } }
it_behaves_like 'a ListCommits request'
end
......
......@@ -470,6 +470,29 @@ RSpec.describe Repository do
end
end
describe '#commits_between' do
let(:commit) { project.commit }
it 'delegates to Gitlab::Git::Commit#between, returning decorated commits' do
expect(Gitlab::Git::Commit)
.to receive(:between)
.with(repository.raw_repository, commit.parent_id, commit.id, limit: 5)
.and_call_original
result = repository.commits_between(commit.parent_id, commit.id, limit: 5)
expect(result).to contain_exactly(instance_of(Commit), instance_of(Commit))
end
it 'defaults to no limit' do
expect(Gitlab::Git::Commit)
.to receive(:between)
.with(repository.raw_repository, commit.parent_id, commit.id, limit: nil)
repository.commits_between(commit.parent_id, commit.id)
end
end
describe '#find_commits_by_message' do
it 'returns commits with messages containing a given string' do
commit_ids = repository.find_commits_by_message('submodule').map(&:id)
......
......@@ -19,9 +19,13 @@ RSpec.describe Git::BaseHooksService do
:push_hooks
end
def commits
def limited_commits
[]
end
def commits_count
0
end
end
end
......
......@@ -362,6 +362,9 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
end
end
let(:commits_count) { service.send(:commits_count) }
let(:threshold_limit) { described_class::PROCESS_COMMIT_LIMIT + 1 }
let(:oldrev) { project.commit(commit_ids.first).parent_id }
let(:newrev) { commit_ids.last }
......@@ -373,17 +376,31 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits)
.with(newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
expect(commits_count).to eq(project.repository.commit_count_for_ref(newrev))
end
end
context 'updating the default branch' do
it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(oldrev, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
expect(commits_count).to eq(project.repository.count_commits_between(oldrev, newrev))
end
end
......@@ -391,9 +408,13 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:newrev) { Gitlab::Git::BLANK_SHA }
it 'does not process commit messages' do
expect(project.repository).not_to receive(:commits)
expect(project.repository).not_to receive(:commits_between)
expect(ProcessCommitWorker).not_to receive(:perform_async)
service.execute
expect(commits_count).to eq(0)
end
end
......@@ -402,9 +423,16 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(project.default_branch, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
expect(commits_count).to eq(project.repository.count_commits_between(project.default_branch, branch))
end
end
......@@ -412,9 +440,15 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:branch) { 'fix' }
it 'processes a limited number of commit messages' do
expect(project.repository)
.to receive(:commits_between)
.with(oldrev, newrev, limit: threshold_limit)
.and_call_original
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
expect(commits_count).to eq(project.repository.count_commits_between(oldrev, newrev))
end
end
......@@ -423,9 +457,13 @@ RSpec.describe Git::BranchHooksService, :clean_gitlab_redis_shared_state do
let(:newrev) { Gitlab::Git::BLANK_SHA }
it 'does not process commit messages' do
expect(project.repository).not_to receive(:commits)
expect(project.repository).not_to receive(:commits_between)
expect(ProcessCommitWorker).not_to receive(:perform_async)
service.execute
expect(commits_count).to eq(0)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment