Commit 97c2564f authored by Douwe Maan's avatar Douwe Maan

Look up upstream commits once before queuing ProcessCommitWorkers

Instead of checking if a commit already exists in the upstream project
in its ProcessCommitWorker and bailing out if it does, we check the
existence of all commits in bulk in Git::BranchHooksService, so that we
can skip scheduling ProcessCommitWorker jobs for those commits
that already exist upstream entirely.
parent df35d772
......@@ -83,8 +83,16 @@ module Git
# Schedules processing of commit messages
def enqueue_process_commit_messages
limited_commits.each do |commit|
next unless commit.matches_cross_reference_regex?
referencing_commits = limited_commits.select(&:matches_cross_reference_regex?)
upstream_commit_ids = upstream_commit_ids(referencing_commits)
referencing_commits.each do |commit|
# Avoid reprocessing commits that already exist upstream if the project
# is a fork. This will prevent duplicated/superfluous system notes on
# mentionables referenced by a commit that is pushed to the upstream,
# that is then also pushed to forks when these get synced by users.
next if upstream_commit_ids.include?(commit.id)
ProcessCommitWorker.perform_async(
project.id,
......@@ -142,5 +150,18 @@ module Git
def branch_name
strong_memoize(:branch_name) { Gitlab::Git.ref_name(params[:ref]) }
end
def upstream_commit_ids(commits)
set = Set.new
upstream_project = project.fork_source
if upstream_project
upstream_project
.commits_by(oids: commits.map(&:id))
.each { |commit| set << commit.id }
end
set
end
end
end
......@@ -2,7 +2,8 @@
# Worker for processing individual commit messages pushed to a repository.
#
# Jobs for this worker are scheduled for every commit that is being pushed. As a
# Jobs for this worker are scheduled for every commit that contains mentionable
# references in its message and does not exist in the upstream project. As a
# result of this the workload of this worker should be kept to a bare minimum.
# Consider using an extra worker if you need to add any extra (and potentially
# slow) processing of commits.
......@@ -19,7 +20,6 @@ class ProcessCommitWorker
project = Project.find_by(id: project_id)
return unless project
return if commit_exists_in_upstream?(project, commit_hash)
user = User.find_by(id: user_id)
......@@ -77,17 +77,4 @@ class ProcessCommitWorker
Commit.from_hash(hash, project)
end
private
# Avoid reprocessing commits that already exist in the upstream
# when project is forked. This will also prevent duplicated system notes.
def commit_exists_in_upstream?(project, commit_hash)
upstream_project = project.fork_source
return false unless upstream_project
commit_id = commit_hash.with_indifferent_access[:id]
upstream_project.commit(commit_id).present?
end
end
---
title: Look up upstream commits once before queuing ProcessCommitWorkers
merge_request:
author:
type: performance
......@@ -4,6 +4,7 @@ require 'spec_helper'
describe Git::BranchHooksService do
include RepoHelpers
include ProjectForksHelper
let(:project) { create(:project, :repository) }
let(:user) { project.creator }
......@@ -266,10 +267,10 @@ describe Git::BranchHooksService do
end
describe 'Processing commit messages' do
# Create 4 commits, 2 of which have references. Limiting to 2 commits, we
# expect to see one commit message processor enqueued.
let(:commit_ids) do
Array.new(4) do |i|
# Create 6 commits, 3 of which have references. Limiting to 4 commits, we
# expect to see two commit message processors enqueued.
let!(:commit_ids) do
Array.new(6) do |i|
message = "Issue #{'#' if i.even?}#{i}"
project.repository.update_file(
user, 'README.md', '', message: message, branch_name: branch
......@@ -277,18 +278,18 @@ describe Git::BranchHooksService do
end
end
let(:oldrev) { commit_ids.first }
let(:oldrev) { project.commit(commit_ids.first).parent_id }
let(:newrev) { commit_ids.last }
before do
stub_const("::Git::BaseHooksService::PROCESS_COMMIT_LIMIT", 2)
stub_const("::Git::BaseHooksService::PROCESS_COMMIT_LIMIT", 4)
end
context 'creating the default branch' do
let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do
expect(ProcessCommitWorker).to receive(:perform_async).once
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
end
......@@ -296,7 +297,7 @@ describe Git::BranchHooksService do
context 'updating the default branch' do
it 'processes a limited number of commit messages' do
expect(ProcessCommitWorker).to receive(:perform_async).once
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
end
......@@ -317,7 +318,7 @@ describe Git::BranchHooksService do
let(:oldrev) { Gitlab::Git::BLANK_SHA }
it 'processes a limited number of commit messages' do
expect(ProcessCommitWorker).to receive(:perform_async).once
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
end
......@@ -327,7 +328,7 @@ describe Git::BranchHooksService do
let(:branch) { 'fix' }
it 'processes a limited number of commit messages' do
expect(ProcessCommitWorker).to receive(:perform_async).once
expect(ProcessCommitWorker).to receive(:perform_async).twice
service.execute
end
......@@ -343,6 +344,55 @@ describe Git::BranchHooksService do
service.execute
end
end
context 'when the project is forked' do
let(:upstream_project) { project }
let(:forked_project) { fork_project(upstream_project, user, repository: true) }
let!(:forked_service) do
described_class.new(forked_project, user, oldrev: oldrev, newrev: newrev, ref: ref)
end
context 'when commits already exists in the upstream project' do
it 'does not process commit messages' do
expect(ProcessCommitWorker).not_to receive(:perform_async)
forked_service.execute
end
end
context 'when a commit does not exist in the upstream repo' do
# On top of the existing 6 commits, 3 of which have references,
# create 2 more, 1 of which has a reference. Limiting to 4 commits, we
# expect to see one commit message processor enqueued.
let!(:forked_commit_ids) do
Array.new(2) do |i|
message = "Issue #{'#' if i.even?}#{i}"
forked_project.repository.update_file(
user, 'README.md', '', message: message, branch_name: branch
)
end
end
let(:newrev) { forked_commit_ids.last }
it 'processes the commit message' do
expect(ProcessCommitWorker).to receive(:perform_async).once
forked_service.execute
end
end
context 'when the upstream project no longer exists' do
it 'processes the commit messages' do
upstream_project.destroy!
expect(ProcessCommitWorker).to receive(:perform_async).twice
forked_service.execute
end
end
end
end
describe 'New branch detection' do
......
......@@ -3,8 +3,6 @@
require 'spec_helper'
describe ProcessCommitWorker do
include ProjectForksHelper
let(:worker) { described_class.new }
let(:user) { create(:user) }
let(:project) { create(:project, :public, :repository) }
......@@ -35,44 +33,6 @@ describe ProcessCommitWorker do
worker.perform(project.id, user.id, commit.to_hash)
end
context 'when the project is forked' do
context 'when commit already exists in the upstream project' do
it 'does not process the commit message' do
forked = fork_project(project, user, repository: true)
expect(worker).not_to receive(:process_commit_message)
worker.perform(forked.id, user.id, forked.commit.to_hash)
end
end
context 'when the commit does not exist in the upstream project' do
it 'processes the commit message' do
empty_project = create(:project, :public)
forked = fork_project(empty_project, user, repository: true)
TestEnv.copy_repo(forked,
bare_repo: TestEnv.factory_repo_path_bare,
refs: TestEnv::BRANCH_SHA)
expect(worker).to receive(:process_commit_message)
worker.perform(forked.id, user.id, forked.commit.to_hash)
end
end
context 'when the upstream project no longer exists' do
it 'processes the commit message' do
forked = fork_project(project, user, repository: true)
project.destroy!
expect(worker).to receive(:process_commit_message)
worker.perform(forked.id, user.id, forked.commit.to_hash)
end
end
end
end
describe '#process_commit_message' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment