Commit f14647fd authored by Stan Hu's avatar Stan Hu Committed by Douwe Maan

Expire project caches once per push instead of once per ref

Previously `ProjectCacheWorker` would be scheduled once per ref, which
would generate unnecessary I/O and load on Sidekiq, especially if many
tags or branches were pushed at once. `ProjectCacheWorker` would expire
three items:

1. Repository size: This only needs to be updated once per push.
2. Commit count: This only needs to be updated if the default branch
   is updated.
3. Project method caches: This only needs to be updated if the default
   branch changes, but only if certain files change (e.g. README,
   CHANGELOG, etc.).

Because the third item requires looking at the actual changes in the
commit deltas, we schedule one `ProjectCacheWorker` to handle the first
two cases, and schedule a separate `ProjectCacheWorker` for the third
case if it is needed. As a result, this brings down the number of
`ProjectCacheWorker` jobs from N to 2.

Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/52046
parent b3145bc9
...@@ -389,11 +389,15 @@ class Repository ...@@ -389,11 +389,15 @@ class Repository
expire_statistics_caches expire_statistics_caches
end end
# Runs code after a repository has been created. def expire_status_cache
def after_create
expire_exists_cache expire_exists_cache
expire_root_ref_cache expire_root_ref_cache
expire_emptiness_caches expire_emptiness_caches
end
# Runs code after a repository has been created.
def after_create
expire_status_cache
repository_event(:create_repository) repository_event(:create_repository)
end end
......
...@@ -8,8 +8,6 @@ module Git ...@@ -8,8 +8,6 @@ module Git
PROCESS_COMMIT_LIMIT = 100 PROCESS_COMMIT_LIMIT = 100
def execute def execute
project.repository.after_create if project.empty_repo?
create_events create_events
create_pipelines create_pipelines
execute_project_hooks execute_project_hooks
...@@ -70,11 +68,11 @@ module Git ...@@ -70,11 +68,11 @@ module Git
end end
def enqueue_invalidate_cache def enqueue_invalidate_cache
ProjectCacheWorker.perform_async( file_types = invalidated_file_types
project.id,
invalidated_file_types, return unless file_types.present?
[:commit_count, :repository_size]
) ProjectCacheWorker.perform_async(project.id, file_types, [], false)
end end
def base_params def base_params
......
...@@ -42,10 +42,8 @@ class PostReceive ...@@ -42,10 +42,8 @@ class PostReceive
user = identify_user(post_received) user = identify_user(post_received)
return false unless user return false unless user
# Expire the branches cache so we have updated data for this push # We only need to expire certain caches once per push
post_received.project.repository.expire_branches_cache if post_received.includes_branches? expire_caches(post_received)
# We only need to expire tags once per push
post_received.project.repository.expire_caches_for_tags if post_received.includes_tags?
post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index| post_received.enum_for(:changes_refs).with_index do |(oldrev, newrev, ref), index|
service_klass = service_klass =
...@@ -74,6 +72,30 @@ class PostReceive ...@@ -74,6 +72,30 @@ class PostReceive
after_project_changes_hooks(post_received, user, refs.to_a, changes) after_project_changes_hooks(post_received, user, refs.to_a, changes)
end end
# Expire the project, branch, and tag cache once per push. Schedule an
# update for the repository size and commit count if necessary.
def expire_caches(post_received)
project = post_received.project
project.repository.expire_status_cache if project.empty_repo?
project.repository.expire_branches_cache if post_received.includes_branches?
project.repository.expire_caches_for_tags if post_received.includes_tags?
enqueue_repository_cache_update(post_received)
end
def enqueue_repository_cache_update(post_received)
stats_to_invalidate = [:repository_size]
stats_to_invalidate << :commit_count if post_received.includes_default_branch?
ProjectCacheWorker.perform_async(
post_received.project.id,
[],
stats_to_invalidate,
true
)
end
def after_project_changes_hooks(post_received, user, refs, changes) def after_project_changes_hooks(post_received, user, refs, changes)
hook_data = Gitlab::DataBuilder::Repository.update(post_received.project, user, changes, refs) hook_data = Gitlab::DataBuilder::Repository.update(post_received.project, user, changes, refs)
SystemHooksService.new.execute_hooks(hook_data, :repository_update_hooks) SystemHooksService.new.execute_hooks(hook_data, :repository_update_hooks)
......
...@@ -12,13 +12,15 @@ class ProjectCacheWorker ...@@ -12,13 +12,15 @@ class ProjectCacheWorker
# CHANGELOG. # CHANGELOG.
# statistics - An Array containing columns from ProjectStatistics to # statistics - An Array containing columns from ProjectStatistics to
# refresh, if empty all columns will be refreshed # refresh, if empty all columns will be refreshed
# refresh_statistics - A boolean that determines whether project statistics should
# be updated.
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, files = [], statistics = []) def perform(project_id, files = [], statistics = [], refresh_statistics = true)
project = Project.find_by(id: project_id) project = Project.find_by(id: project_id)
return unless project return unless project
update_statistics(project, statistics) update_statistics(project, statistics) if refresh_statistics
return unless project.repository.exists? return unless project.repository.exists?
......
---
title: Expire project caches once per push instead of once per ref
merge_request: 31876
author:
type: performance
...@@ -39,6 +39,17 @@ module Gitlab ...@@ -39,6 +39,17 @@ module Gitlab
end end
end end
def includes_default_branch?
# If the branch doesn't have a default branch yet, we presume the
# first branch pushed will be the default.
return true unless project.default_branch.present?
enum_for(:changes_refs).any? do |_oldrev, _newrev, ref|
Gitlab::Git.branch_ref?(ref) &&
Gitlab::Git.branch_name(ref) == project.default_branch
end
end
private private
def deserialize_changes(changes) def deserialize_changes(changes)
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
require 'spec_helper' require 'spec_helper'
describe ::Gitlab::GitPostReceive do describe ::Gitlab::GitPostReceive do
let(:project) { create(:project) } set(:project) { create(:project, :repository) }
subject { described_class.new(project, "project-#{project.id}", changes.dup, {}) } subject { described_class.new(project, "project-#{project.id}", changes.dup, {}) }
...@@ -92,4 +92,47 @@ describe ::Gitlab::GitPostReceive do ...@@ -92,4 +92,47 @@ describe ::Gitlab::GitPostReceive do
end end
end end
end end
describe '#includes_default_branch?' do
context 'with no default branch' do
let(:changes) do
<<~EOF
654321 210987 refs/heads/test1
654322 210986 refs/tags/#{project.default_branch}
654323 210985 refs/heads/test3
EOF
end
it 'returns false' do
expect(subject.includes_default_branch?).to be_falsey
end
end
context 'with a project with no default branch' do
let(:changes) do
<<~EOF
654321 210987 refs/heads/test1
EOF
end
it 'returns true' do
expect(project).to receive(:default_branch).and_return(nil)
expect(subject.includes_default_branch?).to be_truthy
end
end
context 'with default branch' do
let(:changes) do
<<~EOF
654322 210986 refs/heads/test1
654321 210987 refs/tags/test2
654323 210985 refs/heads/#{project.default_branch}
EOF
end
it 'returns true' do
expect(subject.includes_default_branch?).to be_truthy
end
end
end
end end
...@@ -1815,22 +1815,36 @@ describe Repository do ...@@ -1815,22 +1815,36 @@ describe Repository do
end end
describe '#after_create' do describe '#after_create' do
it 'calls expire_status_cache' do
expect(repository).to receive(:expire_status_cache)
repository.after_create
end
it 'logs an event' do
expect(repository).to receive(:repository_event).with(:create_repository)
repository.after_create
end
end
describe '#expire_status_cache' do
it 'flushes the exists cache' do it 'flushes the exists cache' do
expect(repository).to receive(:expire_exists_cache) expect(repository).to receive(:expire_exists_cache)
repository.after_create repository.expire_status_cache
end end
it 'flushes the root ref cache' do it 'flushes the root ref cache' do
expect(repository).to receive(:expire_root_ref_cache) expect(repository).to receive(:expire_root_ref_cache)
repository.after_create repository.expire_status_cache
end end
it 'flushes the emptiness caches' do it 'flushes the emptiness caches' do
expect(repository).to receive(:expire_emptiness_caches) expect(repository).to receive(:expire_emptiness_caches)
repository.after_create repository.expire_status_cache
end end
end end
......
...@@ -158,9 +158,13 @@ describe Git::BranchHooksService do ...@@ -158,9 +158,13 @@ describe Git::BranchHooksService do
let(:blank_sha) { Gitlab::Git::BLANK_SHA } let(:blank_sha) { Gitlab::Git::BLANK_SHA }
def clears_cache(extended: []) def clears_cache(extended: [])
expect(ProjectCacheWorker) expect(service).to receive(:invalidated_file_types).and_return(extended)
.to receive(:perform_async)
.with(project.id, extended, %i[commit_count repository_size]) if extended.present?
expect(ProjectCacheWorker)
.to receive(:perform_async)
.with(project.id, extended, [], false)
end
service.execute service.execute
end end
......
...@@ -37,6 +37,29 @@ describe PostReceive do ...@@ -37,6 +37,29 @@ describe PostReceive do
end end
describe "#process_project_changes" do describe "#process_project_changes" do
context 'with an empty project' do
let(:empty_project) { create(:project, :empty_repo) }
let(:changes) { "123456 789012 refs/heads/tést1\n" }
before do
allow_any_instance_of(Gitlab::GitPostReceive).to receive(:identify).and_return(empty_project.owner)
allow(Gitlab::GlRepository).to receive(:parse).and_return([empty_project, Gitlab::GlRepository::PROJECT])
end
it 'expire the status cache' do
expect(empty_project.repository).to receive(:expire_status_cache)
perform
end
it 'schedules a cache update for commit count and size' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(empty_project.id, [], [:repository_size, :commit_count], true)
perform
end
end
context 'empty changes' do context 'empty changes' do
it "does not call any PushService but runs after project hooks" do it "does not call any PushService but runs after project hooks" do
expect(Git::BranchPushService).not_to receive(:new) expect(Git::BranchPushService).not_to receive(:new)
...@@ -67,15 +90,22 @@ describe PostReceive do ...@@ -67,15 +90,22 @@ describe PostReceive do
context "branches" do context "branches" do
let(:changes) do let(:changes) do
<<~EOF <<~EOF
'123456 789012 refs/heads/tést1' 123456 789012 refs/heads/tést1
'123456 789012 refs/heads/tést2' 123456 789012 refs/heads/tést2
EOF EOF
end end
it 'expires the branches cache' do it 'expires the branches cache' do
expect(project.repository).to receive(:expire_branches_cache).once expect(project.repository).to receive(:expire_branches_cache).once
described_class.new.perform(gl_repository, key_id, base64_changes) perform
end
it 'expires the status cache' do
expect(project).to receive(:empty_repo?).and_return(true)
expect(project.repository).to receive(:expire_status_cache)
perform
end end
it 'calls Git::BranchPushService' do it 'calls Git::BranchPushService' do
...@@ -87,6 +117,30 @@ describe PostReceive do ...@@ -87,6 +117,30 @@ describe PostReceive do
perform perform
end end
it 'schedules a cache update for repository size only' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size], true)
perform
end
context 'with a default branch' do
let(:changes) do
<<~EOF
123456 789012 refs/heads/tést1
123456 789012 refs/heads/tést2
678912 123455 refs/heads/#{project.default_branch}
EOF
end
it 'schedules a cache update for commit count and size' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size, :commit_count], true)
perform
end
end
end end
context "tags" do context "tags" do
...@@ -107,7 +161,7 @@ describe PostReceive do ...@@ -107,7 +161,7 @@ describe PostReceive do
it 'does not expire branches cache' do it 'does not expire branches cache' do
expect(project.repository).not_to receive(:expire_branches_cache) expect(project.repository).not_to receive(:expire_branches_cache)
described_class.new.perform(gl_repository, key_id, base64_changes) perform
end end
it "only invalidates tags once" do it "only invalidates tags once" do
...@@ -115,7 +169,7 @@ describe PostReceive do ...@@ -115,7 +169,7 @@ describe PostReceive do
expect(project.repository).to receive(:expire_caches_for_tags).once.and_call_original expect(project.repository).to receive(:expire_caches_for_tags).once.and_call_original
expect(project.repository).to receive(:expire_tags_cache).once.and_call_original expect(project.repository).to receive(:expire_tags_cache).once.and_call_original
described_class.new.perform(gl_repository, key_id, base64_changes) perform
end end
it "calls Git::TagPushService" do it "calls Git::TagPushService" do
...@@ -129,6 +183,13 @@ describe PostReceive do ...@@ -129,6 +183,13 @@ describe PostReceive do
perform perform
end end
it 'schedules a single ProjectCacheWorker update' do
expect(ProjectCacheWorker).to receive(:perform_async)
.with(project.id, [], [:repository_size], true)
perform
end
end end
context "merge-requests" do context "merge-requests" do
......
...@@ -49,6 +49,16 @@ describe ProjectCacheWorker do ...@@ -49,6 +49,16 @@ describe ProjectCacheWorker do
worker.perform(project.id, %w(readme)) worker.perform(project.id, %w(readme))
end end
context 'with statistics disabled' do
let(:statistics) { [] }
it 'does not update the project statistics' do
expect(worker).not_to receive(:update_statistics)
worker.perform(project.id, [], [], false)
end
end
context 'with statistics' do context 'with statistics' do
let(:statistics) { %w(repository_size) } let(:statistics) { %w(repository_size) }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment