Commit ffb9b3ef authored by Yorick Peterse's avatar Yorick Peterse

Refactor cache refreshing/expiring

This refactors repository caching so it's possible to selectively
refresh certain caches, instead of just expiring and refreshing
everything.

To allow this the various methods that were cached (e.g. "tag_count" and
"readme") use a similar pattern that makes expiring and refreshing
their data much easier.

In this new setup caches are refreshed as follows:

1. After a commit (but before running ProjectCacheWorker) we expire some
   basic caches such as the commit count and repository size.

2. ProjectCacheWorker will recalculate the commit count, repository
   size, then refresh a specific set of caches based on the list of
   files changed in a push payload.

This requires a bunch of changes to the various methods that may be
cached. For one, data should not be cached if a branch used or the
entire repository does not exist. To prevent all these methods from
handling this manually this is taken care of in
Repository#cache_method_output. Some methods still manually check for
the existence of a repository but this result is also cached.

With selective flushing implemented ProjectCacheWorker no longer uses an
exclusive lease for all of its work. Instead this worker only uses a
lease to limit the number of times the repository size is updated as
this is a fairly expensive operation.
parent 6f393877
...@@ -1086,7 +1086,7 @@ class Project < ActiveRecord::Base ...@@ -1086,7 +1086,7 @@ class Project < ActiveRecord::Base
"refs/heads/#{branch}", "refs/heads/#{branch}",
force: true) force: true)
repository.copy_gitattributes(branch) repository.copy_gitattributes(branch)
repository.expire_avatar_cache(branch) repository.expire_avatar_cache
reload_default_branch reload_default_branch
end end
......
This diff is collapsed.
...@@ -18,7 +18,7 @@ class GitPushService < BaseService ...@@ -18,7 +18,7 @@ class GitPushService < BaseService
# #
def execute def execute
@project.repository.after_create if @project.empty_repo? @project.repository.after_create if @project.empty_repo?
@project.repository.after_push_commit(branch_name, params[:newrev]) @project.repository.after_push_commit(branch_name)
if push_remove_branch? if push_remove_branch?
@project.repository.after_remove_branch @project.repository.after_remove_branch
...@@ -51,12 +51,32 @@ class GitPushService < BaseService ...@@ -51,12 +51,32 @@ class GitPushService < BaseService
execute_related_hooks execute_related_hooks
perform_housekeeping perform_housekeeping
update_caches
end end
def update_gitattributes def update_gitattributes
@project.repository.copy_gitattributes(params[:ref]) @project.repository.copy_gitattributes(params[:ref])
end end
def update_caches
if is_default_branch?
paths = Set.new
@push_commits.each do |commit|
commit.raw_diffs(deltas_only: true).each do |diff|
paths << diff.new_path
end
end
types = Gitlab::FileDetector.types_in_paths(paths.to_a)
else
types = []
end
ProjectCacheWorker.perform_async(@project.id, types)
end
protected protected
def execute_related_hooks def execute_related_hooks
...@@ -70,7 +90,6 @@ class GitPushService < BaseService ...@@ -70,7 +90,6 @@ class GitPushService < BaseService
@project.execute_hooks(build_push_data.dup, :push_hooks) @project.execute_hooks(build_push_data.dup, :push_hooks)
@project.execute_services(build_push_data.dup, :push_hooks) @project.execute_services(build_push_data.dup, :push_hooks)
Ci::CreatePipelineService.new(@project, current_user, build_push_data).execute Ci::CreatePipelineService.new(@project, current_user, build_push_data).execute
ProjectCacheWorker.perform_async(@project.id)
if push_remove_branch? if push_remove_branch?
AfterBranchDeleteService AfterBranchDeleteService
......
# Worker for updating any project specific caches. # Worker for updating any project specific caches.
#
# This worker runs at most once every 15 minutes per project. This is to ensure
# that multiple instances of jobs for this worker don't hammer the underlying
# storage engine as much.
class ProjectCacheWorker class ProjectCacheWorker
include Sidekiq::Worker include Sidekiq::Worker
include DedicatedSidekiqQueue include DedicatedSidekiqQueue
LEASE_TIMEOUT = 15.minutes.to_i LEASE_TIMEOUT = 15.minutes.to_i
def self.lease_for(project_id) # project_id - The ID of the project for which to flush the cache.
Gitlab::ExclusiveLease. # refresh - An Array containing extra types of data to refresh such as
new("project_cache_worker:#{project_id}", timeout: LEASE_TIMEOUT) # `:readme` to flush the README and `:changelog` to flush the
end # CHANGELOG.
def perform(project_id, refresh = [])
project = Project.find_by(id: project_id)
# Overwrite Sidekiq's implementation so we only schedule when actually needed. return unless project && project.repository.exists?
def self.perform_async(project_id)
# If a lease for this project is still being held there's no point in
# scheduling a new job.
super unless lease_for(project_id).exists?
end
def perform(project_id) update_repository_size(project)
if try_obtain_lease_for(project_id) project.update_commit_count
Rails.logger.
info("Obtained ProjectCacheWorker lease for project #{project_id}")
else
Rails.logger.
info("Could not obtain ProjectCacheWorker lease for project #{project_id}")
return
end
update_caches(project_id) project.repository.refresh_method_caches(refresh.map(&:to_sym))
end end
def update_caches(project_id) def update_repository_size(project)
project = Project.find(project_id) return unless try_obtain_lease_for(project.id, :update_repository_size)
return unless project.repository.exists? Rails.logger.info("Updating repository size for project #{project.id}")
project.update_repository_size project.update_repository_size
project.update_commit_count
if project.repository.root_ref
project.repository.build_cache
end
end end
def try_obtain_lease_for(project_id) private
self.class.lease_for(project_id).try_obtain
def try_obtain_lease_for(project_id, section)
Gitlab::ExclusiveLease.
new("project_cache_worker:#{project_id}:#{section}", timeout: LEASE_TIMEOUT).
try_obtain
end end
end end
...@@ -1572,7 +1572,7 @@ describe Project, models: true do ...@@ -1572,7 +1572,7 @@ describe Project, models: true do
end end
it 'expires the avatar cache' do it 'expires the avatar cache' do
expect(project.repository).to receive(:expire_avatar_cache).with(project.default_branch) expect(project.repository).to receive(:expire_avatar_cache)
project.change_head(project.default_branch) project.change_head(project.default_branch)
end end
......
This diff is collapsed.
...@@ -14,7 +14,7 @@ describe API::API, api: true do ...@@ -14,7 +14,7 @@ describe API::API, api: true do
describe "GET /projects/:id/repository/branches" do describe "GET /projects/:id/repository/branches" do
it "returns an array of project branches" do it "returns an array of project branches" do
project.repository.expire_cache project.repository.expire_all_method_caches
get api("/projects/#{project.id}/repository/branches", user) get api("/projects/#{project.id}/repository/branches", user)
expect(response).to have_http_status(200) expect(response).to have_http_status(200)
......
...@@ -27,27 +27,14 @@ describe GitPushService, services: true do ...@@ -27,27 +27,14 @@ describe GitPushService, services: true do
it { is_expected.to be_truthy } it { is_expected.to be_truthy }
it 'flushes general cached data' do it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:expire_cache). expect(project.repository).to receive(:after_push_commit).with('master')
with('master', newrev)
subject subject
end end
it 'flushes the visible content cache' do it 'calls the after_create_branch hook' do
expect(project.repository).to receive(:expire_has_visible_content_cache) expect(project.repository).to receive(:after_create_branch)
subject
end
it 'flushes the branches cache' do
expect(project.repository).to receive(:expire_branches_cache)
subject
end
it 'flushes the branch count cache' do
expect(project.repository).to receive(:expire_branch_count_cache)
subject subject
end end
...@@ -56,21 +43,8 @@ describe GitPushService, services: true do ...@@ -56,21 +43,8 @@ describe GitPushService, services: true do
context 'existing branch' do context 'existing branch' do
it { is_expected.to be_truthy } it { is_expected.to be_truthy }
it 'flushes general cached data' do it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:expire_cache). expect(project.repository).to receive(:after_push_commit).with('master')
with('master', newrev)
subject
end
it 'does not flush the branches cache' do
expect(project.repository).not_to receive(:expire_branches_cache)
subject
end
it 'does not flush the branch count cache' do
expect(project.repository).not_to receive(:expire_branch_count_cache)
subject subject
end end
...@@ -81,27 +55,14 @@ describe GitPushService, services: true do ...@@ -81,27 +55,14 @@ describe GitPushService, services: true do
it { is_expected.to be_truthy } it { is_expected.to be_truthy }
it 'flushes the visible content cache' do it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:expire_has_visible_content_cache) expect(project.repository).to receive(:after_push_commit).with('master')
subject
end
it 'flushes the branches cache' do
expect(project.repository).to receive(:expire_branches_cache)
subject
end
it 'flushes the branch count cache' do
expect(project.repository).to receive(:expire_branch_count_cache)
subject subject
end end
it 'flushes general cached data' do it 'calls the after_remove_branch hook' do
expect(project.repository).to receive(:expire_cache). expect(project.repository).to receive(:after_remove_branch)
with('master', newrev)
subject subject
end end
...@@ -598,6 +559,51 @@ describe GitPushService, services: true do ...@@ -598,6 +559,51 @@ describe GitPushService, services: true do
end end
end end
describe '#update_caches' do
let(:service) do
described_class.new(project,
user,
oldrev: sample_commit.parent_id,
newrev: sample_commit.id,
ref: 'refs/heads/master')
end
context 'on the default branch' do
before do
allow(service).to receive(:is_default_branch?).and_return(true)
end
it 'flushes the caches of any special files that have been changed' do
commit = double(:commit)
diff = double(:diff, new_path: 'README.md')
expect(commit).to receive(:raw_diffs).with(deltas_only: true).
and_return([diff])
service.push_commits = [commit]
expect(ProjectCacheWorker).to receive(:perform_async).
with(project.id, %i(readme))
service.update_caches
end
end
context 'on a non-default branch' do
before do
allow(service).to receive(:is_default_branch?).and_return(false)
end
it 'does not flush any conditional caches' do
expect(ProjectCacheWorker).to receive(:perform_async).
with(project.id, []).
and_call_original
service.update_caches
end
end
end
def execute_service(project, user, oldrev, newrev, ref) def execute_service(project, user, oldrev, newrev, ref)
service = described_class.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref ) service = described_class.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref )
service.execute service.execute
......
...@@ -18,7 +18,7 @@ describe GitTagPushService, services: true do ...@@ -18,7 +18,7 @@ describe GitTagPushService, services: true do
end end
it 'flushes general cached data' do it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache) expect(project.repository).to receive(:before_push_tag)
subject subject
end end
...@@ -28,12 +28,6 @@ describe GitTagPushService, services: true do ...@@ -28,12 +28,6 @@ describe GitTagPushService, services: true do
subject subject
end end
it 'flushes the tag count cache' do
expect(project.repository).to receive(:expire_tag_count_cache)
subject
end
end end
describe "Git Tag Push Data" do describe "Git Tag Push Data" do
......
...@@ -2,62 +2,78 @@ require 'spec_helper' ...@@ -2,62 +2,78 @@ require 'spec_helper'
describe ProjectCacheWorker do describe ProjectCacheWorker do
let(:project) { create(:project) } let(:project) { create(:project) }
let(:worker) { described_class.new }
subject { described_class.new } describe '#perform' do
before do
describe '.perform_async' do allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).
it 'schedules the job when no lease exists' do and_return(true)
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?). end
and_return(false)
expect_any_instance_of(described_class).to receive(:perform) context 'with a non-existing project' do
it 'does nothing' do
expect(worker).not_to receive(:update_repository_size)
described_class.perform_async(project.id) worker.perform(-1)
end
end end
it 'does not schedule the job when a lease exists' do context 'with an existing project without a repository' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?). it 'does nothing' do
and_return(true) allow_any_instance_of(Repository).to receive(:exists?).and_return(false)
expect_any_instance_of(described_class).not_to receive(:perform) expect(worker).not_to receive(:update_repository_size)
described_class.perform_async(project.id) worker.perform(project.id)
end
end end
end
describe '#perform' do context 'with an existing project' do
context 'when an exclusive lease can be obtained' do it 'updates the repository size' do
before do expect(worker).to receive(:update_repository_size).and_call_original
allow(subject).to receive(:try_obtain_lease_for).with(project.id).
and_return(true)
end
it 'updates project cache data' do worker.perform(project.id)
expect_any_instance_of(Repository).to receive(:size) end
expect_any_instance_of(Repository).to receive(:commit_count)
expect_any_instance_of(Project).to receive(:update_repository_size) it 'updates the commit count' do
expect_any_instance_of(Project).to receive(:update_commit_count) expect_any_instance_of(Project).to receive(:update_commit_count).
and_call_original
subject.perform(project.id) worker.perform(project.id)
end end
it 'handles missing repository data' do it 'refreshes the method caches' do
expect_any_instance_of(Repository).to receive(:exists?).and_return(false) expect_any_instance_of(Repository).to receive(:refresh_method_caches).
expect_any_instance_of(Repository).not_to receive(:size) with(%i(readme)).
and_call_original
subject.perform(project.id) worker.perform(project.id, %i(readme))
end end
end end
end
context 'when an exclusive lease can not be obtained' do describe '#update_repository_size' do
it 'does nothing' do context 'when a lease could not be obtained' do
allow(subject).to receive(:try_obtain_lease_for).with(project.id). it 'does not update the repository size' do
allow(worker).to receive(:try_obtain_lease_for).
with(project.id, :update_repository_size).
and_return(false) and_return(false)
expect(subject).not_to receive(:update_caches) expect(project).not_to receive(:update_repository_size)
worker.update_repository_size(project)
end
end
context 'when a lease could be obtained' do
it 'updates the repository size' do
allow(worker).to receive(:try_obtain_lease_for).
with(project.id, :update_repository_size).
and_return(true)
expect(project).to receive(:update_repository_size).and_call_original
subject.perform(project.id) worker.update_repository_size(project)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment