Commit 1f99589a authored by Sean McGivern's avatar Sean McGivern Committed by Alejandro Rodríguez

Merge branch 'smarter-cache-invalidation' into 'master'

Smarter cache invalidation

Fixes https://gitlab.com/gitlab-org/gitlab-ce/issues/23550

See merge request !7360
parent f2522c0c
...@@ -1086,7 +1086,7 @@ class Project < ActiveRecord::Base ...@@ -1086,7 +1086,7 @@ class Project < ActiveRecord::Base
"refs/heads/#{branch}", "refs/heads/#{branch}",
force: true) force: true)
repository.copy_gitattributes(branch) repository.copy_gitattributes(branch)
repository.expire_avatar_cache(branch) repository.expire_avatar_cache
reload_default_branch reload_default_branch
end end
......
This diff is collapsed.
...@@ -18,7 +18,9 @@ class Tree ...@@ -18,7 +18,9 @@ class Tree
def readme def readme
return @readme if defined?(@readme) return @readme if defined?(@readme)
available_readmes = blobs.select(&:readme?) available_readmes = blobs.select do |blob|
Gitlab::FileDetector.type_of(blob.name) == :readme
end
previewable_readmes = available_readmes.select do |blob| previewable_readmes = available_readmes.select do |blob|
previewable?(blob.name) previewable?(blob.name)
......
...@@ -18,7 +18,7 @@ class GitPushService < BaseService ...@@ -18,7 +18,7 @@ class GitPushService < BaseService
# #
def execute def execute
@project.repository.after_create if @project.empty_repo? @project.repository.after_create if @project.empty_repo?
@project.repository.after_push_commit(branch_name, params[:newrev]) @project.repository.after_push_commit(branch_name)
if push_remove_branch? if push_remove_branch?
@project.repository.after_remove_branch @project.repository.after_remove_branch
...@@ -51,12 +51,32 @@ class GitPushService < BaseService ...@@ -51,12 +51,32 @@ class GitPushService < BaseService
execute_related_hooks execute_related_hooks
perform_housekeeping perform_housekeeping
update_caches
end end
def update_gitattributes def update_gitattributes
@project.repository.copy_gitattributes(params[:ref]) @project.repository.copy_gitattributes(params[:ref])
end end
def update_caches
if is_default_branch?
paths = Set.new
@push_commits.each do |commit|
commit.raw_diffs(deltas_only: true).each do |diff|
paths << diff.new_path
end
end
types = Gitlab::FileDetector.types_in_paths(paths.to_a)
else
types = []
end
ProjectCacheWorker.perform_async(@project.id, types)
end
protected protected
def execute_related_hooks def execute_related_hooks
...@@ -70,7 +90,6 @@ class GitPushService < BaseService ...@@ -70,7 +90,6 @@ class GitPushService < BaseService
@project.execute_hooks(build_push_data.dup, :push_hooks) @project.execute_hooks(build_push_data.dup, :push_hooks)
@project.execute_services(build_push_data.dup, :push_hooks) @project.execute_services(build_push_data.dup, :push_hooks)
Ci::CreatePipelineService.new(@project, current_user, build_push_data).execute Ci::CreatePipelineService.new(@project, current_user, build_push_data).execute
ProjectCacheWorker.perform_async(@project.id)
if push_remove_branch? if push_remove_branch?
AfterBranchDeleteService AfterBranchDeleteService
......
# Worker for updating any project specific caches. # Worker for updating any project specific caches.
#
# This worker runs at most once every 15 minutes per project. This is to ensure
# that multiple instances of jobs for this worker don't hammer the underlying
# storage engine as much.
class ProjectCacheWorker class ProjectCacheWorker
include Sidekiq::Worker include Sidekiq::Worker
include DedicatedSidekiqQueue include DedicatedSidekiqQueue
LEASE_TIMEOUT = 15.minutes.to_i LEASE_TIMEOUT = 15.minutes.to_i
def self.lease_for(project_id) # project_id - The ID of the project for which to flush the cache.
Gitlab::ExclusiveLease. # refresh - An Array containing extra types of data to refresh such as
new("project_cache_worker:#{project_id}", timeout: LEASE_TIMEOUT) # `:readme` to flush the README and `:changelog` to flush the
end # CHANGELOG.
def perform(project_id, refresh = [])
# Overwrite Sidekiq's implementation so we only schedule when actually needed. project = Project.find_by(id: project_id)
def self.perform_async(project_id)
# If a lease for this project is still being held there's no point in
# scheduling a new job.
super unless lease_for(project_id).exists?
end
def perform(project_id) return unless project && project.repository.exists?
if try_obtain_lease_for(project_id)
Rails.logger.
info("Obtained ProjectCacheWorker lease for project #{project_id}")
else
Rails.logger.
info("Could not obtain ProjectCacheWorker lease for project #{project_id}")
return update_repository_size(project)
end project.update_commit_count
update_caches(project_id) project.repository.refresh_method_caches(refresh.map(&:to_sym))
end end
def update_caches(project_id) def update_repository_size(project)
project = Project.find(project_id) return unless try_obtain_lease_for(project.id, :update_repository_size)
return unless project.repository.exists? Rails.logger.info("Updating repository size for project #{project.id}")
project.update_repository_size project.update_repository_size
project.update_commit_count
if project.repository.root_ref
project.repository.build_cache
end
end end
def try_obtain_lease_for(project_id) private
self.class.lease_for(project_id).try_obtain
def try_obtain_lease_for(project_id, section)
Gitlab::ExclusiveLease.
new("project_cache_worker:#{project_id}:#{section}", timeout: LEASE_TIMEOUT).
try_obtain
end end
end end
---
title: Rework cache invalidation so only changed data is refreshed
merge_request: 7360
author:
require 'set'
module Gitlab
# Module that can be used to detect if a path points to a special file such as
# a README or a CONTRIBUTING file.
module FileDetector
PATTERNS = {
readme: /\Areadme/i,
changelog: /\A(changelog|history|changes|news)/i,
license: /\A(licen[sc]e|copying)(\..+|\z)/i,
contributing: /\Acontributing/i,
version: 'version',
gitignore: '.gitignore',
koding: '.koding.yml',
gitlab_ci: '.gitlab-ci.yml',
avatar: /\Alogo\.(png|jpg|gif)\z/
}
# Returns an Array of file types based on the given paths.
#
# This method can be used to check if a list of file paths (e.g. of changed
# files) involve any special files such as a README or a LICENSE file.
#
# Example:
#
# types_in_paths(%w{README.md foo/bar.txt}) # => [:readme]
def self.types_in_paths(paths)
types = Set.new
paths.each do |path|
type = type_of(path)
types << type if type
end
types.to_a
end
# Returns the type of a file path, or nil if none could be detected.
#
# Returned types are Symbols such as `:readme`, `:version`, etc.
#
# Example:
#
# type_of('README.md') # => :readme
# type_of('VERSION') # => :version
def self.type_of(path)
name = File.basename(path)
PATTERNS.each do |type, search|
did_match = if search.is_a?(Regexp)
name =~ search
else
name.casecmp(search) == 0
end
return type if did_match
end
nil
end
end
end
require 'spec_helper'
describe Gitlab::FileDetector do
describe '.types_in_paths' do
it 'returns the file types for the given paths' do
expect(described_class.types_in_paths(%w(README.md CHANGELOG VERSION VERSION))).
to eq(%i{readme changelog version})
end
it 'does not include unrecognized file paths' do
expect(described_class.types_in_paths(%w(README.md foo.txt))).
to eq(%i{readme})
end
end
describe '.type_of' do
it 'returns the type of a README file' do
expect(described_class.type_of('README.md')).to eq(:readme)
end
it 'returns the type of a changelog file' do
%w(CHANGELOG HISTORY CHANGES NEWS).each do |file|
expect(described_class.type_of(file)).to eq(:changelog)
end
end
it 'returns the type of a license file' do
%w(LICENSE LICENCE COPYING).each do |file|
expect(described_class.type_of(file)).to eq(:license)
end
end
it 'returns the type of a version file' do
expect(described_class.type_of('VERSION')).to eq(:version)
end
it 'returns the type of a .gitignore file' do
expect(described_class.type_of('.gitignore')).to eq(:gitignore)
end
it 'returns the type of a Koding config file' do
expect(described_class.type_of('.koding.yml')).to eq(:koding)
end
it 'returns the type of a GitLab CI config file' do
expect(described_class.type_of('.gitlab-ci.yml')).to eq(:gitlab_ci)
end
it 'returns the type of an avatar' do
%w(logo.gif logo.png logo.jpg).each do |file|
expect(described_class.type_of(file)).to eq(:avatar)
end
end
it 'returns nil for an unknown file' do
expect(described_class.type_of('foo.txt')).to be_nil
end
end
end
...@@ -1572,7 +1572,7 @@ describe Project, models: true do ...@@ -1572,7 +1572,7 @@ describe Project, models: true do
end end
it 'expires the avatar cache' do it 'expires the avatar cache' do
expect(project.repository).to receive(:expire_avatar_cache).with(project.default_branch) expect(project.repository).to receive(:expire_avatar_cache)
project.change_head(project.default_branch) project.change_head(project.default_branch)
end end
......
This diff is collapsed.
...@@ -14,7 +14,7 @@ describe API::API, api: true do ...@@ -14,7 +14,7 @@ describe API::API, api: true do
describe "GET /projects/:id/repository/branches" do describe "GET /projects/:id/repository/branches" do
it "returns an array of project branches" do it "returns an array of project branches" do
project.repository.expire_cache project.repository.expire_all_method_caches
get api("/projects/#{project.id}/repository/branches", user) get api("/projects/#{project.id}/repository/branches", user)
expect(response).to have_http_status(200) expect(response).to have_http_status(200)
......
...@@ -27,27 +27,14 @@ describe GitPushService, services: true do ...@@ -27,27 +27,14 @@ describe GitPushService, services: true do
it { is_expected.to be_truthy } it { is_expected.to be_truthy }
it 'flushes general cached data' do it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:expire_cache). expect(project.repository).to receive(:after_push_commit).with('master')
with('master', newrev)
subject subject
end end
it 'flushes the visible content cache' do it 'calls the after_create_branch hook' do
expect(project.repository).to receive(:expire_has_visible_content_cache) expect(project.repository).to receive(:after_create_branch)
subject
end
it 'flushes the branches cache' do
expect(project.repository).to receive(:expire_branches_cache)
subject
end
it 'flushes the branch count cache' do
expect(project.repository).to receive(:expire_branch_count_cache)
subject subject
end end
...@@ -56,21 +43,8 @@ describe GitPushService, services: true do ...@@ -56,21 +43,8 @@ describe GitPushService, services: true do
context 'existing branch' do context 'existing branch' do
it { is_expected.to be_truthy } it { is_expected.to be_truthy }
it 'flushes general cached data' do it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:expire_cache). expect(project.repository).to receive(:after_push_commit).with('master')
with('master', newrev)
subject
end
it 'does not flush the branches cache' do
expect(project.repository).not_to receive(:expire_branches_cache)
subject
end
it 'does not flush the branch count cache' do
expect(project.repository).not_to receive(:expire_branch_count_cache)
subject subject
end end
...@@ -81,27 +55,14 @@ describe GitPushService, services: true do ...@@ -81,27 +55,14 @@ describe GitPushService, services: true do
it { is_expected.to be_truthy } it { is_expected.to be_truthy }
it 'flushes the visible content cache' do it 'calls the after_push_commit hook' do
expect(project.repository).to receive(:expire_has_visible_content_cache) expect(project.repository).to receive(:after_push_commit).with('master')
subject
end
it 'flushes the branches cache' do
expect(project.repository).to receive(:expire_branches_cache)
subject
end
it 'flushes the branch count cache' do
expect(project.repository).to receive(:expire_branch_count_cache)
subject subject
end end
it 'flushes general cached data' do it 'calls the after_remove_branch hook' do
expect(project.repository).to receive(:expire_cache). expect(project.repository).to receive(:after_remove_branch)
with('master', newrev)
subject subject
end end
...@@ -588,6 +549,51 @@ describe GitPushService, services: true do ...@@ -588,6 +549,51 @@ describe GitPushService, services: true do
end end
end end
describe '#update_caches' do
let(:service) do
described_class.new(project,
user,
oldrev: sample_commit.parent_id,
newrev: sample_commit.id,
ref: 'refs/heads/master')
end
context 'on the default branch' do
before do
allow(service).to receive(:is_default_branch?).and_return(true)
end
it 'flushes the caches of any special files that have been changed' do
commit = double(:commit)
diff = double(:diff, new_path: 'README.md')
expect(commit).to receive(:raw_diffs).with(deltas_only: true).
and_return([diff])
service.push_commits = [commit]
expect(ProjectCacheWorker).to receive(:perform_async).
with(project.id, %i(readme))
service.update_caches
end
end
context 'on a non-default branch' do
before do
allow(service).to receive(:is_default_branch?).and_return(false)
end
it 'does not flush any conditional caches' do
expect(ProjectCacheWorker).to receive(:perform_async).
with(project.id, []).
and_call_original
service.update_caches
end
end
end
def execute_service(project, user, oldrev, newrev, ref) def execute_service(project, user, oldrev, newrev, ref)
service = described_class.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref ) service = described_class.new(project, user, oldrev: oldrev, newrev: newrev, ref: ref )
service.execute service.execute
......
...@@ -18,7 +18,7 @@ describe GitTagPushService, services: true do ...@@ -18,7 +18,7 @@ describe GitTagPushService, services: true do
end end
it 'flushes general cached data' do it 'flushes general cached data' do
expect(project.repository).to receive(:expire_cache) expect(project.repository).to receive(:before_push_tag)
subject subject
end end
...@@ -28,12 +28,6 @@ describe GitTagPushService, services: true do ...@@ -28,12 +28,6 @@ describe GitTagPushService, services: true do
subject subject
end end
it 'flushes the tag count cache' do
expect(project.repository).to receive(:expire_tag_count_cache)
subject
end
end end
describe "Git Tag Push Data" do describe "Git Tag Push Data" do
......
...@@ -2,62 +2,78 @@ require 'spec_helper' ...@@ -2,62 +2,78 @@ require 'spec_helper'
describe ProjectCacheWorker do describe ProjectCacheWorker do
let(:project) { create(:project) } let(:project) { create(:project) }
let(:worker) { described_class.new }
subject { described_class.new } describe '#perform' do
before do
describe '.perform_async' do allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain).
it 'schedules the job when no lease exists' do and_return(true)
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?). end
and_return(false)
expect_any_instance_of(described_class).to receive(:perform) context 'with a non-existing project' do
it 'does nothing' do
expect(worker).not_to receive(:update_repository_size)
described_class.perform_async(project.id) worker.perform(-1)
end
end end
it 'does not schedule the job when a lease exists' do context 'with an existing project without a repository' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:exists?). it 'does nothing' do
and_return(true) allow_any_instance_of(Repository).to receive(:exists?).and_return(false)
expect_any_instance_of(described_class).not_to receive(:perform) expect(worker).not_to receive(:update_repository_size)
described_class.perform_async(project.id) worker.perform(project.id)
end end
end end
describe '#perform' do context 'with an existing project' do
context 'when an exclusive lease can be obtained' do it 'updates the repository size' do
before do expect(worker).to receive(:update_repository_size).and_call_original
allow(subject).to receive(:try_obtain_lease_for).with(project.id).
and_return(true)
end
it 'updates project cache data' do worker.perform(project.id)
expect_any_instance_of(Repository).to receive(:size) end
expect_any_instance_of(Repository).to receive(:commit_count)
expect_any_instance_of(Project).to receive(:update_repository_size) it 'updates the commit count' do
expect_any_instance_of(Project).to receive(:update_commit_count) expect_any_instance_of(Project).to receive(:update_commit_count).
and_call_original
subject.perform(project.id) worker.perform(project.id)
end end
it 'handles missing repository data' do it 'refreshes the method caches' do
expect_any_instance_of(Repository).to receive(:exists?).and_return(false) expect_any_instance_of(Repository).to receive(:refresh_method_caches).
expect_any_instance_of(Repository).not_to receive(:size) with(%i(readme)).
and_call_original
subject.perform(project.id) worker.perform(project.id, %i(readme))
end
end end
end end
context 'when an exclusive lease can not be obtained' do describe '#update_repository_size' do
it 'does nothing' do context 'when a lease could not be obtained' do
allow(subject).to receive(:try_obtain_lease_for).with(project.id). it 'does not update the repository size' do
allow(worker).to receive(:try_obtain_lease_for).
with(project.id, :update_repository_size).
and_return(false) and_return(false)
expect(subject).not_to receive(:update_caches) expect(project).not_to receive(:update_repository_size)
worker.update_repository_size(project)
end
end
context 'when a lease could be obtained' do
it 'updates the repository size' do
allow(worker).to receive(:try_obtain_lease_for).
with(project.id, :update_repository_size).
and_return(true)
expect(project).to receive(:update_repository_size).and_call_original
subject.perform(project.id) worker.update_repository_size(project)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment