Commit 0ad80cab authored by Toon Claes's avatar Toon Claes

Use worker to destroy namespaceless projects in post-deploy

Destroying projects can be very time consuming. So instead of destroying them in
the post-deploy, just schedule them and make Sidekiq do the hard work.

They are scheduled in batches of 5000 records. This way the number of database
requests is limited while also the amount data read to memory is limited.
parent 4f446dd4
# See http://doc.gitlab.com/ce/development/migration_style_guide.html # Worker to destroy projects that do not have a namespace
# for more information on how to write migrations for GitLab. #
# It destroys everything it can without having the info about the namespace it
class CleanUpPendingDeleteProjects < ActiveRecord::Migration # used to belong to. Projects in this state should be rare.
include Gitlab::Database::MigrationHelpers # The worker will reject doing anything for projects that *do* have a
# namespace. For those use ProjectDestroyWorker instead.
DOWNTIME = false class NamespacelessProjectDestroyWorker
include Sidekiq::Worker
disable_ddl_transaction! include DedicatedSidekiqQueue
def up def self.bulk_perform_async(args_list)
Project.unscoped.where(pending_delete: true).each { |project| delete_project(project, admin) } Sidekiq::Client.push_bulk('class' => self, 'queue' => sidekiq_options['queue'], 'args' => args_list)
end end
def down def perform(project_id, user_id, params)
# noop begin
project = Project.unscoped.find(project_id)
rescue ActiveRecord::RecordNotFound
return
end end
return unless project.namespace_id.nil? # Reject doing anything for projects that *do* have a namespace
private user = User.find(user_id)
return unless user.can?(:remove_project, project)
def delete_project(project)
project.team.truncate project.team.truncate
unlink_fork(project) if project.forked? unlink_fork(project) if project.forked?
[:events, :issues, :merge_requests, :labels, :milestones, :notes, :snippets].each do |thing|
project.send(thing).delete_all
end
# Override Project#remove_pages for this instance so it doesn't do anything # Override Project#remove_pages for this instance so it doesn't do anything
def project.remove_pages def project.remove_pages
end end
...@@ -34,6 +34,8 @@ class CleanUpPendingDeleteProjects < ActiveRecord::Migration ...@@ -34,6 +34,8 @@ class CleanUpPendingDeleteProjects < ActiveRecord::Migration
project.destroy! project.destroy!
end end
private
def unlink_fork(project) def unlink_fork(project)
merge_requests = project.forked_from_project.merge_requests.opened.from_project(project) merge_requests = project.forked_from_project.merge_requests.opened.from_project(project)
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
- [expire_build_instance_artifacts, 1] - [expire_build_instance_artifacts, 1]
- [group_destroy, 1] - [group_destroy, 1]
- [irker, 1] - [irker, 1]
- [namespaceless_project_destroy, 1]
- [project_cache, 1] - [project_cache, 1]
- [project_destroy, 1] - [project_destroy, 1]
- [project_export, 1] - [project_export, 1]
......
# This is the counterpart of RequeuePendingDeleteProjects and cleans all
# projects with `pending_delete = true` and that do not have a namespace.
class CleanupNamespacelessPendingDeleteProjects < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
admin = User.find_by(admin: true)
return unless admin
@offset = 0
loop do
ids = pending_delete_batch
break if ids.rows.count.zero?
args = ids.map { |id| [id['id'], admin.id, {}] }
NamespacelessProjectDestroyWorker.bulk_perform_async(args)
@offset += 1
end
end
def down
# noop
end
private
def pending_delete_batch
connection.exec_query(find_batch)
end
BATCH_SIZE = 5000
def find_batch
projects = Arel::Table.new(:projects)
projects.project(projects[:id]).
where(projects[:pending_delete].eq(true)).
where(projects[:namespace_id].eq(nil)).
skip(@offset * BATCH_SIZE).
take(BATCH_SIZE).
to_sql
end
end
# encoding: utf-8
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20170502101023_clean_up_pending_delete_projects.rb')
describe CleanUpPendingDeleteProjects do
let(:migration) { described_class.new }
let!(:admin) { create(:admin) }
let!(:project) { create(:empty_project, pending_delete: true) }
describe '#up' do
it 'only cleans up pending delete projects' do
create(:empty_project)
expect do
migration.up
end.to change { Project.unscoped.count }.by(-1)
end
it "truncates the project's team" do
project.add_master(admin)
expect_any_instance_of(ProjectTeam).to receive(:truncate)
migration.up
end
it 'calls Project#destroy!' do
expect_any_instance_of(Project).to receive(:destroy!)
migration.up
end
it 'does not do anything in Project#remove_pages method' do
expect(Gitlab::PagesTransfer).not_to receive(:new)
migration.up
end
context 'project not a fork of another project' do
it "doesn't call unlink_fork" do
expect(migration).not_to receive(:unlink_fork)
migration.up
end
end
context 'project forked from another' do
let!(:parent_project) { create(:empty_project) }
before do
create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project)
end
it 'closes open merge requests' do
project.update_attribute(:pending_delete, false) # needed to create the MR
merge_request = create(:merge_request, source_project: project, target_project: parent_project)
project.update_attribute(:pending_delete, true)
migration.up
expect(merge_request.reload).to be_closed
end
it 'destroys the link' do
migration.up
expect(parent_project.forked_project_links).to be_empty
end
end
end
end
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20170502101023_cleanup_namespaceless_pending_delete_projects.rb')
describe CleanupNamespacelessPendingDeleteProjects do
before do
# Stub after_save callbacks that will fail when Project has no namespace
allow_any_instance_of(Project).to receive(:ensure_dir_exist).and_return(nil)
allow_any_instance_of(Project).to receive(:update_project_statistics).and_return(nil)
end
describe '#up' do
it 'only cleans up pending delete projects' do
admin = create(:admin)
create(:empty_project)
create(:empty_project, pending_delete: true)
project = build(:empty_project, pending_delete: true, namespace_id: nil)
project.save(validate: false)
expect(NamespacelessProjectDestroyWorker).to receive(:bulk_perform_async).with([[project.id.to_s, admin.id, {}]])
described_class.new.up
end
end
end
require 'spec_helper'
describe NamespacelessProjectDestroyWorker do
subject { described_class.new }
before do
# Stub after_save callbacks that will fail when Project has no namespace
allow_any_instance_of(Project).to receive(:ensure_dir_exist).and_return(nil)
allow_any_instance_of(Project).to receive(:update_project_statistics).and_return(nil)
end
describe '#perform' do
context 'project has namespace' do
it 'does not do anything' do
project = create(:empty_project)
subject.perform(project.id, project.owner.id, {})
expect(Project.unscoped.all).to include(project)
end
end
context 'project has no namespace' do
let(:admin) { create(:admin) }
let!(:project) do
project = build(:empty_project, namespace_id: nil)
project.save(validate: false)
project
end
context 'project not a fork of another project' do
it "truncates the project's team" do
expect_any_instance_of(ProjectTeam).to receive(:truncate)
subject.perform(project.id, admin.id, {})
end
it 'deletes the project' do
subject.perform(project.id, admin.id, {})
expect(Project.unscoped.all).not_to include(project)
end
it 'does not call unlink_fork' do
is_expected.not_to receive(:unlink_fork)
subject.perform(project.id, admin.id, {})
end
it 'does not do anything in Project#remove_pages method' do
expect(Gitlab::PagesTransfer).not_to receive(:new)
subject.perform(project.id, admin.id, {})
end
end
context 'project forked from another' do
let!(:parent_project) { create(:empty_project) }
before do
create(:forked_project_link, forked_to_project: project, forked_from_project: parent_project)
end
it 'closes open merge requests' do
merge_request = create(:merge_request, source_project: project, target_project: parent_project)
subject.perform(project.id, admin.id, {})
expect(merge_request.reload).to be_closed
end
it 'destroys the link' do
subject.perform(project.id, admin.id, {})
expect(parent_project.forked_project_links).to be_empty
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment