Commit 0956c860 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'brodock-refactor-hashed-storage-task' into 'master'

Extracted auxiliary methods from storage.rake into specific RakeHelper

See merge request gitlab-org/gitlab-ce!18962
parents 62d96d88 8c5ce1b0
...@@ -24,7 +24,6 @@ gitlab-rake gitlab:storage:migrate_to_hashed ...@@ -24,7 +24,6 @@ gitlab-rake gitlab:storage:migrate_to_hashed
```bash ```bash
rake gitlab:storage:migrate_to_hashed rake gitlab:storage:migrate_to_hashed
``` ```
You can monitor the progress in the _Admin > Monitoring > Background jobs_ screen. You can monitor the progress in the _Admin > Monitoring > Background jobs_ screen.
...@@ -52,7 +51,6 @@ gitlab-rake gitlab:storage:legacy_projects ...@@ -52,7 +51,6 @@ gitlab-rake gitlab:storage:legacy_projects
```bash ```bash
rake gitlab:storage:legacy_projects rake gitlab:storage:legacy_projects
``` ```
------ ------
...@@ -86,7 +84,6 @@ gitlab-rake gitlab:storage:hashed_projects ...@@ -86,7 +84,6 @@ gitlab-rake gitlab:storage:hashed_projects
```bash ```bash
rake gitlab:storage:hashed_projects rake gitlab:storage:hashed_projects
``` ```
------ ------
...@@ -120,7 +117,6 @@ gitlab-rake gitlab:storage:legacy_attachments ...@@ -120,7 +117,6 @@ gitlab-rake gitlab:storage:legacy_attachments
```bash ```bash
rake gitlab:storage:legacy_attachments rake gitlab:storage:legacy_attachments
``` ```
------ ------
...@@ -137,7 +133,6 @@ gitlab-rake gitlab:storage:list_legacy_attachments ...@@ -137,7 +133,6 @@ gitlab-rake gitlab:storage:list_legacy_attachments
```bash ```bash
rake gitlab:storage:list_legacy_attachments rake gitlab:storage:list_legacy_attachments
``` ```
## List attachments on Hashed storage ## List attachments on Hashed storage
...@@ -154,7 +149,6 @@ gitlab-rake gitlab:storage:hashed_attachments ...@@ -154,7 +149,6 @@ gitlab-rake gitlab:storage:hashed_attachments
```bash ```bash
rake gitlab:storage:hashed_attachments rake gitlab:storage:hashed_attachments
``` ```
------ ------
...@@ -171,7 +165,6 @@ gitlab-rake gitlab:storage:list_hashed_attachments ...@@ -171,7 +165,6 @@ gitlab-rake gitlab:storage:list_hashed_attachments
```bash ```bash
rake gitlab:storage:list_hashed_attachments rake gitlab:storage:list_hashed_attachments
``` ```
[storage-types]: ../repository_storage_types.md [storage-types]: ../repository_storage_types.md
......
module Gitlab
module HashedStorage
module RakeHelper
def self.batch_size
ENV.fetch('BATCH', 200).to_i
end
def self.listing_limit
ENV.fetch('LIMIT', 500).to_i
end
def self.project_id_batches(&block)
Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id)
yield ids.min, ids.max
end
end
def self.legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def self.hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def self.relation_summary(relation_name, relation)
relation_count = relation.count
$stdout.puts "* Found #{relation_count} #{relation_name}".color(:green)
relation_count
end
def self.projects_list(relation_name, relation)
listing(relation_name, relation.with_route) do |project|
$stdout.puts " - #{project.full_path} (id: #{project.id})".color(:red)
end
end
def self.attachments_list(relation_name, relation)
listing(relation_name, relation) do |upload|
$stdout.puts " - #{upload.path} (id: #{upload.id})".color(:red)
end
end
def self.listing(relation_name, relation)
relation_count = relation_summary(relation_name, relation)
return unless relation_count > 0
limit = listing_limit
if relation_count > limit
$stdout.puts " ! Displaying first #{limit} #{relation_name}..."
end
relation.find_each(batch_size: batch_size).with_index do |element, index|
yield element
break if index + 1 >= limit
end
end
end
end
end
...@@ -3,6 +3,7 @@ namespace :gitlab do ...@@ -3,6 +3,7 @@ namespace :gitlab do
desc 'GitLab | Storage | Migrate existing projects to Hashed Storage' desc 'GitLab | Storage | Migrate existing projects to Hashed Storage'
task migrate_to_hashed: :environment do task migrate_to_hashed: :environment do
legacy_projects_count = Project.with_unmigrated_storage.count legacy_projects_count = Project.with_unmigrated_storage.count
helper = Gitlab::HashedStorage::RakeHelper
if legacy_projects_count == 0 if legacy_projects_count == 0
puts 'There are no projects requiring storage migration. Nothing to do!' puts 'There are no projects requiring storage migration. Nothing to do!'
...@@ -10,9 +11,9 @@ namespace :gitlab do ...@@ -10,9 +11,9 @@ namespace :gitlab do
next next
end end
print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{batch_size}" print "Enqueuing migration of #{legacy_projects_count} projects in batches of #{helper.batch_size}"
project_id_batches do |start, finish| helper.project_id_batches do |start, finish|
StorageMigratorWorker.perform_async(start, finish) StorageMigratorWorker.perform_async(start, finish)
print '.' print '.'
...@@ -23,118 +24,50 @@ namespace :gitlab do ...@@ -23,118 +24,50 @@ namespace :gitlab do
desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage' desc 'Gitlab | Storage | Summary of existing projects using Legacy Storage'
task legacy_projects: :environment do task legacy_projects: :environment do
relation_summary('projects', Project.without_storage_feature(:repository)) helper = Gitlab::HashedStorage::RakeHelper
helper.relation_summary('projects using Legacy Storage', Project.without_storage_feature(:repository))
end end
desc 'Gitlab | Storage | List existing projects using Legacy Storage' desc 'Gitlab | Storage | List existing projects using Legacy Storage'
task list_legacy_projects: :environment do task list_legacy_projects: :environment do
projects_list('projects using Legacy Storage', Project.without_storage_feature(:repository)) helper = Gitlab::HashedStorage::RakeHelper
helper.projects_list('projects using Legacy Storage', Project.without_storage_feature(:repository))
end end
desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage' desc 'Gitlab | Storage | Summary of existing projects using Hashed Storage'
task hashed_projects: :environment do task hashed_projects: :environment do
relation_summary('projects using Hashed Storage', Project.with_storage_feature(:repository)) helper = Gitlab::HashedStorage::RakeHelper
helper.relation_summary('projects using Hashed Storage', Project.with_storage_feature(:repository))
end end
desc 'Gitlab | Storage | List existing projects using Hashed Storage' desc 'Gitlab | Storage | List existing projects using Hashed Storage'
task list_hashed_projects: :environment do task list_hashed_projects: :environment do
projects_list('projects using Hashed Storage', Project.with_storage_feature(:repository)) helper = Gitlab::HashedStorage::RakeHelper
helper.projects_list('projects using Hashed Storage', Project.with_storage_feature(:repository))
end end
desc 'Gitlab | Storage | Summary of project attachments using Legacy Storage' desc 'Gitlab | Storage | Summary of project attachments using Legacy Storage'
task legacy_attachments: :environment do task legacy_attachments: :environment do
relation_summary('attachments using Legacy Storage', legacy_attachments_relation) helper = Gitlab::HashedStorage::RakeHelper
helper.relation_summary('attachments using Legacy Storage', helper.legacy_attachments_relation)
end end
desc 'Gitlab | Storage | List existing project attachments using Legacy Storage' desc 'Gitlab | Storage | List existing project attachments using Legacy Storage'
task list_legacy_attachments: :environment do task list_legacy_attachments: :environment do
attachments_list('attachments using Legacy Storage', legacy_attachments_relation) helper = Gitlab::HashedStorage::RakeHelper
helper.attachments_list('attachments using Legacy Storage', helper.legacy_attachments_relation)
end end
desc 'Gitlab | Storage | Summary of project attachments using Hashed Storage' desc 'Gitlab | Storage | Summary of project attachments using Hashed Storage'
task hashed_attachments: :environment do task hashed_attachments: :environment do
relation_summary('attachments using Hashed Storage', hashed_attachments_relation) helper = Gitlab::HashedStorage::RakeHelper
helper.relation_summary('attachments using Hashed Storage', helper.hashed_attachments_relation)
end end
desc 'Gitlab | Storage | List existing project attachments using Hashed Storage' desc 'Gitlab | Storage | List existing project attachments using Hashed Storage'
task list_hashed_attachments: :environment do task list_hashed_attachments: :environment do
attachments_list('attachments using Hashed Storage', hashed_attachments_relation) helper = Gitlab::HashedStorage::RakeHelper
end helper.attachments_list('attachments using Hashed Storage', helper.hashed_attachments_relation)
def batch_size
ENV.fetch('BATCH', 200).to_i
end
def project_id_batches(&block)
Project.with_unmigrated_storage.in_batches(of: batch_size, start: ENV['ID_FROM'], finish: ENV['ID_TO']) do |relation| # rubocop: disable Cop/InBatches
ids = relation.pluck(:id)
yield ids.min, ids.max
end
end
def legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
end
def relation_summary(relation_name, relation)
relation_count = relation.count
puts "* Found #{relation_count} #{relation_name}".color(:green)
relation_count
end
def projects_list(relation_name, relation)
relation_count = relation_summary(relation_name, relation)
projects = relation.with_route
limit = ENV.fetch('LIMIT', 500).to_i
return unless relation_count > 0
puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit
counter = 0
projects.find_in_batches(batch_size: batch_size) do |batch|
batch.each do |project|
counter += 1
puts " - #{project.full_path} (id: #{project.id})".color(:red)
return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator, Cop/AvoidReturnFromBlocks
end
end
end
def attachments_list(relation_name, relation)
relation_count = relation_summary(relation_name, relation)
limit = ENV.fetch('LIMIT', 500).to_i
return unless relation_count > 0
puts " ! Displaying first #{limit} #{relation_name}..." if relation_count > limit
counter = 0
relation.find_in_batches(batch_size: batch_size) do |batch|
batch.each do |upload|
counter += 1
puts " - #{upload.path} (id: #{upload.id})".color(:red)
return if counter >= limit # rubocop:disable Lint/NonLocalExitFromIterator, Cop/AvoidReturnFromBlocks
end
end
end end
end end
end end
require 'rake_helper' require 'rake_helper'
describe 'gitlab:storage rake tasks' do describe 'gitlab:storage:*' do
before do before do
Rake.application.rake_require 'tasks/gitlab/storage' Rake.application.rake_require 'tasks/gitlab/storage'
stub_warn_user_is_not_gitlab stub_warn_user_is_not_gitlab
end end
describe 'migrate_to_hashed rake task' do shared_examples "rake listing entities" do |entity_name, storage_type|
context 'limiting to 2' do
before do
stub_env('LIMIT' => 2)
end
it "lists 2 out of 3 #{storage_type.downcase} #{entity_name}" do
create_collection
expect { run_rake_task(task) }.to output(/Found 3 #{entity_name} using #{storage_type} Storage.*Displaying first 2 #{entity_name}/m).to_stdout
end
end
context "without any #{storage_type.downcase} #{entity_name.singularize}" do
it 'displays message for empty results' do
expect { run_rake_task(task) }.to output(/Found 0 #{entity_name} using #{storage_type} Storage/).to_stdout
end
end
end
shared_examples "rake entities summary" do |entity_name, storage_type|
context "with existing 3 #{storage_type.downcase} #{entity_name}" do
it "reports 3 #{storage_type.downcase} #{entity_name}" do
create_collection
expect { run_rake_task(task) }.to output(/Found 3 #{entity_name} using #{storage_type} Storage/).to_stdout
end
end
context "without any #{storage_type.downcase} #{entity_name.singularize}" do
it 'displays message for empty results' do
expect { run_rake_task(task) }.to output(/Found 0 #{entity_name} using #{storage_type} Storage/).to_stdout
end
end
end
describe 'gitlab:storage:migrate_to_hashed' do
context '0 legacy projects' do context '0 legacy projects' do
it 'does nothing' do it 'does nothing' do
expect(StorageMigratorWorker).not_to receive(:perform_async) expect(StorageMigratorWorker).not_to receive(:perform_async)
...@@ -16,8 +52,8 @@ describe 'gitlab:storage rake tasks' do ...@@ -16,8 +52,8 @@ describe 'gitlab:storage rake tasks' do
end end
end end
context '5 legacy projects' do context '3 legacy projects' do
let(:projects) { create_list(:project, 5, storage_version: 0) } let(:projects) { create_list(:project, 3, storage_version: 0) }
context 'in batches of 1' do context 'in batches of 1' do
before do before do
...@@ -49,4 +85,64 @@ describe 'gitlab:storage rake tasks' do ...@@ -49,4 +85,64 @@ describe 'gitlab:storage rake tasks' do
end end
end end
end end
describe 'gitlab:storage:legacy_projects' do
it_behaves_like 'rake entities summary', 'projects', 'Legacy' do
let(:task) { 'gitlab:storage:legacy_projects' }
let(:create_collection) { create_list(:project, 3, storage_version: 0) }
end
end
describe 'gitlab:storage:list_legacy_projects' do
it_behaves_like 'rake listing entities', 'projects', 'Legacy' do
let(:task) { 'gitlab:storage:list_legacy_projects' }
let(:create_collection) { create_list(:project, 3, storage_version: 0) }
end
end
describe 'gitlab:storage:hashed_projects' do
it_behaves_like 'rake entities summary', 'projects', 'Hashed' do
let(:task) { 'gitlab:storage:hashed_projects' }
let(:create_collection) { create_list(:project, 3, storage_version: 1) }
end
end
describe 'gitlab:storage:list_hashed_projects' do
it_behaves_like 'rake listing entities', 'projects', 'Hashed' do
let(:task) { 'gitlab:storage:list_hashed_projects' }
let(:create_collection) { create_list(:project, 3, storage_version: 1) }
end
end
describe 'gitlab:storage:legacy_attachments' do
it_behaves_like 'rake entities summary', 'attachments', 'Legacy' do
let(:task) { 'gitlab:storage:legacy_attachments' }
let(:project) { create(:project, storage_version: 1) }
let(:create_collection) { create_list(:upload, 3, model: project) }
end
end
describe 'gitlab:storage:list_legacy_attachments' do
it_behaves_like 'rake listing entities', 'attachments', 'Legacy' do
let(:task) { 'gitlab:storage:list_legacy_attachments' }
let(:project) { create(:project, storage_version: 1) }
let(:create_collection) { create_list(:upload, 3, model: project) }
end
end
describe 'gitlab:storage:hashed_attachments' do
it_behaves_like 'rake entities summary', 'attachments', 'Hashed' do
let(:task) { 'gitlab:storage:hashed_attachments' }
let(:project) { create(:project, storage_version: 2) }
let(:create_collection) { create_list(:upload, 3, model: project) }
end
end
describe 'gitlab:storage:list_hashed_attachments' do
it_behaves_like 'rake listing entities', 'attachments', 'Hashed' do
let(:task) { 'gitlab:storage:list_hashed_attachments' }
let(:project) { create(:project, storage_version: 2) }
let(:create_collection) { create_list(:upload, 3, model: project) }
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment