Commit 69fbeb10 authored by Mayra Cabrera's avatar Mayra Cabrera

Merge branch '35789-hashed-storage-attachments-migration-object-storage' into 'master'

Hashed Storage attachments migration: exclude files in object storage as they are all hashed already

See merge request gitlab-org/gitlab!20338
parents 405046cb 98d1d503
......@@ -374,9 +374,17 @@ class Project < ApplicationRecord
scope :pending_delete, -> { where(pending_delete: true) }
scope :without_deleted, -> { where(pending_delete: false) }
scope :with_storage_feature, ->(feature) { where('storage_version >= :version', version: HASHED_STORAGE_FEATURES[feature]) }
scope :without_storage_feature, ->(feature) { where('storage_version < :version OR storage_version IS NULL', version: HASHED_STORAGE_FEATURES[feature]) }
scope :with_unmigrated_storage, -> { where('storage_version < :version OR storage_version IS NULL', version: LATEST_STORAGE_VERSION) }
scope :with_storage_feature, ->(feature) do
where(arel_table[:storage_version].gteq(HASHED_STORAGE_FEATURES[feature]))
end
scope :without_storage_feature, ->(feature) do
where(arel_table[:storage_version].lt(HASHED_STORAGE_FEATURES[feature])
.or(arel_table[:storage_version].eq(nil)))
end
scope :with_unmigrated_storage, -> do
where(arel_table[:storage_version].lt(LATEST_STORAGE_VERSION)
.or(arel_table[:storage_version].eq(nil)))
end
# last_activity_at is throttled every minute, but last_repository_updated_at is updated with every push
scope :sorted_by_activity, -> { reorder(Arel.sql("GREATEST(COALESCE(last_activity_at, '1970-01-01'), COALESCE(last_repository_updated_at, '1970-01-01')) DESC")) }
......
......@@ -23,6 +23,21 @@ class Upload < ApplicationRecord
after_destroy :delete_file!, if: -> { uploader_class <= FileUploader }
class << self
def inner_join_local_uploads_projects
upload_table = Upload.arel_table
project_table = Project.arel_table
join_statement = upload_table.project(upload_table[Arel.star])
.join(project_table)
.on(
upload_table[:model_type].eq('Project')
.and(upload_table[:model_id].eq(project_table[:id]))
.and(upload_table[:store].eq(ObjectStorage::Store::LOCAL))
)
joins(join_statement.join_sources)
end
##
# FastDestroyAll concerns
def begin_fast_destroy
......
......@@ -14,7 +14,7 @@ module HashedStorage
try_obtain_lease do
project = Project.without_deleted.find_by(id: project_id)
break unless project
break unless project && project.storage_upgradable?
old_disk_path ||= Storage::LegacyProject.new(project).disk_path
......
---
title: 'Hashed Storage attachments migration: exclude files in object storage as they
are all hashed already'
merge_request: 20338
author:
type: changed
......@@ -47,23 +47,13 @@ module Gitlab
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.legacy_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version < :version OR projects.storage_version IS NULL', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
Upload.inner_join_local_uploads_projects.merge(Project.without_storage_feature(:attachments))
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def self.hashed_attachments_relation
Upload.joins(<<~SQL).where('projects.storage_version >= :version', version: Project::HASHED_STORAGE_FEATURES[:attachments])
JOIN projects
ON (uploads.model_type='Project' AND uploads.model_id=projects.id)
SQL
Upload.inner_join_local_uploads_projects.merge(Project.with_storage_feature(:attachments))
end
# rubocop: enable CodeReuse/ActiveRecord
def self.relation_summary(relation_name, relation)
relation_count = relation.count
......
......@@ -5,13 +5,13 @@ require 'spec_helper'
describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state do
include ExclusiveLeaseHelpers
describe '#perform' do
let(:project) { create(:project, :empty_repo, :legacy_storage) }
let(:lease_key) { "project_migrate_hashed_storage_worker:#{project.id}" }
let(:lease_timeout) { described_class::LEASE_TIMEOUT }
let(:migration_service) { ::Projects::HashedStorage::MigrationService }
let(:migration_service) { ::Projects::HashedStorage::MigrationService }
let(:lease_timeout) { described_class::LEASE_TIMEOUT }
describe '#perform' do
it 'skips when project no longer exists' do
stub_exclusive_lease(lease_key(-1), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
subject.perform(-1)
......@@ -19,32 +19,67 @@ describe HashedStorage::ProjectMigrateWorker, :clean_gitlab_redis_shared_state d
it 'skips when project is pending delete' do
pending_delete_project = create(:project, :empty_repo, pending_delete: true)
stub_exclusive_lease(lease_key(pending_delete_project.id), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
subject.perform(pending_delete_project.id)
end
it 'delegates migration to service class when we have exclusive lease' do
stub_exclusive_lease(lease_key, 'uuid', timeout: lease_timeout)
it 'skips when project is already migrated' do
migrated_project = create(:project, :empty_repo)
stub_exclusive_lease(lease_key(migrated_project.id), 'uuid', timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
subject.perform(migrated_project.id)
end
context 'with exclusive lease available' do
it 'delegates migration to service class' do
project = create(:project, :empty_repo, :legacy_storage)
stub_exclusive_lease(lease_key(project.id), 'uuid', timeout: lease_timeout)
service_spy = spy
service_spy = spy
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy)
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy)
subject.perform(project.id)
subject.perform(project.id)
expect(service_spy).to have_received(:execute)
expect(service_spy).to have_received(:execute)
end
it 'delegates migration to service class with correct path in a partially migrated project' do
project = create(:project, :empty_repo, storage_version: 1)
stub_exclusive_lease(lease_key(project.id), 'uuid', timeout: lease_timeout)
service_spy = spy
allow(migration_service)
.to receive(:new).with(project, project.full_path, logger: subject.logger)
.and_return(service_spy)
subject.perform(project.id)
expect(service_spy).to have_received(:execute)
end
end
it 'skips when it cant acquire the exclusive lease' do
stub_exclusive_lease_taken(lease_key, timeout: lease_timeout)
context 'with exclusive lease taken' do
it 'skips when it cant acquire the exclusive lease' do
project = create(:project, :empty_repo, :legacy_storage)
stub_exclusive_lease_taken(lease_key(project.id), timeout: lease_timeout)
expect(migration_service).not_to receive(:new)
expect(migration_service).not_to receive(:new)
subject.perform(project.id)
subject.perform(project.id)
end
end
end
def lease_key(key)
"project_migrate_hashed_storage_worker:#{key}"
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment