Commit 8b58db35 authored by Adam Hegyi's avatar Adam Hegyi

Merge branch '55487-backfill-lfs-objects-projects' into 'master'

Backfill LfsObjectsProject records of forks

See merge request gitlab-org/gitlab!26964
parents 56661a3e 9839cca5
---
title: Backfill LfsObjectsProject records of forks
merge_request: 26964
author:
type: other
# frozen_string_literal: true
class ScheduleLinkLfsObjectsProjects < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
MIGRATION = 'LinkLfsObjectsProjects'
BATCH_SIZE = 1000
disable_ddl_transaction!
def up
lfs_objects_projects = Gitlab::BackgroundMigration::LinkLfsObjectsProjects::LfsObjectsProject.linkable
queue_background_migration_jobs_by_range_at_intervals(
lfs_objects_projects,
MIGRATION,
BackgroundMigrationWorker.minimum_interval,
batch_size: BATCH_SIZE
)
end
def down
# No-op. No need to make this reversible. In case the jobs enqueued runs and
# fails at some point, some records will be created. When rescheduled, those
# records won't be re-created. It's also hard to track which records to clean
# up if ever.
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Create missing LfsObjectsProject records for forks
class LinkLfsObjectsProjects
# Model specifically used for migration.
class LfsObjectsProject < ActiveRecord::Base
include EachBatch
self.table_name = 'lfs_objects_projects'
def self.linkable
where(
<<~SQL
lfs_objects_projects.project_id IN (
SELECT fork_network_members.forked_from_project_id
FROM fork_network_members
WHERE fork_network_members.forked_from_project_id IS NOT NULL
)
SQL
)
end
end
# Model specifically used for migration.
class ForkNetworkMember < ActiveRecord::Base
include EachBatch
self.table_name = 'fork_network_members'
def self.without_lfs_object(lfs_object_id)
where(
<<~SQL
fork_network_members.project_id NOT IN (
SELECT lop.project_id
FROM lfs_objects_projects lop
WHERE lop.lfs_object_id = #{lfs_object_id}
)
SQL
)
end
end
BATCH_SIZE = 1000
def perform(start_id, end_id)
lfs_objects_projects =
Gitlab::BackgroundMigration::LinkLfsObjectsProjects::LfsObjectsProject
.linkable
.where(id: start_id..end_id)
return if lfs_objects_projects.empty?
lfs_objects_projects.find_each do |lop|
ForkNetworkMember
.select("#{lop.lfs_object_id}, fork_network_members.project_id, NOW(), NOW()")
.without_lfs_object(lop.lfs_object_id)
.where(forked_from_project_id: lop.project_id)
.each_batch(of: BATCH_SIZE) do |batch, index|
execute <<~SQL
INSERT INTO lfs_objects_projects (lfs_object_id, project_id, created_at, updated_at)
#{batch.to_sql}
SQL
logger.info(message: "LinkLfsObjectsProjects: created missing LfsObjectsProject records for LfsObject #{lop.lfs_object_id}")
end
end
end
private
def execute(sql)
::ActiveRecord::Base.connection.execute(sql)
end
def logger
@logger ||= Gitlab::BackgroundMigration::Logger.build
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::LinkLfsObjectsProjects, :migration, schema: 2020_03_10_075115 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:fork_networks) { table(:fork_networks) }
let(:fork_network_members) { table(:fork_network_members) }
let(:lfs_objects) { table(:lfs_objects) }
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:namespace) { namespaces.create(name: 'GitLab', path: 'gitlab') }
let(:fork_network) { fork_networks.create(root_project_id: source_project.id) }
let(:another_fork_network) { fork_networks.create(root_project_id: another_source_project.id) }
let(:source_project) { projects.create(namespace_id: namespace.id) }
let(:another_source_project) { projects.create(namespace_id: namespace.id) }
let(:project) { projects.create(namespace_id: namespace.id) }
let(:another_project) { projects.create(namespace_id: namespace.id) }
let(:partially_linked_project) { projects.create(namespace_id: namespace.id) }
let(:fully_linked_project) { projects.create(namespace_id: namespace.id) }
let(:lfs_object) { lfs_objects.create(oid: 'abc123', size: 100) }
let(:another_lfs_object) { lfs_objects.create(oid: 'def456', size: 200) }
let!(:source_project_lop_1) do
lfs_objects_projects.create(
lfs_object_id: lfs_object.id,
project_id: source_project.id
)
end
let!(:source_project_lop_2) do
lfs_objects_projects.create(
lfs_object_id: another_lfs_object.id,
project_id: source_project.id
)
end
let!(:another_source_project_lop_1) do
lfs_objects_projects.create(
lfs_object_id: lfs_object.id,
project_id: another_source_project.id
)
end
let!(:another_source_project_lop_2) do
lfs_objects_projects.create(
lfs_object_id: another_lfs_object.id,
project_id: another_source_project.id
)
end
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
# Create links between projects
fork_network_members.create(fork_network_id: fork_network.id, project_id: source_project.id, forked_from_project_id: nil)
[project, partially_linked_project, fully_linked_project].each do |p|
fork_network_members.create(
fork_network_id: fork_network.id,
project_id: p.id,
forked_from_project_id: fork_network.root_project_id
)
end
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_project.id, forked_from_project_id: another_fork_network.root_project_id)
# Links LFS objects to some projects
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: fully_linked_project.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: fully_linked_project.id)
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: partially_linked_project.id)
end
context 'when there are LFS objects to be linked' do
it 'creates LfsObjectsProject records for forks based on the specified range of LfsObjectProject id' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |logger|
expect(logger).to receive(:info).exactly(4).times
end
expect { subject.perform(source_project_lop_1.id, another_source_project_lop_2.id) }.to change { lfs_objects_projects.count }.by(5)
expect(lfs_object_ids_for(project)).to match_array(lfs_object_ids_for(source_project))
expect(lfs_object_ids_for(another_project)).to match_array(lfs_object_ids_for(another_source_project))
expect(lfs_object_ids_for(partially_linked_project)).to match_array(lfs_object_ids_for(source_project))
expect { subject.perform(source_project_lop_1.id, another_source_project_lop_2.id) }.not_to change { lfs_objects_projects.count }
end
end
context 'when there are no LFS objects to be linked' do
before do
# Links LFS objects to all projects
projects.all.each do |p|
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: p.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: p.id)
end
end
it 'does not create LfsObjectProject records' do
expect { subject.perform(source_project_lop_1.id, another_source_project_lop_2.id) }
.not_to change { lfs_objects_projects.count }
end
end
def lfs_object_ids_for(project)
lfs_objects_projects.where(project_id: project.id).pluck(:lfs_object_id)
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200310075115_schedule_link_lfs_objects_projects.rb')
describe ScheduleLinkLfsObjectsProjects, :migration, :sidekiq do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:fork_networks) { table(:fork_networks) }
let(:fork_network_members) { table(:fork_network_members) }
let(:lfs_objects) { table(:lfs_objects) }
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:namespace) { namespaces.create(name: 'GitLab', path: 'gitlab') }
let(:fork_network) { fork_networks.create(root_project_id: source_project.id) }
let(:another_fork_network) { fork_networks.create(root_project_id: another_source_project.id) }
let(:source_project) { projects.create(namespace_id: namespace.id) }
let(:another_source_project) { projects.create(namespace_id: namespace.id) }
let(:project) { projects.create(namespace_id: namespace.id) }
let(:another_project) { projects.create(namespace_id: namespace.id) }
let(:lfs_object) { lfs_objects.create(oid: 'abc123', size: 100) }
let(:another_lfs_object) { lfs_objects.create(oid: 'def456', size: 200) }
let!(:source_project_lop_1) do
lfs_objects_projects.create(
lfs_object_id: lfs_object.id,
project_id: source_project.id
)
end
let!(:source_project_lop_2) do
lfs_objects_projects.create(
lfs_object_id: another_lfs_object.id,
project_id: source_project.id
)
end
let!(:another_source_project_lop_1) do
lfs_objects_projects.create(
lfs_object_id: lfs_object.id,
project_id: another_source_project.id
)
end
let!(:another_source_project_lop_2) do
lfs_objects_projects.create(
lfs_object_id: another_lfs_object.id,
project_id: another_source_project.id
)
end
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
# Create links between projects
fork_network_members.create(fork_network_id: fork_network.id, project_id: source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: fork_network.id, project_id: project.id, forked_from_project_id: source_project.id)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_project.id, forked_from_project_id: another_fork_network.root_project_id)
end
it 'schedules background migration to link LFS objects' do
Sidekiq::Testing.fake! do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, source_project_lop_1.id, source_project_lop_2.id)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(4.minutes, another_source_project_lop_1.id, another_source_project_lop_2.id)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment