Commit 9839cca5 authored by Patrick Bajao's avatar Patrick Bajao Committed by Adam Hegyi

Backfill LfsObjectsProject records of forks

To fix the behavior of existing forks, LfsObjectsProject records
need to be backfilled. This is so we can remove the need to depend
on the source when looking for LFS objects from forks.

This is a variation of https://gitlab.com/gitlab-org/gitlab/-/merge_requests/25343
that was reverted. The difference is that we are now enqueueing
background migration jobs based on lfs_objects_project records.
parent 55d0427e
---
title: Backfill LfsObjectsProject records of forks
merge_request: 26964
author:
type: other
# frozen_string_literal: true
class ScheduleLinkLfsObjectsProjects < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
MIGRATION = 'LinkLfsObjectsProjects'
BATCH_SIZE = 1000
disable_ddl_transaction!
def up
lfs_objects_projects = Gitlab::BackgroundMigration::LinkLfsObjectsProjects::LfsObjectsProject.linkable
queue_background_migration_jobs_by_range_at_intervals(
lfs_objects_projects,
MIGRATION,
BackgroundMigrationWorker.minimum_interval,
batch_size: BATCH_SIZE
)
end
def down
# No-op. No need to make this reversible. In case the jobs enqueued runs and
# fails at some point, some records will be created. When rescheduled, those
# records won't be re-created. It's also hard to track which records to clean
# up if ever.
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Create missing LfsObjectsProject records for forks
class LinkLfsObjectsProjects
# Model specifically used for migration.
class LfsObjectsProject < ActiveRecord::Base
include EachBatch
self.table_name = 'lfs_objects_projects'
def self.linkable
where(
<<~SQL
lfs_objects_projects.project_id IN (
SELECT fork_network_members.forked_from_project_id
FROM fork_network_members
WHERE fork_network_members.forked_from_project_id IS NOT NULL
)
SQL
)
end
end
# Model specifically used for migration.
class ForkNetworkMember < ActiveRecord::Base
include EachBatch
self.table_name = 'fork_network_members'
def self.without_lfs_object(lfs_object_id)
where(
<<~SQL
fork_network_members.project_id NOT IN (
SELECT lop.project_id
FROM lfs_objects_projects lop
WHERE lop.lfs_object_id = #{lfs_object_id}
)
SQL
)
end
end
BATCH_SIZE = 1000
def perform(start_id, end_id)
lfs_objects_projects =
Gitlab::BackgroundMigration::LinkLfsObjectsProjects::LfsObjectsProject
.linkable
.where(id: start_id..end_id)
return if lfs_objects_projects.empty?
lfs_objects_projects.find_each do |lop|
ForkNetworkMember
.select("#{lop.lfs_object_id}, fork_network_members.project_id, NOW(), NOW()")
.without_lfs_object(lop.lfs_object_id)
.where(forked_from_project_id: lop.project_id)
.each_batch(of: BATCH_SIZE) do |batch, index|
execute <<~SQL
INSERT INTO lfs_objects_projects (lfs_object_id, project_id, created_at, updated_at)
#{batch.to_sql}
SQL
logger.info(message: "LinkLfsObjectsProjects: created missing LfsObjectsProject records for LfsObject #{lop.lfs_object_id}")
end
end
end
private
def execute(sql)
::ActiveRecord::Base.connection.execute(sql)
end
def logger
@logger ||= Gitlab::BackgroundMigration::Logger.build
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::LinkLfsObjectsProjects, :migration, schema: 2020_03_10_075115 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:fork_networks) { table(:fork_networks) }
let(:fork_network_members) { table(:fork_network_members) }
let(:lfs_objects) { table(:lfs_objects) }
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:namespace) { namespaces.create(name: 'GitLab', path: 'gitlab') }
let(:fork_network) { fork_networks.create(root_project_id: source_project.id) }
let(:another_fork_network) { fork_networks.create(root_project_id: another_source_project.id) }
let(:source_project) { projects.create(namespace_id: namespace.id) }
let(:another_source_project) { projects.create(namespace_id: namespace.id) }
let(:project) { projects.create(namespace_id: namespace.id) }
let(:another_project) { projects.create(namespace_id: namespace.id) }
let(:partially_linked_project) { projects.create(namespace_id: namespace.id) }
let(:fully_linked_project) { projects.create(namespace_id: namespace.id) }
let(:lfs_object) { lfs_objects.create(oid: 'abc123', size: 100) }
let(:another_lfs_object) { lfs_objects.create(oid: 'def456', size: 200) }
let!(:source_project_lop_1) do
lfs_objects_projects.create(
lfs_object_id: lfs_object.id,
project_id: source_project.id
)
end
let!(:source_project_lop_2) do
lfs_objects_projects.create(
lfs_object_id: another_lfs_object.id,
project_id: source_project.id
)
end
let!(:another_source_project_lop_1) do
lfs_objects_projects.create(
lfs_object_id: lfs_object.id,
project_id: another_source_project.id
)
end
let!(:another_source_project_lop_2) do
lfs_objects_projects.create(
lfs_object_id: another_lfs_object.id,
project_id: another_source_project.id
)
end
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
# Create links between projects
fork_network_members.create(fork_network_id: fork_network.id, project_id: source_project.id, forked_from_project_id: nil)
[project, partially_linked_project, fully_linked_project].each do |p|
fork_network_members.create(
fork_network_id: fork_network.id,
project_id: p.id,
forked_from_project_id: fork_network.root_project_id
)
end
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_project.id, forked_from_project_id: another_fork_network.root_project_id)
# Links LFS objects to some projects
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: fully_linked_project.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: fully_linked_project.id)
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: partially_linked_project.id)
end
context 'when there are LFS objects to be linked' do
it 'creates LfsObjectsProject records for forks based on the specified range of LfsObjectProject id' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |logger|
expect(logger).to receive(:info).exactly(4).times
end
expect { subject.perform(source_project_lop_1.id, another_source_project_lop_2.id) }.to change { lfs_objects_projects.count }.by(5)
expect(lfs_object_ids_for(project)).to match_array(lfs_object_ids_for(source_project))
expect(lfs_object_ids_for(another_project)).to match_array(lfs_object_ids_for(another_source_project))
expect(lfs_object_ids_for(partially_linked_project)).to match_array(lfs_object_ids_for(source_project))
expect { subject.perform(source_project_lop_1.id, another_source_project_lop_2.id) }.not_to change { lfs_objects_projects.count }
end
end
context 'when there are no LFS objects to be linked' do
before do
# Links LFS objects to all projects
projects.all.each do |p|
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: p.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: p.id)
end
end
it 'does not create LfsObjectProject records' do
expect { subject.perform(source_project_lop_1.id, another_source_project_lop_2.id) }
.not_to change { lfs_objects_projects.count }
end
end
def lfs_object_ids_for(project)
lfs_objects_projects.where(project_id: project.id).pluck(:lfs_object_id)
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200310075115_schedule_link_lfs_objects_projects.rb')
describe ScheduleLinkLfsObjectsProjects, :migration, :sidekiq do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:fork_networks) { table(:fork_networks) }
let(:fork_network_members) { table(:fork_network_members) }
let(:lfs_objects) { table(:lfs_objects) }
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:namespace) { namespaces.create(name: 'GitLab', path: 'gitlab') }
let(:fork_network) { fork_networks.create(root_project_id: source_project.id) }
let(:another_fork_network) { fork_networks.create(root_project_id: another_source_project.id) }
let(:source_project) { projects.create(namespace_id: namespace.id) }
let(:another_source_project) { projects.create(namespace_id: namespace.id) }
let(:project) { projects.create(namespace_id: namespace.id) }
let(:another_project) { projects.create(namespace_id: namespace.id) }
let(:lfs_object) { lfs_objects.create(oid: 'abc123', size: 100) }
let(:another_lfs_object) { lfs_objects.create(oid: 'def456', size: 200) }
let!(:source_project_lop_1) do
lfs_objects_projects.create(
lfs_object_id: lfs_object.id,
project_id: source_project.id
)
end
let!(:source_project_lop_2) do
lfs_objects_projects.create(
lfs_object_id: another_lfs_object.id,
project_id: source_project.id
)
end
let!(:another_source_project_lop_1) do
lfs_objects_projects.create(
lfs_object_id: lfs_object.id,
project_id: another_source_project.id
)
end
let!(:another_source_project_lop_2) do
lfs_objects_projects.create(
lfs_object_id: another_lfs_object.id,
project_id: another_source_project.id
)
end
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
# Create links between projects
fork_network_members.create(fork_network_id: fork_network.id, project_id: source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: fork_network.id, project_id: project.id, forked_from_project_id: source_project.id)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_project.id, forked_from_project_id: another_fork_network.root_project_id)
end
it 'schedules background migration to link LFS objects' do
Sidekiq::Testing.fake! do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, source_project_lop_1.id, source_project_lop_2.id)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(4.minutes, another_source_project_lop_1.id, another_source_project_lop_2.id)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment