Commit 4cf4e15a authored by Ash McKenzie's avatar Ash McKenzie

Merge branch '55487-backfill-lfs-objects-v2' into 'master'

Backfill LfsObjectsProject records of forks

See merge request gitlab-org/gitlab!25343
parents 11cfef67 fc44fcc5
---
title: Backfill LfsObjectsProject records of forks
merge_request: 25343
author:
type: other
# frozen_string_literal: true
class RescheduleLinkLfsObjects < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
MIGRATION = 'LinkLfsObjects'
BATCH_SIZE = 1_000
disable_ddl_transaction!
def up
forks = Gitlab::BackgroundMigration::LinkLfsObjects::Project.with_non_existing_lfs_objects
queue_background_migration_jobs_by_range_at_intervals(
forks,
MIGRATION,
BackgroundMigrationWorker.minimum_interval,
batch_size: BATCH_SIZE
)
end
def down
# No-op. No need to make this reversible. In case the jobs enqueued runs and
# fails at some point, some records will be created. When rescheduled, those
# records won't be re-created. It's also hard to track which records to clean
# up if ever.
end
end
...@@ -6,6 +6,8 @@ module Gitlab ...@@ -6,6 +6,8 @@ module Gitlab
class LinkLfsObjects class LinkLfsObjects
# Model definition used for migration # Model definition used for migration
class ForkNetworkMember < ActiveRecord::Base class ForkNetworkMember < ActiveRecord::Base
include EachBatch
self.table_name = 'fork_network_members' self.table_name = 'fork_network_members'
def self.with_non_existing_lfs_objects def self.with_non_existing_lfs_objects
...@@ -23,8 +25,62 @@ module Gitlab ...@@ -23,8 +25,62 @@ module Gitlab
end end
end end
# Model definition used for migration
class Project < ActiveRecord::Base
include EachBatch
self.table_name = 'projects'
has_one :fork_network_member, class_name: 'LinkLfsObjects::ForkNetworkMember'
def self.with_non_existing_lfs_objects
fork_network_members =
ForkNetworkMember.with_non_existing_lfs_objects
.select(1)
.where('fork_network_members.project_id = projects.id')
where('EXISTS (?)', fork_network_members)
end
end
# Model definition used for migration
class LfsObjectsProject < ActiveRecord::Base
include EachBatch
self.table_name = 'lfs_objects_projects'
end
BATCH_SIZE = 1000
def perform(start_id, end_id) def perform(start_id, end_id)
# no-op as some queries times out forks =
Project
.with_non_existing_lfs_objects
.where(id: start_id..end_id)
forks.includes(:fork_network_member).find_each do |project|
LfsObjectsProject
.select("lfs_objects_projects.lfs_object_id, #{project.id}, NOW(), NOW()")
.where(project_id: project.fork_network_member.forked_from_project_id)
.each_batch(of: BATCH_SIZE) do |batch|
execute <<~SQL
INSERT INTO lfs_objects_projects (lfs_object_id, project_id, created_at, updated_at)
#{batch.to_sql}
SQL
end
end
logger.info(message: "LinkLfsObjects: created missing LfsObjectsProject for Projects #{forks.map(&:id).join(', ')}")
end
private
def execute(sql)
::ActiveRecord::Base.connection.execute(sql)
end
def logger
@logger ||= Gitlab::BackgroundMigration::Logger.build
end end
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::LinkLfsObjects, :migration, schema: 2020_02_10_062432 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:fork_networks) { table(:fork_networks) }
let(:fork_network_members) { table(:fork_network_members) }
let(:lfs_objects) { table(:lfs_objects) }
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:namespace) { namespaces.create(name: 'GitLab', path: 'gitlab') }
let!(:source_project) { projects.create(namespace_id: namespace.id) }
let!(:another_source_project) { projects.create(namespace_id: namespace.id) }
let!(:project) { projects.create(namespace_id: namespace.id) }
let!(:another_project) { projects.create(namespace_id: namespace.id) }
let!(:other_project) { projects.create(namespace_id: namespace.id) }
let!(:linked_project) { projects.create(namespace_id: namespace.id) }
let(:fork_network) { fork_networks.create(root_project_id: source_project.id) }
let(:another_fork_network) { fork_networks.create(root_project_id: another_source_project.id) }
let(:lfs_object) { lfs_objects.create(oid: 'abc123', size: 100) }
let(:another_lfs_object) { lfs_objects.create(oid: 'def456', size: 200) }
before do
stub_const("#{described_class}::BATCH_SIZE", 2)
# Create links between projects
fork_network_members.create(fork_network_id: fork_network.id, project_id: source_project.id, forked_from_project_id: nil)
[project, another_project, linked_project].each do |p|
fork_network_members.create(
fork_network_id: fork_network.id,
project_id: p.id,
forked_from_project_id: fork_network.root_project_id
)
end
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: other_project.id, forked_from_project_id: another_fork_network.root_project_id)
# Links LFS objects to some projects
[source_project, another_source_project, linked_project].each do |p|
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: p.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: p.id)
end
end
it 'creates LfsObjectsProject records for forks within the specified range of project IDs' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |logger|
expect(logger).to receive(:info).twice
end
expect { subject.perform(project.id, other_project.id) }.to change { lfs_objects_projects.count }.by(6)
expect(lfs_object_ids_for(project)).to match_array(lfs_object_ids_for(source_project))
expect(lfs_object_ids_for(another_project)).to match_array(lfs_object_ids_for(source_project))
expect(lfs_object_ids_for(other_project)).to match_array(lfs_object_ids_for(another_source_project))
expect { subject.perform(project.id, other_project.id) }.not_to change { lfs_objects_projects.count }
end
context 'when it is not necessary to create LfsObjectProject records' do
it 'does not create LfsObjectProject records' do
expect { subject.perform(linked_project.id, linked_project.id) }
.not_to change { lfs_objects_projects.count }
end
end
def lfs_object_ids_for(project)
lfs_objects_projects.where(project_id: project.id).pluck(:lfs_object_id)
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200217091401_reschedule_link_lfs_objects.rb')
describe RescheduleLinkLfsObjects, :migration, :sidekiq do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:fork_networks) { table(:fork_networks) }
let(:fork_network_members) { table(:fork_network_members) }
let(:lfs_objects) { table(:lfs_objects) }
let(:lfs_objects_projects) { table(:lfs_objects_projects) }
let(:namespace) { namespaces.create(name: 'GitLab', path: 'gitlab') }
let(:fork_network) { fork_networks.create(root_project_id: source_project.id) }
let(:another_fork_network) { fork_networks.create(root_project_id: another_source_project.id) }
let!(:source_project) { projects.create(namespace_id: namespace.id) }
let!(:another_source_project) { projects.create(namespace_id: namespace.id) }
let!(:project) { projects.create(namespace_id: namespace.id) }
let!(:another_project) { projects.create(namespace_id: namespace.id) }
let!(:other_project) { projects.create(namespace_id: namespace.id) }
let!(:linked_project) { projects.create(namespace_id: namespace.id) }
let(:lfs_object) { lfs_objects.create(oid: 'abc123', size: 100) }
let(:another_lfs_object) { lfs_objects.create(oid: 'def456', size: 200) }
before do
# Create links between projects
fork_network_members.create(fork_network_id: fork_network.id, project_id: source_project.id, forked_from_project_id: nil)
[project, another_project, linked_project].each do |p|
fork_network_members.create(
fork_network_id: fork_network.id,
project_id: p.id,
forked_from_project_id: fork_network.root_project_id
)
end
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: another_source_project.id, forked_from_project_id: nil)
fork_network_members.create(fork_network_id: another_fork_network.id, project_id: other_project.id, forked_from_project_id: another_fork_network.root_project_id)
end
context 'when there are forks to be backfilled' do
before do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
# Links LFS objects to some projects
[source_project, another_source_project, linked_project].each do |p|
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: p.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: p.id)
end
end
it 'schedules background migration to link LFS objects' do
Sidekiq::Testing.fake! do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, project.id, another_project.id)
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(4.minutes, other_project.id, other_project.id)
end
end
end
context 'when there are no forks to be backfilled' do
before do
# Links LFS objects to all projects
projects.all.each do |p|
lfs_objects_projects.create(lfs_object_id: lfs_object.id, project_id: p.id)
lfs_objects_projects.create(lfs_object_id: another_lfs_object.id, project_id: p.id)
end
end
it 'does not schedule any job' do
Sidekiq::Testing.fake! do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(0)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment