Add post migration to backfill projects updated at after repository move

In https://gitlab.com/gitlab-data/analytics/-/issues/7868 we discovered
that, after a repository was moved to a different storage, the
container's updated_at column of that repository wasn't updated.

This made some syncing scripts to Sisense to fail because they
couldn't detect the containers were updated.

In this commit, we add a post_migration to update those projects'
updated_at column that hasn't been updated after the repository was
moved.
parent a3a4846b
---
title: Add post migration to backfill projects updated at after repository move
merge_request: 53845
author:
type: fixed
# frozen_string_literal: true
class BackfillUpdatedAtAfterRepositoryStorageMove < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 10_000
INTERVAL = 2.minutes
MIGRATION_CLASS = 'BackfillProjectUpdatedAtAfterRepositoryStorageMove'
disable_ddl_transaction!
class ProjectRepositoryStorageMove < ActiveRecord::Base
include EachBatch
self.table_name = 'project_repository_storage_moves'
end
def up
ProjectRepositoryStorageMove.reset_column_information
ProjectRepositoryStorageMove.select(:project_id).distinct.each_batch(of: BATCH_SIZE, column: :project_id) do |batch, index|
migrate_in(
INTERVAL * index,
MIGRATION_CLASS,
batch.pluck(:project_id)
)
end
end
def down
# No-op
end
end
961c147e9c8e35eac5b8dd33f879582e173b7f6e31659b2d00989bc38afc6f5a
\ No newline at end of file
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Update existent project update_at column after their repository storage was moved
class BackfillProjectUpdatedAtAfterRepositoryStorageMove
def perform(*project_ids)
updated_repository_storages = ProjectRepositoryStorageMove.select("project_id, MAX(updated_at) as updated_at").where(project_id: project_ids).group(:project_id)
Project.connection.execute <<-SQL
WITH repository_storage_cte as (
#{updated_repository_storages.to_sql}
)
UPDATE projects
SET updated_at = (repository_storage_cte.updated_at + interval '1 second')
FROM repository_storage_cte
WHERE projects.id = repository_storage_cte.project_id AND projects.updated_at <= repository_storage_cte.updated_at
SQL
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BackfillProjectUpdatedAtAfterRepositoryStorageMove, :migration, schema: 20210210093901 do
let(:projects) { table(:projects) }
let(:project_repository_storage_moves) { table(:project_repository_storage_moves) }
let(:namespace) { table(:namespaces).create!(name: 'user', path: 'user') }
subject { described_class.new }
describe '#perform' do
it 'updates project updated_at column if they were moved to a different repository storage' do
freeze_time do
project_1 = projects.create!(id: 1, namespace_id: namespace.id, updated_at: 1.day.ago)
project_2 = projects.create!(id: 2, namespace_id: namespace.id, updated_at: Time.current)
original_project_3_updated_at = 2.minutes.from_now
project_3 = projects.create!(id: 3, namespace_id: namespace.id, updated_at: original_project_3_updated_at)
original_project_4_updated_at = 10.days.ago
project_4 = projects.create!(id: 4, namespace_id: namespace.id, updated_at: original_project_4_updated_at)
repository_storage_move_1 = project_repository_storage_moves.create!(project_id: project_1.id, updated_at: 2.hours.ago, source_storage_name: 'default', destination_storage_name: 'default')
repository_storage_move_2 = project_repository_storage_moves.create!(project_id: project_2.id, updated_at: Time.current, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(project_id: project_3.id, updated_at: Time.current, source_storage_name: 'default', destination_storage_name: 'default')
subject.perform([1, 2, 3, 4, non_existing_record_id])
expect(project_1.reload.updated_at).to eq(repository_storage_move_1.updated_at + 1.second)
expect(project_2.reload.updated_at).to eq(repository_storage_move_2.updated_at + 1.second)
expect(project_3.reload.updated_at).to eq(original_project_3_updated_at)
expect(project_4.reload.updated_at).to eq(original_project_4_updated_at)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20210210093901_backfill_updated_at_after_repository_storage_move.rb')
RSpec.describe BackfillUpdatedAtAfterRepositoryStorageMove, :sidekiq do
let_it_be(:projects) { table(:projects) }
let_it_be(:project_repository_storage_moves) { table(:project_repository_storage_moves) }
let_it_be(:namespace) { table(:namespaces).create!(name: 'user', path: 'user') }
describe '#up' do
it 'schedules background jobs for all distinct projects in batches' do
stub_const("#{described_class}::BATCH_SIZE", 3)
project_1 = projects.create!(id: 1, namespace_id: namespace.id)
project_2 = projects.create!(id: 2, namespace_id: namespace.id)
project_3 = projects.create!(id: 3, namespace_id: namespace.id)
project_4 = projects.create!(id: 4, namespace_id: namespace.id)
project_5 = projects.create!(id: 5, namespace_id: namespace.id)
project_6 = projects.create!(id: 6, namespace_id: namespace.id)
project_7 = projects.create!(id: 7, namespace_id: namespace.id)
projects.create!(id: 8, namespace_id: namespace.id)
project_repository_storage_moves.create!(id: 1, project_id: project_1.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 2, project_id: project_1.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 3, project_id: project_2.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 4, project_id: project_3.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 5, project_id: project_3.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 6, project_id: project_4.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 7, project_id: project_4.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 8, project_id: project_5.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 9, project_id: project_6.id, source_storage_name: 'default', destination_storage_name: 'default')
project_repository_storage_moves.create!(id: 10, project_id: project_7.id, source_storage_name: 'default', destination_storage_name: 'default')
Sidekiq::Testing.fake! do
freeze_time do
migrate!
expect(BackgroundMigrationWorker.jobs.size).to eq(3)
expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(2.minutes, 1, 2, 3)
expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(4.minutes, 4, 5, 6)
expect(described_class::MIGRATION_CLASS).to be_scheduled_delayed_migration(6.minutes, 7)
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment