Commit 9a1c3830 authored by charlieablett's avatar charlieablett

Add migration AddLabelRestoreForeignKeys

- Move foreign key duplication to new
migration
- update structure.sql
parent 098bc7cf
......@@ -7,12 +7,6 @@ class AddLabelRestoreTable < ActiveRecord::Migration[6.0]
# copy table
execute "CREATE TABLE #{backup_labels_table_name} (LIKE #{labels_table_name} INCLUDING ALL);"
# create foreign keys
connection.foreign_keys(labels_table_name).each do |fk|
fk_options = fk.options
execute "ALTER TABLE #{backup_labels_table_name} ADD CONSTRAINT #{fk.name} FOREIGN KEY (#{fk_options[:column]}) REFERENCES #{fk.to_table}(#{fk_options[:primary_key]});"
end
# make the primary key a real functioning one rather than incremental
execute "ALTER TABLE #{backup_labels_table_name} ALTER COLUMN ID DROP DEFAULT;"
......
# frozen_string_literal: true
class AddLabelRestoreForeignKeys < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
# create foreign keys
connection.foreign_keys(labels_table_name).each do |fk|
fk_options = fk.options
add_concurrent_foreign_key(backup_labels_table_name, fk.to_table, name: fk.name, column: fk_options[:column])
end
end
def down
connection.foreign_keys(backup_labels_table_name).each do |fk|
with_lock_retries do
remove_foreign_key backup_labels_table_name, name: fk.name
end
end
end
private
def labels_table_name
:labels
end
def backup_labels_table_name
:backup_labels
end
end
......@@ -6,6 +6,8 @@ class RemoveDuplicateLabelsFromProject < ActiveRecord::Migration[6.0]
CREATE = 1
RENAME = 2
disable_ddl_transaction!
class BackupLabel < Label
include BulkInsertSafe
include EachBatch
......@@ -23,14 +25,13 @@ class RemoveDuplicateLabelsFromProject < ActiveRecord::Migration[6.0]
self.table_name = 'projects'
end
DEDUPLICATE_BATCH_SIZE = 100_000
RESTORE_BATCH_SIZE = 100
BATCH_SIZE = 100_000
def up
# Split to smaller chunks
# Loop rather than background job, every 100,000
# there are 45,000,000 projects in total
Project.each_batch(of: DEDUPLICATE_BATCH_SIZE) do |batch|
Project.each_batch(of: BATCH_SIZE) do |batch|
range = batch.pluck('MIN(id)', 'MAX(id)').first
remove_full_duplicates(*range)
......@@ -41,27 +42,11 @@ class RemoveDuplicateLabelsFromProject < ActiveRecord::Migration[6.0]
def down
# we could probably make this more efficient by getting them in bulk by restore action
# and then applying them all at the same time...
BackupLabel.each_batch(of: RESTORE_BATCH_SIZE) do |backup_label|
action = backup_label.restore_action
target = Label.find(backup_label.id)
next unless target
next unless action
if action == RENAME
if target.title == backup_label.new_title
say "Restoring label title '#{target.title}' to backup value '#{backup_label.title}'"
target.update_attribute(:title, backup_label.title)
end
elsif action == CREATE
restored_label = Label.new(backup_label.attributes)
say "Restoring label from deletion with backup attributes #{backup_label.attributes}"
Project.each_batch(of: BATCH_SIZE) do |batch|
range = batch.pluck('MIN(id)', 'MAX(id)').first
if restored_label.valid?
restored_label.save
backup_label.destroy
end
end
restore_renamed_labels(*range)
restore_deleted_labels(*range)
end
end
......@@ -105,7 +90,7 @@ WITH data AS (
row_number() OVER (PARTITION BY project_id, title ORDER BY id) AS row_number
FROM labels
WHERE project_id BETWEEN #{start_id} AND #{stop_id}
) SELECT * from data where row_number > 1;
) SELECT * FROM data WHERE row_number > 1;
SQL
if soft_duplicates.any?
......@@ -118,4 +103,27 @@ WHERE labels.id IN (#{soft_duplicates.map { |dup| dup["id"] }.join(", ")});
SQL
end
end
def restore_renamed_labels(start_id, stop_id)
# the backup label IDs are not incremental, they are copied directly from the Labels table
ApplicationRecord.connection.execute(<<-SQL.squish)
WITH backups AS (
SELECT id, title
FROM backup_labels
WHERE project_id BETWEEN #{start_id} AND #{stop_id} AND
restore_action = #{RENAME}
) UPDATE labels SET title = backups.title
FROM backups
WHERE labels.id = backups.id;
SQL
end
def restore_deleted_labels(start_id, stop_id)
ActiveRecord::Base.connection.execute(<<-SQL.squish)
INSERT INTO labels
SELECT id, title, color, project_id, created_at, updated_at, template, description, description_html, type, group_id, cached_markdown_version FROM backup_labels
WHERE backup_labels.project_id BETWEEN #{start_id} AND #{stop_id}
AND backup_labels.restore_action = #{CREATE}
SQL
end
end
......@@ -18384,7 +18384,7 @@ CREATE INDEX backup_labels_group_id_title_idx ON public.backup_labels USING btre
CREATE INDEX backup_labels_project_id_idx ON public.backup_labels USING btree (project_id);
CREATE INDEX backup_labels_project_id_title_idx ON public.backup_labels USING btree (project_id, title) WHERE (group_id = NULL::integer);
CREATE UNIQUE INDEX backup_labels_project_id_title_idx ON public.backup_labels USING btree (project_id, title) WHERE (group_id = NULL::integer);
CREATE INDEX backup_labels_template_idx ON public.backup_labels USING btree (template) WHERE template;
......@@ -21037,7 +21037,7 @@ ALTER TABLE ONLY public.labels
ADD CONSTRAINT fk_7de4989a69 FOREIGN KEY (project_id) REFERENCES public.projects(id) ON DELETE CASCADE;
ALTER TABLE ONLY public.backup_labels
ADD CONSTRAINT fk_7de4989a69 FOREIGN KEY (project_id) REFERENCES public.projects(id);
ADD CONSTRAINT fk_7de4989a69 FOREIGN KEY (project_id) REFERENCES public.projects(id) ON DELETE CASCADE;
ALTER TABLE ONLY public.merge_requests
ADD CONSTRAINT fk_7e85395a64 FOREIGN KEY (sprint_id) REFERENCES public.sprints(id) ON DELETE CASCADE;
......@@ -22246,7 +22246,7 @@ ALTER TABLE ONLY public.labels
ADD CONSTRAINT fk_rails_c1ac5161d8 FOREIGN KEY (group_id) REFERENCES public.namespaces(id) ON DELETE CASCADE;
ALTER TABLE ONLY public.backup_labels
ADD CONSTRAINT fk_rails_c1ac5161d8 FOREIGN KEY (group_id) REFERENCES public.namespaces(id);
ADD CONSTRAINT fk_rails_c1ac5161d8 FOREIGN KEY (group_id) REFERENCES public.namespaces(id) ON DELETE CASCADE;
ALTER TABLE ONLY public.project_feature_usages
ADD CONSTRAINT fk_rails_c22a50024b FOREIGN KEY (project_id) REFERENCES public.projects(id) ON DELETE CASCADE;
......@@ -23236,6 +23236,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200304160823
20200304211738
20200305020458
20200305020459
20200305082754
20200305082858
20200305121159
......
......@@ -48,11 +48,6 @@ describe RemoveDuplicateLabelsFromProject do
let(:migration) { described_class.new }
before do
stub_const('CREATE', 1)
stub_const('RENAME', 2)
end
describe 'removing full duplicates' do
context 'when there are no duplicate labels' do
let!(:first_label) { labels_table.create(project_label_attributes.merge(id: 1, title: "a different label")) }
......@@ -62,7 +57,7 @@ describe RemoveDuplicateLabelsFromProject do
expect { migration.up }.not_to change { backup_labels_table.count }
end
it 'restores removed records - no change' do
it 'restores removed records when rolling back - no change' do
migration.up
expect { migration.down }.not_to change { labels_table.count }
......@@ -70,6 +65,9 @@ describe RemoveDuplicateLabelsFromProject do
end
context 'with duplicates with no relationships' do
# can't use the activerecord class because the `type` makes it think it has polymorphism and should be/have a ProjectLabel subclass
let(:backup_labels) { ApplicationRecord.connection.execute('SELECT * from backup_labels') }
let!(:first_label) { labels_table.create(project_label_attributes.merge(id: 1)) }
let!(:second_label) { labels_table.create(project_label_attributes.merge(id: 2)) }
let!(:third_label) { labels_table.create(project_label_attributes.merge(id: 3, title: other_title)) }
......@@ -82,11 +80,8 @@ describe RemoveDuplicateLabelsFromProject do
it 'creates the correct backup records with `create` restore_action' do
migration.up
# can't use the activerecord class because the `type` makes it think it has polymorphism and should be/have a ProjectLabel subclass
backup_labels = ApplicationRecord.connection.execute('SELECT * from backup_labels')
expect(backup_labels.find { |bl| bl["id"] == 2 }).to include(second_label.attributes.merge("restore_action" => CREATE, "new_title" => nil, "created_at" => anything, "updated_at" => anything))
expect(backup_labels.find { |bl| bl["id"] == 4 }).to include(fourth_label.attributes.merge("restore_action" => CREATE, "new_title" => nil, "created_at" => anything, "updated_at" => anything))
expect(backup_labels.find { |bl| bl["id"] == 2 }).to include(second_label.attributes.merge("restore_action" => described_class::CREATE, "new_title" => nil, "created_at" => anything, "updated_at" => anything))
expect(backup_labels.find { |bl| bl["id"] == 4 }).to include(fourth_label.attributes.merge("restore_action" => described_class::CREATE, "new_title" => nil, "created_at" => anything, "updated_at" => anything))
end
it 'deletes all but one' do
......@@ -96,7 +91,7 @@ describe RemoveDuplicateLabelsFromProject do
expect { fourth_label.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
it 'restores removed records' do
it 'restores removed records on rollback' do
second_label_attributes = second_label.attributes
fourth_label_attributes = fourth_label.attributes
......@@ -104,8 +99,8 @@ describe RemoveDuplicateLabelsFromProject do
migration.down
expect(second_label.attributes).to eq(second_label_attributes)
expect(fourth_label.attributes).to eq(fourth_label_attributes)
expect(second_label.attributes).to match(second_label_attributes)
expect(fourth_label.attributes).to match(fourth_label_attributes)
end
end
......@@ -119,10 +114,10 @@ describe RemoveDuplicateLabelsFromProject do
end
it 'does not create a backup record with `create` restore_action' do
expect { migration.up }.not_to change { backup_labels_table.where(restore_action: CREATE).count }
expect { migration.up }.not_to change { backup_labels_table.where(restore_action: described_class::CREATE).count }
end
it 'restores removed records' do
it 'restores removed records when rolling back - no change' do
migration.up
expect { migration.down }.not_to change { labels_table.count }
......@@ -138,7 +133,7 @@ describe RemoveDuplicateLabelsFromProject do
let!(:label_priority_for_fourth_label) { label_priorities_table.create(label_id: fourth_label.id, project_id: project_id, priority: 2) }
it 'creates a backup record with `create` restore_action for each removed record' do
expect { migration.up }.to change { backup_labels_table.where(restore_action: CREATE).count }.from(0).to(1)
expect { migration.up }.to change { backup_labels_table.where(restore_action: described_class::CREATE).count }.from(0).to(1)
end
it 'creates the correct backup records' do
......@@ -147,10 +142,10 @@ describe RemoveDuplicateLabelsFromProject do
# can't use the activerecord class because the `type` column makes it think it has polymorphism and should be/have a ProjectLabel subclass
backup_labels = ApplicationRecord.connection.execute('SELECT * from backup_labels')
expect(backup_labels.find { |bl| bl["id"] == 3 }).to include(third_label.attributes.merge("restore_action" => CREATE, "new_title" => nil, "created_at" => anything, "updated_at" => anything))
expect(backup_labels.find { |bl| bl["id"] == 3 }).to include(third_label.attributes.merge("restore_action" => described_class::CREATE, "new_title" => nil, "created_at" => anything, "updated_at" => anything))
end
it 'deletes the third record' do
it 'deletes the duplicate record' do
migration.up
expect { first_label.reload }.not_to raise_error
......@@ -158,14 +153,13 @@ describe RemoveDuplicateLabelsFromProject do
expect { third_label.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
it 'restores removed records' do
it 'restores removed records on rollback' do
third_label_attributes = third_label.attributes
migration.up
migration.down
expect(third_label.attributes).to eq(third_label_attributes)
expect(third_label.attributes).to match(third_label_attributes)
end
end
end
......@@ -197,26 +191,18 @@ describe RemoveDuplicateLabelsFromProject do
# can't use the activerecord class because the `type` makes it think it has polymorphism and should be/have a ProjectLabel subclass
backup_labels = ApplicationRecord.connection.execute('SELECT * from backup_labels')
expect(backup_labels.find { |bl| bl["id"] == 2 }).to include(second_label.attributes.merge("restore_action" => RENAME, "created_at" => anything, "updated_at" => anything))
expect(backup_labels.find { |bl| bl["id"] == 4 }).to include(fourth_label.attributes.merge("restore_action" => RENAME, "created_at" => anything, "updated_at" => anything))
end
it 'modifies the title of the second label' do
expect { migration.up }.to change { second_label.reload.title }.from(label_title).to(a_string_matching(/#{label_title}_duplicate/))
end
it 'modifies the title of the fourth label' do
expect { migration.up }.to change { fourth_label.reload.title }.from(other_title).to(a_string_matching(/#{other_title}_duplicate/))
expect(backup_labels.find { |bl| bl["id"] == 2 }).to include(second_label.attributes.merge("restore_action" => described_class::RENAME, "created_at" => anything, "updated_at" => anything))
expect(backup_labels.find { |bl| bl["id"] == 4 }).to include(fourth_label.attributes.merge("restore_action" => described_class::RENAME, "created_at" => anything, "updated_at" => anything))
end
it 'renames all but one' do
it 'modifies the titles of the partial duplicates' do
migration.up
expect(second_label.reload.title).to match(/#{label_title}_duplicate/)
expect(fourth_label.reload.title).to match(/#{other_title}_duplicate/)
end
it 'restores renamed records' do
it 'restores renamed records on rollback' do
second_label_attributes = second_label.attributes
fourth_label_attributes = fourth_label.attributes
......@@ -224,8 +210,8 @@ describe RemoveDuplicateLabelsFromProject do
migration.down
expect(second_label.attributes).to eq(second_label_attributes)
expect(fourth_label.attributes).to eq(fourth_label_attributes)
expect(second_label.reload.attributes).to match(second_label_attributes)
expect(fourth_label.reload.attributes).to match(fourth_label_attributes)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment