Commit 46c775f1 authored by Tiger Watson's avatar Tiger Watson

Merge branch...

Merge branch '30390-race-condition-that-makes-it-possible-to-create-duplicated-labels' into 'master'

Resolve labels with duplicate title and project IDs

See merge request gitlab-org/gitlab!21384
parents 2a715242 bd861fe9
---
title: Deduplicate labels with identical title and project
merge_request: 21384
author:
type: changed
# frozen_string_literal: true
class AddLabelRestoreTable < ActiveRecord::Migration[6.0]
DOWNTIME = false
def up
# copy table
execute "CREATE TABLE #{backup_labels_table_name} (LIKE #{labels_table_name} INCLUDING ALL);"
# make the primary key a real functioning one rather than incremental
execute "ALTER TABLE #{backup_labels_table_name} ALTER COLUMN ID DROP DEFAULT;"
# add some fields that make changes trackable
execute "ALTER TABLE #{backup_labels_table_name} ADD COLUMN restore_action INTEGER;"
execute "ALTER TABLE #{backup_labels_table_name} ADD COLUMN new_title VARCHAR;"
end
def down
drop_table backup_labels_table_name
end
private
def labels_table_name
:labels
end
def backup_labels_table_name
:backup_labels
end
end
# frozen_string_literal: true
class AddLabelRestoreForeignKeys < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
# create foreign keys
connection.foreign_keys(labels_table_name).each do |fk|
fk_options = fk.options
add_concurrent_foreign_key(backup_labels_table_name, fk.to_table, name: fk.name, column: fk_options[:column])
end
end
def down
connection.foreign_keys(backup_labels_table_name).each do |fk|
with_lock_retries do
remove_foreign_key backup_labels_table_name, name: fk.name
end
end
end
private
def labels_table_name
:labels
end
def backup_labels_table_name
:backup_labels
end
end
# frozen_string_literal: true
class RemoveDuplicateLabelsFromProject < ActiveRecord::Migration[6.0]
DOWNTIME = false
CREATE = 1
RENAME = 2
disable_ddl_transaction!
class BackupLabel < Label
self.table_name = 'backup_labels'
end
class Label < ApplicationRecord
self.table_name = 'labels'
end
class Project < ApplicationRecord
include EachBatch
self.table_name = 'projects'
end
BATCH_SIZE = 100_000
def up
# Split to smaller chunks
# Loop rather than background job, every 100,000
# there are 45,000,000 projects in total
Project.each_batch(of: BATCH_SIZE) do |batch|
range = batch.pluck('MIN(id)', 'MAX(id)').first
transaction do
remove_full_duplicates(*range)
end
transaction do
rename_partial_duplicates(*range)
end
end
end
def down
Project.each_batch(of: BATCH_SIZE) do |batch|
range = batch.pluck('MIN(id)', 'MAX(id)').first
restore_renamed_labels(*range)
restore_deleted_labels(*range)
end
end
def remove_full_duplicates(start_id, stop_id)
# Fields that are considered duplicate:
# project_id title template description type color
duplicate_labels = ApplicationRecord.connection.execute(<<-SQL.squish)
WITH data AS (
SELECT labels.*,
row_number() OVER (PARTITION BY labels.project_id, labels.title, labels.template, labels.description, labels.type, labels.color ORDER BY labels.id) AS row_number,
#{CREATE} AS restore_action
FROM labels
WHERE labels.project_id BETWEEN #{start_id} AND #{stop_id}
AND NOT EXISTS (SELECT * FROM board_labels WHERE board_labels.label_id = labels.id)
AND NOT EXISTS (SELECT * FROM label_links WHERE label_links.label_id = labels.id)
AND NOT EXISTS (SELECT * FROM label_priorities WHERE label_priorities.label_id = labels.id)
AND NOT EXISTS (SELECT * FROM lists WHERE lists.label_id = labels.id)
AND NOT EXISTS (SELECT * FROM resource_label_events WHERE resource_label_events.label_id = labels.id)
) SELECT * FROM data WHERE row_number > 1;
SQL
if duplicate_labels.any?
# create backup records
BackupLabel.insert_all!(duplicate_labels.map { |label| label.except("row_number") })
Label.where(id: duplicate_labels.pluck("id")).delete_all
end
end
def rename_partial_duplicates(start_id, stop_id)
# We need to ensure that the new title (with `_duplicate#{ID}`) doesn't exceed the limit.
# Truncate the original title (if needed) to 245 characters minus the length of the ID
# then add `_duplicate#{ID}`
soft_duplicates = ApplicationRecord.connection.execute(<<-SQL.squish)
WITH data AS (
SELECT
*,
substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text as new_title,
#{RENAME} AS restore_action,
row_number() OVER (PARTITION BY project_id, title ORDER BY id) AS row_number
FROM labels
WHERE project_id BETWEEN #{start_id} AND #{stop_id}
) SELECT * FROM data WHERE row_number > 1;
SQL
if soft_duplicates.any?
# create backup records
BackupLabel.insert_all!(soft_duplicates.map { |label| label.except("row_number") })
ApplicationRecord.connection.execute(<<-SQL.squish)
UPDATE labels SET title = substring(title from 1 for 245 - length(id::text)) || '_duplicate' || id::text
WHERE labels.id IN (#{soft_duplicates.map { |dup| dup["id"] }.join(", ")});
SQL
end
end
def restore_renamed_labels(start_id, stop_id)
# the backup label IDs are not incremental, they are copied directly from the Labels table
ApplicationRecord.connection.execute(<<-SQL.squish)
WITH backups AS (
SELECT id, title
FROM backup_labels
WHERE project_id BETWEEN #{start_id} AND #{stop_id} AND
restore_action = #{RENAME}
) UPDATE labels SET title = backups.title
FROM backups
WHERE labels.id = backups.id;
SQL
end
def restore_deleted_labels(start_id, stop_id)
ActiveRecord::Base.connection.execute(<<-SQL.squish)
INSERT INTO labels
SELECT id, title, color, project_id, created_at, updated_at, template, description, description_html, type, group_id, cached_markdown_version FROM backup_labels
WHERE backup_labels.project_id BETWEEN #{start_id} AND #{stop_id}
AND backup_labels.restore_action = #{CREATE}
SQL
end
end
# frozen_string_literal: true
class AddUniquenessIndexToLabelTitleAndProject < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
PROJECT_AND_TITLE = [:project_id, :title]
def up
add_concurrent_index :labels, PROJECT_AND_TITLE, where: "labels.group_id IS NULL", unique: true, name: "index_labels_on_project_id_and_title_unique"
remove_concurrent_index :labels, PROJECT_AND_TITLE, name: "index_labels_on_project_id_and_title"
end
def down
add_concurrent_index :labels, PROJECT_AND_TITLE, where: "labels.group_id IS NULL", unique: false, name: "index_labels_on_project_id_and_title"
remove_concurrent_index :labels, PROJECT_AND_TITLE, name: "index_labels_on_project_id_and_title_unique"
end
end
......@@ -9413,6 +9413,23 @@ CREATE TABLE public.aws_roles (
role_external_id character varying(64) NOT NULL
);
CREATE TABLE public.backup_labels (
id integer NOT NULL,
title character varying,
color character varying,
project_id integer,
created_at timestamp without time zone,
updated_at timestamp without time zone,
template boolean DEFAULT false,
description character varying,
description_html text,
type character varying,
group_id integer,
cached_markdown_version integer,
restore_action integer,
new_title character varying
);
CREATE TABLE public.badges (
id integer NOT NULL,
link_url character varying NOT NULL,
......@@ -17234,6 +17251,9 @@ ALTER TABLE ONLY public.award_emoji
ALTER TABLE ONLY public.aws_roles
ADD CONSTRAINT aws_roles_pkey PRIMARY KEY (user_id);
ALTER TABLE ONLY public.backup_labels
ADD CONSTRAINT backup_labels_pkey PRIMARY KEY (id);
ALTER TABLE ONLY public.badges
ADD CONSTRAINT badges_pkey PRIMARY KEY (id);
......@@ -18368,6 +18388,20 @@ CREATE UNIQUE INDEX any_approver_project_rule_type_unique_index ON public.approv
CREATE UNIQUE INDEX approval_rule_name_index_for_code_owners ON public.approval_merge_request_rules USING btree (merge_request_id, code_owner, name) WHERE ((code_owner = true) AND (section IS NULL));
CREATE UNIQUE INDEX backup_labels_group_id_project_id_title_idx ON public.backup_labels USING btree (group_id, project_id, title);
CREATE INDEX backup_labels_group_id_title_idx ON public.backup_labels USING btree (group_id, title) WHERE (project_id = NULL::integer);
CREATE INDEX backup_labels_project_id_idx ON public.backup_labels USING btree (project_id);
CREATE UNIQUE INDEX backup_labels_project_id_title_idx ON public.backup_labels USING btree (project_id, title) WHERE (group_id = NULL::integer);
CREATE INDEX backup_labels_template_idx ON public.backup_labels USING btree (template) WHERE template;
CREATE INDEX backup_labels_title_idx ON public.backup_labels USING btree (title);
CREATE INDEX backup_labels_type_project_id_idx ON public.backup_labels USING btree (type, project_id);
CREATE INDEX ci_builds_gitlab_monitor_metrics ON public.ci_builds USING btree (status, created_at, project_id) WHERE ((type)::text = 'Ci::Build'::text);
CREATE INDEX code_owner_approval_required ON public.protected_branches USING btree (project_id, code_owner_approval_required) WHERE (code_owner_approval_required = true);
......@@ -19374,7 +19408,7 @@ CREATE INDEX index_labels_on_group_id_and_title ON public.labels USING btree (gr
CREATE INDEX index_labels_on_project_id ON public.labels USING btree (project_id);
CREATE INDEX index_labels_on_project_id_and_title ON public.labels USING btree (project_id, title) WHERE (group_id = NULL::integer);
CREATE UNIQUE INDEX index_labels_on_project_id_and_title_unique ON public.labels USING btree (project_id, title) WHERE (group_id IS NULL);
CREATE INDEX index_labels_on_template ON public.labels USING btree (template) WHERE template;
......@@ -21016,6 +21050,9 @@ ALTER TABLE ONLY public.vulnerabilities
ALTER TABLE ONLY public.labels
ADD CONSTRAINT fk_7de4989a69 FOREIGN KEY (project_id) REFERENCES public.projects(id) ON DELETE CASCADE;
ALTER TABLE ONLY public.backup_labels
ADD CONSTRAINT fk_7de4989a69 FOREIGN KEY (project_id) REFERENCES public.projects(id) ON DELETE CASCADE;
ALTER TABLE ONLY public.merge_requests
ADD CONSTRAINT fk_7e85395a64 FOREIGN KEY (sprint_id) REFERENCES public.sprints(id) ON DELETE CASCADE;
......@@ -22222,6 +22259,9 @@ ALTER TABLE ONLY public.serverless_domain_cluster
ALTER TABLE ONLY public.labels
ADD CONSTRAINT fk_rails_c1ac5161d8 FOREIGN KEY (group_id) REFERENCES public.namespaces(id) ON DELETE CASCADE;
ALTER TABLE ONLY public.backup_labels
ADD CONSTRAINT fk_rails_c1ac5161d8 FOREIGN KEY (group_id) REFERENCES public.namespaces(id) ON DELETE CASCADE;
ALTER TABLE ONLY public.project_feature_usages
ADD CONSTRAINT fk_rails_c22a50024b FOREIGN KEY (project_id) REFERENCES public.projects(id) ON DELETE CASCADE;
......@@ -23208,6 +23248,10 @@ COPY "schema_migrations" (version) FROM STDIN;
20200304160801
20200304160823
20200304211738
20200305020458
20200305020459
20200305082754
20200305082858
20200305121159
20200305151736
20200305200641
......
......@@ -251,3 +251,16 @@ If you sort by `Priority`, GitLab uses this sort comparison order:
Ties are broken arbitrarily.
![Labels sort priority](img/labels_sort_priority.png)
## Troubleshooting
### Some label titles end with `_duplicate<number>`
In specific circumstances it was possible to create labels with duplicate titles in the same
namespace.
To resolve the duplication, [in GitLab 13.2](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/21384)
and later, some duplicate labels have `_duplicate<number>` appended to their titles.
You can safely change these labels' titles if you prefer.
For details of the original problem, see [issue 30390](https://gitlab.com/gitlab-org/gitlab/issues/30390).
This diff is collapsed.
......@@ -54,29 +54,5 @@ RSpec.describe IncidentManagement::CreateIncidentLabelService do
context 'without label' do
it_behaves_like 'new label'
end
context 'with duplicate labels', issue: 'https://gitlab.com/gitlab-org/gitlab-foss/issues/65042' do
before do
# Replicate race condition to create duplicates
build(:label, project: project, title: title).save!(validate: false)
build(:label, project: project, title: title).save!(validate: false)
end
it 'create an issue without labels' do
# Verify we have duplicates
expect(project.labels.size).to eq(2)
expect(project.labels.map(&:title)).to all(eq(title))
message = <<~MESSAGE.chomp
Cannot create incident label "incident" \
for "#{project.full_name}": Title has already been taken.
MESSAGE
expect(service).to receive(:log_info).with(message)
expect(execute).to be_error
expect(execute.payload[:label]).to be_kind_of(Label)
expect(execute.message).to eq('Title has already been taken')
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment