Commit c3e928f8 authored by Yannis Roussos's avatar Yannis Roussos

Add Foreign Key on projects.namespaces_id

- Add foreign key from projects.namespaces_id to namespaces.id
  with validate: false
- Add a lost-and-found group owned by the Ghost User
- Find orphaned projects with a namespaces_id not in namespaces
  and move them under lost-and-found
- Validate the FK Foreign Key on projects.namespaces_id
parent 1aba995b
---
title: Add Foreign Key on projects.namespaces_id
merge_request: 31675
author:
type: other
# frozen_string_literal: true
class AddProjectsForeignKeyToNamespaces < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
FK_NAME = 'fk_projects_namespace_id'
def up
with_lock_retries do
add_foreign_key(
:projects,
:namespaces,
column: :namespace_id,
on_delete: :restrict,
validate: false,
name: FK_NAME
)
end
end
def down
with_lock_retries do
remove_foreign_key_if_exists :projects, column: :namespace_id, name: FK_NAME
end
end
end
# frozen_string_literal: true
# rubocop:disable Migration/PreventStrings
# This migration cleans up Projects that were orphaned when their namespace was deleted
# Instead of deleting them, we:
# - Find (or create) the Ghost User
# - Create (if not already exists) a `lost-and-found` group owned by the Ghost User
# - Find orphaned projects --> namespace_id can not be found in namespaces
# - Move the orphaned projects to the `lost-and-found` group
# (while making them private and setting `archived=true`)
#
# On GitLab.com (2020-05-11) this migration will update 66 orphaned projects
class CleanupProjectsWithMissingNamespace < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
VISIBILITY_PRIVATE = 0
ACCESS_LEVEL_OWNER = 50
# The batch size of projects to check in each iteration
# We expect the selectivity for orphaned projects to be very low:
# (66 orphaned projects out of a total 13.6M)
# so 10K should be a safe choice
BATCH_SIZE = 10000
disable_ddl_transaction!
class UserDetail < ActiveRecord::Base
self.table_name = 'user_details'
belongs_to :user, class_name: 'CleanupProjectsWithMissingNamespace::User'
end
class User < ActiveRecord::Base
self.table_name = 'users'
LOST_AND_FOUND_GROUP = 'lost-and-found'
USER_TYPE_GHOST = 5
DEFAULT_PROJECTS_LIMIT = 100000
default_value_for :admin, false
default_value_for :can_create_group, true # we need this to create the group
default_value_for :can_create_team, false
default_value_for :project_view, :files
default_value_for :notified_of_own_activity, false
default_value_for :preferred_language, I18n.default_locale
has_one :user_detail, class_name: 'CleanupProjectsWithMissingNamespace::UserDetail'
has_one :namespace, -> { where(type: nil) },
foreign_key: :owner_id, inverse_of: :owner, autosave: true,
class_name: 'CleanupProjectsWithMissingNamespace::Namespace'
before_save :ensure_namespace_correct
before_save :ensure_bio_is_assigned_to_user_details, if: :bio_changed?
enum project_view: { readme: 0, activity: 1, files: 2 }
def ensure_namespace_correct
if namespace
namespace.path = username if username_changed?
namespace.name = name if name_changed?
else
build_namespace(path: username, name: name)
end
end
def ensure_bio_is_assigned_to_user_details
return if Feature.disabled?(:migrate_bio_to_user_details, default_enabled: true)
user_detail.bio = bio.to_s[0...255]
end
def user_detail
super.presence || build_user_detail
end
# Return (or create if necessary) the `lost-and-found` group
def lost_and_found_group
existing_lost_and_found_group || Group.create_unique_group(self, LOST_AND_FOUND_GROUP)
end
def existing_lost_and_found_group
# There should only be one Group for User Ghost starting with LOST_AND_FOUND_GROUP
Group
.joins('INNER JOIN members ON namespaces.id = members.source_id')
.where('namespaces.type = ?', 'Group')
.where('members.type = ?', 'GroupMember')
.where('members.source_type = ?', 'Namespace')
.where('members.user_id = ?', self.id)
.where('members.requested_at IS NULL')
.where('members.access_level = ?', ACCESS_LEVEL_OWNER)
.find_by(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%"))
end
class << self
# Return (or create if necessary) the ghost user
def ghost
email = 'ghost%s@example.com'
unique_internal(where(user_type: USER_TYPE_GHOST), 'ghost', email) do |u|
u.bio = _('This is a "Ghost User", created to hold all issues authored by users that have since been deleted. This user cannot be removed.')
u.name = 'Ghost User'
end
end
def unique_internal(scope, username, email_pattern, &block)
scope.first || create_unique_internal(scope, username, email_pattern, &block)
end
def create_unique_internal(scope, username, email_pattern, &creation_block)
# Since we only want a single one of these in an instance, we use an
# exclusive lease to ensure that this block is never run concurrently.
lease_key = "user:unique_internal:#{username}"
lease = Gitlab::ExclusiveLease.new(lease_key, timeout: 1.minute.to_i)
until uuid = lease.try_obtain
# Keep trying until we obtain the lease. To prevent hammering Redis too
# much we'll wait for a bit between retries.
sleep(1)
end
# Recheck if the user is already present. One might have been
# added between the time we last checked (first line of this method)
# and the time we acquired the lock.
existing_user = uncached { scope.first }
return existing_user if existing_user.present?
uniquify = Uniquify.new
username = uniquify.string(username) { |s| User.find_by_username(s) }
email = uniquify.string(-> (n) { Kernel.sprintf(email_pattern, n) }) do |s|
User.find_by_email(s)
end
User.create!(
username: username,
email: email,
user_type: USER_TYPE_GHOST,
projects_limit: DEFAULT_PROJECTS_LIMIT,
state: :active,
&creation_block
)
ensure
Gitlab::ExclusiveLease.cancel(lease_key, uuid)
end
end
end
class Namespace < ActiveRecord::Base
self.table_name = 'namespaces'
belongs_to :owner, class_name: 'CleanupProjectsWithMissingNamespace::User'
end
class Group < Namespace
# Disable STI to allow us to manually set "type = 'Group'"
# Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::Group"
self.inheritance_column = :_type_disabled
def self.create_unique_group(user, group_name)
# 'lost-and-found' may be already defined, find a unique one
group_name = Uniquify.new.string(group_name) do |str|
Group.where(parent_id: nil, name: str).exists?
end
group = Group.create!(
name: group_name,
path: group_name,
type: 'Group',
description: 'Group to store orphaned projects',
visibility_level: VISIBILITY_PRIVATE
)
# No need to create a route for the lost-and-found group
GroupMember.add_user(group, user, ACCESS_LEVEL_OWNER)
group
end
end
class Member < ActiveRecord::Base
self.table_name = 'members'
end
class GroupMember < Member
NOTIFICATION_SETTING_GLOBAL = 3
# Disable STI to allow us to manually set "type = 'GroupMember'"
# Otherwise rails forces "type = CleanupProjectsWithMissingNamespace::GroupMember"
self.inheritance_column = :_type_disabled
def self.add_user(source, user, access_level)
GroupMember.create!(
type: 'GroupMember',
source_id: source.id,
user_id: user.id,
source_type: 'Namespace',
access_level: access_level,
notification_level: NOTIFICATION_SETTING_GLOBAL
)
end
end
class Project < ActiveRecord::Base
self.table_name = 'projects'
include ::EachBatch
def self.without_namespace
where(
'NOT EXISTS (
SELECT 1
FROM namespaces
WHERE projects.namespace_id = namespaces.id
)'
)
end
end
def up
# Reset the column information of all the models that update the database
# to ensure the Active Record's knowledge of the table structure is current
User.reset_column_information
Namespace.reset_column_information
Member.reset_column_information
Project.reset_column_information
# Find or Create the ghost user
ghost_user = User.ghost
# Find or Create the `lost-and-found`
lost_and_found = ghost_user.lost_and_found_group
# With BATCH_SIZE=10K and projects.count=13.6M
# ~1360 iterations will be run:
# - each requires on average ~160ms for relation.without_namespace
# - worst case scenario is that 66 of those batches will trigger an update (~200ms each)
# In general, we expect less than 5% (=66/13.6M x 10K) to trigger an update
# Expected total run time: ~235 seconds (== 220 seconds + 14 seconds)
Project.each_batch(of: BATCH_SIZE) do |relation|
relation.without_namespace.update_all <<~SQL
namespace_id = #{lost_and_found.id},
archived = TRUE,
visibility_level = #{VISIBILITY_PRIVATE},
-- Names are expected to be unique inside their namespace
-- (uniqueness validation on namespace_id, name)
-- Attach the id to the name and path to make sure that they are unique
name = name || '_' || id,
path = path || '_' || id
SQL
end
end
def down
# no-op: the original state for those projects was inconsistent
# Also, the original namespace_id for each project is lost during the update
end
end
# rubocop:enable Migration/PreventStrings
# frozen_string_literal: true
class ValidateProjectsForeignKeyToNamespaces < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
FK_NAME = 'fk_projects_namespace_id'
def up
# Validate the FK added with 20200511080113_add_projects_foreign_key_to_namespaces.rb
validate_foreign_key :projects, :namespace_id, name: FK_NAME
end
def down
# no-op: No need to invalidate the foreign key
# The inconsistent data are permanently fixed with the data migration
# `20200511083541_cleanup_projects_with_missing_namespace.rb`
# even if it is rolled back.
# If there is an issue with the FK, we'll roll back the migration that adds the FK
end
end
......@@ -11548,6 +11548,9 @@ ALTER TABLE ONLY public.personal_access_tokens
ALTER TABLE ONLY public.project_settings
ADD CONSTRAINT fk_project_settings_push_rule_id FOREIGN KEY (push_rule_id) REFERENCES public.push_rules(id) ON DELETE SET NULL;
ALTER TABLE ONLY public.projects
ADD CONSTRAINT fk_projects_namespace_id FOREIGN KEY (namespace_id) REFERENCES public.namespaces(id) ON DELETE RESTRICT;
ALTER TABLE ONLY public.protected_branch_merge_access_levels
ADD CONSTRAINT fk_protected_branch_merge_access_levels_user_id FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE;
......@@ -13815,6 +13818,8 @@ COPY "schema_migrations" (version) FROM STDIN;
20200506154421
20200507221434
20200508091106
20200511080113
20200511083541
20200511092246
20200511092505
20200511092714
......@@ -13828,6 +13833,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200511145545
20200511162057
20200511162115
20200511220023
20200512085150
20200512164334
20200513160930
......
......@@ -8,10 +8,12 @@ describe UpdateUndefinedConfidenceFromVulnerabilities, :migration do
let(:vulnerabilities) { table(:vulnerabilities) }
let(:identifiers) { table(:vulnerability_identifiers) }
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let(:users) { table(:users) }
before do
projects.create!(id: 123, namespace_id: 12, name: 'gitlab', path: 'gitlab')
namespace = namespaces.create!(name: 'namespace1', path: 'namespace1')
projects.create!(id: 123, namespace_id: namespace.id, name: 'gitlab', path: 'gitlab')
users.create!(id: 13, email: 'author@example.com', notification_email: 'author@example.com', name: 'author', username: 'author', projects_limit: 10, state: 'active')
stub_const("#{described_class}::BATCH_SIZE", 2)
end
......
......@@ -64,7 +64,7 @@ describe 'Database schema' do
open_project_tracker_data: %w[closed_status_id],
project_group_links: %w[group_id],
project_statistics: %w[namespace_id],
projects: %w[creator_id namespace_id ci_id mirror_user_id],
projects: %w[creator_id ci_id mirror_user_id],
redirect_routes: %w[source_id],
repository_languages: %w[programming_language_id],
routes: %w[source_id],
......
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200511080113_add_projects_foreign_key_to_namespaces.rb')
require Rails.root.join('db', 'post_migrate', '20200511083541_cleanup_projects_with_missing_namespace.rb')
LOST_AND_FOUND_GROUP = 'lost-and-found'
USER_TYPE_GHOST = 5
ACCESS_LEVEL_OWNER = 50
# In order to test the CleanupProjectsWithMissingNamespace migration, we need
# to first create an orphaned project (one with an invalid namespace_id)
# and then run the migration to check that the project was properly cleaned up
#
# The problem is that the CleanupProjectsWithMissingNamespace migration comes
# after the FK has been added with a previous migration (AddProjectsForeignKeyToNamespaces)
# That means that while testing the current class we can not insert projects with an
# invalid namespace_id as the existing FK is correctly blocking us from doing so
#
# The approach that solves that problem is to:
# - Set the schema of this test to the one prior to AddProjectsForeignKeyToNamespaces
# - We could hardcode it to `20200508091106` (which currently is the previous
# migration before adding the FK) but that would mean that this test depends
# on migration 20200508091106 not being reverted or deleted
# - So, we use SchemaVersionFinder that finds the previous migration and returns
# its schema, which we then use in the describe
#
# That means that we lock the schema version to the one returned by
# SchemaVersionFinder.previous_migration and only test the cleanup migration
# *without* the migration that adds the Foreign Key ever running
# That's acceptable as the cleanup script should not be affected in any way
# by the migration that adds the Foreign Key
class SchemaVersionFinder
def self.migrations_paths
ActiveRecord::Migrator.migrations_paths
end
def self.migration_context
ActiveRecord::MigrationContext.new(migrations_paths, ActiveRecord::SchemaMigration)
end
def self.migrations
migration_context.migrations
end
def self.previous_migration
migrations.each_cons(2) do |previous, migration|
break previous.version if migration.name == AddProjectsForeignKeyToNamespaces.name
end
end
end
describe CleanupProjectsWithMissingNamespace, :migration, schema: SchemaVersionFinder.previous_migration do
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let(:users) { table(:users) }
before do
namespace = namespaces.create!(name: 'existing_namespace', path: 'existing_namespace')
projects.create!(
name: 'project_with_existing_namespace',
path: 'project_with_existing_namespace',
visibility_level: 20,
archived: false,
namespace_id: namespace.id
)
projects.create!(
name: 'project_with_non_existing_namespace',
path: 'project_with_non_existing_namespace',
visibility_level: 20,
archived: false,
namespace_id: non_existing_record_id
)
end
it 'creates the ghost user' do
expect(users.where(user_type: USER_TYPE_GHOST).count).to eq(0)
disable_migrations_output { migrate! }
expect(users.where(user_type: USER_TYPE_GHOST).count).to eq(1)
end
it 'creates the lost-and-found group, owned by the ghost user' do
expect(
Group.where(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%")).count
).to eq(0)
disable_migrations_output { migrate! }
ghost_user = users.find_by(user_type: USER_TYPE_GHOST)
expect(
Group
.joins('INNER JOIN members ON namespaces.id = members.source_id')
.where('namespaces.type = ?', 'Group')
.where('members.type = ?', 'GroupMember')
.where('members.source_type = ?', 'Namespace')
.where('members.user_id = ?', ghost_user.id)
.where('members.requested_at IS NULL')
.where('members.access_level = ?', ACCESS_LEVEL_OWNER)
.where(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%"))
.count
).to eq(1)
end
it 'moves the orphaned project to the lost-and-found group' do
orphaned_project = projects.find_by(name: 'project_with_non_existing_namespace')
expect(orphaned_project.visibility_level).to eq(20)
expect(orphaned_project.archived).to eq(false)
expect(orphaned_project.namespace_id).to eq(non_existing_record_id)
disable_migrations_output { migrate! }
lost_and_found_group = Group.find_by(Group.arel_table[:name].matches("#{LOST_AND_FOUND_GROUP}%"))
orphaned_project = projects.find_by(id: orphaned_project.id)
expect(orphaned_project.visibility_level).to eq(0)
expect(orphaned_project.namespace_id).to eq(lost_and_found_group.id)
expect(orphaned_project.name).to eq("project_with_non_existing_namespace_#{orphaned_project.id}")
expect(orphaned_project.path).to eq("project_with_non_existing_namespace_#{orphaned_project.id}")
expect(orphaned_project.archived).to eq(true)
valid_project = projects.find_by(name: 'project_with_existing_namespace')
existing_namespace = namespaces.find_by(name: 'existing_namespace')
expect(valid_project.visibility_level).to eq(20)
expect(valid_project.namespace_id).to eq(existing_namespace.id)
expect(valid_project.path).to eq('project_with_existing_namespace')
expect(valid_project.archived).to eq(false)
end
end
......@@ -4012,16 +4012,6 @@ describe Project do
expect { project.remove_pages }.to change { pages_metadatum.reload.deployed }.from(true).to(false)
end
it 'is a no-op when there is no namespace' do
project.namespace.delete
project.reload
expect_any_instance_of(Projects::UpdatePagesConfigurationService).not_to receive(:execute)
expect_any_instance_of(Gitlab::PagesTransfer).not_to receive(:rename_project)
expect { project.remove_pages }.not_to change { pages_metadatum.reload.deployed }
end
it 'is run when the project is destroyed' do
expect(project).to receive(:remove_pages).and_call_original
......
......@@ -79,19 +79,5 @@ describe NamespacelessProjectDestroyWorker do
end
end
end
context 'project has non-existing namespace' do
let!(:project) do
project = build(:project, namespace_id: non_existing_record_id)
project.save(validate: false)
project
end
it 'deletes the project' do
subject.perform(project.id)
expect(Project.unscoped.all).not_to include(project)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment