Commit 902a68cc authored by Adam Hegyi's avatar Adam Hegyi

Merge branch '338535-loose-fk-db-structure-and-helpers-switch-table' into 'master'

Implement loose foreign key cleanup worker

See merge request gitlab-org/gitlab!69165
parents 2065b14d 1a1e4bdf
...@@ -7,20 +7,18 @@ module LooseForeignKey ...@@ -7,20 +7,18 @@ module LooseForeignKey
# Loose foreign keys allow delayed processing of associated database records # Loose foreign keys allow delayed processing of associated database records
# with similar guarantees than a database foreign key. # with similar guarantees than a database foreign key.
# #
# TODO: finalize this later once the async job is in place
#
# Prerequisites: # Prerequisites:
# #
# To start using the concern, you'll need to install a database trigger to the parent # To start using the concern, you'll need to install a database trigger to the parent
# table in a standard DB migration (not post-migration). # table in a standard DB migration (not post-migration).
# #
# > add_loose_foreign_key_support(:projects, :gitlab_main) # > track_record_deletions(:projects)
# #
# Usage: # Usage:
# #
# > class Ci::Build < ApplicationRecord # > class Ci::Build < ApplicationRecord
# > # >
# > loose_foreign_key :security_scans, :build_id, on_delete: :async_delete, gitlab_schema: :gitlab_main # > loose_foreign_key :security_scans, :build_id, on_delete: :async_delete
# > # >
# > # associations can be still defined, the dependent options is no longer necessary: # > # associations can be still defined, the dependent options is no longer necessary:
# > has_many :security_scans, class_name: 'Security::Scan' # > has_many :security_scans, class_name: 'Security::Scan'
...@@ -32,14 +30,6 @@ module LooseForeignKey ...@@ -32,14 +30,6 @@ module LooseForeignKey
# - :async_delete - deletes the children rows via an asynchronous process. # - :async_delete - deletes the children rows via an asynchronous process.
# - :async_nullify - sets the foreign key column to null via an asynchronous process. # - :async_nullify - sets the foreign key column to null via an asynchronous process.
# #
# Options for gitlab_schema:
#
# - :gitlab_ci
# - :gitlab_main
#
# The value can be determined by calling `Model.gitlab_schema` where the Model represents
# the model for the child table.
#
# How it works: # How it works:
# #
# When adding loose foreign key support to the table, a DELETE trigger is installed # When adding loose foreign key support to the table, a DELETE trigger is installed
...@@ -69,23 +59,17 @@ module LooseForeignKey ...@@ -69,23 +59,17 @@ module LooseForeignKey
end end
on_delete_options = %i[async_delete async_nullify] on_delete_options = %i[async_delete async_nullify]
gitlab_schema_options = %i(gitlab_main gitlab_ci)
unless on_delete_options.include?(symbolized_options[:on_delete]&.to_sym) unless on_delete_options.include?(symbolized_options[:on_delete]&.to_sym)
raise "Invalid on_delete option given: #{symbolized_options[:on_delete]}. Valid options: #{on_delete_options.join(', ')}" raise "Invalid on_delete option given: #{symbolized_options[:on_delete]}. Valid options: #{on_delete_options.join(', ')}"
end end
unless gitlab_schema_options.include?(symbolized_options[:gitlab_schema]&.to_sym)
raise "Invalid gitlab_schema option given: #{symbolized_options[:gitlab_schema]}. Valid options: #{gitlab_schema_options.join(', ')}"
end
definition = ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new( definition = ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new(
table_name.to_s, table_name.to_s,
to_table.to_s, to_table.to_s,
{ {
column: column.to_s, column: column.to_s,
on_delete: symbolized_options[:on_delete].to_sym, on_delete: symbolized_options[:on_delete].to_sym
gitlab_schema: symbolized_options[:gitlab_schema].to_sym
} }
) )
......
# frozen_string_literal: true # frozen_string_literal: true
class LooseForeignKeys::DeletedRecord < ApplicationRecord class LooseForeignKeys::DeletedRecord < ApplicationRecord
extend SuppressCompositePrimaryKeyWarning self.primary_key = :id
scope :for_table, -> (table) { where(fully_qualified_table_name: table) }
scope :ordered_by_id, -> { order(:id, :primary_key_value) }
# This needs to be parameterized once we start adding partitions
scope :for_partition, -> { where(partition: 1) }
enum status: { pending: 1, processed: 2 }, _prefix: :status
def self.load_batch_for_table(table, batch_size)
for_table(table)
.for_partition
.status_pending
.ordered_by_id
.limit(batch_size)
.to_a
end
def self.mark_records_processed_for_table_between(table, from_record, to_record)
from = from_record.id
to = to_record.id
for_table(table)
.for_partition
.status_pending
.where(id: from..to)
.update_all(status: :processed)
end
end end
# frozen_string_literal: true
module LooseForeignKeys
class ModificationTracker
MAX_DELETES = 100_000
MAX_UPDATES = 50_000
MAX_RUNTIME = 3.minutes
delegate :monotonic_time, to: :'Gitlab::Metrics::System'
def initialize
@delete_count_by_table = Hash.new { |h, k| h[k] = 0 }
@update_count_by_table = Hash.new { |h, k| h[k] = 0 }
@start_time = monotonic_time
end
def add_deletions(table, count)
@delete_count_by_table[table] += count
end
def add_updates(table, count)
@update_count_by_table[table] += count
end
def over_limit?
@delete_count_by_table.values.sum >= MAX_DELETES ||
@update_count_by_table.values.sum >= MAX_UPDATES ||
monotonic_time - @start_time >= MAX_RUNTIME
end
def stats
{
over_limit: over_limit?,
delete_count_by_table: @delete_count_by_table,
update_count_by_table: @update_count_by_table,
delete_count: @delete_count_by_table.values.sum,
update_count: @update_count_by_table.values.sum
}
end
end
end
# frozen_string_literal: true
module LooseForeignKeys
class BatchCleanerService
def initialize(parent_klass:, deleted_parent_records:, modification_tracker: LooseForeignKeys::ModificationTracker.new, models_by_table_name:)
@parent_klass = parent_klass
@deleted_parent_records = deleted_parent_records
@modification_tracker = modification_tracker
@models_by_table_name = models_by_table_name
end
def execute
parent_klass.loose_foreign_key_definitions.each do |foreign_key_definition|
run_cleaner_service(foreign_key_definition, with_skip_locked: true)
break if modification_tracker.over_limit?
run_cleaner_service(foreign_key_definition, with_skip_locked: false)
break if modification_tracker.over_limit?
end
return if modification_tracker.over_limit?
# At this point, all associations are cleaned up, we can update the status of the parent records
LooseForeignKeys::DeletedRecord
.mark_records_processed_for_table_between(deleted_parent_records.first.fully_qualified_table_name, deleted_parent_records.first, deleted_parent_records.last)
end
private
attr_reader :parent_klass, :deleted_parent_records, :modification_tracker, :models_by_table_name
def record_result(cleaner, result)
if cleaner.async_delete?
modification_tracker.add_deletions(result[:table], result[:affected_rows])
elsif cleaner.async_nullify?
modification_tracker.add_updates(result[:table], result[:affected_rows])
end
end
def run_cleaner_service(foreign_key_definition, with_skip_locked:)
cleaner = CleanerService.new(
model: models_by_table_name.fetch(foreign_key_definition.to_table),
foreign_key_definition: foreign_key_definition,
deleted_parent_records: deleted_parent_records,
with_skip_locked: with_skip_locked
)
loop do
result = cleaner.execute
record_result(cleaner, result)
break if modification_tracker.over_limit? || result[:affected_rows] == 0
end
end
end
end
# frozen_string_literal: true
module LooseForeignKeys
# rubocop: disable CodeReuse/ActiveRecord
class CleanerService
DELETE_LIMIT = 1000
UPDATE_LIMIT = 500
delegate :connection, to: :model
def initialize(model:, foreign_key_definition:, deleted_parent_records:, with_skip_locked: false)
@model = model
@foreign_key_definition = foreign_key_definition
@deleted_parent_records = deleted_parent_records
@with_skip_locked = with_skip_locked
end
def execute
result = connection.execute(build_query)
{ affected_rows: result.cmd_tuples, table: foreign_key_definition.to_table }
end
def async_delete?
foreign_key_definition.on_delete == :async_delete
end
def async_nullify?
foreign_key_definition.on_delete == :async_nullify
end
private
attr_reader :model, :foreign_key_definition, :deleted_parent_records, :with_skip_locked
def build_query
query = if async_delete?
delete_query
elsif async_nullify?
update_query
else
raise "Invalid on_delete argument: #{foreign_key_definition.on_delete}"
end
unless query.include?(%{"#{foreign_key_definition.column}" IN (})
raise("FATAL: foreign key condition is missing from the generated query: #{query}")
end
query
end
def arel_table
@arel_table ||= model.arel_table
end
def primary_keys
@primary_keys ||= connection.primary_keys(model.table_name).map { |key| arel_table[key] }
end
def quoted_table_name
@quoted_table_name ||= Arel.sql(connection.quote_table_name(model.table_name))
end
def delete_query
query = Arel::DeleteManager.new
query.from(quoted_table_name)
add_in_query_with_limit(query, DELETE_LIMIT)
end
def update_query
query = Arel::UpdateManager.new
query.table(quoted_table_name)
query.set([[arel_table[foreign_key_definition.column], nil]])
add_in_query_with_limit(query, UPDATE_LIMIT)
end
# IN query with one or composite primary key
# WHERE (primary_key1, primary_key2) IN (subselect)
def add_in_query_with_limit(query, limit)
columns = Arel::Nodes::Grouping.new(primary_keys)
query.where(columns.in(in_query_with_limit(limit))).to_sql
end
# Builds the following sub-query
# SELECT primary_keys FROM table WHERE foreign_key IN (1, 2, 3) LIMIT N
def in_query_with_limit(limit)
in_query = Arel::SelectManager.new
in_query.from(quoted_table_name)
in_query.where(arel_table[foreign_key_definition.column].in(deleted_parent_records.map(&:primary_key_value)))
in_query.projections = primary_keys
in_query.take(limit)
in_query.lock(Arel.sql('FOR UPDATE SKIP LOCKED')) if with_skip_locked
in_query
end
end
# rubocop: enable CodeReuse/ActiveRecord
end
# frozen_string_literal: true
module LooseForeignKeys
class ProcessDeletedRecordsService
BATCH_SIZE = 1000
def initialize(connection:)
@connection = connection
end
def execute
modification_tracker = ModificationTracker.new
tracked_tables.cycle do |table|
records = load_batch_for_table(table)
if records.empty?
tracked_tables.delete(table)
next
end
break if modification_tracker.over_limit?
model = find_parent_model!(table)
LooseForeignKeys::BatchCleanerService
.new(parent_klass: model,
deleted_parent_records: records,
modification_tracker: modification_tracker,
models_by_table_name: models_by_table_name)
.execute
break if modification_tracker.over_limit?
end
modification_tracker.stats
end
private
attr_reader :connection
def load_batch_for_table(table)
fully_qualified_table_name = "#{current_schema}.#{table}"
LooseForeignKeys::DeletedRecord.load_batch_for_table(fully_qualified_table_name, BATCH_SIZE)
end
def find_parent_model!(table)
models_by_table_name.fetch(table)
end
def current_schema
@current_schema = connection.current_schema
end
def tracked_tables
@tracked_tables ||= models_by_table_name
.select { |table_name, model| model.respond_to?(:loose_foreign_key_definitions) }
.keys
end
def models_by_table_name
@models_by_table_name ||= begin
all_models
.select(&:base_class?)
.index_by(&:table_name)
end
end
def all_models
ApplicationRecord.descendants
end
end
end
...@@ -363,6 +363,15 @@ ...@@ -363,6 +363,15 @@
:weight: 1 :weight: 1
:idempotent: :idempotent:
:tags: [] :tags: []
- :name: cronjob:loose_foreign_keys_cleanup
:worker_name: LooseForeignKeys::CleanupWorker
:feature_category: :sharding
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:member_invitation_reminder_emails - :name: cronjob:member_invitation_reminder_emails
:worker_name: MemberInvitationReminderEmailsWorker :worker_name: MemberInvitationReminderEmailsWorker
:feature_category: :subgroups :feature_category: :subgroups
......
# frozen_string_literal: true
module LooseForeignKeys
class CleanupWorker
include ApplicationWorker
include Gitlab::ExclusiveLeaseHelpers
include CronjobQueue # rubocop: disable Scalability/CronWorkerContext
feature_category :sharding
data_consistency :always
idempotent!
def perform
return if Feature.disabled?(:loose_foreign_key_cleanup, default_enabled: :yaml)
in_lock(self.class.name.underscore, ttl: 1.hour, retries: 0) do
# TODO: Iterate over the connections
# https://gitlab.com/gitlab-org/gitlab/-/issues/341513
stats = ProcessDeletedRecordsService.new(connection: ApplicationRecord.connection).execute
log_extra_metadata_on_done(:stats, stats)
end
end
end
end
---
name: loose_foreign_key_cleanup
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/69165
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/343545
milestone: '14.4'
type: development
group: group::sharding
default_enabled: false
...@@ -714,6 +714,9 @@ Gitlab.ee do ...@@ -714,6 +714,9 @@ Gitlab.ee do
Settings.cron_jobs['app_sec_dast_profile_schedule_worker'] ||= Settingslogic.new({}) Settings.cron_jobs['app_sec_dast_profile_schedule_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['app_sec_dast_profile_schedule_worker']['cron'] ||= '7-59/15 * * * *' Settings.cron_jobs['app_sec_dast_profile_schedule_worker']['cron'] ||= '7-59/15 * * * *'
Settings.cron_jobs['app_sec_dast_profile_schedule_worker']['job_class'] = 'AppSec::Dast::ProfileScheduleWorker' Settings.cron_jobs['app_sec_dast_profile_schedule_worker']['job_class'] = 'AppSec::Dast::ProfileScheduleWorker'
Settings.cron_jobs['loose_foreign_keys_cleanup_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['cron'] ||= '*/5 * * * *'
Settings.cron_jobs['loose_foreign_keys_cleanup_worker']['job_class'] = 'LooseForeignKeys::CleanupWorker'
end end
# #
......
# frozen_string_literal: true
class AddNewLooseFkIndex < Gitlab::Database::Migration[1.0]
include Gitlab::Database::PartitioningMigrationHelpers
disable_ddl_transaction!
INDEX_NAME = 'index_loose_foreign_keys_deleted_records_for_loading_records'
def up
add_concurrent_partitioned_index :loose_foreign_keys_deleted_records,
%I[fully_qualified_table_name id primary_key_value partition],
where: 'status = 1',
name: INDEX_NAME
end
def down
remove_concurrent_partitioned_index_by_name :loose_foreign_keys_deleted_records, INDEX_NAME
end
end
e2812344b16cd51c544235bae8a365713ab7e34652c2c05511a7fe9d84c05fb1
\ No newline at end of file
...@@ -23947,6 +23947,10 @@ ALTER TABLE ONLY zentao_tracker_data ...@@ -23947,6 +23947,10 @@ ALTER TABLE ONLY zentao_tracker_data
ALTER TABLE ONLY zoom_meetings ALTER TABLE ONLY zoom_meetings
ADD CONSTRAINT zoom_meetings_pkey PRIMARY KEY (id); ADD CONSTRAINT zoom_meetings_pkey PRIMARY KEY (id);
CREATE INDEX index_loose_foreign_keys_deleted_records_for_loading_records ON ONLY loose_foreign_keys_deleted_records USING btree (fully_qualified_table_name, id, primary_key_value, partition) WHERE (status = 1);
CREATE INDEX index_8be8640437 ON gitlab_partitions_static.loose_foreign_keys_deleted_records_1 USING btree (fully_qualified_table_name, id, primary_key_value, partition) WHERE (status = 1);
CREATE INDEX index_product_analytics_events_experimental_project_and_time ON ONLY product_analytics_events_experimental USING btree (project_id, collector_tstamp); CREATE INDEX index_product_analytics_events_experimental_project_and_time ON ONLY product_analytics_events_experimental USING btree (project_id, collector_tstamp);
CREATE INDEX product_analytics_events_expe_project_id_collector_tstamp_idx10 ON gitlab_partitions_static.product_analytics_events_experimental_10 USING btree (project_id, collector_tstamp); CREATE INDEX product_analytics_events_expe_project_id_collector_tstamp_idx10 ON gitlab_partitions_static.product_analytics_events_experimental_10 USING btree (project_id, collector_tstamp);
...@@ -27249,6 +27253,8 @@ ALTER INDEX analytics_cycle_analytics_merge_request_stage_events_pkey ATTACH PAR ...@@ -27249,6 +27253,8 @@ ALTER INDEX analytics_cycle_analytics_merge_request_stage_events_pkey ATTACH PAR
ALTER INDEX analytics_cycle_analytics_merge_request_stage_events_pkey ATTACH PARTITION gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_31_pkey; ALTER INDEX analytics_cycle_analytics_merge_request_stage_events_pkey ATTACH PARTITION gitlab_partitions_static.analytics_cycle_analytics_merge_request_stage_events_31_pkey;
ALTER INDEX index_loose_foreign_keys_deleted_records_for_loading_records ATTACH PARTITION gitlab_partitions_static.index_8be8640437;
ALTER INDEX loose_foreign_keys_deleted_records_pkey ATTACH PARTITION gitlab_partitions_static.loose_foreign_keys_deleted_records_1_pkey; ALTER INDEX loose_foreign_keys_deleted_records_pkey ATTACH PARTITION gitlab_partitions_static.loose_foreign_keys_deleted_records_1_pkey;
ALTER INDEX index_product_analytics_events_experimental_project_and_time ATTACH PARTITION gitlab_partitions_static.product_analytics_events_expe_project_id_collector_tstamp_idx10; ALTER INDEX index_product_analytics_events_experimental_project_and_time ATTACH PARTITION gitlab_partitions_static.product_analytics_events_expe_project_id_collector_tstamp_idx10;
...@@ -9,8 +9,8 @@ RSpec.describe LooseForeignKey do ...@@ -9,8 +9,8 @@ RSpec.describe LooseForeignKey do
self.table_name = 'projects' self.table_name = 'projects'
loose_foreign_key :issues, :project_id, on_delete: :async_delete, gitlab_schema: :gitlab_main loose_foreign_key :issues, :project_id, on_delete: :async_delete
loose_foreign_key 'merge_requests', 'project_id', 'on_delete' => 'async_nullify', 'gitlab_schema' => :gitlab_main loose_foreign_key 'merge_requests', 'project_id', 'on_delete' => 'async_nullify'
end end
end end
...@@ -28,7 +28,6 @@ RSpec.describe LooseForeignKey do ...@@ -28,7 +28,6 @@ RSpec.describe LooseForeignKey do
expect(definition.to_table).to eq('merge_requests') expect(definition.to_table).to eq('merge_requests')
expect(definition.column).to eq('project_id') expect(definition.column).to eq('project_id')
expect(definition.on_delete).to eq(:async_nullify) expect(definition.on_delete).to eq(:async_nullify)
expect(definition.options[:gitlab_schema]).to eq(:gitlab_main)
end end
context 'validation' do context 'validation' do
...@@ -39,9 +38,9 @@ RSpec.describe LooseForeignKey do ...@@ -39,9 +38,9 @@ RSpec.describe LooseForeignKey do
self.table_name = 'projects' self.table_name = 'projects'
loose_foreign_key :issues, :project_id, on_delete: :async_delete, gitlab_schema: :gitlab_main loose_foreign_key :issues, :project_id, on_delete: :async_delete
loose_foreign_key :merge_requests, :project_id, on_delete: :async_nullify, gitlab_schema: :gitlab_main loose_foreign_key :merge_requests, :project_id, on_delete: :async_nullify
loose_foreign_key :merge_requests, :project_id, on_delete: :destroy, gitlab_schema: :gitlab_main loose_foreign_key :merge_requests, :project_id, on_delete: :destroy
end end
end end
...@@ -50,28 +49,12 @@ RSpec.describe LooseForeignKey do ...@@ -50,28 +49,12 @@ RSpec.describe LooseForeignKey do
end end
end end
context 'gitlab_schema validation' do
let(:invalid_class) do
Class.new(ApplicationRecord) do
include LooseForeignKey
self.table_name = 'projects'
loose_foreign_key :merge_requests, :project_id, on_delete: :async_nullify, gitlab_schema: :unknown
end
end
it 'raises error when invalid `gitlab_schema` option was given' do
expect { invalid_class }.to raise_error /Invalid gitlab_schema option given: unknown/
end
end
context 'inheritance validation' do context 'inheritance validation' do
let(:inherited_project_class) do let(:inherited_project_class) do
Class.new(Project) do Class.new(Project) do
include LooseForeignKey include LooseForeignKey
loose_foreign_key :issues, :project_id, on_delete: :async_delete, gitlab_schema: :gitlab_main loose_foreign_key :issues, :project_id, on_delete: :async_delete
end end
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe LooseForeignKeys::DeletedRecord, type: :model do
let_it_be(:table) { 'public.projects' }
let_it_be(:deleted_record_1) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 5) }
let_it_be(:deleted_record_2) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 1) }
let_it_be(:deleted_record_3) { described_class.create!(partition: 1, fully_qualified_table_name: 'public.other_table', primary_key_value: 3) }
let_it_be(:deleted_record_4) { described_class.create!(partition: 1, fully_qualified_table_name: table, primary_key_value: 1) } # duplicate
describe '.load_batch_for_table' do
it 'loads records and orders them by creation date' do
records = described_class.load_batch_for_table(table, 10)
expect(records).to eq([deleted_record_1, deleted_record_2, deleted_record_4])
end
it 'supports configurable batch size' do
records = described_class.load_batch_for_table(table, 2)
expect(records).to eq([deleted_record_1, deleted_record_2])
end
end
describe '.mark_records_processed_for_table_between' do
it 'marks processed exactly one record' do
described_class.mark_records_processed_for_table_between(table, deleted_record_2, deleted_record_2)
expect(described_class.status_pending.count).to eq(3)
expect(described_class.status_processed.count).to eq(1)
processed_record = described_class.status_processed.first
expect(processed_record).to eq(deleted_record_2)
end
it 'deletes two records' do
described_class.mark_records_processed_for_table_between(table, deleted_record_2, deleted_record_4)
expect(described_class.status_pending.count).to eq(2)
expect(described_class.status_processed.count).to eq(2)
end
it 'deletes all records' do
described_class.mark_records_processed_for_table_between(table, deleted_record_1, deleted_record_4)
expect(described_class.status_pending.count).to eq(1)
expect(described_class.status_processed.count).to eq(3)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe LooseForeignKeys::ModificationTracker do
subject(:tracker) { described_class.new }
describe '#over_limit?' do
it 'is true when deletion MAX_DELETES is exceeded' do
stub_const('LooseForeignKeys::ModificationTracker::MAX_DELETES', 5)
tracker.add_deletions('issues', 10)
expect(tracker).to be_over_limit
end
it 'is false when MAX_DELETES is not exceeded' do
tracker.add_deletions('issues', 3)
expect(tracker).not_to be_over_limit
end
it 'is true when deletion MAX_UPDATES is exceeded' do
stub_const('LooseForeignKeys::ModificationTracker::MAX_UPDATES', 5)
tracker.add_updates('issues', 3)
tracker.add_updates('issues', 4)
expect(tracker).to be_over_limit
end
it 'is false when MAX_UPDATES is not exceeded' do
tracker.add_updates('projects', 3)
expect(tracker).not_to be_over_limit
end
it 'is true when max runtime is exceeded' do
monotonic_time_before = 1 # this will be the start time
monotonic_time_after = described_class::MAX_RUNTIME.to_i + 1 # this will be returned when over_limit? is called
allow(Gitlab::Metrics::System).to receive(:monotonic_time).and_return(monotonic_time_before, monotonic_time_after)
tracker
expect(tracker).to be_over_limit
end
it 'is false when max runtime is not exceeded' do
expect(tracker).not_to be_over_limit
end
end
describe '#stats' do
it 'exposes stats' do
freeze_time do
tracker
tracker.add_deletions('issues', 5)
tracker.add_deletions('issues', 2)
tracker.add_deletions('projects', 2)
tracker.add_updates('projects', 3)
expect(tracker.stats).to eq({
over_limit: false,
delete_count_by_table: { 'issues' => 7, 'projects' => 2 },
update_count_by_table: { 'projects' => 3 },
delete_count: 9,
update_count: 3
})
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe LooseForeignKeys::BatchCleanerService do
include MigrationsHelpers
def create_table_structure
migration = ActiveRecord::Migration.new.extend(Gitlab::Database::MigrationHelpers::LooseForeignKeyHelpers)
migration.create_table :loose_fk_parent_table
migration.create_table :loose_fk_child_table_1 do |t|
t.bigint :parent_id
end
migration.create_table :loose_fk_child_table_2 do |t|
t.bigint :parent_id_with_different_column
end
migration.track_record_deletions(:loose_fk_parent_table)
end
let(:parent_model) do
Class.new(ApplicationRecord) do
self.table_name = 'loose_fk_parent_table'
include LooseForeignKey
loose_foreign_key :loose_fk_child_table_1, :parent_id, on_delete: :async_delete
loose_foreign_key :loose_fk_child_table_2, :parent_id_with_different_column, on_delete: :async_nullify
end
end
let(:child_model_1) do
Class.new(ApplicationRecord) do
self.table_name = 'loose_fk_child_table_1'
end
end
let(:child_model_2) do
Class.new(ApplicationRecord) do
self.table_name = 'loose_fk_child_table_2'
end
end
let(:loose_fk_child_table_1) { table(:loose_fk_child_table_1) }
let(:loose_fk_child_table_2) { table(:loose_fk_child_table_2) }
let(:parent_record_1) { parent_model.create! }
let(:other_parent_record) { parent_model.create! }
before(:all) do
create_table_structure
end
before do
parent_record_1
loose_fk_child_table_1.create!(parent_id: parent_record_1.id)
loose_fk_child_table_1.create!(parent_id: parent_record_1.id)
# these will not be deleted
loose_fk_child_table_1.create!(parent_id: other_parent_record.id)
loose_fk_child_table_1.create!(parent_id: other_parent_record.id)
loose_fk_child_table_2.create!(parent_id_with_different_column: parent_record_1.id)
loose_fk_child_table_2.create!(parent_id_with_different_column: parent_record_1.id)
# these will not be deleted
loose_fk_child_table_2.create!(parent_id_with_different_column: other_parent_record.id)
loose_fk_child_table_2.create!(parent_id_with_different_column: other_parent_record.id)
end
after(:all) do
migration = ActiveRecord::Migration.new
migration.drop_table :loose_fk_parent_table
migration.drop_table :loose_fk_child_table_1
migration.drop_table :loose_fk_child_table_2
end
context 'when parent records are deleted' do
before do
parent_record_1.delete
expect(loose_fk_child_table_1.count).to eq(4)
expect(loose_fk_child_table_2.count).to eq(4)
described_class.new(parent_klass: parent_model,
deleted_parent_records: LooseForeignKeys::DeletedRecord.status_pending.all,
models_by_table_name: {
'loose_fk_child_table_1' => child_model_1,
'loose_fk_child_table_2' => child_model_2
}).execute
end
it 'cleans up the child records' do
expect(loose_fk_child_table_1.where(parent_id: parent_record_1.id)).to be_empty
expect(loose_fk_child_table_2.where(parent_id_with_different_column: nil).count).to eq(2)
end
it 'cleans up the parent DeletedRecord' do
expect(LooseForeignKeys::DeletedRecord.status_pending.count).to eq(0)
end
it 'does not delete unrelated records' do
expect(loose_fk_child_table_1.where(parent_id: other_parent_record.id).count).to eq(2)
expect(loose_fk_child_table_2.where(parent_id_with_different_column: other_parent_record.id).count).to eq(2)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe LooseForeignKeys::CleanerService do
let(:schema) { ApplicationRecord.connection.current_schema }
let(:deleted_records) do
[
LooseForeignKeys::DeletedRecord.new(fully_qualified_table_name: "#{schema}.projects", primary_key_value: non_existing_record_id),
LooseForeignKeys::DeletedRecord.new(fully_qualified_table_name: "#{schema}.projects", primary_key_value: non_existing_record_id)
]
end
let(:loose_fk_definition) do
ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new(
'projects',
'issues',
{
column: 'project_id',
on_delete: :async_nullify
}
)
end
subject(:cleaner_service) do
described_class.new(
model: Issue,
foreign_key_definition: loose_fk_definition,
deleted_parent_records: deleted_records
)
end
context 'when invalid foreign key definition is passed' do
context 'when invalid on_delete argument was given' do
before do
loose_fk_definition.options[:on_delete] = :invalid
end
it 'raises KeyError' do
expect { cleaner_service.execute }.to raise_error(StandardError, /Invalid on_delete argument/)
end
end
end
describe 'query generation' do
context 'when single primary key is used' do
let(:issue) { create(:issue) }
let(:deleted_records) do
[
LooseForeignKeys::DeletedRecord.new(fully_qualified_table_name: "#{schema}.projects", primary_key_value: issue.project_id)
]
end
it 'generates an IN query for nullifying the rows' do
expected_query = %{UPDATE "issues" SET "project_id" = NULL WHERE ("issues"."id") IN (SELECT "issues"."id" FROM "issues" WHERE "issues"."project_id" IN (#{issue.project_id}) LIMIT 500)}
expect(ApplicationRecord.connection).to receive(:execute).with(expected_query).and_call_original
cleaner_service.execute
issue.reload
expect(issue.project_id).to be_nil
end
it 'generates an IN query for deleting the rows' do
loose_fk_definition.options[:on_delete] = :async_delete
expected_query = %{DELETE FROM "issues" WHERE ("issues"."id") IN (SELECT "issues"."id" FROM "issues" WHERE "issues"."project_id" IN (#{issue.project_id}) LIMIT 1000)}
expect(ApplicationRecord.connection).to receive(:execute).with(expected_query).and_call_original
cleaner_service.execute
expect(Issue.exists?(id: issue.id)).to eq(false)
end
end
context 'when composite primary key is used' do
let!(:user) { create(:user) }
let!(:project) { create(:project) }
let(:loose_fk_definition) do
ActiveRecord::ConnectionAdapters::ForeignKeyDefinition.new(
'users',
'project_authorizations',
{
column: 'user_id',
on_delete: :async_delete
}
)
end
let(:deleted_records) do
[
LooseForeignKeys::DeletedRecord.new(fully_qualified_table_name: "#{schema}.users", primary_key_value: user.id)
]
end
subject(:cleaner_service) do
described_class.new(
model: ProjectAuthorization,
foreign_key_definition: loose_fk_definition,
deleted_parent_records: deleted_records
)
end
before do
project.add_developer(user)
end
it 'generates an IN query for deleting the rows' do
expected_query = %{DELETE FROM "project_authorizations" WHERE ("project_authorizations"."user_id", "project_authorizations"."project_id", "project_authorizations"."access_level") IN (SELECT "project_authorizations"."user_id", "project_authorizations"."project_id", "project_authorizations"."access_level" FROM "project_authorizations" WHERE "project_authorizations"."user_id" IN (#{user.id}) LIMIT 1000)}
expect(ApplicationRecord.connection).to receive(:execute).with(expected_query).and_call_original
cleaner_service.execute
expect(ProjectAuthorization.exists?(user_id: user.id)).to eq(false)
end
context 'when the query generation is incorrect (paranoid check)' do
it 'raises error if the foreign key condition is missing' do
expect_next_instance_of(LooseForeignKeys::CleanerService) do |instance|
expect(instance).to receive(:delete_query).and_return('wrong query')
end
expect { cleaner_service.execute }.to raise_error /FATAL: foreign key condition is missing from the generated query/
end
end
end
context 'when with_skip_locked parameter is true' do
subject(:cleaner_service) do
described_class.new(
model: Issue,
foreign_key_definition: loose_fk_definition,
deleted_parent_records: deleted_records,
with_skip_locked: true
)
end
it 'generates a query with the SKIP LOCKED clause' do
expect(ApplicationRecord.connection).to receive(:execute).with(/FOR UPDATE SKIP LOCKED/).and_call_original
cleaner_service.execute
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe LooseForeignKeys::CleanupWorker do
include MigrationsHelpers
def create_table_structure
migration = ActiveRecord::Migration.new.extend(Gitlab::Database::MigrationHelpers::LooseForeignKeyHelpers)
migration.create_table :loose_fk_parent_table_1
migration.create_table :loose_fk_parent_table_2
migration.create_table :loose_fk_child_table_1_1 do |t|
t.bigint :parent_id
end
migration.create_table :loose_fk_child_table_1_2 do |t|
t.bigint :parent_id_with_different_column
end
migration.create_table :loose_fk_child_table_2_1 do |t|
t.bigint :parent_id
end
migration.track_record_deletions(:loose_fk_parent_table_1)
migration.track_record_deletions(:loose_fk_parent_table_2)
end
let!(:parent_model_1) do
Class.new(ApplicationRecord) do
self.table_name = 'loose_fk_parent_table_1'
include LooseForeignKey
loose_foreign_key :loose_fk_child_table_1_1, :parent_id, on_delete: :async_delete
loose_foreign_key :loose_fk_child_table_1_2, :parent_id_with_different_column, on_delete: :async_nullify
end
end
let!(:parent_model_2) do
Class.new(ApplicationRecord) do
self.table_name = 'loose_fk_parent_table_2'
include LooseForeignKey
loose_foreign_key :loose_fk_child_table_2_1, :parent_id, on_delete: :async_delete
end
end
let!(:child_model_1) do
Class.new(ApplicationRecord) do
self.table_name = 'loose_fk_child_table_1_1'
end
end
let!(:child_model_2) do
Class.new(ApplicationRecord) do
self.table_name = 'loose_fk_child_table_1_2'
end
end
let!(:child_model_3) do
Class.new(ApplicationRecord) do
self.table_name = 'loose_fk_child_table_2_1'
end
end
let(:loose_fk_parent_table_1) { table(:loose_fk_parent_table_1) }
let(:loose_fk_parent_table_2) { table(:loose_fk_parent_table_2) }
let(:loose_fk_child_table_1_1) { table(:loose_fk_child_table_1_1) }
let(:loose_fk_child_table_1_2) { table(:loose_fk_child_table_1_2) }
let(:loose_fk_child_table_2_1) { table(:loose_fk_child_table_2_1) }
before(:all) do
create_table_structure
end
after(:all) do
migration = ActiveRecord::Migration.new
migration.drop_table :loose_fk_parent_table_1
migration.drop_table :loose_fk_parent_table_2
migration.drop_table :loose_fk_child_table_1_1
migration.drop_table :loose_fk_child_table_1_2
migration.drop_table :loose_fk_child_table_2_1
end
before do
parent_record_1 = loose_fk_parent_table_1.create!
loose_fk_child_table_1_1.create!(parent_id: parent_record_1.id)
loose_fk_child_table_1_2.create!(parent_id_with_different_column: parent_record_1.id)
parent_record_2 = loose_fk_parent_table_1.create!
2.times { loose_fk_child_table_1_1.create!(parent_id: parent_record_2.id) }
3.times { loose_fk_child_table_1_2.create!(parent_id_with_different_column: parent_record_2.id) }
parent_record_3 = loose_fk_parent_table_2.create!
5.times { loose_fk_child_table_2_1.create!(parent_id: parent_record_3.id) }
parent_model_1.delete_all
parent_model_2.delete_all
end
it 'cleans up all rows' do
described_class.new.perform
expect(loose_fk_child_table_1_1.count).to eq(0)
expect(loose_fk_child_table_1_2.where(parent_id_with_different_column: nil).count).to eq(4)
expect(loose_fk_child_table_2_1.count).to eq(0)
end
context 'when deleting in batches' do
before do
stub_const('LooseForeignKeys::CleanupWorker::BATCH_SIZE', 2)
end
it 'cleans up all rows' do
expect(LooseForeignKeys::BatchCleanerService).to receive(:new).exactly(:twice).and_call_original
described_class.new.perform
expect(loose_fk_child_table_1_1.count).to eq(0)
expect(loose_fk_child_table_1_2.where(parent_id_with_different_column: nil).count).to eq(4)
expect(loose_fk_child_table_2_1.count).to eq(0)
end
end
context 'when the deleted rows count limit have been reached' do
def count_deletable_rows
loose_fk_child_table_1_1.count + loose_fk_child_table_2_1.count
end
before do
stub_const('LooseForeignKeys::ModificationTracker::MAX_DELETES', 2)
stub_const('LooseForeignKeys::CleanerService::DELETE_LIMIT', 1)
end
it 'cleans up 2 rows' do
expect { described_class.new.perform }.to change { count_deletable_rows }.by(-2)
end
end
context 'when the loose_foreign_key_cleanup feature flag is off' do
before do
stub_feature_flags(loose_foreign_key_cleanup: false)
end
it 'does nothing' do
expect { described_class.new.perform }.not_to change { LooseForeignKeys::DeletedRecord.status_processed.count }
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment