Commit a5dd3b7e authored by Jan Provaznik's avatar Jan Provaznik

Address review feedback

* revert changes in default primary key strategy
* only change is to pass migration job_arguments to strategies so do
  don't have to hardcode specific group id in the migration strategy
* move traversal ids update inside transaction
* fixed locking of project records
* added clean of namespace gin indexes
parent b8cab3e8
......@@ -8,7 +8,7 @@ class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0]
disable_ddl_transaction!
def up
return unless Gitlab.com?
return unless Gitlab.com? || Gitlab.staging?
projects_table = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project.arel_table
hierarchy_cte_sql = Arel.sql(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(GROUP_ID))
......@@ -34,7 +34,7 @@ class BackfillProjectNamespacesForGroup < Gitlab::Database::Migration[1.0]
end
def down
return unless Gitlab.com?
return unless Gitlab.com? || Gitlab.staging?
Gitlab::Database::BackgroundMigration::BatchedMigration
.for_configuration(MIGRATION, :projects, :id, [GROUP_ID, 'up']).delete_all
......
......@@ -11,17 +11,20 @@ module Gitlab
class BackfillProjectNamespacePerGroupBatchingStrategy < PrimaryKeyBatchingStrategy
# Finds and returns the next batch in the table.
#
# table_name - The table to batch over
# column_name - The column to batch over
# batch_min_value - The minimum value which the next batch will start at
# batch_size - The size of the next batch
def next_batch(batch_min_value:, batch_size:)
# job_arguments - The migration job arguments
def next_batch(table_name, column_name, batch_min_value:, batch_size:, job_arguments:)
next_batch_bounds = nil
model_class = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models::Project
quoted_column_name = model_class.connection.quote_column_name(batched_migration.column_name)
quoted_column_name = model_class.connection.quote_column_name(column_name)
projects_table = model_class.arel_table
hierarchy_cte_sql = Arel::Nodes::SqlLiteral.new(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(batched_migration.job_arguments.first))
hierarchy_cte_sql = Arel::Nodes::SqlLiteral.new(::Gitlab::BackgroundMigration::ProjectNamespaces::BackfillProjectNamespaces.hierarchy_cte(job_arguments.first))
relation = model_class.where(projects_table[:namespace_id].in(hierarchy_cte_sql)).where("#{quoted_column_name} >= ?", batch_min_value)
relation.each_batch(of: batch_size, column: batched_migration.column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
break
......
......@@ -11,22 +11,21 @@ module Gitlab
class PrimaryKeyBatchingStrategy
include Gitlab::Database::DynamicModelHelpers
def initialize(batched_migration)
@batched_migration = batched_migration
end
# Finds and returns the next batch in the table.
#
# table_name - The table to batch over
# column_name - The column to batch over
# batch_min_value - The minimum value which the next batch will start at
# batch_size - The size of the next batch
def next_batch(batch_min_value:, batch_size:)
model_class = define_batchable_model(batched_migration.table_name)
# job_arguments - The migration job arguments
def next_batch(table_name, column_name, batch_min_value:, batch_size:, job_arguments:)
model_class = define_batchable_model(table_name)
quoted_column_name = model_class.connection.quote_column_name(batched_migration.column_name)
quoted_column_name = model_class.connection.quote_column_name(column_name)
relation = model_class.where("#{quoted_column_name} >= ?", batch_min_value)
next_batch_bounds = nil
relation.each_batch(of: batch_size, column: batched_migration.column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
relation.each_batch(of: batch_size, column: column_name) do |batch| # rubocop:disable Lint/UnreachableLoop
next_batch_bounds = batch.pluck(Arel.sql("MIN(#{quoted_column_name}), MAX(#{quoted_column_name})")).first
break
......@@ -34,10 +33,6 @@ module Gitlab
next_batch_bounds
end
private
attr_reader :batched_migration
end
end
end
......
......@@ -7,7 +7,7 @@ module Gitlab
#
# rubocop: disable Metrics/ClassLength
class BackfillProjectNamespaces
SUB_BATCH_SIZE = 100
SUB_BATCH_SIZE = 25
PROJECT_NAMESPACE_STI_NAME = 'Project'
IsolatedModels = ::Gitlab::BackgroundMigration::ProjectNamespaces::Models
......@@ -34,6 +34,9 @@ module Gitlab
def backfill_project_namespaces(namespace_id)
project_ids.each_slice(sub_batch_size) do |project_ids|
ActiveRecord::Base.connection.execute("select gin_clean_pending_list('index_namespaces_on_name_trigram')")
ActiveRecord::Base.connection.execute("select gin_clean_pending_list('index_namespaces_on_path_trigram')")
# We need to lock these project records for the period when we create project namespaces
# and link them to projects so that if a project is modified in the time between creating
# project namespaces `batch_insert_namespaces` and linking them to projects `batch_update_projects`
......@@ -41,13 +44,12 @@ module Gitlab
#
# see https://gitlab.com/gitlab-org/gitlab/-/merge_requests/72527#note_730679469
Project.transaction do
Project.where(id: project_ids).select(:id).lock!('FOR UPDATE')
Project.where(id: project_ids).select(:id).lock!('FOR UPDATE').load
batch_insert_namespaces(project_ids)
batch_update_projects(project_ids)
batch_update_project_namespaces_traversal_ids(project_ids)
end
batch_update_project_namespaces_traversal_ids(project_ids)
end
end
......
......@@ -51,7 +51,10 @@ module Gitlab
raise 'Job cannot be split further' if new_batch_size < 1
next_batch_bounds = batched_migration.batching_strategy.next_batch(
batching_strategy = batched_migration.batch_class.new
next_batch_bounds = batching_strategy.next_batch(
batched_migration.table_name,
batched_migration.column_name,
batch_min_value: min_value,
batch_size: new_batch_size
)
......
......@@ -93,10 +93,6 @@ module Gitlab
"#{BATCH_CLASS_MODULE}::#{batch_class_name}".constantize
end
def batching_strategy
@batching_strategy ||= batch_class.new(self)
end
def job_class_name=(class_name)
write_attribute(:job_class_name, class_name.delete_prefix("::"))
end
......@@ -137,7 +133,7 @@ module Gitlab
end
def optimize!
::Gitlab::Database::BackgroundMigration::BatchOptimizer.new(self).optimize!
BatchOptimizer.new(self).optimize!
end
private
......
......@@ -88,12 +88,15 @@ module Gitlab
end
def find_next_batch_range(active_migration)
batching_strategy = active_migration.batch_class.new
batch_min_value = active_migration.next_min_value
next_batch_bounds = active_migration.batching_strategy.next_batch(
next_batch_bounds = batching_strategy.next_batch(
active_migration.table_name,
active_migration.column_name,
batch_min_value: batch_min_value,
batch_size: active_migration.batch_size
)
batch_size: active_migration.batch_size,
job_arguments: active_migration.job_arguments)
return if next_batch_bounds.nil?
......
......@@ -56,7 +56,7 @@ RSpec.describe "Admin > Admin sees background migrations" do
context 'when there are failed migrations' do
before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10])
end
end
......
......@@ -15,30 +15,13 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::BackfillProjectN
let!(:project2) { projects.create!(name: 'project2', path: 'project2', namespace_id: namespace2.id, visibility_level: 20) }
let!(:project3) { projects.create!(name: 'project3', path: 'project3', namespace_id: namespace3.id, visibility_level: 20) }
let!(:project4) { projects.create!(name: 'project4', path: 'project4', namespace_id: namespace3.id, visibility_level: 20) }
let!(:batching_strategy) { described_class.new }
let!(:batched_migration) do
background_migrations.create!(
created_at: Time.current,
updated_at: Time.current,
min_value: 1,
max_value: Project.maximum(:id),
batch_size: 1_000,
sub_batch_size: 100,
interval: 120,
status: 0,
job_class_name: 'ProjectNamespaces::BackfillProjectNamespaces',
table_name: :projects,
column_name: :id,
total_tuple_count: nil,
job_arguments: [namespace1.id, 'up']
)
end
let!(:batching_strategy) { described_class.new(batched_migration) }
let(:job_arguments) { [namespace1.id, 'up'] }
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project1.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(:projects, :id, batch_min_value: project1.id, batch_size: 3, job_arguments: job_arguments)
expect(batch_bounds).to match_array([project1.id, project3.id])
end
......@@ -46,7 +29,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::BackfillProjectN
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project2.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(:projects, :id, batch_min_value: project2.id, batch_size: 3, job_arguments: job_arguments)
expect(batch_bounds).to match_array([project2.id, project4.id])
end
......@@ -54,7 +37,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::BackfillProjectN
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project4.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(:projects, :id, batch_min_value: project4.id, batch_size: 3, job_arguments: job_arguments)
expect(batch_bounds).to match_array([project4.id, project4.id])
end
......@@ -62,7 +45,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::BackfillProjectN
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = batching_strategy.next_batch(batch_min_value: project4.id + 1, batch_size: 1)
batch_bounds = batching_strategy.next_batch(:projects, :id, batch_min_value: project4.id + 1, batch_size: 1, job_arguments: job_arguments)
expect(batch_bounds).to be_nil
end
......
......@@ -3,35 +3,17 @@
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy, '#next_batch' do
let(:batching_strategy) { described_class.new }
let(:namespaces) { table(:namespaces) }
let(:background_migrations) { table(:batched_background_migrations) }
let(:batching_strategy) { described_class.new(batched_migration) }
let!(:namespace1) { namespaces.create!(name: 'batchtest1', path: 'batch-test1') }
let!(:namespace2) { namespaces.create!(name: 'batchtest2', path: 'batch-test2') }
let!(:namespace3) { namespaces.create!(name: 'batchtest3', path: 'batch-test3') }
let!(:namespace4) { namespaces.create!(name: 'batchtest4', path: 'batch-test4') }
let!(:batched_migration) do
background_migrations.create!(
created_at: Time.current,
updated_at: Time.current,
min_value: 1,
max_value: 10_000,
batch_size: 1_000,
sub_batch_size: 100,
interval: 120,
status: 0,
job_class_name: 'Foo',
table_name: :namespaces,
column_name: :id,
total_tuple_count: nil
)
end
context 'when starting on the first batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: namespace1.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace1.id, batch_size: 3, job_arguments: nil)
expect(batch_bounds).to eq([namespace1.id, namespace3.id])
end
......@@ -39,7 +21,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when additional batches remain' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: namespace2.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace2.id, batch_size: 3, job_arguments: nil)
expect(batch_bounds).to eq([namespace2.id, namespace4.id])
end
......@@ -47,7 +29,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when on the final batch' do
it 'returns the bounds of the next batch' do
batch_bounds = batching_strategy.next_batch(batch_min_value: namespace4.id, batch_size: 3)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id, batch_size: 3, job_arguments: nil)
expect(batch_bounds).to eq([namespace4.id, namespace4.id])
end
......@@ -55,7 +37,7 @@ RSpec.describe Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchi
context 'when no additional batches remain' do
it 'returns nil' do
batch_bounds = batching_strategy.next_batch(batch_min_value: namespace4.id + 1, batch_size: 1)
batch_bounds = batching_strategy.next_batch(:namespaces, :id, batch_min_value: namespace4.id + 1, batch_size: 1, job_arguments: nil)
expect(batch_bounds).to be_nil
end
......
......@@ -135,7 +135,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d
context 'when job can be split' do
before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10])
end
end
......@@ -195,7 +195,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedJob, type: :model d
context 'when computed midpoint is larger than the max value of the batch' do
before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 16])
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 16])
end
end
......
......@@ -272,7 +272,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m
before do
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10])
end
end
......
......@@ -16,7 +16,7 @@ RSpec.describe Admin::BackgroundMigrationsController, :enable_admin_mode do
create(:batched_background_migration_job, batched_migration: migration, batch_size: 10, min_value: 6, max_value: 15, status: :failed, attempts: 3)
allow_next_instance_of(Gitlab::BackgroundMigration::BatchingStrategies::PrimaryKeyBatchingStrategy) do |batch_class|
allow(batch_class).to receive(:next_batch).with(batch_min_value: 6, batch_size: 5).and_return([6, 10])
allow(batch_class).to receive(:next_batch).with(anything, anything, batch_min_value: 6, batch_size: 5).and_return([6, 10])
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment