Commit 1bbc79f6 authored by dfrazao-gitlab's avatar dfrazao-gitlab

Support multiple databases for batched background migrations

- background_migrations:finalize - supports multiple db
- gitlab:background_migrations:status - supports multiple db

Changelog: added

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/351585
parent 2b6c52ea
# frozen_string_literal: true # frozen_string_literal: true
databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
namespace :gitlab do namespace :gitlab do
namespace :background_migrations do namespace :background_migrations do
desc 'Synchronously finish executing a batched background migration' desc 'Synchronously finish executing a batched background migration'
task :finalize, [:job_class_name, :table_name, :column_name, :job_arguments] => :environment do |_, args| task :finalize, [:job_class_name, :table_name, :column_name, :job_arguments] => :environment do |_, args|
[:job_class_name, :table_name, :column_name, :job_arguments].each do |argument| if Gitlab::Database.db_config_names.size > 1
unless args[argument] puts "Please specify the database".color(:red)
puts "Must specify #{argument} as an argument".color(:red) exit 1
exit 1
end
end end
Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.finalize( validate_finalization_arguments!(args)
main_model = Gitlab::Database.database_base_models[:main]
finalize_migration(
args[:job_class_name], args[:job_class_name],
args[:table_name], args[:table_name],
args[:column_name], args[:column_name],
Gitlab::Json.parse(args[:job_arguments]) Gitlab::Json.parse(args[:job_arguments]),
connection: main_model.connection
) )
end
puts "Done.".color(:green) namespace :finalize do
ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |name|
next if name.to_s == 'geo'
desc "Gitlab | DB | Synchronously finish executing a batched background migration on #{name} database"
task name, [:job_class_name, :table_name, :column_name, :job_arguments] => :environment do |_, args|
validate_finalization_arguments!(args)
model = Gitlab::Database.database_base_models[name]
finalize_migration(
args[:job_class_name],
args[:table_name],
args[:column_name],
Gitlab::Json.parse(args[:job_arguments]),
connection: model.connection
)
end
end
end end
desc 'Display the status of batched background migrations' desc 'Display the status of batched background migrations'
task status: :environment do task status: :environment do |_, args|
statuses = Gitlab::Database::BackgroundMigration::BatchedMigration.statuses Gitlab::Database.database_base_models.each do |name, model|
max_status_length = statuses.keys.map(&:length).max display_migration_status(name, model.connection)
format_string = "%-#{max_status_length}s | %s\n" end
end
Gitlab::Database::BackgroundMigration::BatchedMigration.find_each(batch_size: 100) do |migration|
identification_fields = [ namespace :status do
migration.job_class_name, ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |name|
migration.table_name, next if name.to_s == 'geo'
migration.column_name,
migration.job_arguments.to_json desc "Gitlab | DB | Display the status of batched background migrations on #{name} database"
].join(',') task name => :environment do |_, args|
model = Gitlab::Database.database_base_models[name]
printf(format_string, migration.status, identification_fields) display_migration_status(name, model.connection)
end
end
end
private
def finalize_migration(class_name, table_name, column_name, job_arguments, connection:)
Gitlab::Database::BackgroundMigration::BatchedMigrationRunner.finalize(
class_name,
table_name,
column_name,
Gitlab::Json.parse(job_arguments),
connection: connection
)
puts "Done.".color(:green)
end
def display_migration_status(database_name, connection)
Gitlab::Database::SharedModel.using_connection(connection) do
statuses = Gitlab::Database::BackgroundMigration::BatchedMigration.statuses
max_status_length = statuses.keys.map(&:length).max
format_string = "%-#{max_status_length}s | %s\n"
puts "Database: #{database_name}\n"
Gitlab::Database::BackgroundMigration::BatchedMigration.find_each(batch_size: 100) do |migration|
identification_fields = [
migration.job_class_name,
migration.table_name,
migration.column_name,
migration.job_arguments.to_json
].join(',')
printf(format_string, migration.status, identification_fields)
end
end
end
def validate_finalization_arguments!(args)
[:job_class_name, :table_name, :column_name, :job_arguments].each do |argument|
unless args[argument]
puts "Must specify #{argument} as an argument".color(:red)
exit 1
end
end end
end end
end end
......
...@@ -10,6 +10,16 @@ RSpec.describe 'gitlab:background_migrations namespace rake tasks' do ...@@ -10,6 +10,16 @@ RSpec.describe 'gitlab:background_migrations namespace rake tasks' do
describe 'finalize' do describe 'finalize' do
subject(:finalize_task) { run_rake_task('gitlab:background_migrations:finalize', *arguments) } subject(:finalize_task) { run_rake_task('gitlab:background_migrations:finalize', *arguments) }
let(:connection) { double(:connection) }
let(:main_model) { double(:model, connection: connection) }
let(:base_models) { { main: main_model } }
let(:databases) { [Gitlab::Database::MAIN_DATABASE_NAME] }
before do
allow(Gitlab::Database).to receive(:database_base_models).and_return(base_models)
allow(Gitlab::Database).to receive(:db_config_names).and_return(databases)
end
context 'without the proper arguments' do context 'without the proper arguments' do
let(:arguments) { %w[CopyColumnUsingBackgroundMigrationJob events id] } let(:arguments) { %w[CopyColumnUsingBackgroundMigrationJob events id] }
...@@ -26,24 +36,135 @@ RSpec.describe 'gitlab:background_migrations namespace rake tasks' do ...@@ -26,24 +36,135 @@ RSpec.describe 'gitlab:background_migrations namespace rake tasks' do
it 'finalizes the matching migration' do it 'finalizes the matching migration' do
expect(Gitlab::Database::BackgroundMigration::BatchedMigrationRunner).to receive(:finalize) expect(Gitlab::Database::BackgroundMigration::BatchedMigrationRunner).to receive(:finalize)
.with('CopyColumnUsingBackgroundMigrationJob', 'events', 'id', [%w[id1 id2]]) .with('CopyColumnUsingBackgroundMigrationJob', 'events', 'id', [%w[id1 id2]], connection: connection)
expect { finalize_task }.to output(/Done/).to_stdout expect { finalize_task }.to output(/Done/).to_stdout
end end
end end
context 'when multiple database feature is enabled' do
subject(:finalize_task) { run_rake_task("gitlab:background_migrations:finalize:#{ci_database_name}", *arguments) }
let(:ci_database_name) { Gitlab::Database::CI_DATABASE_NAME }
let(:ci_model) { double(:model, connection: connection) }
let(:base_models) { { 'main' => main_model, 'ci' => ci_model } }
let(:databases) { [Gitlab::Database::MAIN_DATABASE_NAME, ci_database_name] }
before do
skip_if_multiple_databases_not_setup
allow(Gitlab::Database).to receive(:database_base_models).and_return(base_models)
end
it 'ignores geo' do
expect { run_rake_task('gitlab:background_migrations:finalize:geo}') }
.to raise_error(RuntimeError).with_message(/Don't know how to build task/)
end
context 'without the proper arguments' do
let(:arguments) { %w[CopyColumnUsingBackgroundMigrationJob events id] }
it 'exits without finalizing the migration' do
expect(Gitlab::Database::BackgroundMigration::BatchedMigrationRunner).not_to receive(:finalize)
expect { finalize_task }.to output(/Must specify job_arguments as an argument/).to_stdout
.and raise_error(SystemExit) { |error| expect(error.status).to eq(1) }
end
end
context 'with the proper arguments' do
let(:arguments) { %w[CopyColumnUsingBackgroundMigrationJob events id [["id1"\,"id2"]]] }
it 'finalizes the matching migration' do
expect(Gitlab::Database::BackgroundMigration::BatchedMigrationRunner).to receive(:finalize)
.with('CopyColumnUsingBackgroundMigrationJob', 'events', 'id', [%w[id1 id2]], connection: connection)
expect { finalize_task }.to output(/Done/).to_stdout
end
end
context 'when database name is not passed' do
it 'aborts the rake task' do
expect { run_rake_task('gitlab:background_migrations:finalize') }.to output(/Please specify the database/).to_stdout
.and raise_error(SystemExit) { |error| expect(error.status).to eq(1) }
end
end
end
end end
describe 'status' do describe 'status' do
subject(:status_task) { run_rake_task('gitlab:background_migrations:status') } subject(:status_task) { run_rake_task('gitlab:background_migrations:status') }
let(:migration1) { create(:batched_background_migration, :finished, job_arguments: [%w[id1 id2]]) }
let(:migration2) { create(:batched_background_migration, :failed, job_arguments: []) }
let(:main_database_name) { Gitlab::Database::MAIN_DATABASE_NAME }
let(:model) { Gitlab::Database.database_base_models[main_database_name] }
let(:connection) { double(:connection) }
let(:base_models) { { 'main' => model } }
around do |example|
Gitlab::Database::SharedModel.using_connection(model.connection) do
example.run
end
end
it 'outputs the status of background migrations' do it 'outputs the status of background migrations' do
migration1 = create(:batched_background_migration, :finished, job_arguments: [%w[id1 id2]]) allow(Gitlab::Database).to receive(:database_base_models).and_return(base_models)
migration2 = create(:batched_background_migration, :failed, job_arguments: [])
expect { status_task }.to output(<<~OUTPUT).to_stdout expect { status_task }.to output(<<~OUTPUT).to_stdout
Database: #{main_database_name}
finished | #{migration1.job_class_name},#{migration1.table_name},#{migration1.column_name},[["id1","id2"]] finished | #{migration1.job_class_name},#{migration1.table_name},#{migration1.column_name},[["id1","id2"]]
failed | #{migration2.job_class_name},#{migration2.table_name},#{migration2.column_name},[] failed | #{migration2.job_class_name},#{migration2.table_name},#{migration2.column_name},[]
OUTPUT OUTPUT
end end
context 'when multiple database feature is enabled' do
before do
skip_if_multiple_databases_not_setup
end
context 'with a single database' do
subject(:status_task) { run_rake_task("gitlab:background_migrations:status:#{main_database_name}") }
it 'outputs the status of background migrations' do
expect { status_task }.to output(<<~OUTPUT).to_stdout
Database: #{main_database_name}
finished | #{migration1.job_class_name},#{migration1.table_name},#{migration1.column_name},[["id1","id2"]]
failed | #{migration2.job_class_name},#{migration2.table_name},#{migration2.column_name},[]
OUTPUT
end
it 'ignores geo' do
expect { run_rake_task('gitlab:background_migrations:status:geo') }
.to raise_error(RuntimeError).with_message(/Don't know how to build task/)
end
end
context 'with multiple databases' do
subject(:status_task) { run_rake_task('gitlab:background_migrations:status') }
let(:base_models) { { 'main' => main_model, 'ci' => ci_model } }
let(:main_model) { double(:model, connection: connection) }
let(:ci_model) { double(:model, connection: connection) }
it 'outputs the status for each database' do
allow(Gitlab::Database).to receive(:database_base_models).and_return(base_models)
expect(Gitlab::Database::SharedModel).to receive(:using_connection).with(main_model.connection).and_yield
expect(Gitlab::Database::BackgroundMigration::BatchedMigration).to receive(:find_each).and_yield(migration1)
expect(Gitlab::Database::SharedModel).to receive(:using_connection).with(ci_model.connection).and_yield
expect(Gitlab::Database::BackgroundMigration::BatchedMigration).to receive(:find_each).and_yield(migration2)
expect { status_task }.to output(<<~OUTPUT).to_stdout
Database: main
finished | #{migration1.job_class_name},#{migration1.table_name},#{migration1.column_name},[["id1","id2"]]
Database: ci
failed | #{migration2.job_class_name},#{migration2.table_name},#{migration2.column_name},[]
OUTPUT
end
end
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment