Commit 732492be authored by Patrick Bair's avatar Patrick Bair Committed by Yannis Roussos

Make migration pause time a configurable value

Make the number of seconds to sleep in a batched_background_migration be
a configurable value. The pause_ms column is added to the database
to store this information, and it is now passed through to each job.
parent 00ec9b69
---
title: Add pause_ms column to batched_background_migrations and batched_background_migration_jobs
merge_request: 58583
author:
type: changed
# frozen_string_literal: true
class AddPauseSecondsToBatchedBackgroundMigrations < ActiveRecord::Migration[6.0]
def change
add_column :batched_background_migrations, :pause_ms, :integer, null: false, default: 100
end
end
# frozen_string_literal: true
class AddPauseSecondsToBatchedBackgroundMigrationJobs < ActiveRecord::Migration[6.0]
def change
add_column :batched_background_migration_jobs, :pause_ms, :integer, null: false, default: 100
end
end
7bb8be1616a61b12392bc5ff4d716123bc605d9753744c04a23f9258bab25af6
\ No newline at end of file
197930adaf08e3d22d54309d1cc0605bc4d6843409a38f8e0cc9ce9842ec1816
\ No newline at end of file
......@@ -9824,7 +9824,8 @@ CREATE TABLE batched_background_migration_jobs (
sub_batch_size integer NOT NULL,
status smallint DEFAULT 0 NOT NULL,
attempts smallint DEFAULT 0 NOT NULL,
metrics jsonb DEFAULT '{}'::jsonb NOT NULL
metrics jsonb DEFAULT '{}'::jsonb NOT NULL,
pause_ms integer DEFAULT 100 NOT NULL
);
CREATE SEQUENCE batched_background_migration_jobs_id_seq
......@@ -9852,6 +9853,7 @@ CREATE TABLE batched_background_migrations (
column_name text NOT NULL,
job_arguments jsonb DEFAULT '"[]"'::jsonb NOT NULL,
total_tuple_count bigint,
pause_ms integer DEFAULT 100 NOT NULL,
CONSTRAINT check_5bb0382d6f CHECK ((char_length(column_name) <= 63)),
CONSTRAINT check_6b6a06254a CHECK ((char_length(table_name) <= 63)),
CONSTRAINT check_batch_size_in_range CHECK ((batch_size >= sub_batch_size)),
......@@ -16,8 +16,6 @@ module Gitlab
class CopyColumnUsingBackgroundMigrationJob
include Gitlab::Database::DynamicModelHelpers
PAUSE_SECONDS = 0.1
# start_id - The start ID of the range of rows to update.
# end_id - The end ID of the range of rows to update.
# batch_table - The name of the table that contains the columns.
......@@ -25,9 +23,10 @@ module Gitlab
# sub_batch_size - We don't want updates to take more than ~100ms
# This allows us to run multiple smaller batches during
# the minimum 2.minute interval that we can schedule jobs
# pause_ms - The number of milliseconds to sleep between each subbatch execution.
# copy_from - List of columns containing the data to copy.
# copy_to - List of columns to copy the data to. Order must match the order in `copy_from`.
def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, copy_from, copy_to)
def perform(start_id, end_id, batch_table, batch_column, sub_batch_size, pause_ms, copy_from, copy_to)
copy_from = Array.wrap(copy_from)
copy_to = Array.wrap(copy_to)
......@@ -42,7 +41,8 @@ module Gitlab
sub_batch.update_all(assignment_clauses)
end
sleep(PAUSE_SECONDS)
pause_ms = 0 if pause_ms < 0
sleep(pause_ms * 0.001)
end
end
......
......@@ -35,7 +35,13 @@ module Gitlab
end
def create_batched_job!(min, max)
batched_jobs.create!(min_value: min, max_value: max, batch_size: batch_size, sub_batch_size: sub_batch_size)
batched_jobs.create!(
min_value: min,
max_value: max,
batch_size: batch_size,
sub_batch_size: sub_batch_size,
pause_ms: pause_ms
)
end
def next_min_value
......
......@@ -43,6 +43,7 @@ module Gitlab
tracking_record.migration_table_name,
tracking_record.migration_column_name,
tracking_record.sub_batch_size,
tracking_record.pause_ms,
*tracking_record.migration_job_arguments)
if job_instance.respond_to?(:batch_metrics)
......
......@@ -8,5 +8,6 @@ FactoryBot.define do
max_value { 10 }
batch_size { 5 }
sub_batch_size { 1 }
pause_ms { 100 }
end
end
......@@ -10,5 +10,6 @@ FactoryBot.define do
table_name { :events }
column_name { :id }
total_tuple_count { 10_000 }
pause_ms { 100 }
end
end
......@@ -6,6 +6,7 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
let(:table_name) { :copy_primary_key_test }
let(:test_table) { table(table_name) }
let(:sub_batch_size) { 1000 }
let(:pause_ms) { 0 }
before do
ActiveRecord::Base.connection.execute(<<~SQL)
......@@ -34,13 +35,13 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
SQL
end
subject { described_class.new }
subject(:copy_columns) { described_class.new }
describe '#perform' do
let(:migration_class) { described_class.name }
it 'copies all primary keys in range' do
subject.perform(12, 15, table_name, 'id', sub_batch_size, 'id', 'id_convert_to_bigint')
copy_columns.perform(12, 15, table_name, 'id', sub_batch_size, pause_ms, 'id', 'id_convert_to_bigint')
expect(test_table.where('id = id_convert_to_bigint').pluck(:id)).to contain_exactly(12, 15)
expect(test_table.where(id_convert_to_bigint: 0).pluck(:id)).to contain_exactly(11, 19)
......@@ -48,7 +49,7 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
end
it 'copies all foreign keys in range' do
subject.perform(10, 14, table_name, 'id', sub_batch_size, 'fk', 'fk_convert_to_bigint')
copy_columns.perform(10, 14, table_name, 'id', sub_batch_size, pause_ms, 'fk', 'fk_convert_to_bigint')
expect(test_table.where('fk = fk_convert_to_bigint').pluck(:id)).to contain_exactly(11, 12)
expect(test_table.where(fk_convert_to_bigint: 0).pluck(:id)).to contain_exactly(15, 19)
......@@ -58,7 +59,7 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
it 'copies columns with NULLs' do
expect(test_table.where("name_convert_to_text = 'no name'").count).to eq(4)
subject.perform(10, 20, table_name, 'id', sub_batch_size, 'name', 'name_convert_to_text')
copy_columns.perform(10, 20, table_name, 'id', sub_batch_size, pause_ms, 'name', 'name_convert_to_text')
expect(test_table.where('name = name_convert_to_text').pluck(:id)).to contain_exactly(11, 12, 19)
expect(test_table.where('name is NULL and name_convert_to_text is NULL').pluck(:id)).to contain_exactly(15)
......@@ -69,7 +70,7 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
columns_to_copy_from = %w[id fk]
columns_to_copy_to = %w[id_convert_to_bigint fk_convert_to_bigint]
subject.perform(10, 15, table_name, 'id', sub_batch_size, columns_to_copy_from, columns_to_copy_to)
subject.perform(10, 15, table_name, 'id', sub_batch_size, pause_ms, columns_to_copy_from, columns_to_copy_to)
expect(test_table.where('id = id_convert_to_bigint AND fk = fk_convert_to_bigint').pluck(:id)).to contain_exactly(11, 12, 15)
expect(test_table.where(id_convert_to_bigint: 0).where(fk_convert_to_bigint: 0).pluck(:id)).to contain_exactly(19)
......@@ -81,16 +82,34 @@ RSpec.describe Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJo
columns_to_copy_to = %w[id_convert_to_bigint]
expect do
subject.perform(10, 15, table_name, 'id', sub_batch_size, columns_to_copy_from, columns_to_copy_to)
subject.perform(10, 15, table_name, 'id', sub_batch_size, pause_ms, columns_to_copy_from, columns_to_copy_to)
end.to raise_error(ArgumentError, 'number of source and destination columns must match')
end
it 'tracks timings of queries' do
expect(subject.batch_metrics.timings).to be_empty
expect(copy_columns.batch_metrics.timings).to be_empty
subject.perform(10, 20, table_name, 'id', sub_batch_size, 'name', 'name_convert_to_text')
copy_columns.perform(10, 20, table_name, 'id', sub_batch_size, pause_ms, 'name', 'name_convert_to_text')
expect(subject.batch_metrics.timings[:update_all]).not_to be_empty
expect(copy_columns.batch_metrics.timings[:update_all]).not_to be_empty
end
context 'pause interval between sub-batches' do
it 'sleeps for the specified time between sub-batches' do
sub_batch_size = 2
expect(copy_columns).to receive(:sleep).with(0.005)
copy_columns.perform(10, 12, table_name, 'id', sub_batch_size, 5, 'name', 'name_convert_to_text')
end
it 'treats negative values as 0' do
sub_batch_size = 2
expect(copy_columns).to receive(:sleep).with(0)
copy_columns.perform(10, 12, table_name, 'id', sub_batch_size, -5, 'name', 'name_convert_to_text')
end
end
end
end
......@@ -119,7 +119,13 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m
end
describe '#create_batched_job!' do
let(:batched_migration) { create(:batched_background_migration) }
let(:batched_migration) do
create(:batched_background_migration,
batch_size: 999,
sub_batch_size: 99,
pause_ms: 250
)
end
it 'creates a batched_job with the correct batch configuration' do
batched_job = batched_migration.create_batched_job!(1, 5)
......@@ -128,7 +134,9 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigration, type: :m
min_value: 1,
max_value: 5,
batch_size: batched_migration.batch_size,
sub_batch_size: batched_migration.sub_batch_size)
sub_batch_size: batched_migration.sub_batch_size,
pause_ms: 250
)
end
end
......
......@@ -7,9 +7,16 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
let(:job_class) { Gitlab::BackgroundMigration::CopyColumnUsingBackgroundMigrationJob }
let_it_be(:pause_ms) { 250 }
let_it_be(:active_migration) { create(:batched_background_migration, :active, job_arguments: [:id, :other_id]) }
let!(:job_record) { create(:batched_background_migration_job, batched_migration: active_migration) }
let!(:job_record) do
create(:batched_background_migration_job,
batched_migration: active_migration,
pause_ms: pause_ms
)
end
let(:job_instance) { double('job instance', batch_metrics: {}) }
before do
......@@ -17,7 +24,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
end
it 'runs the migration job' do
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, 'id', 'other_id')
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, pause_ms, 'id', 'other_id')
subject
end
......@@ -98,7 +105,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
context 'when the migration job does not raise an error' do
it 'marks the tracking record as succeeded' do
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, 'id', 'other_id')
expect(job_instance).to receive(:perform).with(1, 10, 'events', 'id', 1, pause_ms, 'id', 'other_id')
freeze_time do
subject
......@@ -115,7 +122,7 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper, '
shared_examples 'an error is raised' do |error_class|
it 'marks the tracking record as failed' do
expect(job_instance).to receive(:perform)
.with(1, 10, 'events', 'id', 1, 'id', 'other_id')
.with(1, 10, 'events', 'id', 1, pause_ms, 'id', 'other_id')
.and_raise(error_class)
freeze_time do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment