Commit c0c501f9 authored by Imre Farkas's avatar Imre Farkas

Merge branch 'jp-batch-optimizer' into 'master'

Add max_batch_size to batched migrations table

See merge request gitlab-org/gitlab!79848
parents 3c1cf62c d40b94a4
# frozen_string_literal: true
class AddBatchedMigrationMaxBatch < Gitlab::Database::Migration[1.0]
def change
add_column :batched_background_migrations, :max_batch_size, :integer
end
end
87cccb30bb6f27a1acb0dd0cb905040e2a0291cefc5f26bb9a08c8be18034ca3
\ No newline at end of file
...@@ -11075,6 +11075,7 @@ CREATE TABLE batched_background_migrations ( ...@@ -11075,6 +11075,7 @@ CREATE TABLE batched_background_migrations (
job_arguments jsonb DEFAULT '"[]"'::jsonb NOT NULL, job_arguments jsonb DEFAULT '"[]"'::jsonb NOT NULL,
total_tuple_count bigint, total_tuple_count bigint,
pause_ms integer DEFAULT 100 NOT NULL, pause_ms integer DEFAULT 100 NOT NULL,
max_batch_size integer,
CONSTRAINT check_5bb0382d6f CHECK ((char_length(column_name) <= 63)), CONSTRAINT check_5bb0382d6f CHECK ((char_length(column_name) <= 63)),
CONSTRAINT check_6b6a06254a CHECK ((char_length(table_name) <= 63)), CONSTRAINT check_6b6a06254a CHECK ((char_length(table_name) <= 63)),
CONSTRAINT check_batch_size_in_range CHECK ((batch_size >= sub_batch_size)), CONSTRAINT check_batch_size_in_range CHECK ((batch_size >= sub_batch_size)),
...@@ -20,7 +20,8 @@ module Gitlab ...@@ -20,7 +20,8 @@ module Gitlab
TARGET_EFFICIENCY = (0.9..0.95).freeze TARGET_EFFICIENCY = (0.9..0.95).freeze
# Lower and upper bound for the batch size # Lower and upper bound for the batch size
ALLOWED_BATCH_SIZE = (1_000..2_000_000).freeze MIN_BATCH_SIZE = 1_000
MAX_BATCH_SIZE = 2_000_000
# Limit for the multiplier of the batch size # Limit for the multiplier of the batch size
MAX_MULTIPLIER = 1.2 MAX_MULTIPLIER = 1.2
...@@ -43,7 +44,8 @@ module Gitlab ...@@ -43,7 +44,8 @@ module Gitlab
return unless Feature.enabled?(:optimize_batched_migrations, type: :ops, default_enabled: :yaml) return unless Feature.enabled?(:optimize_batched_migrations, type: :ops, default_enabled: :yaml)
if multiplier = batch_size_multiplier if multiplier = batch_size_multiplier
migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(ALLOWED_BATCH_SIZE) max_batch = migration.max_batch_size || MAX_BATCH_SIZE
migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(MIN_BATCH_SIZE, max_batch)
migration.save! migration.save!
end end
end end
......
...@@ -66,6 +66,7 @@ module Gitlab ...@@ -66,6 +66,7 @@ module Gitlab
batch_max_value: nil, batch_max_value: nil,
batch_class_name: BATCH_CLASS_NAME, batch_class_name: BATCH_CLASS_NAME,
batch_size: BATCH_SIZE, batch_size: BATCH_SIZE,
max_batch_size: nil,
sub_batch_size: SUB_BATCH_SIZE sub_batch_size: SUB_BATCH_SIZE
) )
...@@ -86,7 +87,7 @@ module Gitlab ...@@ -86,7 +87,7 @@ module Gitlab
migration_status = batch_max_value.nil? ? :finished : :active migration_status = batch_max_value.nil? ? :finished : :active
batch_max_value ||= batch_min_value batch_max_value ||= batch_min_value
migration = Gitlab::Database::BackgroundMigration::BatchedMigration.create!( migration = Gitlab::Database::BackgroundMigration::BatchedMigration.new(
job_class_name: job_class_name, job_class_name: job_class_name,
table_name: batch_table_name, table_name: batch_table_name,
column_name: batch_column_name, column_name: batch_column_name,
...@@ -97,19 +98,28 @@ module Gitlab ...@@ -97,19 +98,28 @@ module Gitlab
batch_class_name: batch_class_name, batch_class_name: batch_class_name,
batch_size: batch_size, batch_size: batch_size,
sub_batch_size: sub_batch_size, sub_batch_size: sub_batch_size,
status: migration_status) status: migration_status
)
# This guard is necessary since #total_tuple_count was only introduced schema-wise, # Below `BatchedMigration` attributes were introduced after the
# after this migration helper had been used for the first time. # initial `batched_background_migrations` table was created, so any
return migration unless migration.respond_to?(:total_tuple_count) # migrations that ran relying on initial table schema would not know
# about columns introduced later on because this model is not
# isolated in migrations, which is why we need to check for existence
# of these columns first.
if migration.respond_to?(:max_batch_size)
migration.max_batch_size = max_batch_size
end
if migration.respond_to?(:total_tuple_count)
# We keep track of the estimated number of tuples to reason later # We keep track of the estimated number of tuples to reason later
# about the overall progress of a migration. # about the overall progress of a migration.
migration.total_tuple_count = Gitlab::Database::SharedModel.using_connection(connection) do migration.total_tuple_count = Gitlab::Database::SharedModel.using_connection(connection) do
Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate
end end
migration.save! end
migration.save!
migration migration
end end
end end
......
...@@ -6,7 +6,11 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do ...@@ -6,7 +6,11 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
describe '#optimize' do describe '#optimize' do
subject { described_class.new(migration, number_of_jobs: number_of_jobs, ema_alpha: ema_alpha).optimize! } subject { described_class.new(migration, number_of_jobs: number_of_jobs, ema_alpha: ema_alpha).optimize! }
let(:migration) { create(:batched_background_migration, batch_size: batch_size, sub_batch_size: 100, interval: 120) } let(:migration_params) { {} }
let(:migration) do
params = { batch_size: batch_size, sub_batch_size: 100, interval: 120 }.merge(migration_params)
create(:batched_background_migration, params)
end
let(:batch_size) { 10_000 } let(:batch_size) { 10_000 }
...@@ -87,6 +91,17 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do ...@@ -87,6 +91,17 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
expect { subject }.to change { migration.reload.batch_size }.to(2_000_000) expect { subject }.to change { migration.reload.batch_size }.to(2_000_000)
end end
context 'when max_batch_size is set' do
let(:max_batch_size) { 10000 }
let(:migration_params) { { max_batch_size: max_batch_size } }
it 'caps the batch size at max_batch_size' do
mock_efficiency(0.7)
expect { subject }.to change { migration.reload.batch_size }.to(max_batch_size)
end
end
end end
context 'reaching the lower limit for the batch size' do context 'reaching the lower limit for the batch size' do
......
...@@ -59,6 +59,7 @@ RSpec.describe Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers d ...@@ -59,6 +59,7 @@ RSpec.describe Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers d
batch_max_value: 1000, batch_max_value: 1000,
batch_class_name: 'MyBatchClass', batch_class_name: 'MyBatchClass',
batch_size: 100, batch_size: 100,
max_batch_size: 10000,
sub_batch_size: 10) sub_batch_size: 10)
end.to change { Gitlab::Database::BackgroundMigration::BatchedMigration.count }.by(1) end.to change { Gitlab::Database::BackgroundMigration::BatchedMigration.count }.by(1)
...@@ -71,6 +72,7 @@ RSpec.describe Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers d ...@@ -71,6 +72,7 @@ RSpec.describe Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers d
max_value: 1000, max_value: 1000,
batch_class_name: 'MyBatchClass', batch_class_name: 'MyBatchClass',
batch_size: 100, batch_size: 100,
max_batch_size: 10000,
sub_batch_size: 10, sub_batch_size: 10,
job_arguments: %w[], job_arguments: %w[],
status: 'active', status: 'active',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment