Commit c0c501f9 authored by Imre Farkas's avatar Imre Farkas

Merge branch 'jp-batch-optimizer' into 'master'

Add max_batch_size to batched migrations table

See merge request gitlab-org/gitlab!79848
parents 3c1cf62c d40b94a4
# frozen_string_literal: true
class AddBatchedMigrationMaxBatch < Gitlab::Database::Migration[1.0]
def change
add_column :batched_background_migrations, :max_batch_size, :integer
end
end
87cccb30bb6f27a1acb0dd0cb905040e2a0291cefc5f26bb9a08c8be18034ca3
\ No newline at end of file
......@@ -11075,6 +11075,7 @@ CREATE TABLE batched_background_migrations (
job_arguments jsonb DEFAULT '"[]"'::jsonb NOT NULL,
total_tuple_count bigint,
pause_ms integer DEFAULT 100 NOT NULL,
max_batch_size integer,
CONSTRAINT check_5bb0382d6f CHECK ((char_length(column_name) <= 63)),
CONSTRAINT check_6b6a06254a CHECK ((char_length(table_name) <= 63)),
CONSTRAINT check_batch_size_in_range CHECK ((batch_size >= sub_batch_size)),
......@@ -20,7 +20,8 @@ module Gitlab
TARGET_EFFICIENCY = (0.9..0.95).freeze
# Lower and upper bound for the batch size
ALLOWED_BATCH_SIZE = (1_000..2_000_000).freeze
MIN_BATCH_SIZE = 1_000
MAX_BATCH_SIZE = 2_000_000
# Limit for the multiplier of the batch size
MAX_MULTIPLIER = 1.2
......@@ -43,7 +44,8 @@ module Gitlab
return unless Feature.enabled?(:optimize_batched_migrations, type: :ops, default_enabled: :yaml)
if multiplier = batch_size_multiplier
migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(ALLOWED_BATCH_SIZE)
max_batch = migration.max_batch_size || MAX_BATCH_SIZE
migration.batch_size = (migration.batch_size * multiplier).to_i.clamp(MIN_BATCH_SIZE, max_batch)
migration.save!
end
end
......
......@@ -66,6 +66,7 @@ module Gitlab
batch_max_value: nil,
batch_class_name: BATCH_CLASS_NAME,
batch_size: BATCH_SIZE,
max_batch_size: nil,
sub_batch_size: SUB_BATCH_SIZE
)
......@@ -86,7 +87,7 @@ module Gitlab
migration_status = batch_max_value.nil? ? :finished : :active
batch_max_value ||= batch_min_value
migration = Gitlab::Database::BackgroundMigration::BatchedMigration.create!(
migration = Gitlab::Database::BackgroundMigration::BatchedMigration.new(
job_class_name: job_class_name,
table_name: batch_table_name,
column_name: batch_column_name,
......@@ -97,19 +98,28 @@ module Gitlab
batch_class_name: batch_class_name,
batch_size: batch_size,
sub_batch_size: sub_batch_size,
status: migration_status)
status: migration_status
)
# This guard is necessary since #total_tuple_count was only introduced schema-wise,
# after this migration helper had been used for the first time.
return migration unless migration.respond_to?(:total_tuple_count)
# Below `BatchedMigration` attributes were introduced after the
# initial `batched_background_migrations` table was created, so any
# migrations that ran relying on initial table schema would not know
# about columns introduced later on because this model is not
# isolated in migrations, which is why we need to check for existence
# of these columns first.
if migration.respond_to?(:max_batch_size)
migration.max_batch_size = max_batch_size
end
# We keep track of the estimated number of tuples to reason later
# about the overall progress of a migration.
migration.total_tuple_count = Gitlab::Database::SharedModel.using_connection(connection) do
Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate
if migration.respond_to?(:total_tuple_count)
# We keep track of the estimated number of tuples to reason later
# about the overall progress of a migration.
migration.total_tuple_count = Gitlab::Database::SharedModel.using_connection(connection) do
Gitlab::Database::PgClass.for_table(batch_table_name)&.cardinality_estimate
end
end
migration.save!
migration.save!
migration
end
end
......
......@@ -6,7 +6,11 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
describe '#optimize' do
subject { described_class.new(migration, number_of_jobs: number_of_jobs, ema_alpha: ema_alpha).optimize! }
let(:migration) { create(:batched_background_migration, batch_size: batch_size, sub_batch_size: 100, interval: 120) }
let(:migration_params) { {} }
let(:migration) do
params = { batch_size: batch_size, sub_batch_size: 100, interval: 120 }.merge(migration_params)
create(:batched_background_migration, params)
end
let(:batch_size) { 10_000 }
......@@ -87,6 +91,17 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
expect { subject }.to change { migration.reload.batch_size }.to(2_000_000)
end
context 'when max_batch_size is set' do
let(:max_batch_size) { 10000 }
let(:migration_params) { { max_batch_size: max_batch_size } }
it 'caps the batch size at max_batch_size' do
mock_efficiency(0.7)
expect { subject }.to change { migration.reload.batch_size }.to(max_batch_size)
end
end
end
context 'reaching the lower limit for the batch size' do
......
......@@ -59,6 +59,7 @@ RSpec.describe Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers d
batch_max_value: 1000,
batch_class_name: 'MyBatchClass',
batch_size: 100,
max_batch_size: 10000,
sub_batch_size: 10)
end.to change { Gitlab::Database::BackgroundMigration::BatchedMigration.count }.by(1)
......@@ -71,6 +72,7 @@ RSpec.describe Gitlab::Database::Migrations::BatchedBackgroundMigrationHelpers d
max_value: 1000,
batch_class_name: 'MyBatchClass',
batch_size: 100,
max_batch_size: 10000,
sub_batch_size: 10,
job_arguments: %w[],
status: 'active',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment