Commit 61e8fc2c authored by Andreas Brandl's avatar Andreas Brandl

Change batch size depending on how far off we are

Assumption: Time efficiency directly correlates with batch size.

Tune batch size down/up so that theoretically we get the target batch
size by this assumption.
parent 0052257c
......@@ -17,22 +17,26 @@ module Gitlab
class BatchOptimizer
# Target time efficiency for a job
# Time efficiency is defined as: job duration / interval
TARGET_EFFICIENCY = (0.8..0.98).freeze
TARGET_EFFICIENCY = (0.9..0.95).freeze
# Lower and upper bound for the batch size
ALLOWED_BATCH_SIZE = (1_000..1_000_000).freeze
ALLOWED_BATCH_SIZE = (1_000..2_000_000).freeze
# Use this batch_size multiplier to increase batch size
INCREASE_MULTIPLIER = 1.1
# Limit for the multiplier of the batch size
MAX_MULTIPLIER = 1.2
# Use this batch_size multiplier to decrease batch size
DECREASE_MULTIPLIER = 0.8
# When smoothing time efficiency, use this many jobs
NUMBER_OF_JOBS = 20
attr_reader :migration, :number_of_jobs
# Smoothing factor for exponential moving average
EMA_ALPHA = 0.4
def initialize(migration, number_of_jobs: 10)
attr_reader :migration, :number_of_jobs, :ema_alpha
def initialize(migration, number_of_jobs: NUMBER_OF_JOBS, ema_alpha: EMA_ALPHA)
@migration = migration
@number_of_jobs = number_of_jobs
@ema_alpha = ema_alpha
end
def optimize!
......@@ -47,20 +51,15 @@ module Gitlab
private
def batch_size_multiplier
efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs)
efficiency = migration.smoothed_time_efficiency(number_of_jobs: number_of_jobs, alpha: ema_alpha)
return unless efficiency
return if efficiency.nil? || efficiency == 0
if TARGET_EFFICIENCY.include?(efficiency)
# We hit the range - no change
nil
elsif efficiency > TARGET_EFFICIENCY.max
# We're above the range - decrease by 20%
DECREASE_MULTIPLIER
else
# We're below the range - increase by 10%
INCREASE_MULTIPLIER
end
return if TARGET_EFFICIENCY.include?(efficiency)
# Assumption: time efficiency is linear in the batch size
[TARGET_EFFICIENCY.max / efficiency, MAX_MULTIPLIER].min
end
end
end
......
......@@ -4,16 +4,19 @@ require 'spec_helper'
RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
describe '#optimize' do
subject { described_class.new(migration, number_of_jobs: number_of_jobs).optimize! }
subject { described_class.new(migration, number_of_jobs: number_of_jobs, ema_alpha: ema_alpha).optimize! }
let(:migration) { create(:batched_background_migration, batch_size: batch_size, sub_batch_size: 100, interval: 120) }
let(:batch_size) { 10_000 }
let_it_be(:number_of_jobs) { 5 }
let_it_be(:ema_alpha) { 0.4 }
let_it_be(:target_efficiency) { described_class::TARGET_EFFICIENCY.max }
def mock_efficiency(eff)
expect(migration).to receive(:smoothed_time_efficiency).with(number_of_jobs: number_of_jobs).and_return(eff)
expect(migration).to receive(:smoothed_time_efficiency).with(number_of_jobs: number_of_jobs, alpha: ema_alpha).and_return(eff)
end
it 'with unknown time efficiency, it keeps the batch size' do
......@@ -34,25 +37,55 @@ RSpec.describe Gitlab::Database::BackgroundMigration::BatchOptimizer do
expect { subject }.not_to change { migration.reload.batch_size }
end
it 'with a time efficiency of 70%, it increases the batch size by 10%' do
mock_efficiency(0.7)
it 'with a time efficiency of 85%, it increases the batch size' do
time_efficiency = 0.85
mock_efficiency(time_efficiency)
expect { subject }.to change { migration.reload.batch_size }.from(10_000).to(11_000)
new_batch_size = ((target_efficiency / time_efficiency) * batch_size).to_i
expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
end
it 'with a time efficiency of 110%, it decreases the batch size by 20%' do
mock_efficiency(1.1)
it 'with a time efficiency of 110%, it decreases the batch size' do
time_efficiency = 1.1
mock_efficiency(time_efficiency)
new_batch_size = ((target_efficiency / time_efficiency) * batch_size).to_i
expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
end
expect { subject }.to change { migration.reload.batch_size }.from(10_000).to(8_000)
context 'reaching the upper limit for an increase' do
it 'caps the batch size multiplier at 20% when increasing' do
time_efficiency = 0.1 # this would result in a factor of 10 if not limited
mock_efficiency(time_efficiency)
new_batch_size = (1.2 * batch_size).to_i
expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
end
it 'does not limit the decrease multiplier' do
time_efficiency = 10
mock_efficiency(time_efficiency)
new_batch_size = (0.1 * batch_size).to_i
expect { subject }.to change { migration.reload.batch_size }.from(batch_size).to(new_batch_size)
end
end
context 'reaching the upper limit for the batch size' do
let(:batch_size) { 950_000 }
let(:batch_size) { 1_950_000 }
it 'caps the batch size at 10M' do
mock_efficiency(0.7)
expect { subject }.to change { migration.reload.batch_size }.to(1_000_000)
expect { subject }.to change { migration.reload.batch_size }.to(2_000_000)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment