Commit 2430c28f authored by Erick Bajao's avatar Erick Bajao

Split backfill query to avoid cross joins

parent e3a99a1e
...@@ -6,6 +6,7 @@ class BackfillProjectsWithCoverage < ActiveRecord::Migration[6.1] ...@@ -6,6 +6,7 @@ class BackfillProjectsWithCoverage < ActiveRecord::Migration[6.1]
MIGRATION = 'BackfillProjectsWithCoverage' MIGRATION = 'BackfillProjectsWithCoverage'
DELAY_INTERVAL = 2.minutes DELAY_INTERVAL = 2.minutes
BATCH_SIZE = 10_000 BATCH_SIZE = 10_000
SUB_BATCH_SIZE = 100
disable_ddl_transaction! disable_ddl_transaction!
...@@ -20,7 +21,8 @@ class BackfillProjectsWithCoverage < ActiveRecord::Migration[6.1] ...@@ -20,7 +21,8 @@ class BackfillProjectsWithCoverage < ActiveRecord::Migration[6.1]
CiDailyBuildGroupReportResult, CiDailyBuildGroupReportResult,
MIGRATION, MIGRATION,
DELAY_INTERVAL, DELAY_INTERVAL,
batch_size: BATCH_SIZE batch_size: BATCH_SIZE,
other_job_arguments: [SUB_BATCH_SIZE]
) )
end end
......
# frozen_string_literal: true # frozen_string_literal: true
# Backfill project_ci_feature_usages for a range of projects with coverage module Gitlab
class Gitlab::BackgroundMigration::BackfillProjectsWithCoverage module BackgroundMigration
COVERAGE_ENUM_VALUE = 1 # Backfill project_ci_feature_usages for a range of projects with coverage
class BackfillProjectsWithCoverage
COVERAGE_ENUM_VALUE = 1
INSERT_DELAY_SECONDS = 0.1
def perform(start_id, end_id) def perform(start_id, end_id, sub_batch_size)
ActiveRecord::Base.connection.execute <<~SQL report_results = ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_ci_feature_usages (project_id, feature, default_branch) SELECT DISTINCT project_id, default_branch
SELECT DISTINCT project_id, #{COVERAGE_ENUM_VALUE} as feature, default_branch FROM ci_daily_build_group_report_results
FROM ci_daily_build_group_report_results WHERE id BETWEEN #{start_id} AND #{end_id}
WHERE id BETWEEN #{start_id} AND #{end_id} SQL
ON CONFLICT (project_id, feature, default_branch) DO NOTHING;
SQL report_results.to_a.in_groups_of(sub_batch_size, false) do |batch|
ActiveRecord::Base.connection.execute <<~SQL
INSERT INTO project_ci_feature_usages (project_id, feature, default_branch) VALUES
#{build_values(batch)}
ON CONFLICT (project_id, feature, default_branch) DO NOTHING;
SQL
sleep INSERT_DELAY_SECONDS
end
end
private
def build_values(batch)
batch.map do |data|
"(#{data['project_id']}, #{COVERAGE_ENUM_VALUE}, #{data['default_branch']})"
end.join(', ')
end
end
end end
end end
...@@ -66,10 +66,12 @@ RSpec.describe Gitlab::BackgroundMigration::BackfillProjectsWithCoverage, schema ...@@ -66,10 +66,12 @@ RSpec.describe Gitlab::BackgroundMigration::BackfillProjectsWithCoverage, schema
default_branch: false, default_branch: false,
group_id: group.id group_id: group.id
) )
stub_const("#{described_class}::INSERT_DELAY_SECONDS", 0)
end end
it 'creates entries per project and default_branch combination in the given range', :aggregate_failures do it 'creates entries per project and default_branch combination in the given range', :aggregate_failures do
subject.perform(1, 4) subject.perform(1, 4, 2)
entries = project_ci_feature_usages.order('project_id ASC, default_branch DESC') entries = project_ci_feature_usages.order('project_id ASC, default_branch DESC')
...@@ -81,11 +83,11 @@ RSpec.describe Gitlab::BackgroundMigration::BackfillProjectsWithCoverage, schema ...@@ -81,11 +83,11 @@ RSpec.describe Gitlab::BackgroundMigration::BackfillProjectsWithCoverage, schema
context 'when an entry for the project and default branch combination already exists' do context 'when an entry for the project and default branch combination already exists' do
before do before do
subject.perform(1, 4) subject.perform(1, 4, 2)
end end
it 'does not create a new entry' do it 'does not create a new entry' do
expect { subject.perform(1, 4) }.not_to change { project_ci_feature_usages.count } expect { subject.perform(1, 4, 2) }.not_to change { project_ci_feature_usages.count }
end end
end end
end end
......
...@@ -17,6 +17,7 @@ RSpec.describe BackfillProjectsWithCoverage do ...@@ -17,6 +17,7 @@ RSpec.describe BackfillProjectsWithCoverage do
describe '#up' do describe '#up' do
before do before do
stub_const("#{described_class}::BATCH_SIZE", 2) stub_const("#{described_class}::BATCH_SIZE", 2)
stub_const("#{described_class}::SUB_BATCH_SIZE", 1)
ci_daily_build_group_report_results.create!( ci_daily_build_group_report_results.create!(
id: 1, id: 1,
...@@ -60,8 +61,8 @@ RSpec.describe BackfillProjectsWithCoverage do ...@@ -60,8 +61,8 @@ RSpec.describe BackfillProjectsWithCoverage do
freeze_time do freeze_time do
migrate! migrate!
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, 1, 2) expect(described_class::MIGRATION).to be_scheduled_delayed_migration(2.minutes, 1, 2, 1)
expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, 3, 3) expect(described_class::MIGRATION).to be_scheduled_delayed_migration(4.minutes, 3, 3, 1)
expect(BackgroundMigrationWorker.jobs.size).to eq(2) expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment