Commit 9abb6fb5 authored by Dylan Griffith's avatar Dylan Griffith

Improve performance of Elasticsearch notes permissions migration

We are currently running this migration in production to reindex all the
notes in GitLab.com. There happen to be over 30M of them so it will take
quite a while to run. We have found in
https://gitlab.com/gitlab-org/gitlab/-/issues/324745 that the migration
is not performing as well as we'd hoped. Given that we want to keep each
job taking less than 1 minute to run we want to reduce the batch size to
make that easier.

We have found that 9000 per 3 minutes is too much and we are regularly
overlapping with the previous batch and thus we end up often processing
the same batch twice.

In addition it will be more efficient to push the work into the "initial
indexing queue" instead of our main "incremental indexing queue" as the
initial indexing queue has far less traffic (it only covers newly
created projects while incremental is any record update in any project).
parent babc2fe7
---
title: Improve performance of Elasticsearch notes permissions migration
merge_request: 56823
author:
type: performance
......@@ -4,7 +4,7 @@ class AddPermissionsDataToNotesDocuments < Elastic::Migration
batched!
throttle_delay 3.minutes
QUERY_BATCH_SIZE = 9_000
QUERY_BATCH_SIZE = 6_000
UPDATE_BATCH_SIZE = 100
def migrate
......@@ -30,7 +30,7 @@ class AddPermissionsDataToNotesDocuments < Elastic::Migration
end
document_references.each_slice(UPDATE_BATCH_SIZE) do |refs|
Elastic::ProcessBookkeepingService.track!(*refs)
Elastic::ProcessInitialBookkeepingService.track!(*refs)
end
log "Adding permission data to notes documents is completed for batch of #{document_references.size} documents"
......
......@@ -32,7 +32,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
context 'when migration is completed' do
it 'does not queue documents for indexing' do
expect(migration.completed?).to be_truthy
expect(::Elastic::ProcessBookkeepingService).not_to receive(:track!)
expect(::Elastic::ProcessInitialBookkeepingService).not_to receive(:track!)
migration.migrate
end
......@@ -44,7 +44,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
end
it 'queues documents for indexing' do
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).once do |*tracked_refs|
expect(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).once do |*tracked_refs|
expect(tracked_refs.count).to eq(4)
end
......@@ -55,7 +55,7 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
add_permission_data_for_notes([note_on_issue, note_on_snippet, note_on_merge_request])
expected = [Gitlab::Elastic::DocumentReference.new(Note, note_on_commit.id, note_on_commit.es_id, note_on_commit.es_parent)]
expect(::Elastic::ProcessBookkeepingService).to receive(:track!).with(*expected).once
expect(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).with(*expected).once
migration.migrate
end
......@@ -64,22 +64,22 @@ RSpec.describe AddPermissionsDataToNotesDocuments, :elastic, :sidekiq_inline do
stub_const("#{described_class}::QUERY_BATCH_SIZE", 2)
stub_const("#{described_class}::UPDATE_BATCH_SIZE", 1)
allow(::Elastic::ProcessBookkeepingService).to receive(:track!).and_call_original
allow(::Elastic::ProcessInitialBookkeepingService).to receive(:track!).and_call_original
migration.migrate
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(2).times
expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(2).times
ensure_elasticsearch_index!
migration.migrate
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times
expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(4).times
ensure_elasticsearch_index!
migration.migrate
# The migration should have already finished so there are no more items to process
expect(::Elastic::ProcessBookkeepingService).to have_received(:track!).exactly(4).times
expect(::Elastic::ProcessInitialBookkeepingService).to have_received(:track!).exactly(4).times
expect(migration).to be_completed
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment