Commit a2ac5f57 authored by Nick Thomas's avatar Nick Thomas

Make elasticsearch bulk parameters configurable

These variables strongly affect the amount of load that indexing
operations place on the Elasticsearch server. They are useful values
to be able to twiddle in both performance and availability contexts.

At present, these only affect `gitlab-elasticsearch-indexer` behaviour,
but we can roll it out to the sidekiq context over time.
parent f503bfec
# frozen_string_literal: true
class AddEsBulkConfig < ActiveRecord::Migration[6.0]
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
def change
add_column :application_settings, :elasticsearch_max_bulk_size_mb, :smallint, null: false, default: 10
add_column :application_settings, :elasticsearch_max_bulk_concurrency, :smallint, null: false, default: 10
end
end
......@@ -344,6 +344,8 @@ ActiveRecord::Schema.define(version: 2020_02_07_151640) do
t.boolean "updating_name_disabled_for_users", default: false, null: false
t.integer "instance_administrators_group_id"
t.integer "elasticsearch_indexed_field_length_limit", default: 0, null: false
t.integer "elasticsearch_max_bulk_size_mb", limit: 2, default: 10, null: false
t.integer "elasticsearch_max_bulk_concurrency", limit: 2, default: 10, null: false
t.index ["custom_project_templates_group_id"], name: "index_application_settings_on_custom_project_templates_group_id"
t.index ["file_template_project_id"], name: "index_application_settings_on_file_template_project_id"
t.index ["instance_administration_project_id"], name: "index_applicationsettings_on_instance_administration_project_id"
......
......@@ -151,6 +151,8 @@ The following Elasticsearch settings are available:
| `AWS Access Key` | The AWS access key. |
| `AWS Secret Access Key` | The AWS secret access key. |
| `Maximum field length` | See [the explanation in instance limits.](../administration/instance_limits.md#maximum-field-length). |
| `Maximum bulk request size (MiB)` | Repository indexing uses the Elasticsearch bulk request API. This setting determines the maximum size of an individual bulk request during these operations. |
| `Bulk request concurrency` | Each repository indexing operation may submit bulk requests in parallel. This increases indexing performance, but fills the Elasticsearch bulk requests queue faster. |
### Limiting namespaces and projects
......
......@@ -26,6 +26,8 @@ module EE
:elasticsearch_aws_region,
:elasticsearch_aws_secret_access_key,
:elasticsearch_indexing,
:elasticsearch_max_bulk_concurrency,
:elasticsearch_max_bulk_size_mb,
:elasticsearch_replicas,
:elasticsearch_indexed_field_length_limit,
:elasticsearch_search,
......
......@@ -48,6 +48,14 @@ module EE
presence: true,
numericality: { only_integer: true, greater_than: 0 }
validates :elasticsearch_max_bulk_size_mb,
presence: true,
numericality: { only_integer: true, greater_than: 0 }
validates :elasticsearch_max_bulk_concurrency,
presence: true,
numericality: { only_integer: true, greater_than: 0 }
validates :elasticsearch_url,
presence: { message: "can't be blank when indexing is enabled" },
if: ->(setting) { setting.elasticsearch_indexing? }
......@@ -90,6 +98,8 @@ module EE
elasticsearch_replicas: 1,
elasticsearch_shards: 5,
elasticsearch_indexed_field_length_limit: 0,
elasticsearch_max_bulk_size_bytes: 10.megabytes,
elasticsearch_max_bulk_concurrency: 10,
elasticsearch_url: ENV['ELASTIC_URL'] || 'http://localhost:9200',
email_additional_text: nil,
lock_memberships_to_ldap: false,
......@@ -209,7 +219,9 @@ module EE
aws: elasticsearch_aws,
aws_access_key: elasticsearch_aws_access_key,
aws_secret_access_key: elasticsearch_aws_secret_access_key,
aws_region: elasticsearch_aws_region
aws_region: elasticsearch_aws_region,
max_bulk_size_bytes: elasticsearch_max_bulk_size_mb.megabytes,
max_bulk_concurrency: elasticsearch_max_bulk_concurrency
}
end
......
......@@ -63,6 +63,20 @@
.form-text.text-muted
= _('If any indexed field exceeds this limit it will be truncated to this number of characters and the rest will not be indexed or searchable. This does not apply to repository and wiki indexing. Setting this to 0 means it is unlimited.')
.form-group
= f.label :elasticsearch_max_bulk_size_mb, _('Maximum bulk request size (MiB)'), class: 'label-bold'
= f.number_field :elasticsearch_max_bulk_size_mb, value: @application_setting.elasticsearch_max_bulk_size_mb, class: 'form-control'
.form-text.text-muted
= _('Maximum size of Elasticsearch bulk indexing requests.')
= _('This only applies to repository indexing operations.')
.form-group
= f.label :elasticsearch_max_bulk_concurrency, _('Bulk request concurrency'), class: 'label-bold'
= f.number_field :elasticsearch_max_bulk_concurrency, value: @application_setting.elasticsearch_max_bulk_concurrency, class: 'form-control'
.form-text.text-muted
= _('Maximum concurrency of Elasticsearch bulk requests per indexing operation.')
= _('This only applies to repository indexing operations.')
.sub-section
%h4= _('Elasticsearch indexing restrictions')
.form-group
......
---
title: Make elasticsearch bulk parameters configurable
merge_request: 24688
author:
type: added
......@@ -69,6 +69,8 @@ describe 'Admin updates EE-only settings' do
fill_in 'Number of Elasticsearch shards', with: '120'
fill_in 'Number of Elasticsearch replicas', with: '2'
fill_in 'Maximum field length', with: '100000'
fill_in 'Maximum bulk request size (MiB)', with: '17'
fill_in 'Bulk request concurrency', with: '23'
click_button 'Save changes'
end
......@@ -79,6 +81,8 @@ describe 'Admin updates EE-only settings' do
expect(current_settings.elasticsearch_shards).to eq(120)
expect(current_settings.elasticsearch_replicas).to eq(2)
expect(current_settings.elasticsearch_indexed_field_length_limit).to eq(100000)
expect(current_settings.elasticsearch_max_bulk_size_mb).to eq(17)
expect(current_settings.elasticsearch_max_bulk_concurrency).to eq(23)
expect(page).to have_content 'Application settings saved successfully'
end
end
......
......@@ -47,6 +47,18 @@ describe ApplicationSetting do
it { is_expected.not_to allow_value(1.1).for(:elasticsearch_indexed_field_length_limit) }
it { is_expected.not_to allow_value(-1).for(:elasticsearch_indexed_field_length_limit) }
it { is_expected.to allow_value(25).for(:elasticsearch_max_bulk_size_mb) }
it { is_expected.not_to allow_value(nil).for(:elasticsearch_max_bulk_size_mb) }
it { is_expected.not_to allow_value(0).for(:elasticsearch_max_bulk_size_mb) }
it { is_expected.not_to allow_value(1.1).for(:elasticsearch_max_bulk_size_mb) }
it { is_expected.not_to allow_value(-1).for(:elasticsearch_max_bulk_size_mb) }
it { is_expected.to allow_value(2).for(:elasticsearch_max_bulk_concurrency) }
it { is_expected.not_to allow_value(nil).for(:elasticsearch_max_bulk_concurrency) }
it { is_expected.not_to allow_value(0).for(:elasticsearch_max_bulk_concurrency) }
it { is_expected.not_to allow_value(1.1).for(:elasticsearch_max_bulk_concurrency) }
it { is_expected.not_to allow_value(-1).for(:elasticsearch_max_bulk_concurrency) }
it { is_expected.to allow_value(nil).for(:required_instance_ci_template) }
it { is_expected.not_to allow_value("").for(:required_instance_ci_template) }
it { is_expected.not_to allow_value(" ").for(:required_instance_ci_template) }
......@@ -208,7 +220,9 @@ describe ApplicationSetting do
elasticsearch_aws: false,
elasticsearch_aws_region: 'test-region',
elasticsearch_aws_access_key: 'test-access-key',
elasticsearch_aws_secret_access_key: 'test-secret-access-key'
elasticsearch_aws_secret_access_key: 'test-secret-access-key',
elasticsearch_max_bulk_size_mb: 67,
elasticsearch_max_bulk_concurrency: 8
)
expect(setting.elasticsearch_config).to eq(
......@@ -216,7 +230,9 @@ describe ApplicationSetting do
aws: false,
aws_region: 'test-region',
aws_access_key: 'test-access-key',
aws_secret_access_key: 'test-secret-access-key'
aws_secret_access_key: 'test-secret-access-key',
max_bulk_size_bytes: 67.megabytes,
max_bulk_concurrency: 8
)
end
......
......@@ -2996,6 +2996,9 @@ msgstr ""
msgid "Built-in"
msgstr ""
msgid "Bulk request concurrency"
msgstr ""
msgid "Burndown chart"
msgstr ""
......@@ -11722,9 +11725,15 @@ msgstr ""
msgid "Maximum attachment size (MB)"
msgstr ""
msgid "Maximum bulk request size (MiB)"
msgstr ""
msgid "Maximum capacity"
msgstr ""
msgid "Maximum concurrency of Elasticsearch bulk requests per indexing operation."
msgstr ""
msgid "Maximum delay (Minutes)"
msgstr ""
......@@ -11767,6 +11776,9 @@ msgstr ""
msgid "Maximum size limit for each repository."
msgstr ""
msgid "Maximum size of Elasticsearch bulk indexing requests."
msgstr ""
msgid "Maximum size of individual attachments in comments."
msgstr ""
......@@ -19557,6 +19569,9 @@ msgstr ""
msgid "This namespace has already been taken! Please choose another one."
msgstr ""
msgid "This only applies to repository indexing operations."
msgstr ""
msgid "This option is only available on GitLab.com"
msgstr ""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment