Commit 01d6f2a1 authored by Krasimir Angelov's avatar Krasimir Angelov

Support reindexing multiple databases independently and concurrently

Update gitlab:db:reindex task and add database specific tasks,
dynamically generated for each configured database.

gitlab:db:reindex      # reindex all defined databases
gitlab:db:reindex:ci   # reindex ci database
gitlab:db:reindex:main # reindex main database

Update exclusive lease keys to include connection name so that multiple
databases can be reindexed concurrently.

https://gitlab.com/gitlab-org/gitlab/-/issues/344235
parent 60cd1b1f
......@@ -47,6 +47,10 @@ module Gitlab
TIMEOUT_PER_ACTION
end
def lease_key
[super, async_index.connection_db_config.name].join('/')
end
def set_statement_timeout
connection.execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield
......
......@@ -15,6 +15,26 @@ module Gitlab
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
def self.enabled?
Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
end
def self.invoke(database = nil)
Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
next if database && database.to_s != connection_name.to_s
Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
# Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
automatic_reindexing
end
rescue StandardError => e
Gitlab::AppLogger.error(e)
raise
end
# Performs automatic reindexing for a limited number of indexes per call
# 1. Consume from the explicit reindexing queue
# 2. Apply bloat heuristic to find most bloated indexes and reindex those
......
......@@ -53,6 +53,10 @@ module Gitlab
def lease_timeout
TIMEOUT_PER_ACTION
end
def lease_key
[super, index.connection_db_config.name].join('/')
end
end
end
end
......
......@@ -39,6 +39,10 @@ module Gitlab
Thread.current[:overriding_connection] = connection
end
end
def connection_db_config
self.class.connection_db_config
end
end
end
end
......@@ -175,24 +175,30 @@ namespace :gitlab do
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end
desc 'execute reindexing without downtime to eliminate bloat'
desc "Reindex database without downtime to eliminate bloat"
task reindex: :environment do
unless Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
unless Gitlab::Database::Reindexing.enabled?
puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit
end
Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
Gitlab::Database::Reindexing.invoke
end
# Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
namespace :reindex do
databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
Gitlab::Database::Reindexing.automatic_reindexing
ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |database_name|
desc "Reindex #{database_name} database without downtime to eliminate bloat"
task database_name => :environment do
unless Gitlab::Database::Reindexing.enabled?
puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit
end
Gitlab::Database::Reindexing.invoke(database_name)
end
end
rescue StandardError => e
Gitlab::AppLogger.error(e)
raise
end
desc 'Enqueue an index for reindexing'
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do
include ExclusiveLeaseHelpers
describe '#perform' do
subject { described_class.new(async_index) }
......@@ -10,7 +12,18 @@ RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do
let(:index_model) { Gitlab::Database::AsyncIndexes::PostgresAsyncIndex }
let(:connection) { ApplicationRecord.connection }
let(:model) { Gitlab::Database.database_base_models[Gitlab::Database::PRIMARY_DATABASE_NAME] }
let(:connection) { model.connection }
let!(:lease) { stub_exclusive_lease(lease_key, :uuid, timeout: lease_timeout) }
let(:lease_key) { "gitlab/database/async_indexes/index_creator/#{Gitlab::Database::PRIMARY_DATABASE_NAME}" }
let(:lease_timeout) { described_class::TIMEOUT_PER_ACTION }
around do |example|
Gitlab::Database::SharedModel.using_connection(connection) do
example.run
end
end
context 'when the index already exists' do
before do
......@@ -40,7 +53,7 @@ RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do
end
it 'skips logic if not able to acquire exclusive lease' do
expect(subject).to receive(:try_obtain_lease).and_return(false)
expect(lease).to receive(:try_obtain).ordered.and_return(false)
expect(connection).not_to receive(:execute).with(/CREATE INDEX/)
expect(async_index).not_to receive(:destroy)
......
......@@ -15,10 +15,18 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
let(:action) { create(:reindex_action, index: index) }
let!(:lease) { stub_exclusive_lease(lease_key, uuid, timeout: lease_timeout) }
let(:lease_key) { 'gitlab/database/reindexing/coordinator' }
let(:lease_key) { "gitlab/database/reindexing/coordinator/#{Gitlab::Database::PRIMARY_DATABASE_NAME}" }
let(:lease_timeout) { 1.day }
let(:uuid) { 'uuid' }
around do |example|
model = Gitlab::Database.database_base_models[Gitlab::Database::PRIMARY_DATABASE_NAME]
Gitlab::Database::SharedModel.using_connection(model.connection) do
example.run
end
end
before do
swapout_view_for_table(:postgres_indexes)
......
......@@ -6,6 +6,63 @@ RSpec.describe Gitlab::Database::Reindexing do
include ExclusiveLeaseHelpers
include Database::DatabaseHelpers
describe '.invoke' do
let(:databases) { Gitlab::Database.database_base_models }
let(:databases_count) { databases.count }
it 'cleans up any leftover indexes' do
expect(described_class).to receive(:cleanup_leftovers!).exactly(databases_count).times
described_class.invoke
end
context 'when there is an error raised' do
it 'logs and re-raise' do
expect(described_class).to receive(:automatic_reindexing).and_raise('Unexpected!')
expect(Gitlab::AppLogger).to receive(:error)
expect { described_class.invoke }.to raise_error('Unexpected!')
end
end
context 'when async index creation is enabled' do
it 'executes async index creation prior to any reindexing actions' do
stub_feature_flags(database_async_index_creation: true)
expect(Gitlab::Database::AsyncIndexes).to receive(:create_pending_indexes!).ordered.exactly(databases_count).times
expect(described_class).to receive(:automatic_reindexing).ordered.exactly(databases_count).times
described_class.invoke
end
end
context 'when async index creation is disabled' do
it 'does not execute async index creation' do
stub_feature_flags(database_async_index_creation: false)
expect(Gitlab::Database::AsyncIndexes).not_to receive(:create_pending_indexes!)
described_class.invoke
end
end
context 'calls automatic reindexing' do
it 'uses all candidate indexes' do
expect(described_class).to receive(:automatic_reindexing).exactly(databases_count).times
described_class.invoke
end
context 'when explicit database is given' do
it 'skips other databases' do
expect(described_class).to receive(:automatic_reindexing).once
described_class.invoke(Gitlab::Database::PRIMARY_DATABASE_NAME)
end
end
end
end
describe '.automatic_reindexing' do
subject { described_class.automatic_reindexing(maximum_records: limit) }
......@@ -133,10 +190,19 @@ RSpec.describe Gitlab::Database::Reindexing do
end
describe '.cleanup_leftovers!' do
subject { described_class.cleanup_leftovers! }
subject(:cleanup_leftovers) { described_class.cleanup_leftovers! }
let(:model) { Gitlab::Database.database_base_models[Gitlab::Database::PRIMARY_DATABASE_NAME] }
let(:connection) { model.connection }
around do |example|
Gitlab::Database::SharedModel.using_connection(connection) do
example.run
end
end
before do
ApplicationRecord.connection.execute(<<~SQL)
connection.execute(<<~SQL)
CREATE INDEX foobar_ccnew ON users (id);
CREATE INDEX foobar_ccnew1 ON users (id);
SQL
......@@ -150,11 +216,11 @@ RSpec.describe Gitlab::Database::Reindexing do
expect_query("DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"foobar_ccnew1\"")
expect_query("RESET idle_in_transaction_session_timeout; RESET lock_timeout")
subject
cleanup_leftovers
end
def expect_query(sql)
expect(ApplicationRecord.connection).to receive(:execute).ordered.with(sql).and_wrap_original do |method, sql|
expect(connection).to receive(:execute).ordered.with(sql).and_wrap_original do |method, sql|
method.call(sql.sub(/CONCURRENTLY/, ''))
end
end
......
......@@ -84,4 +84,16 @@ RSpec.describe Gitlab::Database::SharedModel do
expect(described_class.connection).to be(original_connection)
end
end
describe '#connection_db_config' do
it 'returns the class connection_db_config' do
shared_model_class = Class.new(described_class) do
self.table_name = 'postgres_async_indexes'
end
shared_model = shared_model_class.new
expect(shared_model.connection_db_config). to eq(described_class.connection_db_config)
end
end
end
......@@ -203,43 +203,38 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
end
describe 'reindex' do
let(:reindex) { double('reindex') }
let(:indexes) { double('indexes') }
let(:databases) { Gitlab::Database.database_base_models }
let(:databases_count) { databases.count }
it 'cleans up any leftover indexes' do
expect(Gitlab::Database::Reindexing).to receive(:cleanup_leftovers!).exactly(databases_count).times
it 'delegates to Gitlab::Database::Reindexing' do
expect(Gitlab::Database::Reindexing).to receive(:invoke)
run_rake_task('gitlab:db:reindex')
end
context 'when async index creation is enabled' do
it 'executes async index creation prior to any reindexing actions' do
stub_feature_flags(database_async_index_creation: true)
expect(Gitlab::Database::AsyncIndexes).to receive(:create_pending_indexes!).ordered.exactly(databases_count).times
expect(Gitlab::Database::Reindexing).to receive(:automatic_reindexing).ordered.exactly(databases_count).times
context 'when reindexing is not enabled' do
it 'is a no-op' do
expect(Gitlab::Database::Reindexing).to receive(:enabled?).and_return(false)
expect(Gitlab::Database::Reindexing).not_to receive(:invoke)
run_rake_task('gitlab:db:reindex')
end
end
end
context 'when async index creation is disabled' do
it 'does not execute async index creation' do
stub_feature_flags(database_async_index_creation: false)
expect(Gitlab::Database::AsyncIndexes).not_to receive(:create_pending_indexes!)
databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |database_name|
describe "reindex:#{database_name}" do
it 'delegates to Gitlab::Database::Reindexing' do
expect(Gitlab::Database::Reindexing).to receive(:invoke).with(database_name)
run_rake_task('gitlab:db:reindex')
end
run_rake_task("gitlab:db:reindex:#{database_name}")
end
context 'calls automatic reindexing' do
it 'uses all candidate indexes' do
expect(Gitlab::Database::Reindexing).to receive(:automatic_reindexing).exactly(databases_count).times
context 'when reindexing is not enabled' do
it 'is a no-op' do
expect(Gitlab::Database::Reindexing).to receive(:enabled?).and_return(false)
expect(Gitlab::Database::Reindexing).not_to receive(:invoke).with(database_name)
run_rake_task('gitlab:db:reindex')
run_rake_task("gitlab:db:reindex:#{database_name}")
end
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment