Commit 82451cf3 authored by Thong Kuah's avatar Thong Kuah

Merge branch '344235-reindex-multiple-databases-independently' into 'master'

Support reindexing multiple databases independently and concurrently

See merge request gitlab-org/gitlab!75064
parents a5237380 01d6f2a1
...@@ -47,6 +47,10 @@ module Gitlab ...@@ -47,6 +47,10 @@ module Gitlab
TIMEOUT_PER_ACTION TIMEOUT_PER_ACTION
end end
def lease_key
[super, async_index.connection_db_config.name].join('/')
end
def set_statement_timeout def set_statement_timeout
connection.execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT) connection.execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield yield
......
...@@ -15,6 +15,26 @@ module Gitlab ...@@ -15,6 +15,26 @@ module Gitlab
# on e.g. vacuum. # on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30 REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
def self.enabled?
Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml)
end
def self.invoke(database = nil)
Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name|
next if database && database.to_s != connection_name.to_s
Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false)
# Hack: Before we do actual reindexing work, create async indexes
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops)
automatic_reindexing
end
rescue StandardError => e
Gitlab::AppLogger.error(e)
raise
end
# Performs automatic reindexing for a limited number of indexes per call # Performs automatic reindexing for a limited number of indexes per call
# 1. Consume from the explicit reindexing queue # 1. Consume from the explicit reindexing queue
# 2. Apply bloat heuristic to find most bloated indexes and reindex those # 2. Apply bloat heuristic to find most bloated indexes and reindex those
......
...@@ -53,6 +53,10 @@ module Gitlab ...@@ -53,6 +53,10 @@ module Gitlab
def lease_timeout def lease_timeout
TIMEOUT_PER_ACTION TIMEOUT_PER_ACTION
end end
def lease_key
[super, index.connection_db_config.name].join('/')
end
end end
end end
end end
......
...@@ -39,6 +39,10 @@ module Gitlab ...@@ -39,6 +39,10 @@ module Gitlab
Thread.current[:overriding_connection] = connection Thread.current[:overriding_connection] = connection
end end
end end
def connection_db_config
self.class.connection_db_config
end
end end
end end
end end
...@@ -175,24 +175,30 @@ namespace :gitlab do ...@@ -175,24 +175,30 @@ namespace :gitlab do
Rake::Task['gitlab:db:create_dynamic_partitions'].invoke Rake::Task['gitlab:db:create_dynamic_partitions'].invoke
end end
desc 'execute reindexing without downtime to eliminate bloat' desc "Reindex database without downtime to eliminate bloat"
task reindex: :environment do task reindex: :environment do
unless Feature.enabled?(:database_reindexing, type: :ops, default_enabled: :yaml) unless Gitlab::Database::Reindexing.enabled?
puts "This feature (database_reindexing) is currently disabled.".color(:yellow) puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit exit
end end
Gitlab::Database::EachDatabase.each_database_connection do |connection, connection_name| Gitlab::Database::Reindexing.invoke
Gitlab::Database::SharedModel.logger = Logger.new($stdout) if Gitlab::Utils.to_boolean(ENV['LOG_QUERIES_TO_CONSOLE'], default: false) end
namespace :reindex do
databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
# Hack: Before we do actual reindexing work, create async indexes ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |database_name|
Gitlab::Database::AsyncIndexes.create_pending_indexes! if Feature.enabled?(:database_async_index_creation, type: :ops) desc "Reindex #{database_name} database without downtime to eliminate bloat"
task database_name => :environment do
unless Gitlab::Database::Reindexing.enabled?
puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit
end
Gitlab::Database::Reindexing.automatic_reindexing Gitlab::Database::Reindexing.invoke(database_name)
end
end end
rescue StandardError => e
Gitlab::AppLogger.error(e)
raise
end end
desc 'Enqueue an index for reindexing' desc 'Enqueue an index for reindexing'
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do
include ExclusiveLeaseHelpers
describe '#perform' do describe '#perform' do
subject { described_class.new(async_index) } subject { described_class.new(async_index) }
...@@ -10,7 +12,18 @@ RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do ...@@ -10,7 +12,18 @@ RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do
let(:index_model) { Gitlab::Database::AsyncIndexes::PostgresAsyncIndex } let(:index_model) { Gitlab::Database::AsyncIndexes::PostgresAsyncIndex }
let(:connection) { ApplicationRecord.connection } let(:model) { Gitlab::Database.database_base_models[Gitlab::Database::PRIMARY_DATABASE_NAME] }
let(:connection) { model.connection }
let!(:lease) { stub_exclusive_lease(lease_key, :uuid, timeout: lease_timeout) }
let(:lease_key) { "gitlab/database/async_indexes/index_creator/#{Gitlab::Database::PRIMARY_DATABASE_NAME}" }
let(:lease_timeout) { described_class::TIMEOUT_PER_ACTION }
around do |example|
Gitlab::Database::SharedModel.using_connection(connection) do
example.run
end
end
context 'when the index already exists' do context 'when the index already exists' do
before do before do
...@@ -40,7 +53,7 @@ RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do ...@@ -40,7 +53,7 @@ RSpec.describe Gitlab::Database::AsyncIndexes::IndexCreator do
end end
it 'skips logic if not able to acquire exclusive lease' do it 'skips logic if not able to acquire exclusive lease' do
expect(subject).to receive(:try_obtain_lease).and_return(false) expect(lease).to receive(:try_obtain).ordered.and_return(false)
expect(connection).not_to receive(:execute).with(/CREATE INDEX/) expect(connection).not_to receive(:execute).with(/CREATE INDEX/)
expect(async_index).not_to receive(:destroy) expect(async_index).not_to receive(:destroy)
......
...@@ -15,10 +15,18 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do ...@@ -15,10 +15,18 @@ RSpec.describe Gitlab::Database::Reindexing::Coordinator do
let(:action) { create(:reindex_action, index: index) } let(:action) { create(:reindex_action, index: index) }
let!(:lease) { stub_exclusive_lease(lease_key, uuid, timeout: lease_timeout) } let!(:lease) { stub_exclusive_lease(lease_key, uuid, timeout: lease_timeout) }
let(:lease_key) { 'gitlab/database/reindexing/coordinator' } let(:lease_key) { "gitlab/database/reindexing/coordinator/#{Gitlab::Database::PRIMARY_DATABASE_NAME}" }
let(:lease_timeout) { 1.day } let(:lease_timeout) { 1.day }
let(:uuid) { 'uuid' } let(:uuid) { 'uuid' }
around do |example|
model = Gitlab::Database.database_base_models[Gitlab::Database::PRIMARY_DATABASE_NAME]
Gitlab::Database::SharedModel.using_connection(model.connection) do
example.run
end
end
before do before do
swapout_view_for_table(:postgres_indexes) swapout_view_for_table(:postgres_indexes)
......
...@@ -6,6 +6,63 @@ RSpec.describe Gitlab::Database::Reindexing do ...@@ -6,6 +6,63 @@ RSpec.describe Gitlab::Database::Reindexing do
include ExclusiveLeaseHelpers include ExclusiveLeaseHelpers
include Database::DatabaseHelpers include Database::DatabaseHelpers
describe '.invoke' do
let(:databases) { Gitlab::Database.database_base_models }
let(:databases_count) { databases.count }
it 'cleans up any leftover indexes' do
expect(described_class).to receive(:cleanup_leftovers!).exactly(databases_count).times
described_class.invoke
end
context 'when there is an error raised' do
it 'logs and re-raise' do
expect(described_class).to receive(:automatic_reindexing).and_raise('Unexpected!')
expect(Gitlab::AppLogger).to receive(:error)
expect { described_class.invoke }.to raise_error('Unexpected!')
end
end
context 'when async index creation is enabled' do
it 'executes async index creation prior to any reindexing actions' do
stub_feature_flags(database_async_index_creation: true)
expect(Gitlab::Database::AsyncIndexes).to receive(:create_pending_indexes!).ordered.exactly(databases_count).times
expect(described_class).to receive(:automatic_reindexing).ordered.exactly(databases_count).times
described_class.invoke
end
end
context 'when async index creation is disabled' do
it 'does not execute async index creation' do
stub_feature_flags(database_async_index_creation: false)
expect(Gitlab::Database::AsyncIndexes).not_to receive(:create_pending_indexes!)
described_class.invoke
end
end
context 'calls automatic reindexing' do
it 'uses all candidate indexes' do
expect(described_class).to receive(:automatic_reindexing).exactly(databases_count).times
described_class.invoke
end
context 'when explicit database is given' do
it 'skips other databases' do
expect(described_class).to receive(:automatic_reindexing).once
described_class.invoke(Gitlab::Database::PRIMARY_DATABASE_NAME)
end
end
end
end
describe '.automatic_reindexing' do describe '.automatic_reindexing' do
subject { described_class.automatic_reindexing(maximum_records: limit) } subject { described_class.automatic_reindexing(maximum_records: limit) }
...@@ -133,10 +190,19 @@ RSpec.describe Gitlab::Database::Reindexing do ...@@ -133,10 +190,19 @@ RSpec.describe Gitlab::Database::Reindexing do
end end
describe '.cleanup_leftovers!' do describe '.cleanup_leftovers!' do
subject { described_class.cleanup_leftovers! } subject(:cleanup_leftovers) { described_class.cleanup_leftovers! }
let(:model) { Gitlab::Database.database_base_models[Gitlab::Database::PRIMARY_DATABASE_NAME] }
let(:connection) { model.connection }
around do |example|
Gitlab::Database::SharedModel.using_connection(connection) do
example.run
end
end
before do before do
ApplicationRecord.connection.execute(<<~SQL) connection.execute(<<~SQL)
CREATE INDEX foobar_ccnew ON users (id); CREATE INDEX foobar_ccnew ON users (id);
CREATE INDEX foobar_ccnew1 ON users (id); CREATE INDEX foobar_ccnew1 ON users (id);
SQL SQL
...@@ -150,11 +216,11 @@ RSpec.describe Gitlab::Database::Reindexing do ...@@ -150,11 +216,11 @@ RSpec.describe Gitlab::Database::Reindexing do
expect_query("DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"foobar_ccnew1\"") expect_query("DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"foobar_ccnew1\"")
expect_query("RESET idle_in_transaction_session_timeout; RESET lock_timeout") expect_query("RESET idle_in_transaction_session_timeout; RESET lock_timeout")
subject cleanup_leftovers
end end
def expect_query(sql) def expect_query(sql)
expect(ApplicationRecord.connection).to receive(:execute).ordered.with(sql).and_wrap_original do |method, sql| expect(connection).to receive(:execute).ordered.with(sql).and_wrap_original do |method, sql|
method.call(sql.sub(/CONCURRENTLY/, '')) method.call(sql.sub(/CONCURRENTLY/, ''))
end end
end end
......
...@@ -84,4 +84,16 @@ RSpec.describe Gitlab::Database::SharedModel do ...@@ -84,4 +84,16 @@ RSpec.describe Gitlab::Database::SharedModel do
expect(described_class.connection).to be(original_connection) expect(described_class.connection).to be(original_connection)
end end
end end
describe '#connection_db_config' do
it 'returns the class connection_db_config' do
shared_model_class = Class.new(described_class) do
self.table_name = 'postgres_async_indexes'
end
shared_model = shared_model_class.new
expect(shared_model.connection_db_config). to eq(described_class.connection_db_config)
end
end
end end
...@@ -203,43 +203,38 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do ...@@ -203,43 +203,38 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
end end
describe 'reindex' do describe 'reindex' do
let(:reindex) { double('reindex') } it 'delegates to Gitlab::Database::Reindexing' do
let(:indexes) { double('indexes') } expect(Gitlab::Database::Reindexing).to receive(:invoke)
let(:databases) { Gitlab::Database.database_base_models }
let(:databases_count) { databases.count }
it 'cleans up any leftover indexes' do
expect(Gitlab::Database::Reindexing).to receive(:cleanup_leftovers!).exactly(databases_count).times
run_rake_task('gitlab:db:reindex') run_rake_task('gitlab:db:reindex')
end end
context 'when async index creation is enabled' do context 'when reindexing is not enabled' do
it 'executes async index creation prior to any reindexing actions' do it 'is a no-op' do
stub_feature_flags(database_async_index_creation: true) expect(Gitlab::Database::Reindexing).to receive(:enabled?).and_return(false)
expect(Gitlab::Database::Reindexing).not_to receive(:invoke)
expect(Gitlab::Database::AsyncIndexes).to receive(:create_pending_indexes!).ordered.exactly(databases_count).times
expect(Gitlab::Database::Reindexing).to receive(:automatic_reindexing).ordered.exactly(databases_count).times
run_rake_task('gitlab:db:reindex') run_rake_task('gitlab:db:reindex')
end end
end end
end
context 'when async index creation is disabled' do databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
it 'does not execute async index creation' do ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |database_name|
stub_feature_flags(database_async_index_creation: false) describe "reindex:#{database_name}" do
it 'delegates to Gitlab::Database::Reindexing' do
expect(Gitlab::Database::AsyncIndexes).not_to receive(:create_pending_indexes!) expect(Gitlab::Database::Reindexing).to receive(:invoke).with(database_name)
run_rake_task('gitlab:db:reindex') run_rake_task("gitlab:db:reindex:#{database_name}")
end end
end
context 'calls automatic reindexing' do context 'when reindexing is not enabled' do
it 'uses all candidate indexes' do it 'is a no-op' do
expect(Gitlab::Database::Reindexing).to receive(:automatic_reindexing).exactly(databases_count).times expect(Gitlab::Database::Reindexing).to receive(:enabled?).and_return(false)
expect(Gitlab::Database::Reindexing).not_to receive(:invoke).with(database_name)
run_rake_task('gitlab:db:reindex') run_rake_task("gitlab:db:reindex:#{database_name}")
end
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment