Commit 1f89ccc1 authored by Bob Van Landuyt's avatar Bob Van Landuyt

Merge branch 'ab/reindexing-concurrency' into 'master'

Limit reindexing concurrency

See merge request gitlab-org/gitlab!44036
parents fcc225e7 8d8cb077
...@@ -4,11 +4,7 @@ module Gitlab ...@@ -4,11 +4,7 @@ module Gitlab
module Database module Database
module Reindexing module Reindexing
def self.perform(index_selector) def self.perform(index_selector)
Array.wrap(index_selector).each do |index| Coordinator.new(index_selector).perform
ReindexAction.keep_track_of(index) do
ConcurrentReindex.new(index).perform
end
end
end end
def self.candidate_indexes def self.candidate_indexes
......
...@@ -5,13 +5,13 @@ module Gitlab ...@@ -5,13 +5,13 @@ module Gitlab
module Reindexing module Reindexing
class ConcurrentReindex class ConcurrentReindex
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
include MigrationHelpers
ReindexError = Class.new(StandardError) ReindexError = Class.new(StandardError)
PG_IDENTIFIER_LENGTH = 63 PG_IDENTIFIER_LENGTH = 63
TEMPORARY_INDEX_PREFIX = 'tmp_reindex_' TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
REPLACED_INDEX_PREFIX = 'old_reindex_' REPLACED_INDEX_PREFIX = 'old_reindex_'
STATEMENT_TIMEOUT = 6.hours
attr_reader :index, :logger attr_reader :index, :logger
...@@ -47,7 +47,7 @@ module Gitlab ...@@ -47,7 +47,7 @@ module Gitlab
logger.info("creating replacement index #{replacement_index_name}") logger.info("creating replacement index #{replacement_index_name}")
logger.debug("replacement index definition: #{create_replacement_index_statement}") logger.debug("replacement index definition: #{create_replacement_index_statement}")
disable_statement_timeout do set_statement_timeout do
connection.execute(create_replacement_index_statement) connection.execute(create_replacement_index_statement)
end end
...@@ -88,7 +88,7 @@ module Gitlab ...@@ -88,7 +88,7 @@ module Gitlab
def remove_index(schema, name) def remove_index(schema, name)
logger.info("Removing index #{schema}.#{name}") logger.info("Removing index #{schema}.#{name}")
disable_statement_timeout do set_statement_timeout do
connection.execute(<<~SQL) connection.execute(<<~SQL)
DROP INDEX CONCURRENTLY DROP INDEX CONCURRENTLY
IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)} IF EXISTS #{quote_table_name(schema)}.#{quote_table_name(name)}
...@@ -110,6 +110,13 @@ module Gitlab ...@@ -110,6 +110,13 @@ module Gitlab
Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block) Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
end end
def set_statement_timeout
execute("SET statement_timeout TO #{STATEMENT_TIMEOUT}")
yield
ensure
execute('RESET statement_timeout')
end
delegate :execute, :quote_table_name, to: :connection delegate :execute, :quote_table_name, to: :connection
def connection def connection
@connection ||= ActiveRecord::Base.connection @connection ||= ActiveRecord::Base.connection
......
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class Coordinator
include ExclusiveLeaseGuard
# Maximum lease time for the global Redis lease
# This should be higher than the maximum time for any
# long running step in the reindexing process (compare with
# statement timeouts).
TIMEOUT_PER_ACTION = 1.day
attr_reader :indexes
def initialize(indexes)
@indexes = indexes
end
def perform
indexes.each do |index|
# This obtains a global lease such that there's
# only one live reindexing process at a time.
try_obtain_lease do
ReindexAction.keep_track_of(index) do
ConcurrentReindex.new(index).perform
end
end
end
end
private
def lease_timeout
TIMEOUT_PER_ACTION
end
end
end
end
end
...@@ -193,7 +193,7 @@ namespace :gitlab do ...@@ -193,7 +193,7 @@ namespace :gitlab do
end end
indexes = if args[:index_name] indexes = if args[:index_name]
Gitlab::Database::PostgresIndex.by_identifier(args[:index_name]) [Gitlab::Database::PostgresIndex.by_identifier(args[:index_name])]
else else
Gitlab::Database::Reindexing.candidate_indexes.random_few(2) Gitlab::Database::Reindexing.candidate_indexes.random_few(2)
end end
......
...@@ -107,11 +107,11 @@ RSpec.describe Gitlab::Database::Reindexing::ConcurrentReindex, '#perform' do ...@@ -107,11 +107,11 @@ RSpec.describe Gitlab::Database::Reindexing::ConcurrentReindex, '#perform' do
context 'mocked specs' do context 'mocked specs' do
before do before do
allow(subject).to receive(:connection).and_return(connection) allow(subject).to receive(:connection).and_return(connection)
allow(subject).to receive(:disable_statement_timeout).and_yield allow(connection).to receive(:execute).and_call_original
end end
it 'replaces the existing index with an identical index' do it 'replaces the existing index with an identical index' do
expect(subject).to receive(:disable_statement_timeout).twice.and_yield expect(connection).to receive(:execute).with('SET statement_timeout TO 21600').twice
expect_to_execute_concurrently_in_order(create_index) expect_to_execute_concurrently_in_order(create_index)
...@@ -136,7 +136,7 @@ RSpec.describe Gitlab::Database::Reindexing::ConcurrentReindex, '#perform' do ...@@ -136,7 +136,7 @@ RSpec.describe Gitlab::Database::Reindexing::ConcurrentReindex, '#perform' do
end end
it 'replaces the existing index with an identical index' do it 'replaces the existing index with an identical index' do
expect(subject).to receive(:disable_statement_timeout).exactly(3).times.and_yield expect(connection).to receive(:execute).with('SET statement_timeout TO 21600').exactly(3).times
expect_to_execute_concurrently_in_order(drop_index) expect_to_execute_concurrently_in_order(drop_index)
expect_to_execute_concurrently_in_order(create_index) expect_to_execute_concurrently_in_order(create_index)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing::Coordinator do
include ExclusiveLeaseHelpers
describe '.perform' do
subject { described_class.new(indexes).perform }
let(:indexes) { [instance_double(Gitlab::Database::PostgresIndex), instance_double(Gitlab::Database::PostgresIndex)] }
let(:reindexers) { [instance_double(Gitlab::Database::Reindexing::ConcurrentReindex), instance_double(Gitlab::Database::Reindexing::ConcurrentReindex)] }
let!(:lease) { stub_exclusive_lease(lease_key, uuid, timeout: lease_timeout) }
let(:lease_key) { 'gitlab/database/reindexing/coordinator' }
let(:lease_timeout) { 1.day }
let(:uuid) { 'uuid' }
before do
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:keep_track_of).and_yield
indexes.zip(reindexers).each do |index, reindexer|
allow(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).and_return(reindexer)
allow(reindexer).to receive(:perform)
end
end
it 'performs concurrent reindexing for each index' do
indexes.zip(reindexers).each do |index, reindexer|
expect(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).ordered.and_return(reindexer)
expect(reindexer).to receive(:perform)
end
subject
end
it 'keeps track of actions and creates ReindexAction records' do
indexes.each do |index|
expect(Gitlab::Database::Reindexing::ReindexAction).to receive(:keep_track_of).with(index).and_yield
end
subject
end
context 'locking' do
it 'acquires a lock while reindexing' do
indexes.each do |index|
expect(lease).to receive(:try_obtain).ordered.and_return(uuid)
action = instance_double(Gitlab::Database::Reindexing::ConcurrentReindex)
expect(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).ordered.with(index).and_return(action)
expect(action).to receive(:perform).ordered
expect(Gitlab::ExclusiveLease).to receive(:cancel).ordered.with(lease_key, uuid)
end
subject
end
it 'does does not perform reindexing actions if lease is not granted' do
indexes.each do |index|
expect(lease).to receive(:try_obtain).ordered.and_return(false)
expect(Gitlab::Database::Reindexing::ConcurrentReindex).not_to receive(:new)
end
subject
end
end
end
end
...@@ -3,53 +3,19 @@ ...@@ -3,53 +3,19 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing do RSpec.describe Gitlab::Database::Reindexing do
describe '.perform' do include ExclusiveLeaseHelpers
before do
allow(Gitlab::Database::Reindexing::ReindexAction).to receive(:keep_track_of).and_yield
end
shared_examples_for 'reindexing' do
before do
indexes.zip(reindexers).each do |index, reindexer|
allow(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).and_return(reindexer)
allow(reindexer).to receive(:perform)
end
end
it 'performs concurrent reindexing for each index' do describe '.perform' do
indexes.zip(reindexers).each do |index, reindexer|
expect(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index).ordered.and_return(reindexer)
expect(reindexer).to receive(:perform)
end
subject
end
it 'keeps track of actions and creates ReindexAction records' do
indexes.each do |index|
expect(Gitlab::Database::Reindexing::ReindexAction).to receive(:keep_track_of).with(index).and_yield
end
subject
end
end
context 'with multiple indexes' do
subject { described_class.perform(indexes) } subject { described_class.perform(indexes) }
let(:indexes) { [instance_double('Gitlab::Database::PostgresIndex'), instance_double('Gitlab::Database::PostgresIndex')] } let(:coordinator) { instance_double(Gitlab::Database::Reindexing::Coordinator) }
let(:reindexers) { [instance_double('Gitlab::Database::Reindexing::ConcurrentReindex'), instance_double('Gitlab::Database::Reindexing::ConcurrentReindex')] } let(:indexes) { double }
it_behaves_like 'reindexing'
end
context 'single index' do it 'delegates to Coordinator' do
subject { described_class.perform(indexes.first) } expect(Gitlab::Database::Reindexing::Coordinator).to receive(:new).with(indexes).and_return(coordinator)
expect(coordinator).to receive(:perform)
let(:indexes) { [instance_double('Gitlab::Database::PostgresIndex')] } subject
let(:reindexers) { [instance_double('Gitlab::Database::Reindexing::ConcurrentReindex')] }
it_behaves_like 'reindexing'
end end
end end
......
...@@ -248,7 +248,7 @@ RSpec.describe 'gitlab:db namespace rake task' do ...@@ -248,7 +248,7 @@ RSpec.describe 'gitlab:db namespace rake task' do
it 'calls the index rebuilder with the proper arguments' do it 'calls the index rebuilder with the proper arguments' do
expect(Gitlab::Database::PostgresIndex).to receive(:by_identifier).with('public.foo_idx').and_return(index) expect(Gitlab::Database::PostgresIndex).to receive(:by_identifier).with('public.foo_idx').and_return(index)
expect(Gitlab::Database::Reindexing).to receive(:perform).with(index) expect(Gitlab::Database::Reindexing).to receive(:perform).with([index])
run_rake_task('gitlab:db:reindex', '[public.foo_idx]') run_rake_task('gitlab:db:reindex', '[public.foo_idx]')
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment