Commit 31bf56d7 authored by Andreas Brandl's avatar Andreas Brandl

Reindexing strategy for PG12

This uses REINDEX CONCURRENTLY which has been introduced with PG12

This is in preparation for
https://gitlab.com/gitlab-org/gitlab/-/issues/329717.
parent 86da56a2
......@@ -24,6 +24,8 @@ module Gitlab
scope :not_match, ->(regex) { where("name !~ ?", regex)}
scope :match, ->(regex) { where("name ~* ?", regex)}
scope :not_recently_reindexed, -> do
recent_actions = Reindexing::ReindexAction.recent.where('index_identifier = identifier')
......
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class ReindexConcurrently
include Gitlab::Utils::StrongMemoize
ReindexError = Class.new(StandardError)
TEMPORARY_INDEX_PATTERN = '\_ccnew[0-9]*'
STATEMENT_TIMEOUT = 9.hours
# When dropping an index, we acquire a SHARE UPDATE EXCLUSIVE lock,
# which only conflicts with DDL and vacuum. We therefore execute this with a rather
# high lock timeout and a long pause in between retries. This is an alternative to
# setting a high statement timeout, which would lead to a long running query with effects
# on e.g. vacuum.
REMOVE_INDEX_RETRY_CONFIG = [[1.minute, 9.minutes]] * 30
attr_reader :index, :logger
def initialize(index, logger: Gitlab::AppLogger)
@index = index
@logger = logger
end
def perform
raise ReindexError, 'indexes serving an exclusion constraint are currently not supported' if index.exclusion?
raise ReindexError, 'index is a left-over temporary index from a previous reindexing run' if index.name =~ /#{TEMPORARY_INDEX_PATTERN}/
# Expression indexes require additional statistics in `pg_statistic`:
# select * from pg_statistic where starelid = (select oid from pg_class where relname = 'some_index');
#
# In PG12, this has been fixed in https://gitlab.com/postgres/postgres/-/commit/b17ff07aa3eb142d2cde2ea00e4a4e8f63686f96.
# Discussion happened in https://www.postgresql.org/message-id/flat/CAFcNs%2BqpFPmiHd1oTXvcPdvAHicJDA9qBUSujgAhUMJyUMb%2BSA%40mail.gmail.com
# following a GitLab.com incident that surfaced this (https://gitlab.com/gitlab-com/gl-infra/production/-/issues/2885).
#
# While this has been backpatched, we continue to disable expression indexes until further review.
raise ReindexError, 'expression indexes are currently not supported' if index.expression?
logger.info "Starting reindex of #{index}"
set_statement_timeout do
execute("REINDEX INDEX CONCURRENTLY #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
end
ensure
cleanup_dangling_indexes
end
private
def cleanup_dangling_indexes
Gitlab::Database::PostgresIndex.match("#{Regexp.escape(index.name)}#{TEMPORARY_INDEX_PATTERN}").each do |lingering_index|
remove_index(lingering_index)
end
end
def remove_index(index)
logger.info("Removing dangling index #{index.identifier}")
retries = Gitlab::Database::WithLockRetriesOutsideTransaction.new(
timing_configuration: REMOVE_INDEX_RETRY_CONFIG,
klass: self.class,
logger: logger
)
retries.run(raise_on_exhaustion: false) do
execute("DROP INDEX CONCURRENTLY IF EXISTS #{quote_table_name(index.schema)}.#{quote_table_name(index.name)}")
end
end
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
Gitlab::Database::WithLockRetries.new(**arguments).run(raise_on_exhaustion: true, &block)
end
def set_statement_timeout
execute("SET statement_timeout TO '%ds'" % STATEMENT_TIMEOUT)
yield
ensure
execute('RESET statement_timeout')
end
delegate :execute, :quote_table_name, to: :connection
def connection
@connection ||= ActiveRecord::Base.connection
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing::ReindexConcurrently, '#perform' do
subject { described_class.new(index, logger: logger).perform }
let(:table_name) { '_test_reindex_table' }
let(:column_name) { '_test_column' }
let(:index_name) { '_test_reindex_index' }
let(:index) { instance_double(Gitlab::Database::PostgresIndex, indexrelid: 42, name: index_name, schema: 'public', tablename: table_name, partitioned?: false, unique?: false, exclusion?: false, expression?: false, definition: 'CREATE INDEX _test_reindex_index ON public._test_reindex_table USING btree (_test_column)') }
let(:logger) { double('logger', debug: nil, info: nil, error: nil ) }
let(:connection) { ActiveRecord::Base.connection }
before do
connection.execute(<<~SQL)
CREATE TABLE #{table_name} (
id serial NOT NULL PRIMARY KEY,
#{column_name} integer NOT NULL);
CREATE INDEX #{index.name} ON #{table_name} (#{column_name});
SQL
end
context 'when the index serves an exclusion constraint' do
before do
allow(index).to receive(:exclusion?).and_return(true)
end
it 'raises an error' do
expect { subject }.to raise_error(described_class::ReindexError, /indexes serving an exclusion constraint are currently not supported/)
end
end
context 'when attempting to reindex an expression index' do
before do
allow(index).to receive(:expression?).and_return(true)
end
it 'raises an error' do
expect { subject }.to raise_error(described_class::ReindexError, /expression indexes are currently not supported/)
end
end
context 'when the index is a dangling temporary index from a previous reindexing run' do
context 'with the temporary index prefix' do
let(:index_name) { '_test_reindex_index_ccnew' }
it 'raises an error' do
expect { subject }.to raise_error(described_class::ReindexError, /left-over temporary index/)
end
end
context 'with the temporary index prefix with a counter' do
let(:index_name) { '_test_reindex_index_ccnew1' }
it 'raises an error' do
expect { subject }.to raise_error(described_class::ReindexError, /left-over temporary index/)
end
end
end
it 'recreates the index using REINDEX with a long statement timeout' do
expect_to_execute_in_order(
"SET statement_timeout TO '32400s'",
"REINDEX INDEX CONCURRENTLY \"public\".\"#{index.name}\"",
"RESET statement_timeout"
)
subject
end
context 'with dangling indexes matching TEMPORARY_INDEX_PATTERN, i.e. /some\_index\_ccnew(\d)*/' do
before do
# dangling indexes
connection.execute("CREATE INDEX #{index_name}_ccnew ON #{table_name} (#{column_name})")
connection.execute("CREATE INDEX #{index_name}_ccnew2 ON #{table_name} (#{column_name})")
# Unrelated index - don't drop
connection.execute("CREATE INDEX some_other_index_ccnew ON #{table_name} (#{column_name})")
end
it 'drops the dangling indexes while controlling lock_timeout' do
expect_to_execute_in_order(
# Regular index rebuild
"SET statement_timeout TO '32400s'",
"REINDEX INDEX CONCURRENTLY \"public\".\"#{index.name}\"",
"RESET statement_timeout",
# Drop _ccnew index
"SET lock_timeout TO '60000ms'",
"DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"#{index.name}_ccnew\"",
"RESET idle_in_transaction_session_timeout; RESET lock_timeout",
# Drop _ccnew2 index
"SET lock_timeout TO '60000ms'",
"DROP INDEX CONCURRENTLY IF EXISTS \"public\".\"#{index.name}_ccnew2\"",
"RESET idle_in_transaction_session_timeout; RESET lock_timeout"
)
subject
end
end
def expect_to_execute_in_order(*queries)
# Indexes cannot be created CONCURRENTLY in a transaction. Since the tests are wrapped in transactions,
# verify the original call but pass through the non-concurrent form.
queries.each do |query|
expect(connection).to receive(:execute).with(query).ordered.and_wrap_original do |method, sql|
method.call(sql.sub(/CONCURRENTLY/, ''))
end
end
end
def expect_index_rename(from, to)
expect(connection).to receive(:execute).with(<<~SQL).ordered
ALTER INDEX "public"."#{from}"
RENAME TO "#{to}"
SQL
end
def expect_index_drop(drop_index)
expect_next_instance_of(::Gitlab::Database::WithLockRetries) do |instance|
expect(instance).to receive(:run).with(raise_on_exhaustion: false).and_yield
end
expect_to_execute_concurrently_in_order(drop_index)
end
def find_index_create_statement
ActiveRecord::Base.connection.select_value(<<~SQL)
SELECT indexdef
FROM pg_indexes
WHERE schemaname = 'public'
AND indexname = #{ActiveRecord::Base.connection.quote(index.name)}
SQL
end
def check_index_exists
expect(find_index_create_statement).to eq(original_index)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment