Commit 993de3f7 authored by Andreas Brandl's avatar Andreas Brandl Committed by Bob Van Landuyt

Create module for reindexing

This introduces a namespace for the reindexing code
parent c1fc2b37
# frozen_string_literal: true
module Gitlab
module Database
class ConcurrentReindex
include Gitlab::Utils::StrongMemoize
include MigrationHelpers
ReindexError = Class.new(StandardError)
PG_IDENTIFIER_LENGTH = 63
TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
REPLACED_INDEX_PREFIX = 'old_reindex_'
attr_reader :index_name, :logger
def initialize(index_name, logger:)
@index_name = index_name
@logger = logger
end
def perform
raise ReindexError, "index #{index_name} does not exist" unless index_exists?
raise ReindexError, 'UNIQUE indexes are currently not supported' if index_unique?
logger.debug("dropping dangling index from previous run: #{replacement_index_name}")
remove_replacement_index
begin
create_replacement_index
unless replacement_index_valid?
message = 'replacement index was created as INVALID'
logger.error("#{message}, cleaning up")
raise ReindexError, "failed to reindex #{index_name}: #{message}"
end
swap_replacement_index
rescue Gitlab::Database::WithLockRetries::AttemptsExhaustedError => e
logger.error('failed to obtain the required database locks to swap the indexes, cleaning up')
raise ReindexError, e.message
rescue ActiveRecord::ActiveRecordError, PG::Error => e
logger.error("database error while attempting reindex of #{index_name}: #{e.message}")
raise ReindexError, e.message
ensure
logger.info("dropping unneeded replacement index: #{replacement_index_name}")
remove_replacement_index
end
end
private
delegate :execute, to: :connection
def connection
@connection ||= ActiveRecord::Base.connection
end
def replacement_index_name
@replacement_index_name ||= constrained_index_name(TEMPORARY_INDEX_PREFIX)
end
def index
strong_memoize(:index) do
find_index(index_name)
end
end
def index_exists?
!index.nil?
end
def index_unique?
index.indisunique
end
def constrained_index_name(prefix)
"#{prefix}#{index_name}".slice(0, PG_IDENTIFIER_LENGTH)
end
def create_replacement_index
create_replacement_index_statement = index.indexdef
.sub(/CREATE INDEX/, 'CREATE INDEX CONCURRENTLY')
.sub(/#{index_name}/, replacement_index_name)
logger.info("creating replacement index #{replacement_index_name}")
logger.debug("replacement index definition: #{create_replacement_index_statement}")
disable_statement_timeout do
connection.execute(create_replacement_index_statement)
end
end
def replacement_index_valid?
find_index(replacement_index_name).indisvalid
end
def find_index(index_name)
record = connection.select_one(<<~SQL)
SELECT
pg_index.indisunique,
pg_index.indisvalid,
pg_indexes.indexdef
FROM pg_index
INNER JOIN pg_class ON pg_class.oid = pg_index.indexrelid
INNER JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
INNER JOIN pg_indexes ON pg_class.relname = pg_indexes.indexname
WHERE pg_namespace.nspname = 'public'
AND pg_class.relname = #{connection.quote(index_name)}
SQL
OpenStruct.new(record) if record
end
def swap_replacement_index
replaced_index_name = constrained_index_name(REPLACED_INDEX_PREFIX)
logger.info("swapping replacement index #{replacement_index_name} with #{index_name}")
with_lock_retries do
rename_index(index_name, replaced_index_name)
rename_index(replacement_index_name, index_name)
rename_index(replaced_index_name, replacement_index_name)
end
end
def rename_index(old_index_name, new_index_name)
connection.execute("ALTER INDEX #{old_index_name} RENAME TO #{new_index_name}")
end
def remove_replacement_index
disable_statement_timeout do
connection.execute("DROP INDEX CONCURRENTLY IF EXISTS #{replacement_index_name}")
end
end
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
Gitlab::Database::WithLockRetries.new(arguments).run(raise_on_exhaustion: true, &block)
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class ConcurrentReindex
include Gitlab::Utils::StrongMemoize
include MigrationHelpers
ReindexError = Class.new(StandardError)
PG_IDENTIFIER_LENGTH = 63
TEMPORARY_INDEX_PREFIX = 'tmp_reindex_'
REPLACED_INDEX_PREFIX = 'old_reindex_'
attr_reader :index, :logger
def initialize(index, logger: Gitlab::AppLogger)
@index = index
@logger = logger
end
def perform
raise ReindexError, 'UNIQUE indexes are currently not supported' if index.unique?
with_rebuilt_index do |replacement_index|
swap_index(replacement_index)
end
end
private
def with_rebuilt_index
logger.debug("dropping dangling index from previous run (if it exists): #{replacement_index_name}")
remove_replacement_index
create_replacement_index_statement = index.definition
.sub(/CREATE INDEX/, 'CREATE INDEX CONCURRENTLY')
.sub(/#{index.name}/, replacement_index_name)
logger.info("creating replacement index #{replacement_index_name}")
logger.debug("replacement index definition: #{create_replacement_index_statement}")
disable_statement_timeout do
connection.execute(create_replacement_index_statement)
end
replacement_index = Index.find_with_schema("#{index.schema}.#{replacement_index_name}")
unless replacement_index.valid?
message = 'replacement index was created as INVALID'
logger.error("#{message}, cleaning up")
raise ReindexError, "failed to reindex #{index}: #{message}"
end
yield replacement_index
rescue Gitlab::Database::WithLockRetries::AttemptsExhaustedError => e
logger.error('failed to obtain the required database locks to swap the indexes, cleaning up')
raise ReindexError, e.message
rescue ActiveRecord::ActiveRecordError, PG::Error => e
logger.error("database error while attempting reindex of #{index}: #{e.message}")
raise ReindexError, e.message
ensure
logger.info("dropping unneeded replacement index: #{replacement_index_name}")
remove_replacement_index
end
def swap_index(replacement_index)
replaced_index_name = constrained_index_name(REPLACED_INDEX_PREFIX)
logger.info("swapping replacement index #{replacement_index} with #{index}")
with_lock_retries do
rename_index(index.name, replaced_index_name)
rename_index(replacement_index.name, index.name)
rename_index(replaced_index_name, replacement_index.name)
end
end
def rename_index(old_index_name, new_index_name)
connection.execute("ALTER INDEX #{old_index_name} RENAME TO #{new_index_name}")
end
def remove_replacement_index
disable_statement_timeout do
connection.execute("DROP INDEX CONCURRENTLY IF EXISTS #{replacement_index_name}")
end
end
def replacement_index_name
@replacement_index_name ||= constrained_index_name(TEMPORARY_INDEX_PREFIX)
end
def constrained_index_name(prefix)
"#{prefix}#{index.name}".slice(0, PG_IDENTIFIER_LENGTH)
end
def with_lock_retries(&block)
arguments = { klass: self.class, logger: logger }
Gitlab::Database::WithLockRetries.new(arguments).run(raise_on_exhaustion: true, &block)
end
delegate :execute, to: :connection
def connection
@connection ||= ActiveRecord::Base.connection
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Database
module Reindexing
class Index
def self.find_with_schema(full_name)
raise ArgumentError, "Index name is not fully qualified with a schema: #{full_name}" unless full_name =~ /^\w+\.\w+$/
schema, index = full_name.split('.')
record = ActiveRecord::Base.connection.select_one(<<~SQL)
SELECT
pg_index.indisunique as is_unique,
pg_index.indisvalid as is_valid,
pg_indexes.indexdef as definition,
pg_namespace.nspname as schema,
pg_class.relname as name
FROM pg_index
INNER JOIN pg_class ON pg_class.oid = pg_index.indexrelid
INNER JOIN pg_namespace ON pg_class.relnamespace = pg_namespace.oid
INNER JOIN pg_indexes ON pg_class.relname = pg_indexes.indexname
WHERE pg_namespace.nspname = #{ActiveRecord::Base.connection.quote(schema)}
AND pg_class.relname = #{ActiveRecord::Base.connection.quote(index)}
SQL
return unless record
new(OpenStruct.new(record))
end
delegate :definition, :schema, :name, to: :@attrs
def initialize(attrs)
@attrs = attrs
end
def unique?
@attrs.is_unique
end
def valid?
@attrs.is_valid
end
def to_s
name
end
end
end
end
end
...@@ -170,13 +170,18 @@ namespace :gitlab do ...@@ -170,13 +170,18 @@ namespace :gitlab do
desc 'reindex a regular (non-unique) index without downtime to eliminate bloat' desc 'reindex a regular (non-unique) index without downtime to eliminate bloat'
task :reindex, [:index_name] => :environment do |_, args| task :reindex, [:index_name] => :environment do |_, args|
unless Feature.enabled?(:database_reindexing, type: :ops) unless Feature.enabled?(:database_reindexing, type: :ops)
puts "This feature (database_reindexing) is currently disabled.".yellow puts "This feature (database_reindexing) is currently disabled.".color(:yellow)
exit exit
end end
raise ArgumentError, 'must give the index name to reindex' unless args[:index_name] raise ArgumentError, 'must give the index name to reindex' unless args[:index_name]
Gitlab::Database::ConcurrentReindex.new(args[:index_name], logger: Logger.new(STDOUT)).perform index = Gitlab::Database::Reindexing::Index.find_with_schema(args[:index_name])
raise ArgumentError, "Given index does not exist: #{args[:index_name]}" unless index
puts "Rebuilding index #{index}".color(:green)
Gitlab::Database::Reindexing::ConcurrentReindex.new(index).perform
end end
end end
end end
...@@ -2,12 +2,13 @@ ...@@ -2,12 +2,13 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do RSpec.describe Gitlab::Database::Reindexing::ConcurrentReindex, '#perform' do
subject { described_class.new(index_name, logger: logger) } subject { described_class.new(index, logger: logger) }
let(:table_name) { '_test_reindex_table' } let(:table_name) { '_test_reindex_table' }
let(:column_name) { '_test_column' } let(:column_name) { '_test_column' }
let(:index_name) { '_test_reindex_index' } let(:index_name) { '_test_reindex_index' }
let(:index) { double('index', name: index_name, schema: 'public', unique?: false, definition: 'CREATE INDEX _test_reindex_index ON public._test_reindex_table USING btree (_test_column)') }
let(:logger) { double('logger', debug: nil, info: nil, error: nil ) } let(:logger) { double('logger', debug: nil, info: nil, error: nil ) }
let(:connection) { ActiveRecord::Base.connection } let(:connection) { ActiveRecord::Base.connection }
...@@ -17,29 +18,12 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do ...@@ -17,29 +18,12 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do
id serial NOT NULL PRIMARY KEY, id serial NOT NULL PRIMARY KEY,
#{column_name} integer NOT NULL); #{column_name} integer NOT NULL);
CREATE INDEX #{index_name} ON #{table_name} (#{column_name}); CREATE INDEX #{index.name} ON #{table_name} (#{column_name});
SQL SQL
end end
context 'when the index does not exist' do
before do
connection.execute(<<~SQL)
DROP INDEX #{index_name}
SQL
end
it 'raises an error' do
expect { subject.perform }.to raise_error(described_class::ReindexError, /does not exist/)
end
end
context 'when the index is unique' do context 'when the index is unique' do
before do let(:index) { double('index', name: index_name, unique?: true, definition: 'CREATE INDEX _test_reindex_index ON public._test_reindex_table USING btree (_test_column)') }
connection.execute(<<~SQL)
DROP INDEX #{index_name};
CREATE UNIQUE INDEX #{index_name} ON #{table_name} (#{column_name})
SQL
end
it 'raises an error' do it 'raises an error' do
expect do expect do
...@@ -83,8 +67,8 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do ...@@ -83,8 +67,8 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do
expect(instance).to receive(:run).with(raise_on_exhaustion: true).and_yield expect(instance).to receive(:run).with(raise_on_exhaustion: true).and_yield
end end
expect_to_execute_in_order("ALTER INDEX #{index_name} RENAME TO #{replaced_name}") expect_to_execute_in_order("ALTER INDEX #{index.name} RENAME TO #{replaced_name}")
expect_to_execute_in_order("ALTER INDEX #{replacement_name} RENAME TO #{index_name}") expect_to_execute_in_order("ALTER INDEX #{replacement_name} RENAME TO #{index.name}")
expect_to_execute_in_order("ALTER INDEX #{replaced_name} RENAME TO #{replacement_name}") expect_to_execute_in_order("ALTER INDEX #{replaced_name} RENAME TO #{replacement_name}")
expect_to_execute_concurrently_in_order(drop_index) expect_to_execute_concurrently_in_order(drop_index)
...@@ -109,8 +93,8 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do ...@@ -109,8 +93,8 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do
expect(instance).to receive(:run).with(raise_on_exhaustion: true).and_yield expect(instance).to receive(:run).with(raise_on_exhaustion: true).and_yield
end end
expect_to_execute_in_order("ALTER INDEX #{index_name} RENAME TO #{replaced_name}") expect_to_execute_in_order("ALTER INDEX #{index.name} RENAME TO #{replaced_name}")
expect_to_execute_in_order("ALTER INDEX #{replacement_name} RENAME TO #{index_name}") expect_to_execute_in_order("ALTER INDEX #{replacement_name} RENAME TO #{index.name}")
expect_to_execute_in_order("ALTER INDEX #{replaced_name} RENAME TO #{replacement_name}") expect_to_execute_in_order("ALTER INDEX #{replaced_name} RENAME TO #{replacement_name}")
expect_to_execute_concurrently_in_order(drop_index) expect_to_execute_concurrently_in_order(drop_index)
...@@ -141,7 +125,8 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do ...@@ -141,7 +125,8 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do
expect_to_execute_concurrently_in_order(drop_index) expect_to_execute_concurrently_in_order(drop_index)
expect_to_execute_concurrently_in_order(create_index) expect_to_execute_concurrently_in_order(create_index)
expect(subject).to receive(:replacement_index_valid?).and_return(false) replacement_index = double('replacement index', valid?: false)
allow(Gitlab::Database::Reindexing::Index).to receive(:find_with_schema).with("public.#{replacement_name}").and_return(replacement_index)
expect_to_execute_concurrently_in_order(drop_index) expect_to_execute_concurrently_in_order(drop_index)
...@@ -161,8 +146,8 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do ...@@ -161,8 +146,8 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do
end end
expect(connection).to receive(:execute).ordered expect(connection).to receive(:execute).ordered
.with("ALTER INDEX #{index_name} RENAME TO #{replaced_name}") .with("ALTER INDEX #{index.name} RENAME TO #{replaced_name}")
.and_raise(ActiveRecord::ConnectionTimeoutError, 'connect timeout') .and_raise(ActiveRecord::ConnectionTimeoutError, 'connect timeout')
expect_to_execute_concurrently_in_order(drop_index) expect_to_execute_concurrently_in_order(drop_index)
...@@ -209,7 +194,7 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do ...@@ -209,7 +194,7 @@ RSpec.describe Gitlab::Database::ConcurrentReindex, '#perform' do
SELECT indexdef SELECT indexdef
FROM pg_indexes FROM pg_indexes
WHERE schemaname = 'public' WHERE schemaname = 'public'
AND indexname = #{ActiveRecord::Base.connection.quote(index_name)} AND indexname = #{ActiveRecord::Base.connection.quote(index.name)}
SQL SQL
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Database::Reindexing::Index do
before do
ActiveRecord::Base.connection.execute(<<~SQL)
CREATE INDEX foo_idx ON public.users (name);
CREATE UNIQUE INDEX bar_key ON public.users (id);
CREATE TABLE example_table (id serial primary key);
SQL
end
def find(name)
described_class.find_with_schema(name)
end
describe '.find_with_schema' do
it 'returns an instance of Gitlab::Database::Reindexing::Index when the index is present' do
expect(find('public.foo_idx')).to be_a(Gitlab::Database::Reindexing::Index)
end
it 'returns nil if the index is not present' do
expect(find('public.idontexist')).to be_nil
end
it 'raises ArgumentError if given a non-fully qualified index name' do
expect { find('foo') }.to raise_error(ArgumentError, /not fully qualified/)
end
end
describe '#unique?' do
it 'returns true for a unique index' do
expect(find('public.bar_key')).to be_unique
end
it 'returns false for a regular, non-unique index' do
expect(find('public.foo_idx')).not_to be_unique
end
it 'returns true for a primary key index' do
expect(find('public.example_table_pkey')).to be_unique
end
end
describe '#valid?' do
it 'returns true if the index is valid' do
expect(find('public.foo_idx')).to be_valid
end
it 'returns false if the index is marked as invalid' do
ActiveRecord::Base.connection.execute(<<~SQL)
UPDATE pg_index SET indisvalid=false
FROM pg_class
WHERE pg_class.relname = 'foo_idx' AND pg_index.indexrelid = pg_class.oid
SQL
expect(find('public.foo_idx')).not_to be_valid
end
end
describe '#to_s' do
it 'returns the index name' do
expect(find('public.foo_idx').to_s).to eq('foo_idx')
end
end
describe '#name' do
it 'returns the name' do
expect(find('public.foo_idx').name).to eq('foo_idx')
end
end
describe '#schema' do
it 'returns the index schema' do
expect(find('public.foo_idx').schema).to eq('public')
end
end
describe '#definition' do
it 'returns the index definition' do
expect(find('public.foo_idx').definition).to eq('CREATE INDEX foo_idx ON public.users USING btree (name)')
end
end
end
...@@ -173,16 +173,23 @@ RSpec.describe 'gitlab:db namespace rake task' do ...@@ -173,16 +173,23 @@ RSpec.describe 'gitlab:db namespace rake task' do
end end
end end
it 'calls the index rebuilder with the proper arguments' do context 'with index name given' do
reindex = double('rebuilder') let(:index) { double('index') }
let(:reindex) { double('reindex') }
expect(Gitlab::Database::ConcurrentReindex).to receive(:new) it 'calls the index rebuilder with the proper arguments' do
.with('some_index_name', logger: instance_of(Logger)) expect(Gitlab::Database::Reindexing::Index).to receive(:find_with_schema).with('public.foo_idx').and_return(index)
.and_return(reindex) expect(Gitlab::Database::Reindexing::ConcurrentReindex).to receive(:new).with(index, any_args).and_return(reindex)
expect(reindex).to receive(:perform)
expect(reindex).to receive(:perform) run_rake_task('gitlab:db:reindex', '[public.foo_idx]')
end
it 'raises an error if the index does not exist' do
expect(Gitlab::Database::Reindexing::Index).to receive(:find_with_schema).with('public.absent_index').and_return(nil)
run_rake_task('gitlab:db:reindex', '[some_index_name]') expect { run_rake_task('gitlab:db:reindex', '[public.absent_index]') }.to raise_error(ArgumentError, /index does not exist/)
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment