Commit a97dcc07 authored by Yorick Peterse's avatar Yorick Peterse

Add method for creating foreign keys concurrently

This method allows one to create foreign keys without blocking access to
the source table, but only on PostgreSQL.

When creating a regular foreign key the "ALTER TABLE" statement used for
this won't return until all data has been validated. This statement in
turn will acquire a lock on the source table. As a result this lock can
be held for quite a long amount of time, depending on the number of rows
and system load.

By breaking up the foreign key creation process in two steps (creation,
and validation) we can reduce the amount of locking to a minimum.
Locking is still necessary for the "ALTER TABLE" statement that adds the
constraint, but this is a fast process and so will only block access for
a few milliseconds.
parent 58131ac9
......@@ -26,11 +26,59 @@ module Gitlab
add_index(table_name, column_name, options)
end
# Adds a foreign key with only minimal locking on the tables involved.
#
# This method only requires minimal locking when using PostgreSQL. When
# using MySQL this method will use Rails' default `add_foreign_key`.
#
# source - The source table containing the foreign key.
# target - The target table the key points to.
# column - The name of the column to create the foreign key on.
# on_delete - The action to perform when associated data is removed,
# defaults to "CASCADE".
def add_concurrent_foreign_key(source, target, column:, on_delete: :cascade)
# Transactions would result in ALTER TABLE locks being held for the
# duration of the transaction, defeating the purpose of this method.
if transaction_open?
raise 'add_concurrent_foreign_key can not be run inside a transaction'
end
# While MySQL does allow disabling of foreign keys it has no equivalent
# of PostgreSQL's "VALIDATE CONSTRAINT". As a result we'll just fall
# back to the normal foreign key procedure.
if Database.mysql?
return add_foreign_key(source, target,
column: column,
on_delete: on_delete)
end
disable_statement_timeout
key_name = "fk_#{source}_#{target}_#{column}"
# Using NOT VALID allows us to create a key without immediately
# validating it. This means we keep the ALTER TABLE lock only for a
# short period of time. The key _is_ enforced for any newly created
# data.
execute <<-EOF.strip_heredoc
ALTER TABLE #{source}
ADD CONSTRAINT #{key_name}
FOREIGN KEY (#{column})
REFERENCES #{target} (id)
ON DELETE #{on_delete} NOT VALID;
EOF
# Validate the existing constraint. This can potentially take a very
# long time to complete, but fortunately does not lock the source table
# while running.
execute("ALTER TABLE #{source} VALIDATE CONSTRAINT #{key_name};")
end
# Long-running migrations may take more than the timeout allowed by
# the database. Disable the session's statement timeout to ensure
# migrations don't get killed prematurely. (PostgreSQL only)
def disable_statement_timeout
ActiveRecord::Base.connection.execute('SET statement_timeout TO 0') if Database.postgresql?
execute('SET statement_timeout TO 0') if Database.postgresql?
end
# Updates the value of a column in batches.
......
......@@ -12,15 +12,14 @@ describe Gitlab::Database::MigrationHelpers, lib: true do
describe '#add_concurrent_index' do
context 'outside a transaction' do
before do
expect(model).to receive(:transaction_open?).and_return(false)
unless Gitlab::Database.postgresql?
allow_any_instance_of(Gitlab::Database::MigrationHelpers).to receive(:disable_statement_timeout)
end
allow(model).to receive(:transaction_open?).and_return(false)
end
context 'using PostgreSQL' do
before { expect(Gitlab::Database).to receive(:postgresql?).and_return(true) }
before do
allow(Gitlab::Database).to receive(:postgresql?).and_return(true)
allow(model).to receive(:disable_statement_timeout)
end
it 'creates the index concurrently' do
expect(model).to receive(:add_index).
......@@ -59,6 +58,71 @@ describe Gitlab::Database::MigrationHelpers, lib: true do
end
end
describe '#add_concurrent_foreign_key' do
context 'inside a transaction' do
it 'raises an error' do
expect(model).to receive(:transaction_open?).and_return(true)
expect do
model.add_concurrent_foreign_key(:projects, :users, column: :user_id)
end.to raise_error(RuntimeError)
end
end
context 'outside a transaction' do
before do
allow(model).to receive(:transaction_open?).and_return(false)
end
context 'using MySQL' do
it 'creates a regular foreign key' do
allow(Gitlab::Database).to receive(:mysql?).and_return(true)
expect(model).to receive(:add_foreign_key).
with(:projects, :users, column: :user_id, on_delete: :cascade)
model.add_concurrent_foreign_key(:projects, :users, column: :user_id)
end
end
context 'using PostgreSQL' do
before do
allow(Gitlab::Database).to receive(:mysql?).and_return(false)
end
it 'creates a concurrent foreign key' do
expect(model).to receive(:disable_statement_timeout)
expect(model).to receive(:execute).ordered.with(/NOT VALID/)
expect(model).to receive(:execute).ordered.with(/VALIDATE CONSTRAINT/)
model.add_concurrent_foreign_key(:projects, :users, column: :user_id)
end
end
end
end
describe '#disable_statement_timeout' do
context 'using PostgreSQL' do
it 'disables statement timeouts' do
expect(Gitlab::Database).to receive(:postgresql?).and_return(true)
expect(model).to receive(:execute).with('SET statement_timeout TO 0')
model.disable_statement_timeout
end
end
context 'using MySQL' do
it 'does nothing' do
expect(Gitlab::Database).to receive(:postgresql?).and_return(false)
expect(model).not_to receive(:execute)
model.disable_statement_timeout
end
end
end
describe '#update_column_in_batches' do
before do
create_list(:empty_project, 5)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment