Commit b62f5796 authored by Kamil Trzciński's avatar Kamil Trzciński

Fix `DbCleaner` migration helper to recreate DBs

The current `DatabaseTasks.migrate` call is not many databases
aware. This changes the implementation to:

- check each attached DB in a migration context
- recreate and migrate all databases in a consistent way
parent f54f18c0
...@@ -12,6 +12,13 @@ module Ci ...@@ -12,6 +12,13 @@ module Ci
if Gitlab::Database.has_config?(:ci) if Gitlab::Database.has_config?(:ci)
connects_to database: { writing: :ci, reading: :ci } connects_to database: { writing: :ci, reading: :ci }
# TODO: Load Balancing messes with `CiDatabaseRecord`
# returning wrong connection. To be removed once merged:
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/67773
def self.connection
retrieve_connection
end
end end
end end
end end
...@@ -24,16 +24,6 @@ RSpec.describe Gitlab::Database::SchemaMigrations::Context do ...@@ -24,16 +24,6 @@ RSpec.describe Gitlab::Database::SchemaMigrations::Context do
end end
context 'multiple databases', :reestablished_active_record_base do context 'multiple databases', :reestablished_active_record_base do
let(:connection_class) do
Class.new(::ApplicationRecord) do
self.abstract_class = true
def self.name
'Gitlab::Database::SchemaMigrations::Context::TestConnection'
end
end
end
before do before do
connection_class.establish_connection( connection_class.establish_connection(
ActiveRecord::Base ActiveRecord::Base
...@@ -44,10 +34,6 @@ RSpec.describe Gitlab::Database::SchemaMigrations::Context do ...@@ -44,10 +34,6 @@ RSpec.describe Gitlab::Database::SchemaMigrations::Context do
) )
end end
after do
connection_class.remove_connection
end
context 'when `schema_migrations_path` is configured as string' do context 'when `schema_migrations_path` is configured as string' do
let(:configuration_overrides) do let(:configuration_overrides) do
{ "schema_migrations_path" => "db/ci_schema_migrations" } { "schema_migrations_path" => "db/ci_schema_migrations" }
......
...@@ -17,32 +17,9 @@ RSpec.configure do |config| ...@@ -17,32 +17,9 @@ RSpec.configure do |config|
delete_from_all_tables!(except: ['work_item_types']) delete_from_all_tables!(except: ['work_item_types'])
# Postgres maximum number of columns in a table is 1600 (https://github.com/postgres/postgres/blob/de41869b64d57160f58852eab20a27f248188135/src/include/access/htup_details.h#L23-L47). # Postgres maximum number of columns in a table is 1600 (https://github.com/postgres/postgres/blob/de41869b64d57160f58852eab20a27f248188135/src/include/access/htup_details.h#L23-L47).
# And since:
# "The DROP COLUMN form does not physically remove the column, but simply makes
# it invisible to SQL operations. Subsequent insert and update operations in the
# table will store a null value for the column. Thus, dropping a column is quick
# but it will not immediately reduce the on-disk size of your table, as the space
# occupied by the dropped column is not reclaimed.
# The space will be reclaimed over time as existing rows are updated."
# according to https://www.postgresql.org/docs/current/sql-altertable.html.
# We drop and recreate the database if any table has more than 1200 columns, just to be safe. # We drop and recreate the database if any table has more than 1200 columns, just to be safe.
max_allowed_columns = 1200 if any_connection_class_with_more_than_allowed_columns?
tables_with_more_than_allowed_columns = recreate_all_databases!
ApplicationRecord.connection.execute("SELECT attrelid::regclass::text AS table, COUNT(*) AS column_count FROM pg_attribute GROUP BY attrelid HAVING COUNT(*) > #{max_allowed_columns}")
if tables_with_more_than_allowed_columns.any?
tables_with_more_than_allowed_columns.each do |result|
puts "The #{result['table']} table has #{result['column_count']} columns."
end
puts "Recreating the database"
start = Gitlab::Metrics::System.monotonic_time
ActiveRecord::Tasks::DatabaseTasks.drop_current
ActiveRecord::Tasks::DatabaseTasks.create_current
ActiveRecord::Tasks::DatabaseTasks.load_schema_current
ActiveRecord::Tasks::DatabaseTasks.migrate
puts "Database re-creation done in #{Gitlab::Metrics::System.monotonic_time - start}"
end end
end end
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
module DbCleaner module DbCleaner
def all_connection_classes def all_connection_classes
::ActiveRecord::Base.connection_handler.connection_pool_names.map(&:constantize) ::BeforeAllAdapter.all_connection_classes
end end
def delete_from_all_tables!(except: []) def delete_from_all_tables!(except: [])
...@@ -20,6 +20,79 @@ module DbCleaner ...@@ -20,6 +20,79 @@ module DbCleaner
DatabaseCleaner[:active_record, { connection: connection_class }] DatabaseCleaner[:active_record, { connection: connection_class }]
end end
end end
def any_connection_class_with_more_than_allowed_columns?
all_connection_classes.any? do |connection_class|
more_than_allowed_columns?(connection_class)
end
end
def more_than_allowed_columns?(connection_class)
# Postgres maximum number of columns in a table is 1600 (https://github.com/postgres/postgres/blob/de41869b64d57160f58852eab20a27f248188135/src/include/access/htup_details.h#L23-L47).
# And since:
# "The DROP COLUMN form does not physically remove the column, but simply makes
# it invisible to SQL operations. Subsequent insert and update operations in the
# table will store a null value for the column. Thus, dropping a column is quick
# but it will not immediately reduce the on-disk size of your table, as the space
# occupied by the dropped column is not reclaimed.
# The space will be reclaimed over time as existing rows are updated."
# according to https://www.postgresql.org/docs/current/sql-altertable.html.
# We drop and recreate the database if any table has more than 1200 columns, just to be safe.
max_allowed_columns = 1200
tables_with_more_than_allowed_columns = connection_class.connection.execute(<<-SQL)
SELECT attrelid::regclass::text AS table, COUNT(*) AS column_count
FROM pg_attribute
GROUP BY attrelid
HAVING COUNT(*) > #{max_allowed_columns}
SQL
tables_with_more_than_allowed_columns.each do |result|
puts "The #{result['table']} (#{connection_class.connection_db_config.name}) table has #{result['column_count']} columns."
end
tables_with_more_than_allowed_columns.any?
end
def recreate_all_databases!
start = Gitlab::Metrics::System.monotonic_time
puts "Recreating the database"
force_disconnect_all_connections!
ActiveRecord::Tasks::DatabaseTasks.drop_current
ActiveRecord::Tasks::DatabaseTasks.create_current
ActiveRecord::Tasks::DatabaseTasks.load_schema_current
# Migrate each database individually
with_reestablished_active_record_base do
all_connection_classes.each do |connection_class|
ActiveRecord::Base.establish_connection(connection_class.connection_db_config)
ActiveRecord::Tasks::DatabaseTasks.migrate
end
end
puts "Databases re-creation done in #{Gitlab::Metrics::System.monotonic_time - start}"
end
def force_disconnect_all_connections!
all_connection_classes.each do |connection_class|
# We use `connection_pool` to avoid going through
# Load Balancer since it does retry ops
pool = connection_class.connection_pool
# Force disconnect https://www.cybertec-postgresql.com/en/terminating-database-connections-in-postgresql/
pool.connection.execute(<<-SQL)
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = #{pool.connection.quote(pool.db_config.database)}
AND pid != pg_backend_pid();
SQL
connection_class.connection_pool.disconnect!
end
end
end end
DbCleaner.prepend_mod_with('DbCleaner') DbCleaner.prepend_mod_with('DbCleaner')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment