Commit 8c0d6171 authored by Thong Kuah's avatar Thong Kuah

Merge branch 'validate-database-tasks' into 'master'

Add `gitlab:db:validate_config` to ensure the proper `database_tasks:` is set

See merge request gitlab-org/gitlab!83118
parents 85ef8e39 5d8eeefc
......@@ -271,6 +271,17 @@ module Gitlab
db_config&.name || 'unknown'
end
# Currently the database configuration can only be shared with `main:`
# If the `database_tasks: false` is being used
# This is to be refined: https://gitlab.com/gitlab-org/gitlab/-/issues/356580
def self.db_config_share_with(db_config)
if db_config.database_tasks?
nil # no sharing
else
'main' # share with `main:`
end
end
def self.read_only?
false
end
......
# frozen_string_literal: true
databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
namespace :gitlab do
namespace :db do
desc 'Validates `config/database.yml` to ensure a correct behavior is configured'
task validate_config: :environment do
should_reconnect = ActiveRecord::Base.connection_pool.active_connection?
# The include_replicas: is a legacy name to fetch all hidden entries (replica: true or database_tasks: false)
# Once we upgrade to Rails 7.x this should be changed to `include_hidden: true`
# Ref.: https://github.com/rails/rails/blob/f2d9316ba965e150ad04596085ee10eea4f58d3e/activerecord/lib/active_record/database_configurations.rb#L48
db_configs = ActiveRecord::Base.configurations.configs_for(env_name: Rails.env, include_replicas: true)
db_configs = db_configs.reject(&:replica?)
# Map each database connection into unique identifier of system+database
all_connections = db_configs.map do |db_config|
identifier =
begin
ActiveRecord::Base.establish_connection(db_config) # rubocop: disable Database/EstablishConnection
ActiveRecord::Base.connection.select_one("SELECT system_identifier, current_database() FROM pg_control_system()")
rescue ActiveRecord::NoDatabaseError, PG::ConnectionBad
end
{
name: db_config.name,
config: db_config,
database_tasks?: db_config.database_tasks?,
identifier: identifier
}
end.compact
unique_connections = all_connections.group_by { |connection| connection[:identifier] }
primary_connection = all_connections.find { |connection| ActiveRecord::Base.configurations.primary?(connection[:name]) }
named_connections = all_connections.index_by { |connection| connection[:name] }
warnings = []
# The `main:` should always have `database_tasks: true`
unless primary_connection[:database_tasks?]
warnings << "- The '#{primary_connection[:name]}' is required to use 'database_tasks: true'"
end
# Each unique database should have exactly one configuration with `database_tasks: true`
unique_connections.each do |identifier, connections|
next unless identifier
connections_with_tasks = connections.select { |connection| connection[:database_tasks?] }
if connections_with_tasks.many?
names = connections_with_tasks.pluck(:name)
warnings << "- Many configurations (#{names.join(', ')}) " \
"share the same database (#{identifier}). " \
"This will result in failures provisioning or migrating this database. " \
"Ensure that additional databases are configured " \
"with 'database_tasks: false' or are pointing to a dedicated database host."
end
end
# Each configuration with `database_tasks: false` should share the database with `main:`
all_connections.each do |connection|
share_with = Gitlab::Database.db_config_share_with(connection[:config])
next unless share_with
shared_connection = named_connections[share_with]
unless shared_connection
warnings << "- The '#{connection[:name]}' is expecting to share configuration with '#{share_with}', " \
"but no such is to be found."
next
end
# Skip if databases are yet to be provisioned
next unless connection[:identifier] && shared_connection[:identifier]
unless connection[:identifier] == shared_connection[:identifier]
warnings << "- The '#{connection[:name]}' since it is using 'database_tasks: false' " \
"should share database with '#{share_with}:'."
end
end
if warnings.any?
warnings.unshift("Database config validation failure:")
# Warn (for now) by default in production environment
if Gitlab::Utils.to_boolean(ENV['GITLAB_VALIDATE_DATABASE_CONFIG'], default: Gitlab.dev_or_test_env?)
raise warnings.join("\n")
else
warn warnings.join("\n")
end
end
ensure
if should_reconnect
ActiveRecord::Base.establish_connection(ActiveRecord::Tasks::DatabaseTasks.env.to_sym) # rubocop: disable Database/EstablishConnection
end
end
Rake::Task['db:migrate'].enhance(['gitlab:db:validate_config'])
Rake::Task['db:schema:load'].enhance(['gitlab:db:validate_config'])
Rake::Task['db:schema:dump'].enhance(['gitlab:db:validate_config'])
ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |name|
Rake::Task["db:migrate:#{name}"].enhance(['gitlab:db:validate_config'])
Rake::Task["db:schema:load:#{name}"].enhance(['gitlab:db:validate_config'])
Rake::Task["db:schema:dump:#{name}"].enhance(['gitlab:db:validate_config'])
end
end
end
# frozen_string_literal: true
require 'rake_helper'
RSpec.describe 'gitlab:db:validate_config', :silence_stdout do
before :all do
Rake.application.rake_require 'active_record/railties/databases'
Rake.application.rake_require 'tasks/seed_fu'
Rake.application.rake_require 'tasks/gitlab/db/validate_config'
# empty task as env is already loaded
Rake::Task.define_task :environment
end
context "when validating config" do
let(:main_database_config) do
Rails.application.config.load_database_yaml
.dig('test', 'main')
.slice('adapter', 'encoding', 'database', 'username', 'password', 'host')
.symbolize_keys
end
let(:additional_database_config) do
# Use built-in postgres database
main_database_config.merge(database: 'postgres')
end
around do |example|
with_reestablished_active_record_base(reconnect: true) do
with_db_configs(test: test_config) do
example.run
end
end
end
shared_examples 'validates successfully' do
it 'by default' do
expect { run_rake_task('gitlab:db:validate_config') }.not_to output(/Database config validation failure/).to_stderr
expect { run_rake_task('gitlab:db:validate_config') }.not_to raise_error
end
it 'for production' do
allow(Gitlab).to receive(:dev_or_test_env?).and_return(false)
expect { run_rake_task('gitlab:db:validate_config') }.not_to output(/Database config validation failure/).to_stderr
expect { run_rake_task('gitlab:db:validate_config') }.not_to raise_error
end
it 'if GITLAB_VALIDATE_DATABASE_CONFIG is set' do
stub_env('GITLAB_VALIDATE_DATABASE_CONFIG', '1')
allow(Gitlab).to receive(:dev_or_test_env?).and_return(false)
expect { run_rake_task('gitlab:db:validate_config') }.not_to output(/Database config validation failure/).to_stderr
expect { run_rake_task('gitlab:db:validate_config') }.not_to raise_error
end
end
shared_examples 'raises an error' do |match|
it 'by default' do
expect { run_rake_task('gitlab:db:validate_config') }.to raise_error(match)
end
it 'to stderr instead of exception for production' do
allow(Gitlab).to receive(:dev_or_test_env?).and_return(false)
expect { run_rake_task('gitlab:db:validate_config') }.to output(match).to_stderr
end
it 'if GITLAB_VALIDATE_DATABASE_CONFIG is set' do
stub_env('GITLAB_VALIDATE_DATABASE_CONFIG', '1')
allow(Gitlab).to receive(:dev_or_test_env?).and_return(false)
expect { run_rake_task('gitlab:db:validate_config') }.to raise_error(match)
end
end
context 'when only main: is specified' do
let(:test_config) do
{
main: main_database_config
}
end
it_behaves_like 'validates successfully'
end
context 'when main: uses database_tasks=false' do
let(:test_config) do
{
main: main_database_config.merge(database_tasks: false)
}
end
it_behaves_like 'raises an error', /The 'main' is required to use 'database_tasks: true'/
end
context 'when many configurations share the same database' do
context 'when no database_tasks is specified, assumes true' do
let(:test_config) do
{
main: main_database_config,
ci: main_database_config
}
end
it_behaves_like 'raises an error', /Many configurations \(main, ci\) share the same database/
end
context 'when database_tasks is specified' do
let(:test_config) do
{
main: main_database_config.merge(database_tasks: true),
ci: main_database_config.merge(database_tasks: true)
}
end
it_behaves_like 'raises an error', /Many configurations \(main, ci\) share the same database/
end
context "when there's no main: but something different, as currently we only can share with main:" do
let(:test_config) do
{
archive: main_database_config,
ci: main_database_config.merge(database_tasks: false)
}
end
it_behaves_like 'raises an error', /The 'ci' is expecting to share configuration with 'main', but no such is to be found/
end
end
context 'when ci: uses different database' do
context 'and does not specify database_tasks which indicates using dedicated database' do
let(:test_config) do
{
main: main_database_config,
ci: additional_database_config
}
end
it_behaves_like 'validates successfully'
end
context 'and does specify database_tasks=false which indicates sharing with main:' do
let(:test_config) do
{
main: main_database_config,
ci: additional_database_config.merge(database_tasks: false)
}
end
it_behaves_like 'raises an error', /The 'ci' since it is using 'database_tasks: false' should share database with 'main:'/
end
end
end
%w[db:migrate db:schema:load db:schema:dump].each do |task|
context "when running #{task}" do
it "does run gitlab:db:validate_config before" do
expect(Rake::Task['gitlab:db:validate_config']).to receive(:execute).and_return(true)
expect(Rake::Task[task]).to receive(:execute).and_return(true)
Rake::Task['gitlab:db:validate_config'].reenable
run_rake_task(task)
end
end
end
def with_db_configs(test: test_config)
current_configurations = ActiveRecord::Base.configurations # rubocop:disable Database/MultipleDatabases
ActiveRecord::Base.configurations = { test: test_config }
yield
ensure
ActiveRecord::Base.configurations = current_configurations
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment