Geo - Use Rails 6 many databases support

Currently, Geo manually manages additional DB connections.
This results in a number of overwrites to provide this
kind of support.

These changes are the first step towards configuring the
Geo tracking database in database.yml with the following:

1. Use Rails 6 many databases support to reduce the
 complexity of the current implementation;
2. Keep Geo tracking database files (structure.sql,
 migrations, etc) in EE;

Changelog: changed
EE: true
parent 9618f99c
......@@ -342,8 +342,8 @@ rspec fast_spec_helper minimal:
db:rollback:
extends: .db-job-base
script:
- bundle exec rake db:migrate VERSION=20181228175414
- bundle exec rake db:migrate SKIP_SCHEMA_VERSION_CHECK=true
- bundle exec rake db:migrate:main VERSION=20181228175414
- bundle exec rake db:migrate:main SKIP_SCHEMA_VERSION_CHECK=true
db:migrate:reset:
extends: .db-job-base
......@@ -368,7 +368,7 @@ db:migrate-from-previous-major-version:
- git checkout -f $CI_COMMIT_SHA
- SETUP_DB=false USE_BUNDLE_INSTALL=true bash scripts/prepare_build.sh
script:
- run_timed_command "bundle exec rake db:migrate"
- run_timed_command "bundle exec rake db:migrate:main"
db:check-schema:
extends:
......@@ -377,7 +377,7 @@ db:check-schema:
variables:
TAG_TO_CHECKOUT: "v14.4.0"
script:
- run_timed_command "bundle exec rake db:migrate"
- run_timed_command "bundle exec rake db:migrate:main"
- scripts/schema_changed.sh
- scripts/validate_migration_timestamps
......@@ -900,8 +900,8 @@ db:rollback geo:
- db:rollback
- .rails:rules:ee-only-migration
script:
- bundle exec rake geo:db:migrate VERSION=20170627195211
- bundle exec rake geo:db:migrate
- bundle exec rake db:migrate:geo VERSION=20170627195211
- bundle exec rake db:migrate:geo
# EE: default refs (MRs, default branch, schedules) jobs #
##################################################
......
# frozen_string_literal: true
return unless Gitlab.ee?
ActiveSupport.on_load(:active_record) do
ActiveRecord::Tasks::DatabaseTasks.singleton_class.prepend(Gitlab::Patch::GeoDatabaseTasks)
end
......@@ -8,11 +8,11 @@ Gitlab.ee do
config.geo_database = config_for(:database_geo)
end
end
end
Gitlab.ee do
if Gitlab::Runtime.sidekiq? && Gitlab::Geo.geo_database_configured?
Rails.configuration.geo_database['pool'] = Gitlab::Database.default_pool_size
Geo::TrackingBase.establish_connection(Rails.configuration.geo_database)
# The Geo::TrackingBase model does not yet use connects_to. So,
# this will not properly support geo: from config/databse.yml
# file yet. This is ACK of the current state and will be fixed.
Geo::TrackingBase.establish_connection(Gitlab::Database.geo_db_config_with_default_pool_size)
end
end
......@@ -16,11 +16,11 @@ if configurations = ActiveRecord::Base.configurations.configurations
"The `main:` database needs to be defined as a first configuration item instead of `#{configurations.first.name}`."
end
rejected_config_names = configurations.map(&:name).to_set - Gitlab::Database::DATABASE_NAMES
rejected_config_names = configurations.map(&:name).to_set - Gitlab::Database.all_database_names
if rejected_config_names.any?
raise "ERROR: This installation of GitLab uses unsupported database names " \
"in 'config/database.yml': #{rejected_config_names.to_a.join(", ")}. The only supported ones are " \
"#{Gitlab::Database::DATABASE_NAMES.join(", ")}."
"#{Gitlab::Database.all_database_names.join(", ")}."
end
replicas_config_names = configurations.select(&:replica?).map(&:name)
......
......@@ -5,9 +5,36 @@ module EE
module Database
extend ActiveSupport::Concern
GEO_DATABASE_NAME = 'geo'
GEO_DATABASE_DIR = 'ee/db/geo'
EE_DATABASE_NAMES = [GEO_DATABASE_NAME].freeze
class_methods do
extend ::Gitlab::Utils::Override
override :all_database_names
def all_database_names
super + EE_DATABASE_NAMES
end
def geo_database?(name)
name.to_s == GEO_DATABASE_NAME
end
def geo_db_config_with_default_pool_size
db_config_object = Geo::TrackingBase.connection_db_config
config = db_config_object
.configuration_hash
.merge(pool: ::Gitlab::Database.default_pool_size)
ActiveRecord::DatabaseConfigurations::HashConfig.new(
db_config_object.env_name,
db_config_object.name,
config
)
end
override :read_only?
def read_only?
::Gitlab::Geo.secondary? || ::Gitlab.maintenance_mode?
......
# frozen_string_literal: true
module Gitlab
module Patch
module GeoDatabaseTasks
def dump_filename(db_config_name, format = ApplicationRecord.schema_format)
return super unless Gitlab::Database.geo_database?(db_config_name)
Rails.root.join(Gitlab::Database::GEO_DATABASE_DIR, 'structure.sql').to_s
end
def cache_dump_filename(db_config_name, schema_cache_path: nil)
return super unless Gitlab::Database.geo_database?(db_config_name)
Rails.root.join(Gitlab::Database::GEO_DATABASE_DIR, 'schema_cache.yml').to_s
end
end
end
end
# frozen_string_literal: true
task spec: ['geo:db:test:prepare']
task spec: ['db:test:prepare:geo']
namespace :geo do
GEO_LICENSE_ERROR_TEXT = 'GitLab Geo is not supported with this license. Please contact the sales team: https://about.gitlab.com/sales.'
......
......@@ -2,7 +2,7 @@
require 'spec_helper'
RSpec.describe 'Database config initializer for GitLab EE' do
RSpec.describe 'Database config initializer for GitLab EE', :reestablished_active_record_base do
subject do
load Rails.root.join('config/initializers/database_config.rb')
end
......@@ -11,29 +11,41 @@ RSpec.describe 'Database config initializer for GitLab EE' do
before do
allow(Gitlab::Runtime).to receive(:max_threads).and_return(max_threads)
allow(ActiveRecord::Base).to receive(:establish_connection)
expect(Geo::TrackingBase).to receive(:establish_connection)
end
context "and the runtime is Sidekiq" do
before do
stub_geo_database_config(pool_size: 1)
allow(Gitlab::Runtime).to receive(:sidekiq?).and_return(true)
end
it "sets Geo DB connection pool size to the max number of worker threads" do
expect { subject }.to change { Rails.configuration.geo_database['pool'] }.from(1).to(18)
context 'when no custom headroom is specified' do
it 'sets the pool size based on the number of worker threads' do
old = Geo::TrackingBase.connection_db_config.pool
expect(old).not_to eq(18)
expect { subject }
.to change { Geo::TrackingBase.connection_db_config.pool }
.from(old)
.to(18)
end
end
end
def stub_geo_database_config(pool_size:)
config = {
'adapter' => 'postgresql',
'host' => 'db.host.com',
'pool' => pool_size
}.compact
context "when specifying headroom through an ENV variable" do
let(:headroom) { 15 }
allow(Rails.configuration).to receive(:geo_database).and_return(config)
before do
stub_env("DB_POOL_HEADROOM", headroom)
end
it "adds headroom on top of the calculated size" do
old = Geo::TrackingBase.connection_db_config.pool
expect { subject }
.to change { Geo::TrackingBase.connection_db_config.pool }
.from(old)
.to(23)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Patch::GeoDatabaseTasks do
subject do
Class.new do
prepend Gitlab::Patch::GeoDatabaseTasks
def dump_filename(db_config_name, format = ApplicationRecord.schema_format)
'foo.sql'
end
def cache_dump_filename(db_config_name, format = ApplicationRecord.schema_format)
'bar.yml'
end
end.new
end
describe '#dump_filename' do
context 'with geo database config name' do
it 'returns the path for the structure.sql file in the Geo database dir' do
expect(subject.dump_filename(:geo)).to eq Rails.root.join('ee/db/geo/structure.sql').to_s
end
end
context 'with other database config name' do
it 'calls super' do
expect(subject.dump_filename(:main)).to eq 'foo.sql'
end
end
end
describe '#cache_dump_filename' do
context 'with geo database config name' do
it 'returns the path for the schema_cache file in the Geo database dir' do
expect(subject.cache_dump_filename(:geo)).to eq Rails.root.join('ee/db/geo/schema_cache.yml').to_s
end
end
context 'with other database config name' do
it 'calls super' do
expect(subject.cache_dump_filename(:main)).to eq 'bar.yml'
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Patch::LegacyDatabaseConfig do
describe '#load_geo_database_yaml' do
let(:configuration) { Rails::Application::Configuration.new(Rails.root) }
context 'when config/database_geo.yml does not exist' do
before do
allow(File).to receive(:exist?).and_call_original
allow(File).to receive(:exist?).with(Rails.root.join("config/database_geo.yml")).and_return(false)
end
it 'returns an empty hash' do
expect(configuration.load_geo_database_yaml).to eq({})
end
end
context 'when config/database_geo.yml exists' do
shared_examples 'hash containing geo: connection name' do
it 'returns a hash containing geo:' do
expect(configuration.load_geo_database_yaml).to match(
"production" => { "geo" => a_hash_including("adapter") },
"development" => { "geo" => a_hash_including("adapter" => "postgresql") },
"test" => { "geo" => a_hash_including("adapter" => "postgresql") }
)
end
end
before do
allow(Pathname)
.to receive(:new)
.and_call_original
allow(Pathname)
.to receive(:new).with(Rails.root.join('config/database_geo.yml'))
.and_return(double(read: database_geo_yml))
end
context 'when config/database_geo.yml use a new syntax' do
let(:database_geo_yml) do
<<-EOS
production:
geo:
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_production
username: git
password: "secure password"
host: localhost
development:
geo:
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_development
username: postgres
password: "secure password"
host: localhost
variables:
statement_timeout: 15s
test: &test
geo:
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_test
username: postgres
password:
host: localhost
prepared_statements: false
variables:
statement_timeout: 15s
EOS
end
include_examples 'hash containing geo: connection name'
end
context 'when config/database_geo.yml use a legacy syntax' do
let(:database_geo_yml) do
<<-EOS
production:
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_production
username: git
password: "secure password"
host: localhost
development:
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_development
username: postgres
password: "secure password"
host: localhost
variables:
statement_timeout: 15s
test: &test
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_test
username: postgres
password:
host: localhost
prepared_statements: false
variables:
statement_timeout: 15s
EOS
end
include_examples 'hash containing geo: connection name'
end
end
end
describe '#database_configuration' do
let(:configuration) { Rails::Application::Configuration.new(Rails.root) }
before do
# The `AS::ConfigurationFile` calls `read` in `def initialize`
# thus we cannot use `allow_next_instance_of`
# rubocop:disable RSpec/AnyInstanceOf
allow_any_instance_of(ActiveSupport::ConfigurationFile)
.to receive(:read).with(Rails.root.join('config/database.yml')).and_return(database_yml)
# rubocop:enable RSpec/AnyInstanceOf
end
context 'when config/database_geo.yml does not exist' do
shared_examples 'hash containing main: connection name' do
it 'returns a hash containing only main:' do
database_configuration = configuration.database_configuration
expect(database_configuration).to match(
"production" => { "main" => a_hash_including("adapter") },
"development" => { "main" => a_hash_including("adapter" => "postgresql") },
"test" => { "main" => a_hash_including("adapter" => "postgresql") }
)
end
end
before do
allow(File).to receive(:exist?).and_call_original
allow(File).to receive(:exist?).with(Rails.root.join("config/database_geo.yml")).and_return(false)
end
context 'when config/database.yml use a new syntax' do
let(:database_yml) do
<<-EOS
production:
main:
adapter: postgresql
encoding: unicode
database: gitlabhq_production
username: git
password: "secure password"
host: localhost
development:
main:
adapter: postgresql
encoding: unicode
database: gitlabhq_development
username: postgres
password: "secure password"
host: localhost
variables:
statement_timeout: 15s
test: &test
main:
adapter: postgresql
encoding: unicode
database: gitlabhq_test
username: postgres
password:
host: localhost
prepared_statements: false
variables:
statement_timeout: 15s
EOS
end
include_examples 'hash containing main: connection name'
end
context 'when config/database.yml use a legacy syntax' do
let(:database_yml) do
<<-EOS
production:
adapter: postgresql
encoding: unicode
database: gitlabhq_production
username: git
password: "secure password"
host: localhost
development:
adapter: postgresql
encoding: unicode
database: gitlabhq_development
username: postgres
password: "secure password"
host: localhost
variables:
statement_timeout: 15s
test: &test
adapter: postgresql
encoding: unicode
database: gitlabhq_test
username: postgres
password:
host: localhost
prepared_statements: false
variables:
statement_timeout: 15s
EOS
end
include_examples 'hash containing main: connection name'
it 'configuration is legacy' do
configuration.database_configuration
expect(configuration.uses_legacy_database_config).to eq(true)
end
end
end
context 'when config/database_geo.yml exists' do
let(:database_geo_yml) do
<<-EOS
production:
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_production
username: git
password: "secure password"
host: localhost
development:
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_development
username: postgres
password: "secure password"
host: localhost
staging:
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_staging
username: git
password: "secure password"
host: localhost
test: &test
adapter: postgresql
encoding: unicode
database: gitlabhq_geo_test
username: postgres
password:
host: localhost
EOS
end
shared_examples 'hash containing both main: and geo: connection names' do
it 'returns a hash containing both main: and geo:' do
database_configuration = configuration.database_configuration
expect(database_configuration).to match(
"production" => { "main" => a_hash_including("adapter"), "geo" => a_hash_including("adapter") },
"development" => { "main" => a_hash_including("adapter"), "geo" => a_hash_including("adapter" => "postgresql") },
"test" => { "main" => a_hash_including("adapter"), "geo" => a_hash_including("adapter" => "postgresql") }
)
end
end
before do
# The `AS::ConfigurationFile` calls `read` in `def initialize`
# thus we cannot use `allow_next_instance_of`
# rubocop:disable RSpec/AnyInstanceOf
allow_any_instance_of(ActiveSupport::ConfigurationFile)
.to receive(:read).with(Rails.root.join('config/database_geo.yml')).and_return(database_geo_yml)
# rubocop:enable RSpec/AnyInstanceOf
end
context 'when config/database.yml use a new syntax' do
let(:database_yml) do
<<-EOS
production:
main:
adapter: postgresql
encoding: unicode
database: gitlabhq_production
username: git
password: "secure password"
host: localhost
development:
main:
adapter: postgresql
encoding: unicode
database: gitlabhq_development
username: postgres
password: "secure password"
host: localhost
variables:
statement_timeout: 15s
test: &test
main:
adapter: postgresql
encoding: unicode
database: gitlabhq_test
username: postgres
password:
host: localhost
prepared_statements: false
variables:
statement_timeout: 15s
EOS
end
include_examples 'hash containing both main: and geo: connection names'
end
context 'when config/database.yml use a legacy syntax' do
let(:database_yml) do
<<-EOS
production:
adapter: postgresql
encoding: unicode
database: gitlabhq_production
username: git
password: "secure password"
host: localhost
development:
adapter: postgresql
encoding: unicode
database: gitlabhq_development
username: postgres
password: "secure password"
host: localhost
variables:
statement_timeout: 15s
test: &test
adapter: postgresql
encoding: unicode
database: gitlabhq_test
username: postgres
password:
host: localhost
prepared_statements: false
variables:
statement_timeout: 15s
EOS
end
include_examples 'hash containing both main: and geo: connection names'
end
end
end
end
......@@ -72,6 +72,10 @@ module Gitlab
}.with_indifferent_access.freeze
end
def self.all_database_names
DATABASE_NAMES
end
# We configure the database connection pool size automatically based on the
# configured concurrency. We also add some headroom, to make sure we don't
# run out of connections when more threads besides the 'user-facing' ones
......
......@@ -35,6 +35,40 @@ module Gitlab
attr_reader :uses_legacy_database_config
end
def load_database_yaml
return super unless Gitlab.ee?
super.deep_merge(load_geo_database_yaml)
end
# This method is taken from Rails to load a database YAML file without
# evaluating ERB. This allows us to create the rake tasks for the Geo
# tracking database without filling in the configuration values or
# loading the environment. To be removed when we start configure Geo
# tracking database in database.yml instead of custom database_geo.yml
#
# https://github.com/rails/rails/blob/v6.1.4/railties/lib/rails/application/configuration.rb#L255
def load_geo_database_yaml
path = Rails.root.join("config/database_geo.yml")
return {} unless File.exist?(path)
require "rails/application/dummy_erb_compiler"
yaml = DummyERB.new(Pathname.new(path).read).result
config = YAML.load(yaml) || {} # rubocop:disable Security/YAMLLoad
config.to_h do |env, configs|
# This check is taken from Rails where the transformation
# of a flat database.yml is done into `primary:`
# https://github.com/rails/rails/blob/v6.1.4/activerecord/lib/active_record/database_configurations.rb#L169
if configs.is_a?(Hash) && !configs.all? { |_, v| v.is_a?(Hash) }
configs = { "geo" => configs }
end
[env, configs]
end
end
def database_configuration
@uses_legacy_database_config = false # rubocop:disable Gitlab/ModuleWithInstanceVariables
......@@ -48,6 +82,16 @@ module Gitlab
@uses_legacy_database_config = true # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
if Gitlab.ee? && File.exist?(Rails.root.join("config/database_geo.yml"))
migrations_paths = ["ee/db/geo/migrate"]
migrations_paths << "ee/db/geo/post_migrate" unless ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS']
configs["geo"] =
Rails.application.config_for(:database_geo)
.merge(migrations_paths: migrations_paths, schema_migrations_path: "ee/db/geo/schema_migrations")
.stringify_keys
end
[env, configs]
end
end
......
# frozen_string_literal: true
databases = ActiveRecord::Tasks::DatabaseTasks.setup_initial_database_yaml
namespace :gitlab do
namespace :db do
desc 'GitLab | DB | Manually insert schema migration version'
......@@ -83,7 +85,7 @@ namespace :gitlab do
desc 'GitLab | DB | Sets up EE specific database functionality'
if Gitlab.ee?
task setup_ee: %w[geo:db:drop geo:db:create geo:db:schema:load geo:db:migrate]
task setup_ee: %w[db:drop:geo db:create:geo db:schema:load:geo db:migrate:geo]
else
task :setup_ee
end
......@@ -116,6 +118,19 @@ namespace :gitlab do
Rake::Task['gitlab:db:clean_structure_sql'].invoke
end
ActiveRecord::Tasks::DatabaseTasks.for_each(databases) do |name|
# Inform Rake that custom tasks should be run every time rake db:structure:dump is run
#
# Rails 6.1 deprecates db:structure:dump in favor of db:schema:dump
Rake::Task["db:structure:dump:#{name}"].enhance do
Rake::Task['gitlab:db:clean_structure_sql'].invoke
end
Rake::Task["db:schema:dump:#{name}"].enhance do
Rake::Task['gitlab:db:clean_structure_sql'].invoke
end
end
desc 'Create missing dynamic database partitions'
task create_dynamic_partitions: :environment do
Gitlab::Database::Partitioning.sync_partitions
......
......@@ -14,6 +14,9 @@ RSpec.describe 'validate database config' do
end
before do
allow(File).to receive(:exist?).and_call_original
allow(File).to receive(:exist?).with(Rails.root.join("config/database_geo.yml")).and_return(false)
# The `AS::ConfigurationFile` calls `read` in `def initialize`
# thus we cannot use `expect_next_instance_of`
# rubocop:disable RSpec/AnyInstanceOf
......
......@@ -11,6 +11,9 @@ RSpec.describe Gitlab::Patch::LegacyDatabaseConfig do
let(:configuration) { Rails::Application::Configuration.new(Rails.root) }
before do
allow(File).to receive(:exist?).and_call_original
allow(File).to receive(:exist?).with(Rails.root.join("config/database_geo.yml")).and_return(false)
# The `AS::ConfigurationFile` calls `read` in `def initialize`
# thus we cannot use `expect_next_instance_of`
# rubocop:disable RSpec/AnyInstanceOf
......
......@@ -138,6 +138,10 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
stub_file_read(structure_file, content: input)
allow(File).to receive(:open).with(structure_file.to_s, any_args).and_yield(output)
end
if Gitlab.ee?
allow(File).to receive(:open).with(Rails.root.join(Gitlab::Database::GEO_DATABASE_DIR, 'structure.sql').to_s, any_args).and_yield(output)
end
end
after do
......@@ -328,6 +332,32 @@ RSpec.describe 'gitlab:db namespace rake task', :silence_stdout do
end
end
context 'with multiple databases', :reestablished_active_record_base do
before do
allow(ActiveRecord::Tasks::DatabaseTasks).to receive(:setup_initial_database_yaml).and_return([:main, :geo])
end
describe 'db:structure:dump' do
it 'invokes gitlab:db:clean_structure_sql' do
skip unless Gitlab.ee?
expect(Rake::Task['gitlab:db:clean_structure_sql']).to receive(:invoke).twice.and_return(true)
expect { run_rake_task('db:structure:dump:main') }.not_to raise_error
end
end
describe 'db:schema:dump' do
it 'invokes gitlab:db:clean_structure_sql' do
skip unless Gitlab.ee?
expect(Rake::Task['gitlab:db:clean_structure_sql']).to receive(:invoke).once.and_return(true)
expect { run_rake_task('db:schema:dump:main') }.not_to raise_error
end
end
end
def run_rake_task(task_name, arguments = '')
Rake::Task[task_name].reenable
Rake.application.invoke_task("#{task_name}#{arguments}")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment