Commit 9913a3a1 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch...

Merge branch '5933-geo-inconsistent-database-replication-status-when-wal-e-streaming-is-in-use-2-tidy' into 'master'

Geo: Tidy up Gitlab::Geo::HealthCheck

See merge request gitlab-org/gitlab-ee!9990
parents 0b04be12 044a695c
......@@ -3,6 +3,6 @@ HealthCheck.setup do |config|
config.full_checks = %w(database migrations cache)
config.add_custom_check('geo') do
Gitlab::Geo::HealthCheck.perform_checks
Gitlab::Geo::HealthCheck.new.perform_checks
end
end
......@@ -209,7 +209,7 @@ class GeoNodeStatus < ActiveRecord::Base
def load_secondary_data
if Gitlab::Geo.secondary?
self.db_replication_lag_seconds = Gitlab::Geo::HealthCheck.db_replication_lag_seconds
self.db_replication_lag_seconds = Gitlab::Geo::HealthCheck.new.db_replication_lag_seconds
self.cursor_last_event_id = current_cursor_last_event_id
self.cursor_last_event_date = Geo::EventLog.find_by(id: self.cursor_last_event_id)&.created_at
self.repositories_synced_count = projects_finder.count_synced_repositories
......
......@@ -3,34 +3,21 @@
module Gitlab
module Geo
class HealthCheck
def self.perform_checks
include Gitlab::Utils::StrongMemoize
def perform_checks
raise NotImplementedError.new('Geo is only compatible with PostgreSQL') unless Gitlab::Database.postgresql?
return '' unless Gitlab::Geo.secondary?
return 'The Geo database configuration file is missing.' unless Gitlab::Geo.geo_database_configured?
return 'The Geo node has a database that is not configured for streaming replication with the primary node.' unless self.database_secondary?
return 'The Geo node does not appear to be replicating the database from the primary node.' if Gitlab::Database.pg_stat_wal_receiver_supported? && !self.streaming_active?
database_version = self.get_database_version.to_i
migration_version = self.get_migration_version.to_i
if database_version != migration_version
return "Current Geo database version (#{database_version}) does not match latest migration (#{migration_version}).\n"\
'You may have to run `gitlab-rake geo:db:migrate` as root on the secondary.'
end
return 'The Geo database is not configured to use Foreign Data Wrapper.' unless Gitlab::Geo::Fdw.enabled?
return 'Geo database configuration file is missing.' unless Gitlab::Geo.geo_database_configured?
return 'Geo node has a database that is not configured for streaming replication with the primary node.' unless Gitlab::Database.db_read_only?
return 'Geo node does not appear to be replicating the database from the primary node.' if Gitlab::Database.pg_stat_wal_receiver_supported? && !streaming_active?
return "Current Geo database version (#{database_version}) does not match latest migration (#{migration_version}).\nYou may have to run `gitlab-rake geo:db:migrate` as root on the secondary." unless database_migration_version_match?
return 'Geo database is not configured to use Foreign Data Wrapper.' unless Gitlab::Geo::Fdw.enabled?
unless Gitlab::Geo::Fdw.foreign_tables_up_to_date?
output = "The Geo database has an outdated FDW remote schema."
foreign_schema_tables_count = Gitlab::Geo::Fdw.foreign_schema_tables_count
gitlab_schema_tables_count = Gitlab::Geo::Fdw.gitlab_schema_tables_count
unless gitlab_schema_tables_count == foreign_schema_tables_count
output = "#{output} It contains #{foreign_schema_tables_count} of #{gitlab_schema_tables_count} expected tables."
end
output = "Geo database has an outdated FDW remote schema."
output = "#{output} It contains #{foreign_schema_tables_count} of #{gitlab_schema_tables_count} expected tables." unless schema_tables_match?
return output
end
......@@ -39,74 +26,90 @@ module Gitlab
e.message
end
def self.db_migrate_path
def db_replication_lag_seconds
# Obtain the replication lag in seconds
ActiveRecord::Base.connection
.execute(db_replication_lag_seconds_query)
.first
.fetch('replication_lag').to_i
end
private
def db_replication_lag_seconds_query
<<-SQL.squish
SELECT CASE
WHEN #{Gitlab::Database.pg_last_wal_receive_lsn}() = #{Gitlab::Database.pg_last_wal_replay_lsn}()
THEN 0
ELSE
EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER
END
AS replication_lag
SQL
end
def db_migrate_path
# Lazy initialisation so Rails.root will be defined
@db_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'migrate')
end
def self.db_post_migrate_path
def db_post_migrate_path
# Lazy initialisation so Rails.root will be defined
@db_post_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'post_migrate')
end
def self.get_database_version
if defined?(ActiveRecord)
connection = ::Geo::BaseRegistry.connection
schema_migrations_table_name = ActiveRecord::Base.schema_migrations_table_name
if connection.data_source_exists?(schema_migrations_table_name)
connection.execute("SELECT MAX(version) AS version FROM #{schema_migrations_table_name}")
.first
.fetch('version')
def database_version
strong_memoize(:database_version) do
if defined?(ActiveRecord)
connection = ::Geo::BaseRegistry.connection
schema_migrations_table_name = ActiveRecord::Base.schema_migrations_table_name
if connection.data_source_exists?(schema_migrations_table_name)
connection.execute("SELECT MAX(version) AS version FROM #{schema_migrations_table_name}")
.first
.fetch('version')
end
end
end
end
def self.get_migration_version
latest_migration = nil
def migration_version
strong_memoize(:migration_version) do
latest_migration = nil
Dir[File.join(self.db_migrate_path, "[0-9]*_*.rb"), File.join(self.db_post_migrate_path, "[0-9]*_*.rb")].each do |f|
timestamp = f.scan(/0*([0-9]+)_[_.a-zA-Z0-9]*.rb/).first.first rescue -1
Dir[File.join(db_migrate_path, "[0-9]*_*.rb"), File.join(db_post_migrate_path, "[0-9]*_*.rb")].each do |f|
timestamp = f.scan(/0*([0-9]+)_[_.a-zA-Z0-9]*.rb/).first.first rescue -1
if latest_migration.nil? || timestamp.to_i > latest_migration.to_i
latest_migration = timestamp
if latest_migration.nil? || timestamp.to_i > latest_migration.to_i
latest_migration = timestamp
end
end
latest_migration
end
end
latest_migration
def database_migration_version_match?
database_version.to_i == migration_version.to_i
end
def self.database_secondary?
Gitlab::Database.db_read_only?
def gitlab_schema_tables_count
@gitlab_schema_tables_count ||= Gitlab::Geo::Fdw.gitlab_schema_tables_count
end
def self.db_replication_lag_seconds
# Obtain the replication lag in seconds
lag =
ActiveRecord::Base.connection.execute(<<-SQL.squish)
SELECT CASE
WHEN #{Gitlab::Database.pg_last_wal_receive_lsn}() = #{Gitlab::Database.pg_last_wal_receive_lsn}()
THEN 0
ELSE
EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER
END
AS replication_lag
SQL
.first
.fetch('replication_lag')
def foreign_schema_tables_count
@foreign_schema_tables_count ||= Gitlab::Geo::Fdw.foreign_schema_tables_count
end
lag.present? ? lag.to_i : lag
def schema_tables_match?
gitlab_schema_tables_count == foreign_schema_tables_count
end
def self.streaming_active?
def streaming_active?
# Get a streaming status
# This only works for Postgresql 9.6 and greater
pid =
ActiveRecord::Base.connection.select_values('
SELECT pid FROM pg_stat_wal_receiver;')
.first
pid.to_i > 0
ActiveRecord::Base.connection
.select_values('SELECT pid FROM pg_stat_wal_receiver').first.to_i > 0
end
end
end
......
......@@ -297,7 +297,7 @@ namespace :geo do
puts geo_node.namespaces.any? ? 'Selective' : 'Full'
print 'Database replication lag: '.rjust(COLUMN_WIDTH)
puts "#{Gitlab::Geo::HealthCheck.db_replication_lag_seconds} seconds"
puts "#{Gitlab::Geo::HealthCheck.new.db_replication_lag_seconds} seconds"
print 'Last event ID seen from primary: '.rjust(COLUMN_WIDTH)
last_event = Geo::EventLog.last
......
require 'spec_helper'
describe Gitlab::Geo::HealthCheck, :geo do
set(:secondary) { create(:geo_node) }
subject { described_class }
before do
allow(Gitlab::Geo).to receive(:current_node).and_return(secondary)
end
describe '.perform_checks' do
it 'returns a string if database is not fully migrated' do
allow(described_class).to receive(:geo_database_configured?).and_return(true)
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(described_class).to receive(:get_database_version).and_return('20170101')
allow(described_class).to receive(:get_migration_version).and_return('20170201')
allow(described_class).to receive(:streaming_active?).and_return(true)
message = subject.perform_checks
expect(message).to include('Current Geo database version (20170101) does not match latest migration (20170201)')
expect(message).to include('gitlab-rake geo:db:migrate')
end
include ::EE::GeoHelpers
it 'returns an empty string when not running on a secondary node' do
allow(Gitlab::Geo).to receive(:secondary?) { false }
expect(subject.perform_checks).to be_blank
end
it 'returns an error when database is not configured for streaming replication' do
allow(Gitlab::Geo).to receive(:configured?) { true }
allow(Gitlab::Database).to receive(:postgresql?) { true }
allow(described_class).to receive(:database_secondary?) { false }
expect(subject.perform_checks).not_to be_blank
end
it 'returns an error when streaming replication is not working' do
allow(Gitlab::Geo).to receive(:configured?) { true }
allow(Gitlab::Database).to receive(:postgresql?) { true }
allow(described_class).to receive(:database_secondary?) { false }
expect(subject.perform_checks).to include('not configured for streaming replication')
end
it 'returns an error when configuration file is missing for tracking DB' do
allow(Rails.configuration).to receive(:respond_to?).with(:geo_database) { false }
expect(subject.perform_checks).to include('database configuration file is missing')
end
it 'returns an error when Geo database version does not match the latest migration version' do
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(subject).to receive(:get_database_version) { 1 }
allow(described_class).to receive(:streaming_active?).and_return(true)
set(:secondary) { create(:geo_node) }
expect(subject.perform_checks).to match(/Current Geo database version \([0-9]+\) does not match latest migration \([0-9]+\)/)
describe '#perform_checks' do
before do
stub_current_geo_node(secondary)
end
it 'returns an error when latest migration version does not match the Geo database version' do
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(subject).to receive(:get_migration_version) { 1 }
allow(described_class).to receive(:streaming_active?).and_return(true)
context 'when an exception is raised' do
it 'catches the exception nicely and returns the message' do
allow(Gitlab::Database).to receive(:postgresql?).and_raise('Uh oh')
expect(subject.perform_checks).to match(/Current Geo database version \([0-9]+\) does not match latest migration \([0-9]+\)/)
expect(subject.perform_checks).to eq('Uh oh')
end
end
it 'returns an error when streaming is not active and Postgresql supports pg_stat_wal_receiver' do
allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(true)
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(described_class).to receive(:streaming_active?).and_return(false)
context 'without PostgreSQL' do
it 'raises an error' do
allow(Gitlab::Database).to receive(:postgresql?) { false }
expect(subject.perform_checks).to match(/The Geo node does not appear to be replicating the database from the primary node/)
expect { subject.perform_checks }.to raise_error(NotImplementedError)
end
end
it 'returns an error when streaming is not active and Postgresql does not support pg_stat_wal_receiver' do
allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(false)
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(described_class).to receive(:streaming_active?).and_return(false)
expect(subject.perform_checks).to be_empty
context 'with PostgreSQL' do
before do
allow(Gitlab::Database).to receive(:postgresql?) { true }
end
context 'on the primary node' do
it 'returns an empty string' do
allow(Gitlab::Geo).to receive(:secondary?) { false }
expect(subject.perform_checks).to be_blank
end
end
context 'on the secondary node' do
let(:geo_database_configured) { true }
let(:db_read_only) { true }
before do
allow(Gitlab::Geo).to receive(:secondary?) { true }
allow(Gitlab::Geo).to receive(:geo_database_configured?) { geo_database_configured }
allow(Gitlab::Database).to receive(:db_read_only?) { db_read_only }
end
context 'when the Geo tracking DB is not configured' do
let(:geo_database_configured) { false }
it 'returns an error' do
expect(subject.perform_checks).to include('Geo database configuration file is missing')
end
end
context 'when the database is writable' do
let(:db_read_only) { false }
it 'returns an error' do
expect(subject.perform_checks).to include('Geo node has a database that is not configured for streaming replication with the primary node.')
end
end
context 'streaming replication' do
before do
allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(true)
end
context 'that is supported but not working' do
it 'returns an error' do
allow(ActiveRecord::Base).to receive_message_chain('connection.select_values').with(no_args).with('SELECT pid FROM pg_stat_wal_receiver').and_return([])
expect(subject.perform_checks).to match(/Geo node does not appear to be replicating the database from the primary node/)
end
end
context 'that is supported and working' do
before do
allow(ActiveRecord::Base).to receive_message_chain('connection.select_values').with(no_args).with('SELECT pid FROM pg_stat_wal_receiver').and_return(['123'])
allow(Gitlab::Geo::Fdw).to receive(:enabled?) { true }
allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { true }
end
it 'returns an error if database is not fully migrated' do
allow(subject).to receive(:database_version).and_return('20170101')
allow(subject).to receive(:migration_version).and_return('20170201')
message = subject.perform_checks
expect(message).to include('Geo database version (20170101) does not match latest migration (20170201)')
expect(message).to include('gitlab-rake geo:db:migrate')
end
it 'returns an error when FDW is disabled' do
allow(Gitlab::Geo::Fdw).to receive(:enabled?) { false }
expect(subject.perform_checks).to match(/Geo database is not configured to use Foreign Data Wrapper/)
end
context 'when foreign tables are not up-to-date' do
before do
allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false }
end
it 'returns an error when FDW remote table is not in sync but has same amount of tables' do
allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 }
allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 1 }
expect(subject.perform_checks).to match(/Geo database has an outdated FDW remote schema\./)
end
it 'returns an error when FDW remote table is not in sync and has same different amount of tables' do
allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 }
allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 2 }
expect(subject.perform_checks).to match(/Geo database has an outdated FDW remote schema\. It contains [0-9]+ of [0-9]+ expected tables/)
end
end
it 'finally returns an empty string when everything is healthy' do
expect(subject.perform_checks).to be_blank
end
end
end
end
end
end
it 'returns an error when FDW is disabled' do
allow(described_class).to receive(:database_secondary?) { true }
allow(described_class).to receive(:streaming_active?) { true }
allow(Gitlab::Geo::Fdw).to receive(:enabled?) { false }
expect(subject.perform_checks).to match(/The Geo database is not configured to use Foreign Data Wrapper/)
end
it 'returns an error when FDW remote table is not in sync but has same amount of tables' do
allow(described_class).to receive(:database_secondary?) { true }
allow(described_class).to receive(:streaming_active?) { true }
allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false }
allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 }
allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 1 }
expect(subject.perform_checks).to match(/The Geo database has an outdated FDW remote schema\./)
describe '#db_replication_lag_seconds' do
before do
query = 'SELECT CASE WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 ELSE EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER END AS replication_lag'
allow(Gitlab::Database).to receive(:pg_last_wal_receive_lsn).and_return('pg_last_wal_receive_lsn')
allow(Gitlab::Database).to receive(:pg_last_wal_replay_lsn).and_return('pg_last_wal_replay_lsn')
allow(ActiveRecord::Base).to receive_message_chain('connection.execute').with(query).and_return([{ 'replication_lag' => lag_in_seconds }])
end
it 'returns an error when FDW remote table is not in sync and has same different amount of tables' do
allow(described_class).to receive(:database_secondary?) { true }
allow(described_class).to receive(:streaming_active?) { true }
allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false }
allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 }
allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 2 }
context 'when there is no lag' do
let(:lag_in_seconds) { nil }
expect(subject.perform_checks).to match(/The Geo database has an outdated FDW remote schema\. It contains [0-9]+ of [0-9]+ expected tables/)
it 'returns 0 seconds' do
expect(subject.db_replication_lag_seconds).to eq(0)
end
end
end
describe 'MySQL checks' do
it 'raises an error' do
allow(Gitlab::Database).to receive(:postgresql?) { false }
context 'when there is lag' do
let(:lag_in_seconds) { 7 }
expect { subject.perform_checks }.to raise_error(NotImplementedError)
it 'returns the number of seconds' do
expect(subject.db_replication_lag_seconds).to eq(7)
end
end
end
end
......@@ -228,14 +228,14 @@ describe GeoNodeStatus, :geo do
describe '#db_replication_lag_seconds' do
it 'returns the set replication lag if secondary' do
allow(Gitlab::Geo).to receive(:secondary?).and_return(true)
allow(Gitlab::Geo::HealthCheck).to receive(:db_replication_lag_seconds).and_return(1000)
geo_health_check = double('Gitlab::Geo::HealthCheck', perform_checks: '', db_replication_lag_seconds: 1000)
allow(Gitlab::Geo::HealthCheck).to receive(:new).and_return(geo_health_check)
expect(subject.db_replication_lag_seconds).to eq(1000)
end
it "doesn't attempt to set replication lag if primary" do
stub_current_geo_node(primary)
expect(Gitlab::Geo::HealthCheck).not_to receive(:db_replication_lag_seconds)
expect(subject.db_replication_lag_seconds).to eq(nil)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment