Commit 9913a3a1 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch...

Merge branch '5933-geo-inconsistent-database-replication-status-when-wal-e-streaming-is-in-use-2-tidy' into 'master'

Geo: Tidy up Gitlab::Geo::HealthCheck

See merge request gitlab-org/gitlab-ee!9990
parents 0b04be12 044a695c
...@@ -3,6 +3,6 @@ HealthCheck.setup do |config| ...@@ -3,6 +3,6 @@ HealthCheck.setup do |config|
config.full_checks = %w(database migrations cache) config.full_checks = %w(database migrations cache)
config.add_custom_check('geo') do config.add_custom_check('geo') do
Gitlab::Geo::HealthCheck.perform_checks Gitlab::Geo::HealthCheck.new.perform_checks
end end
end end
...@@ -209,7 +209,7 @@ class GeoNodeStatus < ActiveRecord::Base ...@@ -209,7 +209,7 @@ class GeoNodeStatus < ActiveRecord::Base
def load_secondary_data def load_secondary_data
if Gitlab::Geo.secondary? if Gitlab::Geo.secondary?
self.db_replication_lag_seconds = Gitlab::Geo::HealthCheck.db_replication_lag_seconds self.db_replication_lag_seconds = Gitlab::Geo::HealthCheck.new.db_replication_lag_seconds
self.cursor_last_event_id = current_cursor_last_event_id self.cursor_last_event_id = current_cursor_last_event_id
self.cursor_last_event_date = Geo::EventLog.find_by(id: self.cursor_last_event_id)&.created_at self.cursor_last_event_date = Geo::EventLog.find_by(id: self.cursor_last_event_id)&.created_at
self.repositories_synced_count = projects_finder.count_synced_repositories self.repositories_synced_count = projects_finder.count_synced_repositories
......
...@@ -3,34 +3,21 @@ ...@@ -3,34 +3,21 @@
module Gitlab module Gitlab
module Geo module Geo
class HealthCheck class HealthCheck
def self.perform_checks include Gitlab::Utils::StrongMemoize
def perform_checks
raise NotImplementedError.new('Geo is only compatible with PostgreSQL') unless Gitlab::Database.postgresql? raise NotImplementedError.new('Geo is only compatible with PostgreSQL') unless Gitlab::Database.postgresql?
return '' unless Gitlab::Geo.secondary? return '' unless Gitlab::Geo.secondary?
return 'The Geo database configuration file is missing.' unless Gitlab::Geo.geo_database_configured? return 'Geo database configuration file is missing.' unless Gitlab::Geo.geo_database_configured?
return 'The Geo node has a database that is not configured for streaming replication with the primary node.' unless self.database_secondary? return 'Geo node has a database that is not configured for streaming replication with the primary node.' unless Gitlab::Database.db_read_only?
return 'The Geo node does not appear to be replicating the database from the primary node.' if Gitlab::Database.pg_stat_wal_receiver_supported? && !self.streaming_active? return 'Geo node does not appear to be replicating the database from the primary node.' if Gitlab::Database.pg_stat_wal_receiver_supported? && !streaming_active?
return "Current Geo database version (#{database_version}) does not match latest migration (#{migration_version}).\nYou may have to run `gitlab-rake geo:db:migrate` as root on the secondary." unless database_migration_version_match?
database_version = self.get_database_version.to_i return 'Geo database is not configured to use Foreign Data Wrapper.' unless Gitlab::Geo::Fdw.enabled?
migration_version = self.get_migration_version.to_i
if database_version != migration_version
return "Current Geo database version (#{database_version}) does not match latest migration (#{migration_version}).\n"\
'You may have to run `gitlab-rake geo:db:migrate` as root on the secondary.'
end
return 'The Geo database is not configured to use Foreign Data Wrapper.' unless Gitlab::Geo::Fdw.enabled?
unless Gitlab::Geo::Fdw.foreign_tables_up_to_date? unless Gitlab::Geo::Fdw.foreign_tables_up_to_date?
output = "The Geo database has an outdated FDW remote schema." output = "Geo database has an outdated FDW remote schema."
output = "#{output} It contains #{foreign_schema_tables_count} of #{gitlab_schema_tables_count} expected tables." unless schema_tables_match?
foreign_schema_tables_count = Gitlab::Geo::Fdw.foreign_schema_tables_count
gitlab_schema_tables_count = Gitlab::Geo::Fdw.gitlab_schema_tables_count
unless gitlab_schema_tables_count == foreign_schema_tables_count
output = "#{output} It contains #{foreign_schema_tables_count} of #{gitlab_schema_tables_count} expected tables."
end
return output return output
end end
...@@ -39,17 +26,40 @@ module Gitlab ...@@ -39,17 +26,40 @@ module Gitlab
e.message e.message
end end
def self.db_migrate_path def db_replication_lag_seconds
# Obtain the replication lag in seconds
ActiveRecord::Base.connection
.execute(db_replication_lag_seconds_query)
.first
.fetch('replication_lag').to_i
end
private
def db_replication_lag_seconds_query
<<-SQL.squish
SELECT CASE
WHEN #{Gitlab::Database.pg_last_wal_receive_lsn}() = #{Gitlab::Database.pg_last_wal_replay_lsn}()
THEN 0
ELSE
EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER
END
AS replication_lag
SQL
end
def db_migrate_path
# Lazy initialisation so Rails.root will be defined # Lazy initialisation so Rails.root will be defined
@db_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'migrate') @db_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'migrate')
end end
def self.db_post_migrate_path def db_post_migrate_path
# Lazy initialisation so Rails.root will be defined # Lazy initialisation so Rails.root will be defined
@db_post_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'post_migrate') @db_post_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'post_migrate')
end end
def self.get_database_version def database_version
strong_memoize(:database_version) do
if defined?(ActiveRecord) if defined?(ActiveRecord)
connection = ::Geo::BaseRegistry.connection connection = ::Geo::BaseRegistry.connection
schema_migrations_table_name = ActiveRecord::Base.schema_migrations_table_name schema_migrations_table_name = ActiveRecord::Base.schema_migrations_table_name
...@@ -61,11 +71,13 @@ module Gitlab ...@@ -61,11 +71,13 @@ module Gitlab
end end
end end
end end
end
def self.get_migration_version def migration_version
strong_memoize(:migration_version) do
latest_migration = nil latest_migration = nil
Dir[File.join(self.db_migrate_path, "[0-9]*_*.rb"), File.join(self.db_post_migrate_path, "[0-9]*_*.rb")].each do |f| Dir[File.join(db_migrate_path, "[0-9]*_*.rb"), File.join(db_post_migrate_path, "[0-9]*_*.rb")].each do |f|
timestamp = f.scan(/0*([0-9]+)_[_.a-zA-Z0-9]*.rb/).first.first rescue -1 timestamp = f.scan(/0*([0-9]+)_[_.a-zA-Z0-9]*.rb/).first.first rescue -1
if latest_migration.nil? || timestamp.to_i > latest_migration.to_i if latest_migration.nil? || timestamp.to_i > latest_migration.to_i
...@@ -75,38 +87,29 @@ module Gitlab ...@@ -75,38 +87,29 @@ module Gitlab
latest_migration latest_migration
end end
end
def self.database_secondary? def database_migration_version_match?
Gitlab::Database.db_read_only? database_version.to_i == migration_version.to_i
end end
def self.db_replication_lag_seconds def gitlab_schema_tables_count
# Obtain the replication lag in seconds @gitlab_schema_tables_count ||= Gitlab::Geo::Fdw.gitlab_schema_tables_count
lag = end
ActiveRecord::Base.connection.execute(<<-SQL.squish)
SELECT CASE
WHEN #{Gitlab::Database.pg_last_wal_receive_lsn}() = #{Gitlab::Database.pg_last_wal_receive_lsn}()
THEN 0
ELSE
EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER
END
AS replication_lag
SQL
.first
.fetch('replication_lag')
lag.present? ? lag.to_i : lag def foreign_schema_tables_count
@foreign_schema_tables_count ||= Gitlab::Geo::Fdw.foreign_schema_tables_count
end end
def self.streaming_active? def schema_tables_match?
gitlab_schema_tables_count == foreign_schema_tables_count
end
def streaming_active?
# Get a streaming status # Get a streaming status
# This only works for Postgresql 9.6 and greater # This only works for Postgresql 9.6 and greater
pid = ActiveRecord::Base.connection
ActiveRecord::Base.connection.select_values(' .select_values('SELECT pid FROM pg_stat_wal_receiver').first.to_i > 0
SELECT pid FROM pg_stat_wal_receiver;')
.first
pid.to_i > 0
end end
end end
end end
......
...@@ -297,7 +297,7 @@ namespace :geo do ...@@ -297,7 +297,7 @@ namespace :geo do
puts geo_node.namespaces.any? ? 'Selective' : 'Full' puts geo_node.namespaces.any? ? 'Selective' : 'Full'
print 'Database replication lag: '.rjust(COLUMN_WIDTH) print 'Database replication lag: '.rjust(COLUMN_WIDTH)
puts "#{Gitlab::Geo::HealthCheck.db_replication_lag_seconds} seconds" puts "#{Gitlab::Geo::HealthCheck.new.db_replication_lag_seconds} seconds"
print 'Last event ID seen from primary: '.rjust(COLUMN_WIDTH) print 'Last event ID seen from primary: '.rjust(COLUMN_WIDTH)
last_event = Geo::EventLog.last last_event = Geo::EventLog.last
......
require 'spec_helper' require 'spec_helper'
describe Gitlab::Geo::HealthCheck, :geo do describe Gitlab::Geo::HealthCheck, :geo do
set(:secondary) { create(:geo_node) } include ::EE::GeoHelpers
subject { described_class } set(:secondary) { create(:geo_node) }
describe '#perform_checks' do
before do before do
allow(Gitlab::Geo).to receive(:current_node).and_return(secondary) stub_current_geo_node(secondary)
end end
describe '.perform_checks' do context 'when an exception is raised' do
it 'returns a string if database is not fully migrated' do it 'catches the exception nicely and returns the message' do
allow(described_class).to receive(:geo_database_configured?).and_return(true) allow(Gitlab::Database).to receive(:postgresql?).and_raise('Uh oh')
allow(described_class).to receive(:database_secondary?).and_return(true)
allow(described_class).to receive(:get_database_version).and_return('20170101')
allow(described_class).to receive(:get_migration_version).and_return('20170201')
allow(described_class).to receive(:streaming_active?).and_return(true)
message = subject.perform_checks
expect(message).to include('Current Geo database version (20170101) does not match latest migration (20170201)') expect(subject.perform_checks).to eq('Uh oh')
expect(message).to include('gitlab-rake geo:db:migrate') end
end end
it 'returns an empty string when not running on a secondary node' do context 'without PostgreSQL' do
allow(Gitlab::Geo).to receive(:secondary?) { false } it 'raises an error' do
allow(Gitlab::Database).to receive(:postgresql?) { false }
expect(subject.perform_checks).to be_blank expect { subject.perform_checks }.to raise_error(NotImplementedError)
end
end end
it 'returns an error when database is not configured for streaming replication' do context 'with PostgreSQL' do
allow(Gitlab::Geo).to receive(:configured?) { true } before do
allow(Gitlab::Database).to receive(:postgresql?) { true } allow(Gitlab::Database).to receive(:postgresql?) { true }
allow(described_class).to receive(:database_secondary?) { false }
expect(subject.perform_checks).not_to be_blank
end end
it 'returns an error when streaming replication is not working' do context 'on the primary node' do
allow(Gitlab::Geo).to receive(:configured?) { true } it 'returns an empty string' do
allow(Gitlab::Database).to receive(:postgresql?) { true } allow(Gitlab::Geo).to receive(:secondary?) { false }
allow(described_class).to receive(:database_secondary?) { false }
expect(subject.perform_checks).to include('not configured for streaming replication') expect(subject.perform_checks).to be_blank
end
end end
it 'returns an error when configuration file is missing for tracking DB' do context 'on the secondary node' do
allow(Rails.configuration).to receive(:respond_to?).with(:geo_database) { false } let(:geo_database_configured) { true }
let(:db_read_only) { true }
expect(subject.perform_checks).to include('database configuration file is missing') before do
allow(Gitlab::Geo).to receive(:secondary?) { true }
allow(Gitlab::Geo).to receive(:geo_database_configured?) { geo_database_configured }
allow(Gitlab::Database).to receive(:db_read_only?) { db_read_only }
end end
it 'returns an error when Geo database version does not match the latest migration version' do context 'when the Geo tracking DB is not configured' do
allow(described_class).to receive(:database_secondary?).and_return(true) let(:geo_database_configured) { false }
allow(subject).to receive(:get_database_version) { 1 }
allow(described_class).to receive(:streaming_active?).and_return(true)
expect(subject.perform_checks).to match(/Current Geo database version \([0-9]+\) does not match latest migration \([0-9]+\)/) it 'returns an error' do
expect(subject.perform_checks).to include('Geo database configuration file is missing')
end
end end
it 'returns an error when latest migration version does not match the Geo database version' do context 'when the database is writable' do
allow(described_class).to receive(:database_secondary?).and_return(true) let(:db_read_only) { false }
allow(subject).to receive(:get_migration_version) { 1 }
allow(described_class).to receive(:streaming_active?).and_return(true)
expect(subject.perform_checks).to match(/Current Geo database version \([0-9]+\) does not match latest migration \([0-9]+\)/) it 'returns an error' do
expect(subject.perform_checks).to include('Geo node has a database that is not configured for streaming replication with the primary node.')
end
end end
it 'returns an error when streaming is not active and Postgresql supports pg_stat_wal_receiver' do context 'streaming replication' do
before do
allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(true) allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(true)
allow(described_class).to receive(:database_secondary?).and_return(true) end
allow(described_class).to receive(:streaming_active?).and_return(false)
expect(subject.perform_checks).to match(/The Geo node does not appear to be replicating the database from the primary node/) context 'that is supported but not working' do
it 'returns an error' do
allow(ActiveRecord::Base).to receive_message_chain('connection.select_values').with(no_args).with('SELECT pid FROM pg_stat_wal_receiver').and_return([])
expect(subject.perform_checks).to match(/Geo node does not appear to be replicating the database from the primary node/)
end
end end
it 'returns an error when streaming is not active and Postgresql does not support pg_stat_wal_receiver' do context 'that is supported and working' do
allow(Gitlab::Database).to receive(:pg_stat_wal_receiver_supported?).and_return(false) before do
allow(described_class).to receive(:database_secondary?).and_return(true) allow(ActiveRecord::Base).to receive_message_chain('connection.select_values').with(no_args).with('SELECT pid FROM pg_stat_wal_receiver').and_return(['123'])
allow(described_class).to receive(:streaming_active?).and_return(false) allow(Gitlab::Geo::Fdw).to receive(:enabled?) { true }
allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { true }
end
it 'returns an error if database is not fully migrated' do
allow(subject).to receive(:database_version).and_return('20170101')
allow(subject).to receive(:migration_version).and_return('20170201')
message = subject.perform_checks
expect(subject.perform_checks).to be_empty expect(message).to include('Geo database version (20170101) does not match latest migration (20170201)')
expect(message).to include('gitlab-rake geo:db:migrate')
end end
it 'returns an error when FDW is disabled' do it 'returns an error when FDW is disabled' do
allow(described_class).to receive(:database_secondary?) { true }
allow(described_class).to receive(:streaming_active?) { true }
allow(Gitlab::Geo::Fdw).to receive(:enabled?) { false } allow(Gitlab::Geo::Fdw).to receive(:enabled?) { false }
expect(subject.perform_checks).to match(/The Geo database is not configured to use Foreign Data Wrapper/) expect(subject.perform_checks).to match(/Geo database is not configured to use Foreign Data Wrapper/)
end end
it 'returns an error when FDW remote table is not in sync but has same amount of tables' do context 'when foreign tables are not up-to-date' do
allow(described_class).to receive(:database_secondary?) { true } before do
allow(described_class).to receive(:streaming_active?) { true }
allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false } allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false }
end
it 'returns an error when FDW remote table is not in sync but has same amount of tables' do
allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 } allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 }
allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 1 } allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 1 }
expect(subject.perform_checks).to match(/The Geo database has an outdated FDW remote schema\./) expect(subject.perform_checks).to match(/Geo database has an outdated FDW remote schema\./)
end end
it 'returns an error when FDW remote table is not in sync and has same different amount of tables' do it 'returns an error when FDW remote table is not in sync and has same different amount of tables' do
allow(described_class).to receive(:database_secondary?) { true }
allow(described_class).to receive(:streaming_active?) { true }
allow(Gitlab::Geo::Fdw).to receive(:foreign_tables_up_to_date?) { false }
allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 } allow(Gitlab::Geo::Fdw).to receive(:foreign_schema_tables_count) { 1 }
allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 2 } allow(Gitlab::Geo::Fdw).to receive(:gitlab_schema_tables_count) { 2 }
expect(subject.perform_checks).to match(/The Geo database has an outdated FDW remote schema\. It contains [0-9]+ of [0-9]+ expected tables/) expect(subject.perform_checks).to match(/Geo database has an outdated FDW remote schema\. It contains [0-9]+ of [0-9]+ expected tables/)
end end
end end
describe 'MySQL checks' do it 'finally returns an empty string when everything is healthy' do
it 'raises an error' do expect(subject.perform_checks).to be_blank
allow(Gitlab::Database).to receive(:postgresql?) { false } end
end
end
end
end
end
expect { subject.perform_checks }.to raise_error(NotImplementedError) describe '#db_replication_lag_seconds' do
before do
query = 'SELECT CASE WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 ELSE EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER END AS replication_lag'
allow(Gitlab::Database).to receive(:pg_last_wal_receive_lsn).and_return('pg_last_wal_receive_lsn')
allow(Gitlab::Database).to receive(:pg_last_wal_replay_lsn).and_return('pg_last_wal_replay_lsn')
allow(ActiveRecord::Base).to receive_message_chain('connection.execute').with(query).and_return([{ 'replication_lag' => lag_in_seconds }])
end
context 'when there is no lag' do
let(:lag_in_seconds) { nil }
it 'returns 0 seconds' do
expect(subject.db_replication_lag_seconds).to eq(0)
end
end
context 'when there is lag' do
let(:lag_in_seconds) { 7 }
it 'returns the number of seconds' do
expect(subject.db_replication_lag_seconds).to eq(7)
end
end end
end end
end end
...@@ -228,14 +228,14 @@ describe GeoNodeStatus, :geo do ...@@ -228,14 +228,14 @@ describe GeoNodeStatus, :geo do
describe '#db_replication_lag_seconds' do describe '#db_replication_lag_seconds' do
it 'returns the set replication lag if secondary' do it 'returns the set replication lag if secondary' do
allow(Gitlab::Geo).to receive(:secondary?).and_return(true) allow(Gitlab::Geo).to receive(:secondary?).and_return(true)
allow(Gitlab::Geo::HealthCheck).to receive(:db_replication_lag_seconds).and_return(1000) geo_health_check = double('Gitlab::Geo::HealthCheck', perform_checks: '', db_replication_lag_seconds: 1000)
allow(Gitlab::Geo::HealthCheck).to receive(:new).and_return(geo_health_check)
expect(subject.db_replication_lag_seconds).to eq(1000) expect(subject.db_replication_lag_seconds).to eq(1000)
end end
it "doesn't attempt to set replication lag if primary" do it "doesn't attempt to set replication lag if primary" do
stub_current_geo_node(primary) stub_current_geo_node(primary)
expect(Gitlab::Geo::HealthCheck).not_to receive(:db_replication_lag_seconds)
expect(subject.db_replication_lag_seconds).to eq(nil) expect(subject.db_replication_lag_seconds).to eq(nil)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment