Commit 66bdf47d authored by Nick Thomas's avatar Nick Thomas

Merge branch...

Merge branch '12720-geo-rake-task-to-check-the-health-of-the-secondary-node-does-not-mention-why-it-is-unhealthy' into 'master'

Geo - Show why node is unhealthy in the rake task to check the health of the secondary node

Closes #12720

See merge request gitlab-org/gitlab-ee!14615
parents 7f774c26 a9915725
---
title: Geo - Show why node is unhealthy in the rake task to check the health of the
secondary node
merge_request: 14615
author:
type: fixed
......@@ -7,6 +7,7 @@ namespace :geo do
include ActionView::Helpers::NumberHelper
GEO_LICENSE_ERROR_TEXT = 'GitLab Geo is not supported with this license. Please contact the sales team: https://about.gitlab.com/sales.'.freeze
GEO_STATUS_COLUMN_WIDTH = 40
namespace :db do |ns|
desc 'Drops the Geo tracking database from config/database_geo.yml for the current RAILS_ENV.'
......@@ -207,7 +208,6 @@ namespace :geo do
task status: :environment do
abort GEO_LICENSE_ERROR_TEXT unless Gitlab::Geo.license_allows?
COLUMN_WIDTH = 40
current_node_status = GeoNodeStatus.current_node_status
geo_node = current_node_status.geo_node
......@@ -227,10 +227,10 @@ namespace :geo do
puts
end
print 'GitLab Version: '.rjust(COLUMN_WIDTH)
print 'GitLab Version: '.rjust(GEO_STATUS_COLUMN_WIDTH)
puts Gitlab::VERSION
print 'Geo Role: '.rjust(COLUMN_WIDTH)
print 'Geo Role: '.rjust(GEO_STATUS_COLUMN_WIDTH)
role =
if Gitlab::Geo.primary?
'Primary'
......@@ -240,7 +240,7 @@ namespace :geo do
puts role
print 'Health Status: '.rjust(COLUMN_WIDTH)
print 'Health Status: '.rjust(GEO_STATUS_COLUMN_WIDTH)
if current_node_status.healthy?
puts current_node_status.health_status
......@@ -248,65 +248,71 @@ namespace :geo do
puts current_node_status.health_status.color(:red)
end
print 'Repositories: '.rjust(COLUMN_WIDTH)
unless current_node_status.healthy?
print 'Health Status Summary: '.rjust(GEO_STATUS_COLUMN_WIDTH)
puts current_node_status.health.color(:red)
end
print 'Repositories: '.rjust(GEO_STATUS_COLUMN_WIDTH)
show_failed_value(current_node_status.repositories_failed_count)
print "#{current_node_status.repositories_synced_count}/#{current_node_status.projects_count} "
puts using_percentage(current_node_status.repositories_synced_in_percentage)
if Gitlab::Geo.repository_verification_enabled?
print 'Verified Repositories: '.rjust(COLUMN_WIDTH)
print 'Verified Repositories: '.rjust(GEO_STATUS_COLUMN_WIDTH)
show_failed_value(current_node_status.repositories_verification_failed_count)
print "#{current_node_status.repositories_verified_count}/#{current_node_status.projects_count} "
puts using_percentage(current_node_status.repositories_verified_in_percentage)
end
print 'Wikis: '.rjust(COLUMN_WIDTH)
print 'Wikis: '.rjust(GEO_STATUS_COLUMN_WIDTH)
show_failed_value(current_node_status.wikis_failed_count)
print "#{current_node_status.wikis_synced_count}/#{current_node_status.projects_count} "
puts using_percentage(current_node_status.wikis_synced_in_percentage)
if Gitlab::Geo.repository_verification_enabled?
print 'Verified Wikis: '.rjust(COLUMN_WIDTH)
print 'Verified Wikis: '.rjust(GEO_STATUS_COLUMN_WIDTH)
show_failed_value(current_node_status.wikis_verification_failed_count)
print "#{current_node_status.wikis_verified_count}/#{current_node_status.projects_count} "
puts using_percentage(current_node_status.wikis_verified_in_percentage)
end
print 'LFS Objects: '.rjust(COLUMN_WIDTH)
print 'LFS Objects: '.rjust(GEO_STATUS_COLUMN_WIDTH)
show_failed_value(current_node_status.lfs_objects_failed_count)
print "#{current_node_status.lfs_objects_synced_count}/#{current_node_status.lfs_objects_count} "
puts using_percentage(current_node_status.lfs_objects_synced_in_percentage)
print 'Attachments: '.rjust(COLUMN_WIDTH)
print 'Attachments: '.rjust(GEO_STATUS_COLUMN_WIDTH)
show_failed_value(current_node_status.attachments_failed_count)
print "#{current_node_status.attachments_synced_count}/#{current_node_status.attachments_count} "
puts using_percentage(current_node_status.attachments_synced_in_percentage)
print 'CI job artifacts: '.rjust(COLUMN_WIDTH)
print 'CI job artifacts: '.rjust(GEO_STATUS_COLUMN_WIDTH)
show_failed_value(current_node_status.job_artifacts_failed_count)
print "#{current_node_status.job_artifacts_synced_count}/#{current_node_status.job_artifacts_count} "
puts using_percentage(current_node_status.job_artifacts_synced_in_percentage)
if Gitlab::CurrentSettings.repository_checks_enabled
print 'Repositories Checked: '.rjust(COLUMN_WIDTH)
print 'Repositories Checked: '.rjust(GEO_STATUS_COLUMN_WIDTH)
show_failed_value(current_node_status.repositories_checked_failed_count)
print "#{current_node_status.repositories_checked_count}/#{current_node_status.projects_count} "
puts using_percentage(current_node_status.repositories_checked_in_percentage)
end
print 'Sync Settings: '.rjust(COLUMN_WIDTH)
print 'Sync Settings: '.rjust(GEO_STATUS_COLUMN_WIDTH)
puts geo_node.namespaces.any? ? 'Selective' : 'Full'
print 'Database replication lag: '.rjust(COLUMN_WIDTH)
print 'Database replication lag: '.rjust(GEO_STATUS_COLUMN_WIDTH)
puts "#{Gitlab::Geo::HealthCheck.new.db_replication_lag_seconds} seconds"
print 'Last event ID seen from primary: '.rjust(COLUMN_WIDTH)
print 'Last event ID seen from primary: '.rjust(GEO_STATUS_COLUMN_WIDTH)
last_event = Geo::EventLog.last
if last_event
print last_event&.id
puts " (#{time_ago_in_words(last_event&.created_at)} ago)"
print 'Last event ID processed by cursor: '.rjust(COLUMN_WIDTH)
print 'Last event ID processed by cursor: '.rjust(GEO_STATUS_COLUMN_WIDTH)
cursor_last_event_id = Geo::EventLogState.last_processed&.event_id
if cursor_last_event_id
......@@ -321,7 +327,7 @@ namespace :geo do
puts 'N/A'
end
print 'Last status report was: '.rjust(COLUMN_WIDTH)
print 'Last status report was: '.rjust(GEO_STATUS_COLUMN_WIDTH)
if current_node_status.updated_at
puts "#{time_ago_in_words(current_node_status.updated_at)} ago"
......
......@@ -34,6 +34,7 @@ FactoryBot.define do
wikis_checksum_mismatch_count 10
repositories_retrying_verification_count 25
wikis_retrying_verification_count 3
repositories_checked_failed_count 1
last_event_id 2
last_event_timestamp { Time.now.to_i }
cursor_last_event_id 1
......
......@@ -62,19 +62,60 @@ describe 'geo rake tasks', :geo do
end
describe 'status task', :geo_fdw do
let!(:current_node) { create(:geo_node) }
let!(:primary_node) { create(:geo_node, :primary) }
let!(:geo_event_log) { create(:geo_event_log) }
before do
expect(Gitlab::Geo).to receive(:license_allows?).and_return(true).at_least(:once)
expect(GeoNodeStatus).to receive(:current_node_status).and_call_original
stub_current_geo_node(current_node)
context 'without a valid license' do
before do
stub_licensed_features(geo: false)
end
it 'runs with an error' do
expect { run_rake_task('geo:status') }.to raise_error("GitLab Geo is not supported with this license. Please contact the sales team: https://about.gitlab.com/sales.")
end
end
it 'runs with no error' do
expect { run_rake_task('geo:status') }.to output(/Sync Settings: Full/).to_stdout
context 'with a valid license' do
let!(:current_node) { create(:geo_node) }
let!(:primary_node) { create(:geo_node, :primary) }
let!(:geo_event_log) { create(:geo_event_log) }
let!(:geo_node_status) { build(:geo_node_status, :healthy, geo_node: current_node) }
before do
stub_licensed_features(geo: true)
stub_current_geo_node(current_node)
allow(GeoNodeStatus).to receive(:current_node_status).once.and_return(geo_node_status)
end
it 'runs with no error' do
expect { run_rake_task('geo:status') }.not_to raise_error
end
context 'with a healthy node' do
before do
geo_node_status.status_message = nil
end
it 'shows status as healthy' do
expect { run_rake_task('geo:status') }.to output(/Health Status: Healthy/).to_stdout
end
it 'does not show health status summary' do
expect { run_rake_task('geo:status') }.not_to output(/Health Status Summary/).to_stdout
end
end
context 'with an unhealthy node' do
before do
geo_node_status.status_message = 'Something went wrong'
end
it 'shows status as unhealthy' do
expect { run_rake_task('geo:status') }.to output(/Health Status: Unhealthy/).to_stdout
end
it 'shows health status summary' do
expect { run_rake_task('geo:status') }.to output(/Health Status Summary: Something went wrong/).to_stdout
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment