Commit c9f2c7df authored by Michael Kozono's avatar Michael Kozono

Merge branch 'ag-check-replication-status' into 'master'

Add rake task to check replication status

See merge request gitlab-org/gitlab!33834
parents cce32177 28e736d9
......@@ -45,7 +45,7 @@ be found in `/var/opt/gitlab/gitlab-rails/shared/pages` if using Omnibus).
## Preflight checks
Run this command to list out all preflight checks before scheduling a planned failover to ensure the process will go smoothly:
Run this command to list out all preflight checks and automatically check if replication and verification are complete before scheduling a planned failover to ensure the process will go smoothly:
```shell
gitlab-ctl promotion-preflight-checks
......
---
title: 'Geo: Add rake task to check replication status'
merge_request: 33834
author:
type: changed
......@@ -44,6 +44,23 @@ module Gitlab
puts
end
def print_replication_verification_status
print_repositories_status
print_verified_repositories
print_wikis_status
print_verified_wikis
print_lfs_objects_status
print_attachments_status
print_ci_job_artifacts_status
print_container_repositories_status
print_design_repositories_status
print_repositories_checked_status
end
def replication_verification_complete?
return replication_complete? && verification_complete?
end
private
def print_current_node_info
......@@ -224,6 +241,44 @@ module Gitlab
end
end
def replication_complete?
replicables.all? { |failed_count| failed_count == 0 }
end
def verification_complete?
verifiables.all? { |failed_count| failed_count == 0 }
end
def replicables
[
current_node_status.repositories_failed_count,
current_node_status.wikis_failed_count,
current_node_status.lfs_objects_failed_count,
current_node_status.attachments_failed_count,
current_node_status.job_artifacts_failed_count,
current_node_status.design_repositories_failed_count
].tap do |r|
if Gitlab.config.geo.registry_replication.enabled
r.push current_node_status.container_repositories_failed_count
end
end
end
def verifiables
[].tap do |v|
if Gitlab::Geo.repository_verification_enabled?
v.push(
current_node_status.repositories_verification_failed_count,
current_node_status.wikis_verification_failed_count
)
end
if Gitlab::CurrentSettings.repository_checks_enabled
v.push current_node_status.repositories_checked_failed_count
end
end
end
def show_failed_value(value)
print "#{value}".color(:red) + '/' if value > 0
end
......
......@@ -238,6 +238,38 @@ namespace :geo do
Gitlab::Geo::GeoTasks.update_primary_geo_node_url
end
desc "Gitlab | Geo | Check replication/verification status"
task check_replication_verification_status: :environment do
abort GEO_LICENSE_ERROR_TEXT unless Gitlab::Geo.license_allows?
current_node_status = GeoNodeStatus.current_node_status
geo_node = current_node_status.geo_node
unless geo_node.secondary?
puts 'This command is only available on a secondary node'.color(:red)
exit
end
puts
status_check = Gitlab::Geo::GeoNodeStatusCheck.new(current_node_status, geo_node)
status_check.print_replication_verification_status
complete = status_check.replication_verification_complete?
if complete
puts 'SUCCESS - Replication is up-to-date.'.color(:green)
exit 0
else
puts "ERROR - Replication is not up-to-date. \n"\
"Please see documentation to complete replication: "\
"https://docs.gitlab.com/ee/administration/geo/disaster_recovery"\
"/planned_failover.html#ensure-geo-replication-is-up-to-date"
.color(:red)
exit 1
end
end
desc 'GitLab | Geo | Print Geo node status'
task status: :environment do
abort GEO_LICENSE_ERROR_TEXT unless Gitlab::Geo.license_allows?
......
......@@ -65,6 +65,19 @@ FactoryBot.define do
repository_verification_enabled { true }
end
trait :replicated_and_verified do
attachments_failed_count { 0 }
lfs_objects_failed_count { 0 }
job_artifacts_failed_count { 0 }
container_repositories_failed_count { 0 }
design_repositories_failed_count { 0 }
repositories_failed_count { 0 }
wikis_failed_count { 0 }
repositories_verification_failed_count { 0 }
wikis_verification_failed_count { 0 }
repositories_checked_failed_count { 0 }
end
trait :unhealthy do
status_message { "Could not connect to Geo node - HTTP Status Code: 401 Unauthorized\nTest" }
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Geo::GeoNodeStatusCheck do
let(:current_node) { create(:geo_node) }
let(:geo_node_status) do
build(:geo_node_status, :replicated_and_verified, geo_node: current_node)
end
let(:subject) { described_class.new(geo_node_status, current_node) }
describe '#replication_verification_complete?' do
before do
allow(Gitlab.config.geo.registry_replication).to receive(:enabled).and_return(true)
end
it 'prints messages for all verification checks' do
[
/Repositories/,
/Verified Repositories/,
/Wikis/,
/Verified Wikis/,
/LFS Objects/,
/Attachments/,
/CI job artifacts/,
/Container repositories/,
/Design repositories/,
/Repositories Checked/
].each do |text|
expect { subject.print_replication_verification_status }.to output(text).to_stdout
end
end
context 'when replication is up-to-date' do
it 'returns true' do
expect(subject.replication_verification_complete?).to be_truthy
end
end
context 'when replication is not up-to-date' do
before do
allow(geo_node_status).to receive(:repositories_checked_failed_count).and_return(1)
end
it 'returns false' do
expect(subject.replication_verification_complete?).to be_falsy
end
end
end
end
......@@ -321,6 +321,47 @@ RSpec.describe 'geo rake tasks', :geo do
end
end
describe 'geo:check_replication_verification_status' do
let(:run_task) { run_rake_task('geo:check_replication_verification_status') }
let!(:current_node) { create(:geo_node) }
let!(:geo_node_status) { build(:geo_node_status, :healthy, geo_node: current_node) }
around do |example|
example.run
rescue SystemExit
end
before do
allow(GeoNodeStatus).to receive(:current_node_status).and_return(geo_node_status)
allow(Gitlab.config.geo.registry_replication).to receive(:enabled).and_return(true)
allow(Gitlab::Geo::GeoNodeStatusCheck).to receive(:replication_verification_complete?)
.and_return(complete)
end
context 'when replication is up-to-date' do
let(:complete) { true }
it 'prints a success message' do
expect { run_task }.to output(/SUCCESS - Replication is up-to-date/).to_stdout
end
end
context 'when replication is not up-to-date' do
let(:complete) { false }
it 'prints an error message' do
expect { run_task }.to output(/ERROR - Replication is not up-to-date/).to_stdout
end
it 'exits with a 1' do
expect { run_task }.to raise_error(SystemExit) do |error|
expect(error.status).to eq(1)
end
end
end
end
describe 'geo:status', :geo_fdw do
context 'without a valid license' do
before do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment