Commit 71dbfa0c authored by Alex Ives's avatar Alex Ives

Pause replication and PG WAL from secondary

- Add rake task to call node toggle service
- Add service for secondary to request to be disabled to a
  primary
- Refactor node_status_post_service to deduplicate shared
  code with new service

Relates to https://gitlab.com/gitlab-org/gitlab/issues/35913
parent 5b6fafe9
......@@ -29,7 +29,7 @@ module Geo
end
def send_status_to_primary(node, status)
if !NodeStatusPostService.new.execute(status) && prometheus_enabled?
if !NodeStatusRequestService.new.execute(status) && prometheus_enabled?
increment_failed_status_counter(node)
end
end
......
# frozen_string_literal: true
module Geo
class NodeStatusRequestService < RequestService
include Gitlab::Geo::LogHelpers
def execute(status)
return false unless primary_node.present?
super(primary_status_url, payload(status))
end
private
def primary_status_url
primary_node&.status_url
end
def payload(status)
status.attributes.except('id')
end
end
end
# frozen_string_literal: true
module Geo
class ReplicationToggleRequestService < RequestService
include Gitlab::Geo::LogHelpers
def execute(enabled:)
return false unless primary_node.present?
success = super(primary_node_api_url, payload(enabled), method: Net::HTTP::Put)
Gitlab::Geo.expire_cache! if success
success
end
def payload(enabled)
{ enabled: enabled }
end
def primary_node_api_url
primary_node&.node_api_url(Gitlab::Geo.current_node)
end
end
end
# frozen_string_literal: true
module Geo
class NodeStatusPostService
include Gitlab::Geo::LogHelpers
class RequestService
private
def execute(url, body, method: Net::HTTP::Post)
return false if url.nil?
def execute(status)
response = Gitlab::HTTP.post(primary_status_url, body: payload(status), allow_local_requests: true, headers: headers, timeout: timeout)
response = Gitlab::HTTP.perform_request(method, url, body: body, allow_local_requests: true, headers: headers, timeout: timeout)
unless response.success?
handle_failure_for(response)
......@@ -13,23 +15,11 @@ module Geo
end
true
rescue Gitlab::Geo::GeoNodeNotFoundError => e
log_error(e.to_s)
false
rescue OpenSSL::Cipher::CipherError => e
log_error('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.', e)
false
rescue Gitlab::HTTP::Error, Timeout::Error, SocketError, SystemCallError, OpenSSL::SSL::SSLError => e
log_error('Failed to post status data to primary', e)
log_error("Failed to #{method} to primary url: #{url}", e)
false
end
private
def payload(status)
status.attributes.except('id')
end
def handle_failure_for(response)
message = "Could not connect to Geo primary node - HTTP Status Code: #{response.code} #{response.message}"
payload = response.parsed_response
......@@ -44,15 +34,20 @@ module Geo
log_error([message, details].compact.join("\n"))
end
def primary_status_url
primary_node = Gitlab::Geo.primary_node
raise Gitlab::Geo::GeoNodeNotFoundError.new('Failed to look up Geo primary node in the database') unless primary_node
primary_node.status_url
def primary_node
Gitlab::Geo.primary_node
rescue OpenSSL::Cipher::CipherError => e
log_error('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.', e)
nil
end
def headers
Gitlab::Geo::BaseRequest.new(scope: ::Gitlab::Geo::API_SCOPE).headers
rescue Gitlab::Geo::GeoNodeNotFoundError => e
log_error('Geo primary node could not be found', e)
rescue OpenSSL::Cipher::CipherError => e
log_error('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.', e)
nil
end
def timeout
......
---
title: Add rake geo:replication:pause to pause replication from a secondary node
merge_request: 29515
author:
type: added
namespace :geo do
namespace :replication do
task pause: :gitlab_environment do
Geo::ReplicationToggleRequestService.new.execute(enabled: false)
end
task resume: :gitlab_environment do
Geo::ReplicationToggleRequestService.new.execute(enabled: true)
end
end
end
......@@ -82,8 +82,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do
describe '#execute' do
before do
response = double(success?: true, parsed_response: data.stringify_keys, code: 200)
allow(Gitlab::HTTP).to receive(:post).and_return(response)
allow_any_instance_of(Geo::NodeStatusRequestService).to receive(:execute).and_return(true)
end
context 'when current node is nil' do
......@@ -92,7 +91,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do
end
it 'skips posting the status' do
expect(Gitlab::HTTP).to receive(:post).never
expect_any_instance_of(Geo::NodeStatusRequestService).to receive(:execute).never
subject.execute
end
......@@ -195,7 +194,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do
end
it 'increments a counter when metrics fail to retrieve' do
allow_next_instance_of(Geo::NodeStatusPostService) do |instance|
allow_next_instance_of(Geo::NodeStatusRequestService) do |instance|
allow(instance).to receive(:execute).and_return(false)
end
......
......@@ -2,71 +2,30 @@
require 'spec_helper'
RSpec.describe Geo::NodeStatusPostService, :geo do
RSpec.describe Geo::NodeStatusRequestService, :geo do
include ::EE::GeoHelpers
include ApiHelpers
let_it_be(:primary) { create(:geo_node, :primary) }
let_it_be(:secondary) { create(:geo_node) }
subject { described_class.new }
describe '#execute' do
before do
stub_current_geo_node(primary)
end
it 'parses a 401 response' do
response = double(success?: false,
code: 401,
message: 'Unauthorized',
parsed_response: { 'message' => 'Test' } )
allow(Gitlab::HTTP).to receive(:post).and_return(response)
expect(subject).to receive(:log_error).with("Could not connect to Geo primary node - HTTP Status Code: 401 Unauthorized\nTest")
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end
it 'alerts on bad SSL certficate' do
message = 'bad certificate'
allow(Gitlab::HTTP).to receive(:post).and_raise(OpenSSL::SSL::SSLError.new(message))
expect(subject).to receive(:log_error).with('Failed to post status data to primary', kind_of(OpenSSL::SSL::SSLError))
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end
it 'handles connection refused' do
allow(Gitlab::HTTP).to receive(:post).and_raise(Errno::ECONNREFUSED.new('bad connection'))
expect(subject).to receive(:log_error).with('Failed to post status data to primary', kind_of(Errno::ECONNREFUSED))
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
it_behaves_like 'a geo RequestService' do
let(:args) { secondary.find_or_build_status }
end
it 'returns meaningful error message when primary uses incorrect db key' do
allow_any_instance_of(GeoNode).to receive(:secret_access_key).and_raise(OpenSSL::Cipher::CipherError)
expect(subject).to receive(:log_error).with(
"Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.",
kind_of(OpenSSL::Cipher::CipherError)
)
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end
it 'gracefully handles case when primary is deleted' do
primary.destroy!
expect(subject).to receive(:log_error).with(
'Failed to look up Geo primary node in the database'
)
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
describe '#execute' do
before do
stub_current_geo_node(primary)
end
it 'does not include id in the payload' do
expect(Gitlab::HTTP).to receive(:post)
expect(Gitlab::HTTP).to receive(:perform_request)
.with(
Net::HTTP::Post,
primary.status_url,
hash_including(body: hash_not_including('id')))
.and_return(double(success?: true))
......@@ -80,8 +39,9 @@ RSpec.describe Geo::NodeStatusPostService, :geo do
end
it 'sends geo_node_id in the request' do
expect(Gitlab::HTTP).to receive(:post)
expect(Gitlab::HTTP).to receive(:perform_request)
.with(
Net::HTTP::Post,
primary.status_url,
hash_including(body: hash_including('geo_node_id' => secondary.id)))
.and_return(double(success?: true))
......
# frozen_string_literal: true
require 'spec_helper'
describe Geo::ReplicationToggleRequestService, :geo do
include ::EE::GeoHelpers
include ApiHelpers
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:primary) { create(:geo_node, :primary) }
let(:args) { { enabled: false } }
before do
stub_current_geo_node(secondary)
end
it_behaves_like 'a geo RequestService'
it 'expires the geo cache on success' do
response = double(success?: true,
code: 200 )
allow(Gitlab::HTTP).to receive(:perform_request).and_return(response)
expect(Gitlab::Geo).to receive(:expire_cache!)
expect(subject.execute(args)).to be_truthy
end
it 'does not expire the geo cache on failure' do
response = double(success?: false,
code: 401,
message: 'Unauthorized',
parsed_response: { 'message' => 'Test' } )
allow(Gitlab::HTTP).to receive(:perform_request).and_return(response)
expect(Gitlab::Geo).not_to receive(:expire_cache!)
expect(subject.execute(args)).to be_falsey
end
end
# frozen_string_literal: true
RSpec.shared_examples 'a geo RequestService' do
include ::EE::GeoHelpers
include ApiHelpers
let_it_be(:primary) { create(:geo_node, :primary) } unless method_defined?(:primary)
let(:args) { raise 'args must be supplied in a let variable in order to execute the request' } unless method_defined?(:args)
describe '#execute' do
it 'parses a 401 response' do
response = double(success?: false,
code: 401,
message: 'Unauthorized',
parsed_response: { 'message' => 'Test' } )
allow(Gitlab::HTTP).to receive(:perform_request).and_return(response)
expect(subject).to receive(:log_error).with("Could not connect to Geo primary node - HTTP Status Code: 401 Unauthorized\nTest")
expect(subject.execute(args)).to be_falsey
end
it 'alerts on bad SSL certficate' do
allow(Gitlab::HTTP).to receive(:perform_request).and_raise(OpenSSL::SSL::SSLError.new('bad certificate'))
expect(subject).to receive(:log_error).with(/Failed to Net::HTTP::(Put|Post) to primary url: /, kind_of(OpenSSL::SSL::SSLError))
expect(subject.execute(args)).to be_falsey
end
it 'handles connection refused' do
allow(Gitlab::HTTP).to receive(:perform_request).and_raise(Errno::ECONNREFUSED.new('bad connection'))
expect(subject).to receive(:log_error).with(/Failed to Net::HTTP::(Put|Post) to primary url: /, kind_of(Errno::ECONNREFUSED))
expect(subject.execute(args)).to be_falsey
end
it 'returns meaningful error message when primary uses incorrect db key' do
allow_any_instance_of(GeoNode).to receive(:secret_access_key).and_raise(OpenSSL::Cipher::CipherError)
expect(subject).to receive(:log_error).with(
"Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.",
kind_of(OpenSSL::Cipher::CipherError)
)
expect(subject.execute(args)).to be_falsey
end
it 'gracefully handles case when primary is deleted' do
primary.destroy!
expect(subject.execute(args)).to be_falsey
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment