Commit 71dbfa0c authored by Alex Ives's avatar Alex Ives

Pause replication and PG WAL from secondary

- Add rake task to call node toggle service
- Add service for secondary to request to be disabled to a
  primary
- Refactor node_status_post_service to deduplicate shared
  code with new service

Relates to https://gitlab.com/gitlab-org/gitlab/issues/35913
parent 5b6fafe9
...@@ -29,7 +29,7 @@ module Geo ...@@ -29,7 +29,7 @@ module Geo
end end
def send_status_to_primary(node, status) def send_status_to_primary(node, status)
if !NodeStatusPostService.new.execute(status) && prometheus_enabled? if !NodeStatusRequestService.new.execute(status) && prometheus_enabled?
increment_failed_status_counter(node) increment_failed_status_counter(node)
end end
end end
......
# frozen_string_literal: true
module Geo
class NodeStatusRequestService < RequestService
include Gitlab::Geo::LogHelpers
def execute(status)
return false unless primary_node.present?
super(primary_status_url, payload(status))
end
private
def primary_status_url
primary_node&.status_url
end
def payload(status)
status.attributes.except('id')
end
end
end
# frozen_string_literal: true
module Geo
class ReplicationToggleRequestService < RequestService
include Gitlab::Geo::LogHelpers
def execute(enabled:)
return false unless primary_node.present?
success = super(primary_node_api_url, payload(enabled), method: Net::HTTP::Put)
Gitlab::Geo.expire_cache! if success
success
end
def payload(enabled)
{ enabled: enabled }
end
def primary_node_api_url
primary_node&.node_api_url(Gitlab::Geo.current_node)
end
end
end
# frozen_string_literal: true # frozen_string_literal: true
module Geo module Geo
class NodeStatusPostService class RequestService
include Gitlab::Geo::LogHelpers private
def execute(url, body, method: Net::HTTP::Post)
return false if url.nil?
def execute(status) response = Gitlab::HTTP.perform_request(method, url, body: body, allow_local_requests: true, headers: headers, timeout: timeout)
response = Gitlab::HTTP.post(primary_status_url, body: payload(status), allow_local_requests: true, headers: headers, timeout: timeout)
unless response.success? unless response.success?
handle_failure_for(response) handle_failure_for(response)
...@@ -13,23 +15,11 @@ module Geo ...@@ -13,23 +15,11 @@ module Geo
end end
true true
rescue Gitlab::Geo::GeoNodeNotFoundError => e
log_error(e.to_s)
false
rescue OpenSSL::Cipher::CipherError => e
log_error('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.', e)
false
rescue Gitlab::HTTP::Error, Timeout::Error, SocketError, SystemCallError, OpenSSL::SSL::SSLError => e rescue Gitlab::HTTP::Error, Timeout::Error, SocketError, SystemCallError, OpenSSL::SSL::SSLError => e
log_error('Failed to post status data to primary', e) log_error("Failed to #{method} to primary url: #{url}", e)
false false
end end
private
def payload(status)
status.attributes.except('id')
end
def handle_failure_for(response) def handle_failure_for(response)
message = "Could not connect to Geo primary node - HTTP Status Code: #{response.code} #{response.message}" message = "Could not connect to Geo primary node - HTTP Status Code: #{response.code} #{response.message}"
payload = response.parsed_response payload = response.parsed_response
...@@ -44,15 +34,20 @@ module Geo ...@@ -44,15 +34,20 @@ module Geo
log_error([message, details].compact.join("\n")) log_error([message, details].compact.join("\n"))
end end
def primary_status_url def primary_node
primary_node = Gitlab::Geo.primary_node Gitlab::Geo.primary_node
raise Gitlab::Geo::GeoNodeNotFoundError.new('Failed to look up Geo primary node in the database') unless primary_node rescue OpenSSL::Cipher::CipherError => e
log_error('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.', e)
primary_node.status_url nil
end end
def headers def headers
Gitlab::Geo::BaseRequest.new(scope: ::Gitlab::Geo::API_SCOPE).headers Gitlab::Geo::BaseRequest.new(scope: ::Gitlab::Geo::API_SCOPE).headers
rescue Gitlab::Geo::GeoNodeNotFoundError => e
log_error('Geo primary node could not be found', e)
rescue OpenSSL::Cipher::CipherError => e
log_error('Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.', e)
nil
end end
def timeout def timeout
......
---
title: Add rake geo:replication:pause to pause replication from a secondary node
merge_request: 29515
author:
type: added
namespace :geo do
namespace :replication do
task pause: :gitlab_environment do
Geo::ReplicationToggleRequestService.new.execute(enabled: false)
end
task resume: :gitlab_environment do
Geo::ReplicationToggleRequestService.new.execute(enabled: true)
end
end
end
...@@ -82,8 +82,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do ...@@ -82,8 +82,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do
describe '#execute' do describe '#execute' do
before do before do
response = double(success?: true, parsed_response: data.stringify_keys, code: 200) allow_any_instance_of(Geo::NodeStatusRequestService).to receive(:execute).and_return(true)
allow(Gitlab::HTTP).to receive(:post).and_return(response)
end end
context 'when current node is nil' do context 'when current node is nil' do
...@@ -92,7 +91,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do ...@@ -92,7 +91,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do
end end
it 'skips posting the status' do it 'skips posting the status' do
expect(Gitlab::HTTP).to receive(:post).never expect_any_instance_of(Geo::NodeStatusRequestService).to receive(:execute).never
subject.execute subject.execute
end end
...@@ -195,7 +194,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do ...@@ -195,7 +194,7 @@ RSpec.describe Geo::MetricsUpdateService, :geo, :prometheus do
end end
it 'increments a counter when metrics fail to retrieve' do it 'increments a counter when metrics fail to retrieve' do
allow_next_instance_of(Geo::NodeStatusPostService) do |instance| allow_next_instance_of(Geo::NodeStatusRequestService) do |instance|
allow(instance).to receive(:execute).and_return(false) allow(instance).to receive(:execute).and_return(false)
end end
......
...@@ -2,71 +2,30 @@ ...@@ -2,71 +2,30 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::NodeStatusPostService, :geo do RSpec.describe Geo::NodeStatusRequestService, :geo do
include ::EE::GeoHelpers include ::EE::GeoHelpers
include ApiHelpers include ApiHelpers
let_it_be(:primary) { create(:geo_node, :primary) } let_it_be(:primary) { create(:geo_node, :primary) }
let_it_be(:secondary) { create(:geo_node) } let_it_be(:secondary) { create(:geo_node) }
subject { described_class.new }
describe '#execute' do
before do before do
stub_current_geo_node(primary) stub_current_geo_node(primary)
end end
it 'parses a 401 response' do it_behaves_like 'a geo RequestService' do
response = double(success?: false, let(:args) { secondary.find_or_build_status }
code: 401,
message: 'Unauthorized',
parsed_response: { 'message' => 'Test' } )
allow(Gitlab::HTTP).to receive(:post).and_return(response)
expect(subject).to receive(:log_error).with("Could not connect to Geo primary node - HTTP Status Code: 401 Unauthorized\nTest")
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end
it 'alerts on bad SSL certficate' do
message = 'bad certificate'
allow(Gitlab::HTTP).to receive(:post).and_raise(OpenSSL::SSL::SSLError.new(message))
expect(subject).to receive(:log_error).with('Failed to post status data to primary', kind_of(OpenSSL::SSL::SSLError))
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end
it 'handles connection refused' do
allow(Gitlab::HTTP).to receive(:post).and_raise(Errno::ECONNREFUSED.new('bad connection'))
expect(subject).to receive(:log_error).with('Failed to post status data to primary', kind_of(Errno::ECONNREFUSED))
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end end
it 'returns meaningful error message when primary uses incorrect db key' do describe '#execute' do
allow_any_instance_of(GeoNode).to receive(:secret_access_key).and_raise(OpenSSL::Cipher::CipherError) before do
stub_current_geo_node(primary)
expect(subject).to receive(:log_error).with(
"Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.",
kind_of(OpenSSL::Cipher::CipherError)
)
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end
it 'gracefully handles case when primary is deleted' do
primary.destroy!
expect(subject).to receive(:log_error).with(
'Failed to look up Geo primary node in the database'
)
expect(subject.execute(secondary.find_or_build_status)).to be_falsey
end end
it 'does not include id in the payload' do it 'does not include id in the payload' do
expect(Gitlab::HTTP).to receive(:post) expect(Gitlab::HTTP).to receive(:perform_request)
.with( .with(
Net::HTTP::Post,
primary.status_url, primary.status_url,
hash_including(body: hash_not_including('id'))) hash_including(body: hash_not_including('id')))
.and_return(double(success?: true)) .and_return(double(success?: true))
...@@ -80,8 +39,9 @@ RSpec.describe Geo::NodeStatusPostService, :geo do ...@@ -80,8 +39,9 @@ RSpec.describe Geo::NodeStatusPostService, :geo do
end end
it 'sends geo_node_id in the request' do it 'sends geo_node_id in the request' do
expect(Gitlab::HTTP).to receive(:post) expect(Gitlab::HTTP).to receive(:perform_request)
.with( .with(
Net::HTTP::Post,
primary.status_url, primary.status_url,
hash_including(body: hash_including('geo_node_id' => secondary.id))) hash_including(body: hash_including('geo_node_id' => secondary.id)))
.and_return(double(success?: true)) .and_return(double(success?: true))
......
# frozen_string_literal: true
require 'spec_helper'
describe Geo::ReplicationToggleRequestService, :geo do
include ::EE::GeoHelpers
include ApiHelpers
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:primary) { create(:geo_node, :primary) }
let(:args) { { enabled: false } }
before do
stub_current_geo_node(secondary)
end
it_behaves_like 'a geo RequestService'
it 'expires the geo cache on success' do
response = double(success?: true,
code: 200 )
allow(Gitlab::HTTP).to receive(:perform_request).and_return(response)
expect(Gitlab::Geo).to receive(:expire_cache!)
expect(subject.execute(args)).to be_truthy
end
it 'does not expire the geo cache on failure' do
response = double(success?: false,
code: 401,
message: 'Unauthorized',
parsed_response: { 'message' => 'Test' } )
allow(Gitlab::HTTP).to receive(:perform_request).and_return(response)
expect(Gitlab::Geo).not_to receive(:expire_cache!)
expect(subject.execute(args)).to be_falsey
end
end
# frozen_string_literal: true
RSpec.shared_examples 'a geo RequestService' do
include ::EE::GeoHelpers
include ApiHelpers
let_it_be(:primary) { create(:geo_node, :primary) } unless method_defined?(:primary)
let(:args) { raise 'args must be supplied in a let variable in order to execute the request' } unless method_defined?(:args)
describe '#execute' do
it 'parses a 401 response' do
response = double(success?: false,
code: 401,
message: 'Unauthorized',
parsed_response: { 'message' => 'Test' } )
allow(Gitlab::HTTP).to receive(:perform_request).and_return(response)
expect(subject).to receive(:log_error).with("Could not connect to Geo primary node - HTTP Status Code: 401 Unauthorized\nTest")
expect(subject.execute(args)).to be_falsey
end
it 'alerts on bad SSL certficate' do
allow(Gitlab::HTTP).to receive(:perform_request).and_raise(OpenSSL::SSL::SSLError.new('bad certificate'))
expect(subject).to receive(:log_error).with(/Failed to Net::HTTP::(Put|Post) to primary url: /, kind_of(OpenSSL::SSL::SSLError))
expect(subject.execute(args)).to be_falsey
end
it 'handles connection refused' do
allow(Gitlab::HTTP).to receive(:perform_request).and_raise(Errno::ECONNREFUSED.new('bad connection'))
expect(subject).to receive(:log_error).with(/Failed to Net::HTTP::(Put|Post) to primary url: /, kind_of(Errno::ECONNREFUSED))
expect(subject.execute(args)).to be_falsey
end
it 'returns meaningful error message when primary uses incorrect db key' do
allow_any_instance_of(GeoNode).to receive(:secret_access_key).and_raise(OpenSSL::Cipher::CipherError)
expect(subject).to receive(:log_error).with(
"Error decrypting the Geo secret from the database. Check that the primary uses the correct db_key_base.",
kind_of(OpenSSL::Cipher::CipherError)
)
expect(subject.execute(args)).to be_falsey
end
it 'gracefully handles case when primary is deleted' do
primary.destroy!
expect(subject.execute(args)).to be_falsey
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment