Commit 248debde authored by Bob Van Landuyt's avatar Bob Van Landuyt

Implement backoff for the circuitbreaker

The circuitbreaker now has 2 failure modes:

- Backing off: This will raise the `Gitlab::Git::Storage::Failing`
  exception. Access to the shard is blocked temporarily.
- Circuit broken: This will raise the
  `Gitlab::Git::Storage::CircuitBroken` exception. Access to the shard
  will be blocked until the failures are reset.
parent c1f4d7d6
...@@ -16,17 +16,16 @@ module StorageHealthHelper ...@@ -16,17 +16,16 @@ module StorageHealthHelper
def message_for_circuit_breaker(circuit_breaker) def message_for_circuit_breaker(circuit_breaker)
maximum_failures = circuit_breaker.failure_count_threshold maximum_failures = circuit_breaker.failure_count_threshold
current_failures = circuit_breaker.failure_count current_failures = circuit_breaker.failure_count
permanently_broken = circuit_breaker.circuit_broken? && current_failures >= maximum_failures
translation_params = { number_of_failures: current_failures, translation_params = { number_of_failures: current_failures,
maximum_failures: maximum_failures, maximum_failures: maximum_failures,
number_of_seconds: circuit_breaker.failure_wait_time } number_of_seconds: circuit_breaker.failure_wait_time }
if permanently_broken if circuit_breaker.circuit_broken?
s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\ s_("%{number_of_failures} of %{maximum_failures} failures. GitLab will not "\
"retry automatically. Reset storage information when the problem is "\ "retry automatically. Reset storage information when the problem is "\
"resolved.") % translation_params "resolved.") % translation_params
elsif circuit_breaker.circuit_broken? elsif circuit_breaker.backing_off?
_("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\ _("%{number_of_failures} of %{maximum_failures} failures. GitLab will "\
"block access for %{number_of_seconds} seconds.") % translation_params "block access for %{number_of_seconds} seconds.") % translation_params
else else
......
...@@ -12,6 +12,7 @@ module Gitlab ...@@ -12,6 +12,7 @@ module Gitlab
CircuitOpen = Class.new(Inaccessible) CircuitOpen = Class.new(Inaccessible)
Misconfiguration = Class.new(Inaccessible) Misconfiguration = Class.new(Inaccessible)
Failing = Class.new(Inaccessible)
REDIS_KEY_PREFIX = 'storage_accessible:'.freeze REDIS_KEY_PREFIX = 'storage_accessible:'.freeze
......
...@@ -64,12 +64,20 @@ module Gitlab ...@@ -64,12 +64,20 @@ module Gitlab
def circuit_broken? def circuit_broken?
return false if no_failures? return false if no_failures?
failure_count > failure_count_threshold
end
def backing_off?
return false if no_failures?
recent_failure = last_failure > failure_wait_time.seconds.ago recent_failure = last_failure > failure_wait_time.seconds.ago
too_many_failures = failure_count > failure_count_threshold too_many_failures = failure_count > backoff_threshold
recent_failure || too_many_failures recent_failure && too_many_failures
end end
private
def failure_info def failure_info
@failure_info ||= get_failure_info @failure_info ||= get_failure_info
end end
...@@ -94,7 +102,11 @@ module Gitlab ...@@ -94,7 +102,11 @@ module Gitlab
def check_storage_accessible! def check_storage_accessible!
if circuit_broken? if circuit_broken?
raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_wait_time) raise Gitlab::Git::Storage::CircuitOpen.new("Circuit for #{storage} is broken", failure_reset_time)
end
if backing_off?
raise Gitlab::Git::Storage::Failing.new("Backing off access to #{storage}", failure_wait_time)
end end
unless storage_available? unless storage_available?
...@@ -131,12 +143,6 @@ module Gitlab ...@@ -131,12 +143,6 @@ module Gitlab
end end
end end
def cache_key
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
private
def get_failure_info def get_failure_info
last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis| last_failure, failure_count = Gitlab::Git::Storage.redis.with do |redis|
redis.hmget(cache_key, :last_failure, :failure_count) redis.hmget(cache_key, :last_failure, :failure_count)
...@@ -146,6 +152,10 @@ module Gitlab ...@@ -146,6 +152,10 @@ module Gitlab
FailureInfo.new(last_failure, failure_count.to_i) FailureInfo.new(last_failure, failure_count.to_i)
end end
def cache_key
@cache_key ||= "#{Gitlab::Git::Storage::REDIS_KEY_PREFIX}#{storage}:#{hostname}"
end
end end
end end
end end
......
...@@ -18,6 +18,14 @@ module Gitlab ...@@ -18,6 +18,14 @@ module Gitlab
application_settings.circuitbreaker_storage_timeout application_settings.circuitbreaker_storage_timeout
end end
def access_retries
application_settings.circuitbreaker_access_retries
end
def backoff_threshold
application_settings.circuitbreaker_backoff_threshold
end
private private
def application_settings def application_settings
......
...@@ -25,6 +25,10 @@ module Gitlab ...@@ -25,6 +25,10 @@ module Gitlab
!!@error !!@error
end end
def backing_off?
false
end
def last_failure def last_failure
circuit_broken? ? Time.now : nil circuit_broken? ? Time.now : nil
end end
......
...@@ -65,17 +65,6 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do ...@@ -65,17 +65,6 @@ describe Gitlab::Git::Storage::NullCircuitBreaker do
ours = described_class.public_instance_methods ours = described_class.public_instance_methods
theirs = Gitlab::Git::Storage::CircuitBreaker.public_instance_methods theirs = Gitlab::Git::Storage::CircuitBreaker.public_instance_methods
# These methods are not part of the public API, but are public to allow the expect(theirs - ours).to be_empty
# CircuitBreaker specs to operate. They should be made private over time.
exceptions = %i[
cache_key
check_storage_accessible!
no_failures?
storage_available?
track_storage_accessible
track_storage_inaccessible
]
expect(theirs - ours).to contain_exactly(*exceptions)
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment