Commit ba4114d2 authored by Dylan Griffith's avatar Dylan Griffith

Refactor ingress IP address waiting code (#42643)

parent f0b27f9b
...@@ -5,6 +5,7 @@ module Clusters ...@@ -5,6 +5,7 @@ module Clusters
include ::Clusters::Concerns::ApplicationCore include ::Clusters::Concerns::ApplicationCore
include ::Clusters::Concerns::ApplicationStatus include ::Clusters::Concerns::ApplicationStatus
include AfterCommitQueue
default_value_for :ingress_type, :nginx default_value_for :ingress_type, :nginx
default_value_for :version, :nginx default_value_for :version, :nginx
...@@ -15,6 +16,15 @@ module Clusters ...@@ -15,6 +16,15 @@ module Clusters
IP_ADDRESS_FETCH_RETRIES = 3 IP_ADDRESS_FETCH_RETRIES = 3
state_machine :status do
before_transition any => [:installed] do |application|
application.run_after_commit do
ClusterWaitForIngressIpAddressWorker.perform_in(
ClusterWaitForIngressIpAddressWorker::INTERVAL, application.name, application.id, IP_ADDRESS_FETCH_RETRIES)
end
end
end
def chart def chart
'stable/nginx-ingress' 'stable/nginx-ingress'
end end
...@@ -26,11 +36,6 @@ module Clusters ...@@ -26,11 +36,6 @@ module Clusters
def install_command def install_command
Gitlab::Kubernetes::Helm::InstallCommand.new(name, chart: chart, chart_values_file: chart_values_file) Gitlab::Kubernetes::Helm::InstallCommand.new(name, chart: chart, chart_values_file: chart_values_file)
end end
def post_install
ClusterWaitForIngressIpAddressWorker.perform_in(
ClusterWaitForIngressIpAddressWorker::INTERVAL, name, id, IP_ADDRESS_FETCH_RETRIES)
end
end end
end end
end end
...@@ -23,10 +23,6 @@ module Clusters ...@@ -23,10 +23,6 @@ module Clusters
def name def name
self.class.application_name self.class.application_name
end end
def post_install
# Override for any extra work that needs to be done after install
end
end end
end end
end end
......
module Clusters module Clusters
module Applications module Applications
class CheckIngressIpAddressService < BaseHelmService class CheckIngressIpAddressService < BaseHelmService
Error = Class.new(StandardError)
LEASE_TIMEOUT = 3.seconds.to_i LEASE_TIMEOUT = 3.seconds.to_i
def execute(retries_remaining) def execute
return if app.external_ip return true if app.external_ip
return unless try_obtain_lease return true unless try_obtain_lease
service = get_service service = get_service
if service.status.loadBalancer.ingress if service.status.loadBalancer.ingress
resolve_external_ip(service) resolve_external_ip(service)
else else
retry_if_necessary(retries_remaining) false
end end
rescue KubeException rescue KubeException => e
retry_if_necessary(retries_remaining) raise Error, "#{e.class}: #{e.message}"
end end
private private
...@@ -28,19 +30,12 @@ module Clusters ...@@ -28,19 +30,12 @@ module Clusters
end end
def resolve_external_ip(service) def resolve_external_ip(service)
app.update!( external_ip: service.status.loadBalancer.ingress[0].ip) app.update!(external_ip: service.status.loadBalancer.ingress[0].ip)
end end
def get_service def get_service
kubeclient.get_service('ingress-nginx-ingress-controller', Gitlab::Kubernetes::Helm::NAMESPACE) kubeclient.get_service('ingress-nginx-ingress-controller', Gitlab::Kubernetes::Helm::NAMESPACE)
end end
def retry_if_necessary(retries_remaining)
if retries_remaining > 0
ClusterWaitForIngressIpAddressWorker.perform_in(
ClusterWaitForIngressIpAddressWorker::INTERVAL, app.name, app.id, retries_remaining - 1)
end
end
end end
end end
end end
...@@ -20,7 +20,6 @@ module Clusters ...@@ -20,7 +20,6 @@ module Clusters
def on_success def on_success
app.make_installed! app.make_installed!
app.post_install
ensure ensure
remove_installation_pod remove_installation_pod
end end
......
...@@ -4,11 +4,22 @@ class ClusterWaitForIngressIpAddressWorker ...@@ -4,11 +4,22 @@ class ClusterWaitForIngressIpAddressWorker
include ClusterApplications include ClusterApplications
INTERVAL = 10.seconds INTERVAL = 10.seconds
TIMEOUT = 20.minutes
def perform(app_name, app_id, retries_remaining) def perform(app_name, app_id, retries_remaining)
find_application(app_name, app_id) do |app| find_application(app_name, app_id) do |app|
Clusters::Applications::CheckIngressIpAddressService.new(app).execute(retries_remaining) result = Clusters::Applications::CheckIngressIpAddressService.new(app).execute
retry_if_necessary(app_name, app_id, retries_remaining) unless result
end
rescue Clusters::Applications::CheckIngressIpAddressService::Error => e
retry_if_necessary(app_name, app_id, retries_remaining)
raise e
end
private
def retry_if_necessary(app_name, app_id, retries_remaining)
if retries_remaining > 0
self.class.perform_in(INTERVAL, app_name, app_id, retries_remaining - 1)
end end
end end
end end
# See http://doc.gitlab.com/ce/development/migration_style_guide.html
# for more information on how to write migrations for GitLab.
class AddExternalIpToClustersApplicationsIngress < ActiveRecord::Migration class AddExternalIpToClustersApplicationsIngress < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false DOWNTIME = false
# When a migration requires downtime you **must** uncomment the following
# constant and define a short and easy to understand explanation as to why the
# migration requires downtime.
# DOWNTIME_REASON = ''
# When using the methods "add_concurrent_index", "remove_concurrent_index" or
# "add_column_with_default" you must disable the use of transactions
# as these methods can not run in an existing transaction.
# When using "add_concurrent_index" or "remove_concurrent_index" methods make sure
# that either of them is the _only_ method called in the migration,
# any other changes should go in a separate migration.
# This ensures that upon failure _only_ the index creation or removing fails
# and can be retried or reverted easily.
#
# To disable transactions uncomment the following line and remove these
# comments:
# disable_ddl_transaction!
def change def change
add_column :clusters_applications_ingress, :external_ip, :string add_column :clusters_applications_ingress, :external_ip, :string
end end
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
} }
}, },
"status_reason": { "type": ["string", "null"] }, "status_reason": { "type": ["string", "null"] },
"external_ip": { "type": ["string", null] } "external_ip": { "type": ["string", "null"] }
}, },
"required" : [ "name", "status" ] "required" : [ "name", "status" ]
} }
......
...@@ -4,16 +4,19 @@ describe Clusters::Applications::Ingress do ...@@ -4,16 +4,19 @@ describe Clusters::Applications::Ingress do
it { is_expected.to belong_to(:cluster) } it { is_expected.to belong_to(:cluster) }
it { is_expected.to validate_presence_of(:cluster) } it { is_expected.to validate_presence_of(:cluster) }
include_examples 'cluster application specs', described_class before do
allow(ClusterWaitForIngressIpAddressWorker).to receive(:perform_in)
end
describe '#post_install' do include_examples 'cluster application specs', described_class
let(:application) { create(:clusters_applications_ingress, :installed) }
describe '#make_installed!' do
before do before do
allow(ClusterWaitForIngressIpAddressWorker).to receive(:perform_in) application.make_installed!
application.post_install
end end
let(:application) { create(:clusters_applications_ingress, :installing) }
it 'schedules a ClusterWaitForIngressIpAddressWorker' do it 'schedules a ClusterWaitForIngressIpAddressWorker' do
expect(ClusterWaitForIngressIpAddressWorker).to have_received(:perform_in) expect(ClusterWaitForIngressIpAddressWorker).to have_received(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 3) .with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 3)
......
require 'spec_helper' require 'spec_helper'
describe Clusters::Applications::CheckIngressIpAddressService do describe Clusters::Applications::CheckIngressIpAddressService do
subject { service.execute }
let(:application) { create(:clusters_applications_ingress, :installed) } let(:application) { create(:clusters_applications_ingress, :installed) }
let(:service) { described_class.new(application) } let(:service) { described_class.new(application) }
let(:kubeclient) { double(::Kubeclient::Client, get_service: kube_service) }
let(:ingress) { [{ ip: '111.222.111.222' }] }
let(:exclusive_lease) { instance_double(Gitlab::ExclusiveLease, try_obtain: true) }
let(:kube_service) do let(:kube_service) do
::Kubeclient::Resource.new( ::Kubeclient::Resource.new(
{ {
...@@ -14,9 +19,6 @@ describe Clusters::Applications::CheckIngressIpAddressService do ...@@ -14,9 +19,6 @@ describe Clusters::Applications::CheckIngressIpAddressService do
} }
) )
end end
let(:kubeclient) { double(::Kubeclient::Client, get_service: kube_service) }
let(:ingress) { [{ ip: '111.222.111.222' }] }
let(:exclusive_lease) { instance_double(Gitlab::ExclusiveLease, try_obtain: true) }
before do before do
allow(application.cluster).to receive(:kubeclient).and_return(kubeclient) allow(application.cluster).to receive(:kubeclient).and_return(kubeclient)
...@@ -28,10 +30,10 @@ describe Clusters::Applications::CheckIngressIpAddressService do ...@@ -28,10 +30,10 @@ describe Clusters::Applications::CheckIngressIpAddressService do
describe '#execute' do describe '#execute' do
context 'when the ingress ip address is available' do context 'when the ingress ip address is available' do
it 'updates the external_ip for the app and does not schedule another worker' do it { is_expected.to eq(true) }
expect(ClusterWaitForIngressIpAddressWorker).not_to receive(:perform_in)
service.execute(1) it 'updates the external_ip for the app' do
subject
expect(application.external_ip).to eq('111.222.111.222') expect(application.external_ip).to eq('111.222.111.222')
end end
...@@ -40,21 +42,7 @@ describe Clusters::Applications::CheckIngressIpAddressService do ...@@ -40,21 +42,7 @@ describe Clusters::Applications::CheckIngressIpAddressService do
context 'when the ingress ip address is not available' do context 'when the ingress ip address is not available' do
let(:ingress) { nil } let(:ingress) { nil }
it 'it schedules another worker with 1 less retry' do it { is_expected.to eq(false) }
expect(ClusterWaitForIngressIpAddressWorker)
.to receive(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 0)
service.execute(1)
end
context 'when no more retries remaining' do
it 'does not schedule another worker' do
expect(ClusterWaitForIngressIpAddressWorker).not_to receive(:perform_in)
service.execute(0)
end
end
end end
context 'when the exclusive lease cannot be obtained' do context 'when the exclusive lease cannot be obtained' do
...@@ -64,22 +52,24 @@ describe Clusters::Applications::CheckIngressIpAddressService do ...@@ -64,22 +52,24 @@ describe Clusters::Applications::CheckIngressIpAddressService do
.and_return(false) .and_return(false)
end end
it { is_expected.to eq(true) }
it 'does not call kubeclient' do it 'does not call kubeclient' do
expect(kubeclient).not_to receive(:get_service) subject
service.execute(1) expect(kubeclient).not_to have_received(:get_service)
end end
end end
context 'when there is already an external_ip' do context 'when there is already an external_ip' do
let(:application) { create(:clusters_applications_ingress, :installed, external_ip: '001.111.002.111') } let(:application) { create(:clusters_applications_ingress, :installed, external_ip: '001.111.002.111') }
it 'does nothing' do it { is_expected.to eq(true) }
expect(kubeclient).not_to receive(:get_service)
service.execute(1) it 'does not call kubeclient' do
subject
expect(application.external_ip).to eq('001.111.002.111') expect(kubeclient).not_to have_received(:get_service)
end end
end end
...@@ -88,12 +78,9 @@ describe Clusters::Applications::CheckIngressIpAddressService do ...@@ -88,12 +78,9 @@ describe Clusters::Applications::CheckIngressIpAddressService do
allow(kubeclient).to receive(:get_service).and_raise(KubeException.new(500, 'something blew up', nil)) allow(kubeclient).to receive(:get_service).and_raise(KubeException.new(500, 'something blew up', nil))
end end
it 'it schedules another worker with 1 less retry' do it 'it raises Clusters::Applications::CheckIngressIpAddressServiceError' do
expect(ClusterWaitForIngressIpAddressWorker) expect { subject }
.to receive(:perform_in) .to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "KubeException: something blew up")
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 0)
service.execute(1)
end end
end end
end end
......
...@@ -2,19 +2,94 @@ require 'spec_helper' ...@@ -2,19 +2,94 @@ require 'spec_helper'
describe ClusterWaitForIngressIpAddressWorker do describe ClusterWaitForIngressIpAddressWorker do
describe '#perform' do describe '#perform' do
let(:service) { instance_double(Clusters::Applications::CheckIngressIpAddressService) } let(:service) { instance_double(Clusters::Applications::CheckIngressIpAddressService, execute: true) }
let(:application) { instance_double(Clusters::Applications::Ingress) } let(:application) { instance_double(Clusters::Applications::Ingress) }
let(:worker) { described_class.new } let(:worker) { described_class.new }
it 'finds the application and calls CheckIngressIpAddressService#execute' do before do
expect(worker).to receive(:find_application).with('ingress', 117).and_yield(application) allow(worker)
expect(Clusters::Applications::CheckIngressIpAddressService) .to receive(:find_application)
.with('ingress', 117)
.and_yield(application)
allow(Clusters::Applications::CheckIngressIpAddressService)
.to receive(:new) .to receive(:new)
.with(application) .with(application)
.and_return(service) .and_return(service)
expect(service).to receive(:execute).with(2)
allow(described_class)
.to receive(:perform_in)
end
it 'finds the application and calls CheckIngressIpAddressService#execute' do
worker.perform('ingress', 117, 2) worker.perform('ingress', 117, 2)
expect(service).to have_received(:execute)
end
context 'when the service succeeds' do
it 'does not schedule another worker' do
worker.perform('ingress', 117, 2)
expect(described_class)
.not_to have_received(:perform_in)
end
end
context 'when the service fails' do
before do
allow(service)
.to receive(:execute)
.and_return(false)
end
context 'when there are retries remaining' do
it 'schedules another worker with 1 less retry' do
worker.perform('ingress', 117, 2)
expect(described_class)
.to have_received(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', 117, 1)
end
end
context 'when there are no retries_remaining' do
it 'does not schedule another worker' do
worker.perform('ingress', 117, 0)
expect(described_class)
.not_to have_received(:perform_in)
end
end
end
context 'when the update raises exception' do
before do
allow(service)
.to receive(:execute)
.and_raise(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
end
context 'when there are retries remaining' do
it 'schedules another worker with 1 less retry and re-raises the error' do
expect { worker.perform('ingress', 117, 2) }
.to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
expect(described_class)
.to have_received(:perform_in)
.with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', 117, 1)
end
end
context 'when there are no retries_remaining' do
it 'does not schedule another worker but re-raises the error' do
expect { worker.perform('ingress', 117, 0) }
.to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
expect(described_class)
.not_to have_received(:perform_in)
end
end
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment