Refactor ingress IP address waiting code (#42643)

ba4114d2 · Dylan Griffith · f0b27f9b · ba4114d2 · ba4114d2 · ba4114d2
Commit ba4114d2 authored Feb 20, 2018 by Dylan Griffith
10 changed files
--- a/app/models/clusters/applications/ingress.rb
+++ b/app/models/clusters/applications/ingress.rb
@@ -5,6 +5,7 @@ module Clusters

      include ::Clusters::Concerns::ApplicationCore
      include ::Clusters::Concerns::ApplicationStatus
+      include AfterCommitQueue

      default_value_for :ingress_type, :nginx
      default_value_for :version, :nginx
@@ -15,6 +16,15 @@ module Clusters

      IP_ADDRESS_FETCH_RETRIES = 3

+      state_machine :status do
+        before_transition any => [:installed] do |application|
+          application.run_after_commit do
+            ClusterWaitForIngressIpAddressWorker.perform_in(
+              ClusterWaitForIngressIpAddressWorker::INTERVAL, application.name, application.id, IP_ADDRESS_FETCH_RETRIES)
+          end
+        end
+      end
+
      def chart
        'stable/nginx-ingress'
      end
@@ -26,11 +36,6 @@ module Clusters
      def install_command
        Gitlab::Kubernetes::Helm::InstallCommand.new(name, chart: chart, chart_values_file: chart_values_file)
      end
-
-      def post_install
-        ClusterWaitForIngressIpAddressWorker.perform_in(
-          ClusterWaitForIngressIpAddressWorker::INTERVAL, name, id, IP_ADDRESS_FETCH_RETRIES)
-      end
    end
  end
 end
--- a/app/models/clusters/concerns/application_core.rb
+++ b/app/models/clusters/concerns/application_core.rb
@@ -23,10 +23,6 @@ module Clusters
        def name
          self.class.application_name
        end
-
-        def post_install
-          # Override for any extra work that needs to be done after install
-        end
      end
    end
  end

--- a/app/services/clusters/applications/check_ingress_ip_address_service.rb
+++ b/app/services/clusters/applications/check_ingress_ip_address_service.rb
 module Clusters
  module Applications
    class CheckIngressIpAddressService < BaseHelmService
+      Error = Class.new(StandardError)
+
      LEASE_TIMEOUT = 3.seconds.to_i

-      def execute(retries_remaining)
-        return if app.external_ip
-        return unless try_obtain_lease
+      def execute
+        return true if app.external_ip
+        return true unless try_obtain_lease

        service = get_service

        if service.status.loadBalancer.ingress
          resolve_external_ip(service)
        else
-          retry_if_necessary(retries_remaining)
+          false
        end

-      rescue KubeException
-        retry_if_necessary(retries_remaining)
+      rescue KubeException => e
+        raise Error, "#{e.class}: #{e.message}"
      end

      private
@@ -28,19 +30,12 @@ module Clusters
      end

      def resolve_external_ip(service)
-        app.update!( external_ip: service.status.loadBalancer.ingress[0].ip)
+        app.update!(external_ip: service.status.loadBalancer.ingress[0].ip)
      end

      def get_service
        kubeclient.get_service('ingress-nginx-ingress-controller', Gitlab::Kubernetes::Helm::NAMESPACE)
      end
-
-      def retry_if_necessary(retries_remaining)
-        if retries_remaining > 0
-          ClusterWaitForIngressIpAddressWorker.perform_in(
-            ClusterWaitForIngressIpAddressWorker::INTERVAL, app.name, app.id, retries_remaining - 1)
-        end
-      end
    end
  end
 end
--- a/app/services/clusters/applications/check_installation_progress_service.rb
+++ b/app/services/clusters/applications/check_installation_progress_service.rb
@@ -20,7 +20,6 @@ module Clusters

      def on_success
        app.make_installed!
-        app.post_install
      ensure
        remove_installation_pod
      end

--- a/app/workers/cluster_wait_for_ingress_ip_address_worker.rb
+++ b/app/workers/cluster_wait_for_ingress_ip_address_worker.rb
@@ -4,11 +4,22 @@ class ClusterWaitForIngressIpAddressWorker
  include ClusterApplications

  INTERVAL = 10.seconds
-  TIMEOUT = 20.minutes

  def perform(app_name, app_id, retries_remaining)
    find_application(app_name, app_id) do |app|
-      Clusters::Applications::CheckIngressIpAddressService.new(app).execute(retries_remaining)
+      result = Clusters::Applications::CheckIngressIpAddressService.new(app).execute
+      retry_if_necessary(app_name, app_id, retries_remaining) unless result
+    end
+  rescue Clusters::Applications::CheckIngressIpAddressService::Error => e
+    retry_if_necessary(app_name, app_id, retries_remaining)
+    raise e
+  end
+
+  private
+
+  def retry_if_necessary(app_name, app_id, retries_remaining)
+    if retries_remaining > 0
+      self.class.perform_in(INTERVAL, app_name, app_id, retries_remaining - 1)
    end
  end
 end
--- a/db/migrate/20180212030105_add_external_ip_to_clusters_applications_ingress.rb
+++ b/db/migrate/20180212030105_add_external_ip_to_clusters_applications_ingress.rb
-# See http://doc.gitlab.com/ce/development/migration_style_guide.html
-# for more information on how to write migrations for GitLab.
-
 class AddExternalIpToClustersApplicationsIngress < ActiveRecord::Migration
  include Gitlab::Database::MigrationHelpers

-  # Set this constant to true if this migration requires downtime.
  DOWNTIME = false

-  # When a migration requires downtime you **must** uncomment the following
-  # constant and define a short and easy to understand explanation as to why the
-  # migration requires downtime.
-  # DOWNTIME_REASON = ''
-
-  # When using the methods "add_concurrent_index", "remove_concurrent_index" or
-  # "add_column_with_default" you must disable the use of transactions
-  # as these methods can not run in an existing transaction.
-  # When using "add_concurrent_index" or "remove_concurrent_index" methods make sure
-  # that either of them is the _only_ method called in the migration,
-  # any other changes should go in a separate migration.
-  # This ensures that upon failure _only_ the index creation or removing fails
-  # and can be retried or reverted easily.
-  #
-  # To disable transactions uncomment the following line and remove these
-  # comments:
-  # disable_ddl_transaction!
-
  def change
    add_column :clusters_applications_ingress, :external_ip, :string
  end

--- a/spec/fixtures/api/schemas/cluster_status.json
+++ b/spec/fixtures/api/schemas/cluster_status.json
@@ -31,7 +31,7 @@
          }
        },
        "status_reason": { "type": ["string", "null"] },
-        "external_ip": { "type": ["string", null] }
+        "external_ip": { "type": ["string", "null"] }
      },
      "required" : [ "name", "status" ]
    }

--- a/spec/models/clusters/applications/ingress_spec.rb
+++ b/spec/models/clusters/applications/ingress_spec.rb
@@ -4,16 +4,19 @@ describe Clusters::Applications::Ingress do
  it { is_expected.to belong_to(:cluster) }
  it { is_expected.to validate_presence_of(:cluster) }

-  include_examples 'cluster application specs', described_class
+  before do
+    allow(ClusterWaitForIngressIpAddressWorker).to receive(:perform_in)
+  end

-  describe '#post_install' do
-    let(:application) { create(:clusters_applications_ingress, :installed) }
+  include_examples 'cluster application specs', described_class

+  describe '#make_installed!' do
    before do
-      allow(ClusterWaitForIngressIpAddressWorker).to receive(:perform_in)
-      application.post_install
+      application.make_installed!
    end

+    let(:application) { create(:clusters_applications_ingress, :installing) }
+
    it 'schedules a ClusterWaitForIngressIpAddressWorker' do
      expect(ClusterWaitForIngressIpAddressWorker).to have_received(:perform_in)
        .with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 3)

--- a/spec/services/clusters/applications/check_ingress_ip_address_service_spec.rb
+++ b/spec/services/clusters/applications/check_ingress_ip_address_service_spec.rb
 require 'spec_helper'

 describe Clusters::Applications::CheckIngressIpAddressService do
+  subject { service.execute }
  let(:application) { create(:clusters_applications_ingress, :installed) }
  let(:service) { described_class.new(application) }
+  let(:kubeclient) { double(::Kubeclient::Client, get_service: kube_service) }
+  let(:ingress) { [{ ip: '111.222.111.222' }] }
+  let(:exclusive_lease) { instance_double(Gitlab::ExclusiveLease, try_obtain: true) }
+
  let(:kube_service) do
    ::Kubeclient::Resource.new(
      {
@@ -14,9 +19,6 @@ describe Clusters::Applications::CheckIngressIpAddressService do
      }
    )
  end
-  let(:kubeclient) { double(::Kubeclient::Client, get_service: kube_service) }
-  let(:ingress) { [{ ip: '111.222.111.222' }] }
-  let(:exclusive_lease) { instance_double(Gitlab::ExclusiveLease, try_obtain: true) }

  before do
    allow(application.cluster).to receive(:kubeclient).and_return(kubeclient)
@@ -28,10 +30,10 @@ describe Clusters::Applications::CheckIngressIpAddressService do

  describe '#execute' do
    context 'when the ingress ip address is available' do
-      it 'updates the external_ip for the app and does not schedule another worker' do
-        expect(ClusterWaitForIngressIpAddressWorker).not_to receive(:perform_in)
+      it { is_expected.to eq(true) }

-        service.execute(1)
+      it 'updates the external_ip for the app' do
+        subject

        expect(application.external_ip).to eq('111.222.111.222')
      end
@@ -40,21 +42,7 @@ describe Clusters::Applications::CheckIngressIpAddressService do
    context 'when the ingress ip address is not available' do
      let(:ingress) { nil }

-      it 'it schedules another worker with 1 less retry' do
-        expect(ClusterWaitForIngressIpAddressWorker)
-          .to receive(:perform_in)
-          .with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 0)
-
-        service.execute(1)
-      end
-
-      context 'when no more retries remaining' do
-        it 'does not schedule another worker' do
-          expect(ClusterWaitForIngressIpAddressWorker).not_to receive(:perform_in)
-
-          service.execute(0)
-        end
-      end
+      it { is_expected.to eq(false) }
    end

    context 'when the exclusive lease cannot be obtained' do
@@ -64,22 +52,24 @@ describe Clusters::Applications::CheckIngressIpAddressService do
          .and_return(false)
      end

+      it { is_expected.to eq(true) }
+
      it 'does not call kubeclient' do
-        expect(kubeclient).not_to receive(:get_service)
+        subject

-        service.execute(1)
+        expect(kubeclient).not_to have_received(:get_service)
      end
    end

    context 'when there is already an external_ip' do
      let(:application) { create(:clusters_applications_ingress, :installed, external_ip: '001.111.002.111') }

-      it 'does nothing' do
-        expect(kubeclient).not_to receive(:get_service)
+      it { is_expected.to eq(true) }

-        service.execute(1)
+      it 'does not call kubeclient' do
+        subject

-        expect(application.external_ip).to eq('001.111.002.111')
+        expect(kubeclient).not_to have_received(:get_service)
      end
    end

@@ -88,12 +78,9 @@ describe Clusters::Applications::CheckIngressIpAddressService do
        allow(kubeclient).to receive(:get_service).and_raise(KubeException.new(500, 'something blew up', nil))
      end

-      it 'it schedules another worker with 1 less retry' do
-        expect(ClusterWaitForIngressIpAddressWorker)
-          .to receive(:perform_in)
-          .with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', application.id, 0)
-
-        service.execute(1)
+      it 'it raises Clusters::Applications::CheckIngressIpAddressServiceError' do
+        expect { subject }
+          .to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "KubeException: something blew up")
      end
    end
  end

--- a/spec/workers/cluster_wait_for_ingress_ip_address_worker_spec.rb
+++ b/spec/workers/cluster_wait_for_ingress_ip_address_worker_spec.rb
@@ -2,19 +2,94 @@ require 'spec_helper'

 describe ClusterWaitForIngressIpAddressWorker do
  describe '#perform' do
-    let(:service) { instance_double(Clusters::Applications::CheckIngressIpAddressService) }
+    let(:service) { instance_double(Clusters::Applications::CheckIngressIpAddressService, execute: true) }
    let(:application) { instance_double(Clusters::Applications::Ingress) }
    let(:worker) { described_class.new }

-    it 'finds the application and calls CheckIngressIpAddressService#execute' do
-      expect(worker).to receive(:find_application).with('ingress', 117).and_yield(application)
-      expect(Clusters::Applications::CheckIngressIpAddressService)
+    before do
+      allow(worker)
+        .to receive(:find_application)
+        .with('ingress', 117)
+        .and_yield(application)
+
+      allow(Clusters::Applications::CheckIngressIpAddressService)
        .to receive(:new)
        .with(application)
        .and_return(service)
-      expect(service).to receive(:execute).with(2)

+      allow(described_class)
+        .to receive(:perform_in)
+    end
+
+    it 'finds the application and calls CheckIngressIpAddressService#execute' do
      worker.perform('ingress', 117, 2)
+
+      expect(service).to have_received(:execute)
+    end
+
+    context 'when the service succeeds' do
+      it 'does not schedule another worker' do
+        worker.perform('ingress', 117, 2)
+
+        expect(described_class)
+          .not_to have_received(:perform_in)
+      end
+    end
+
+    context 'when the service fails' do
+      before do
+        allow(service)
+          .to receive(:execute)
+          .and_return(false)
+      end
+
+      context 'when there are retries remaining' do
+        it 'schedules another worker with 1 less retry' do
+          worker.perform('ingress', 117, 2)
+
+          expect(described_class)
+            .to have_received(:perform_in)
+            .with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', 117, 1)
+        end
+      end
+
+      context 'when there are no retries_remaining' do
+        it 'does not schedule another worker' do
+          worker.perform('ingress', 117, 0)
+
+          expect(described_class)
+            .not_to have_received(:perform_in)
+        end
+      end
+    end
+
+    context 'when the update raises exception' do
+      before do
+        allow(service)
+          .to receive(:execute)
+          .and_raise(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
+      end
+
+      context 'when there are retries remaining' do
+        it 'schedules another worker with 1 less retry and re-raises the error' do
+          expect { worker.perform('ingress', 117, 2) }
+            .to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
+
+          expect(described_class)
+            .to have_received(:perform_in)
+            .with(ClusterWaitForIngressIpAddressWorker::INTERVAL, 'ingress', 117, 1)
+        end
+      end
+
+      context 'when there are no retries_remaining' do
+        it 'does not schedule another worker but re-raises the error' do
+          expect { worker.perform('ingress', 117, 0) }
+            .to raise_error(Clusters::Applications::CheckIngressIpAddressService::Error, "something went wrong")
+
+          expect(described_class)
+            .not_to have_received(:perform_in)
+        end
+      end
    end
  end
 end