Commit 4f700f11 authored by Hordur Freyr Yngvason's avatar Hordur Freyr Yngvason Committed by Tetiana Chupryna

Stop uninstalling applications on cluster removal

parent 7a9043b6
<script>
/* eslint-disable vue/no-v-html */
import { GlModal, GlButton, GlFormInput, GlSprintf } from '@gitlab/ui';
import { GlModal, GlButton, GlFormInput } from '@gitlab/ui';
import { escape } from 'lodash';
import csrf from '~/lib/utils/csrf';
import { s__, sprintf } from '~/locale';
......@@ -30,7 +30,6 @@ export default {
GlModal,
GlButton,
GlFormInput,
GlSprintf,
},
props: {
clusterPath: {
......@@ -135,17 +134,6 @@ export default {
<div v-if="confirmCleanup">
{{ s__('ClusterIntegration|This will permanently delete the following resources:') }}
<ul>
<li>
{{ s__('ClusterIntegration|All installed applications and related resources') }}
</li>
<li>
<gl-sprintf :message="s__('ClusterIntegration|The %{gitlabNamespace} namespace')">
<template #gitlabNamespace>
<!-- eslint-disable-next-line @gitlab/vue-require-i18n-strings -->
<code>{{ 'gitlab-managed-apps' }}</code>
</template>
</gl-sprintf>
</li>
<li>{{ s__('ClusterIntegration|Any project namespaces') }}</li>
<!-- eslint-disable @gitlab/vue-require-i18n-strings -->
<li><code>clusterroles</code></li>
......
......@@ -168,18 +168,16 @@ module Clusters
state_machine :cleanup_status, initial: :cleanup_not_started do
state :cleanup_not_started, value: 1
state :cleanup_uninstalling_applications, value: 2
state :cleanup_removing_project_namespaces, value: 3
state :cleanup_removing_service_account, value: 4
state :cleanup_errored, value: 5
event :start_cleanup do |cluster|
transition [:cleanup_not_started, :cleanup_errored] => :cleanup_uninstalling_applications
transition [:cleanup_not_started, :cleanup_errored] => :cleanup_removing_project_namespaces
end
event :continue_cleanup do
transition(
cleanup_uninstalling_applications: :cleanup_removing_project_namespaces,
cleanup_removing_project_namespaces: :cleanup_removing_service_account)
end
......@@ -192,13 +190,7 @@ module Clusters
cluster.cleanup_status_reason = status_reason if status_reason
end
after_transition [:cleanup_not_started, :cleanup_errored] => :cleanup_uninstalling_applications do |cluster|
cluster.run_after_commit do
Clusters::Cleanup::AppWorker.perform_async(cluster.id)
end
end
after_transition cleanup_uninstalling_applications: :cleanup_removing_project_namespaces do |cluster|
after_transition [:cleanup_not_started, :cleanup_errored] => :cleanup_removing_project_namespaces do |cluster|
cluster.run_after_commit do
Clusters::Cleanup::ProjectNamespaceWorker.perform_async(cluster.id)
end
......
# frozen_string_literal: true
module Clusters
module Cleanup
class AppService < Clusters::Cleanup::BaseService
def execute
persisted_applications = @cluster.persisted_applications
persisted_applications.each do |app|
next unless app.available?
next unless app.can_uninstall?
log_event(:uninstalling_app, application: app.class.application_name)
uninstall_app_async(app)
end
# Keep calling the worker untill all dependencies are uninstalled
return schedule_next_execution(Clusters::Cleanup::AppWorker) if persisted_applications.any?
log_event(:schedule_remove_project_namespaces)
cluster.continue_cleanup!
end
private
def uninstall_app_async(application)
application.make_scheduled!
Clusters::Applications::UninstallWorker.perform_async(application.name, application.id)
end
end
end
end
......@@ -2,7 +2,7 @@
module Clusters
module Cleanup
class ProjectNamespaceService < BaseService
class ProjectNamespaceService < ::Clusters::Cleanup::BaseService
KUBERNETES_NAMESPACE_BATCH_SIZE = 100
def execute
......
......@@ -2,7 +2,7 @@
module Clusters
module Cleanup
class ServiceAccountService < BaseService
class ServiceAccountService < ::Clusters::Cleanup::BaseService
def execute
delete_gitlab_service_account
......
......@@ -808,15 +808,6 @@
:weight: 1
:idempotent:
:tags: []
- :name: gcp_cluster:clusters_cleanup_app
:worker_name: Clusters::Cleanup::AppWorker
:feature_category: :kubernetes_management
:has_external_dependencies: true
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: gcp_cluster:clusters_cleanup_project_namespace
:worker_name: Clusters::Cleanup::ProjectNamespaceWorker
:feature_category: :kubernetes_management
......
# frozen_string_literal: true
module Clusters
module Cleanup
class AppWorker # rubocop:disable Scalability/IdempotentWorker
include ClusterCleanupMethods
def perform(cluster_id, execution_count = 0)
Clusters::Cluster.with_persisted_applications.find_by_id(cluster_id).try do |cluster|
break unless cluster.cleanup_uninstalling_applications?
break exceeded_execution_limit(cluster) if exceeded_execution_limit?(execution_count)
::Clusters::Cleanup::AppService.new(cluster, execution_count).execute
end
end
end
end
end
......@@ -6979,9 +6979,6 @@ msgstr ""
msgid "ClusterIntegration|All data will be deleted and cannot be restored."
msgstr ""
msgid "ClusterIntegration|All installed applications and related resources"
msgstr ""
msgid "ClusterIntegration|Allow GitLab to manage namespace and service accounts for this cluster. %{linkStart}More information%{linkEnd}"
msgstr ""
......@@ -7687,9 +7684,6 @@ msgstr ""
msgid "ClusterIntegration|Subnets"
msgstr ""
msgid "ClusterIntegration|The %{gitlabNamespace} namespace"
msgstr ""
msgid "ClusterIntegration|The Amazon Resource Name (ARN) associated with your role. If you do not have a provision role, first create one on %{startAwsLink}Amazon Web Services %{externalLinkIcon}%{endLink} using the above account and external IDs. %{startMoreInfoLink}More information%{endLink}"
msgstr ""
......
......@@ -138,10 +138,6 @@ FactoryBot.define do
cleanup_status { 1 }
end
trait :cleanup_uninstalling_applications do
cleanup_status { 2 }
end
trait :cleanup_removing_project_namespaces do
cleanup_status { 3 }
end
......
......@@ -1021,7 +1021,6 @@ RSpec.describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
where(:status_name, :cleanup_status) do
provider_status | :cleanup_not_started
:cleanup_ongoing | :cleanup_uninstalling_applications
:cleanup_ongoing | :cleanup_removing_project_namespaces
:cleanup_ongoing | :cleanup_removing_service_account
:cleanup_errored | :cleanup_errored
......@@ -1077,8 +1076,8 @@ RSpec.describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
end
describe '#start_cleanup!' do
let(:expected_worker_class) { Clusters::Cleanup::AppWorker }
let(:to_state) { :cleanup_uninstalling_applications }
let(:expected_worker_class) { Clusters::Cleanup::ProjectNamespaceWorker }
let(:to_state) { :cleanup_removing_project_namespaces }
subject { cluster.start_cleanup! }
......@@ -1116,17 +1115,6 @@ RSpec.describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
end
describe '#continue_cleanup!' do
context 'when cleanup_status is cleanup_uninstalling_applications' do
let(:expected_worker_class) { Clusters::Cleanup::ProjectNamespaceWorker }
let(:from_state) { :cleanup_uninstalling_applications }
let(:to_state) { :cleanup_removing_project_namespaces }
subject { cluster.continue_cleanup! }
it_behaves_like 'cleanup_status transition'
end
context 'when cleanup_status is cleanup_removing_project_namespaces' do
let(:expected_worker_class) { Clusters::Cleanup::ServiceAccountWorker }
let(:from_state) { :cleanup_removing_project_namespaces }
let(:to_state) { :cleanup_removing_service_account }
......@@ -1136,7 +1124,6 @@ RSpec.describe Clusters::Cluster, :use_clean_rails_memory_store_caching do
it_behaves_like 'cleanup_status transition'
end
end
end
describe '#connection_status' do
let(:cluster) { create(:cluster) }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Clusters::Cleanup::AppService do
describe '#execute' do
let!(:cluster) { create(:cluster, :project, :cleanup_uninstalling_applications, provider_type: :gcp) }
let(:service) { described_class.new(cluster) }
let(:logger) { service.send(:logger) }
let(:log_meta) do
{
service: described_class.name,
cluster_id: cluster.id,
execution_count: 0
}
end
subject { service.execute }
shared_examples 'does not reschedule itself' do
it 'does not reschedule itself' do
expect(Clusters::Cleanup::AppWorker).not_to receive(:perform_in)
end
end
context 'when cluster has no applications available or transitioning applications' do
it_behaves_like 'does not reschedule itself'
it 'transitions cluster to cleanup_removing_project_namespaces' do
expect { subject }
.to change { cluster.reload.cleanup_status_name }
.from(:cleanup_uninstalling_applications)
.to(:cleanup_removing_project_namespaces)
end
it 'schedules Clusters::Cleanup::ProjectNamespaceWorker' do
expect(Clusters::Cleanup::ProjectNamespaceWorker).to receive(:perform_async).with(cluster.id)
subject
end
it 'logs all events' do
expect(logger).to receive(:info)
.with(log_meta.merge(event: :schedule_remove_project_namespaces))
subject
end
end
context 'when cluster has uninstallable applications' do
shared_examples 'reschedules itself' do
it 'reschedules itself' do
expect(Clusters::Cleanup::AppWorker)
.to receive(:perform_in)
.with(1.minute, cluster.id, 1)
subject
end
end
context 'has applications with dependencies' do
let!(:helm) { create(:clusters_applications_helm, :installed, cluster: cluster) }
let!(:ingress) { create(:clusters_applications_ingress, :installed, cluster: cluster) }
let!(:cert_manager) { create(:clusters_applications_cert_manager, :installed, cluster: cluster) }
let!(:jupyter) { create(:clusters_applications_jupyter, :installed, cluster: cluster) }
it_behaves_like 'reschedules itself'
it 'only uninstalls apps that are not dependencies for other installed apps' do
expect(Clusters::Applications::UninstallWorker)
.to receive(:perform_async).with(helm.name, helm.id)
.and_call_original
expect(Clusters::Applications::UninstallWorker)
.not_to receive(:perform_async).with(ingress.name, ingress.id)
expect(Clusters::Applications::UninstallWorker)
.to receive(:perform_async).with(cert_manager.name, cert_manager.id)
.and_call_original
expect(Clusters::Applications::UninstallWorker)
.to receive(:perform_async).with(jupyter.name, jupyter.id)
.and_call_original
subject
end
it 'logs application uninstalls and next execution' do
expect(logger).to receive(:info)
.with(log_meta.merge(event: :uninstalling_app, application: kind_of(String))).exactly(3).times
expect(logger).to receive(:info)
.with(log_meta.merge(event: :scheduling_execution, next_execution: 1))
subject
end
context 'cluster is not cleanup_uninstalling_applications' do
let!(:cluster) { create(:cluster, :project, provider_type: :gcp) }
it_behaves_like 'does not reschedule itself'
end
end
context 'when applications are still uninstalling/scheduled/depending on others' do
let!(:helm) { create(:clusters_applications_helm, :installed, cluster: cluster) }
let!(:ingress) { create(:clusters_applications_ingress, :scheduled, cluster: cluster) }
let!(:runner) { create(:clusters_applications_runner, :uninstalling, cluster: cluster) }
it_behaves_like 'reschedules itself'
it 'does not call the uninstallation service' do
expect(Clusters::Applications::UninstallWorker).not_to receive(:new)
subject
end
end
end
end
end
......@@ -37,7 +37,7 @@ RSpec.describe Clusters::DestroyService do
let(:params) { { cleanup: 'true' } }
before do
allow(Clusters::Cleanup::AppWorker).to receive(:perform_async)
allow(Clusters::Cleanup::ProjectNamespaceWorker).to receive(:perform_async)
end
it 'does not destroy cluster' do
......@@ -45,10 +45,10 @@ RSpec.describe Clusters::DestroyService do
expect(Clusters::Cluster.where(id: cluster.id).exists?).not_to be_falsey
end
it 'transition cluster#cleanup_status from cleanup_not_started to cleanup_uninstalling_applications' do
it 'transition cluster#cleanup_status from cleanup_not_started to cleanup_removing_project_namespaces' do
expect { subject }.to change { cluster.cleanup_status_name }
.from(:cleanup_not_started)
.to(:cleanup_uninstalling_applications)
.to(:cleanup_removing_project_namespaces)
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Clusters::Cleanup::AppWorker do
describe '#perform' do
subject { worker_instance.perform(cluster.id) }
let!(:worker_instance) { described_class.new }
let!(:cluster) { create(:cluster, :project, :cleanup_uninstalling_applications, provider_type: :gcp) }
let!(:logger) { worker_instance.send(:logger) }
it_behaves_like 'cluster cleanup worker base specs'
context 'when exceeded the execution limit' do
subject { worker_instance.perform(cluster.id, worker_instance.send(:execution_limit)) }
let(:worker_instance) { described_class.new }
let(:logger) { worker_instance.send(:logger) }
let!(:helm) { create(:clusters_applications_helm, :installed, cluster: cluster) }
let!(:ingress) { create(:clusters_applications_ingress, :scheduled, cluster: cluster) }
it 'logs the error' do
expect(logger).to receive(:error)
.with(
hash_including(
exception: 'ClusterCleanupMethods::ExceededExecutionLimitError',
cluster_id: kind_of(Integer),
class_name: described_class.name,
applications: "helm:installed,ingress:scheduled",
cleanup_status: cluster.cleanup_status_name,
event: :failed_to_remove_cluster_and_resources,
message: "exceeded execution limit of 10 tries"
)
)
subject
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment