Commit bcfd9e32 authored by Sean Arnold's avatar Sean Arnold

Add logic to create/delete/process escalations

- include specs for each case
parent e5cc8675
......@@ -225,6 +225,10 @@ module AlertManagement
open_statuses.include?(status)
end
def open?
self.class.open_status?(status_name)
end
def status_event_for(status)
self.class.state_machines[:status].events.transitions_for(self, to: status.to_s.to_sym).first&.event
end
......
......@@ -37,7 +37,6 @@ module AlertManagement
private
attr_reader :alert, :current_user, :params, :param_errors, :status
delegate :resolved?, to: :alert
def allowed?
current_user&.can?(:update_alert_management_alert, alert)
......@@ -129,7 +128,7 @@ module AlertManagement
def handle_status_change
add_status_change_system_note
resolve_todos if resolved?
resolve_todos if alert.resolved?
end
def add_status_change_system_note
......@@ -177,3 +176,5 @@ module AlertManagement
end
end
end
AlertManagement::Alerts::UpdateService.prepend_mod
......@@ -13,9 +13,13 @@ module IncidentManagement
# @option oncall_at [ActiveSupport::TimeWithZone]
# Limits users to only those
# on-call at the specified time.
def initialize(project, oncall_at: Time.current)
# @option schedule [IncidentManagement::OncallSchedule]
# Limits the users to rotations within a
# specific schedule
def initialize(project, oncall_at: Time.current, schedule: nil)
@project = project
@oncall_at = oncall_at
@schedule = schedule
end
# @return [User::ActiveRecord_Relation]
......@@ -28,7 +32,7 @@ module IncidentManagement
private
attr_reader :project, :oncall_at
attr_reader :project, :oncall_at, :schedule
def user_ids
strong_memoize(:user_ids) do
......@@ -44,11 +48,17 @@ module IncidentManagement
ids_for_persisted_shifts.flat_map(&:first)
end
def rotations
strong_memoize(:rotations) do
schedule ? schedule.rotations : project.incident_management_oncall_rotations
end
end
# @return [Array<[rotation_id, user_id]>]
# @example - [ [1, 16], [2, 200] ]
def ids_for_persisted_shifts
strong_memoize(:ids_for_persisted_shifts) do
project.incident_management_oncall_rotations
rotations
.merge(IncidentManagement::OncallShift.for_timestamp(oncall_at))
.pluck_id_and_user_id
end
......@@ -63,7 +73,7 @@ module IncidentManagement
end
def rotations_without_persisted_shifts
project.incident_management_oncall_rotations
rotations
.except_ids(rotation_ids_for_persisted_shifts)
.with_shift_generation_associations
end
......
......@@ -13,5 +13,7 @@ module IncidentManagement
validates :rules, presence: true
accepts_nested_attributes_for :rules
scope :with_rules, -> { includes(:rules) }
end
end
......@@ -12,6 +12,15 @@ module EE
super
notify_oncall if oncall_notification_recipients.present? && notifying_alert?
process_escalations
end
def process_escalations
if alert.resolved? || alert.ignored?
delete_pending_escalations
else
create_pending_escalations
end
end
def notify_oncall
......@@ -25,6 +34,15 @@ module EE
::IncidentManagement::OncallUsersFinder.new(project).execute
end
end
def delete_pending_escalations
# We use :delete_all here to avoid null constraint errors. (the default is :nullify).
alert.pending_escalations.delete_all(:delete_all)
end
def create_pending_escalations
::IncidentManagement::PendingEscalations::CreateService.new(alert).execute
end
end
end
end
# frozen_string_literal: true
module EE
module AlertManagement
module Alerts
module UpdateService
extend ::Gitlab::Utils::Override
override :handle_status_change
def handle_status_change
super
delete_pending_escalations if alert.resolved? || alert.ignored?
old_status = alert.status_previously_was
if !::AlertManagement::Alert.open_status?(old_status) && alert.open?
create_pending_escalations
end
end
private
def delete_pending_escalations
alert.pending_escalations.delete_all(:delete_all)
end
def create_pending_escalations
::IncidentManagement::PendingEscalations::CreateService.new(alert).execute
end
end
end
end
end
# frozen_string_literal: true
module IncidentManagement
module PendingEscalations
class CreateService < BaseService
def initialize(target)
@target = target
@project = target.project
@process_time = Time.current
end
def execute
return unless ::Gitlab::IncidentManagement.escalation_policies_available?(project) && !target.resolved?
policy = escalation_policies.first
return unless policy
create_escalations(policy.rules)
end
private
attr_reader :target, :project, :escalation, :process_time
def escalation_policies
project.incident_management_escalation_policies.with_rules
end
def create_escalations(rules)
rules.each do |rule|
escalaton = create_escalation(rule)
process_escalation(escalaton) if rule.elapsed_time_seconds == 0
end
rescue StandardError => e
Gitlab::ErrorTracking.track_exception(e, target_type: target.class.to_s, target_id: target.id)
end
def create_escalation(rule)
IncidentManagement::PendingEscalations::Alert.create!(
target: target,
rule: rule,
schedule_id: rule.oncall_schedule_id,
status: rule.status,
process_at: rule.elapsed_time_seconds.seconds.after(process_time)
)
end
def process_escalation(escalation)
::IncidentManagement::PendingEscalations::ProcessService.new(escalation).execute
end
end
end
end
# frozen_string_literal: true
module IncidentManagement
module PendingEscalations
class ProcessService < BaseService
def initialize(escalation)
@escalation = escalation
@project = escalation.project
@oncall_schedule = escalation.oncall_schedule
@target = escalation.target
end
def execute
return unless ::Gitlab::IncidentManagement.escalation_policies_available?(project)
return if target_already_resolved?
return if target_status_exceeded_rule?
notify_recipients
destroy_escalation!
end
private
attr_reader :escalation, :project, :target, :oncall_schedule
def target_already_resolved?
return false unless target.resolved?
destroy_escalation!
end
def target_status_exceeded_rule?
target.status >= escalation.status_before_type_cast
end
def notify_recipients
NotificationService
.new
.async
.notify_oncall_users_of_alert(oncall_notification_recipients.to_a, target)
end
def oncall_notification_recipients
::IncidentManagement::OncallUsersFinder.new(project, schedule: oncall_schedule).execute
end
def destroy_escalation!
escalation.destroy!
end
end
end
end
......@@ -10,5 +10,17 @@ FactoryBot.define do
trait :utc do
timezone { 'Etc/UTC' }
end
trait :with_rotation do
transient do
rotation_count { 1 }
end
after(:create) do |schedule, evaluator|
evaluator.rotation_count.times do
schedule.rotations << create(:incident_management_oncall_rotation, :with_participants, schedule: schedule)
end
end
end
end
end
......@@ -54,9 +54,10 @@ RSpec.describe IncidentManagement::OncallUsersFinder do
let_it_be(:proj2_s1_r1_shift2) { create(:incident_management_oncall_shift, participant: proj2_s1_r1_p2, starts_at: proj2_s1_r1_shift1.ends_at) }
let(:oncall_at) { Time.current }
let(:schedule) { nil }
describe '#execute' do
subject(:execute) { described_class.new(project, oncall_at: oncall_at).execute }
subject(:execute) { described_class.new(project, oncall_at: oncall_at, schedule: schedule).execute }
context 'when feature is available' do
before do
......@@ -69,6 +70,12 @@ RSpec.describe IncidentManagement::OncallUsersFinder do
it { is_expected.to contain_exactly(user_1, user_2, user_4) }
end
context 'with :schedule paramater specified' do
let(:schedule) { s1 }
it { is_expected.to contain_exactly(user_1, user_2) }
end
context 'with :oncall_at parameter specified' do
let(:during_first_shift) { Time.current }
let(:during_second_shift) { s1_r2_shift2.starts_at + 5.minutes }
......
......@@ -13,7 +13,8 @@ RSpec.describe AlertManagement::ProcessPrometheusAlertService do
context 'when alert payload is valid' do
let_it_be(:starts_at) { '2020-04-27T10:10:22.265949279Z' }
let_it_be(:title) { 'Alert title' }
let_it_be(:gitlab_fingerprint) { Digest::SHA1.hexdigest([starts_at, title, 'vector(1)'].join('/')) }
let_it_be(:plain_fingerprint) { [starts_at, title, 'vector(1)'].join('/') }
let_it_be(:gitlab_fingerprint) { Digest::SHA1.hexdigest(plain_fingerprint) }
let(:payload) { raw_payload }
let(:raw_payload) do
......@@ -44,6 +45,35 @@ RSpec.describe AlertManagement::ProcessPrometheusAlertService do
include_examples 'oncall users are correctly notified of recovery alert'
end
context 'with escalation policies ready' do
let_it_be(:project) { schedule.project }
let_it_be(:policy) { create(:incident_management_escalation_policy, project: project) }
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
include_examples 'oncall users are correctly notified of firing alert'
include_examples 'creates an escalation', 1
context 'existing alert is now resolved' do
let(:payload) { raw_payload.merge('status' => 'resolved') }
let!(:target) { create(:alert_management_alert, :from_payload, project: project, payload: payload, fingerprint: gitlab_fingerprint) }
let!(:pending_escalation) { create(:incident_management_pending_alert_escalation, alert: target) }
include_examples "deletes the target's escalations"
context 'with escalation policy feature disabled' do
before do
stub_feature_flags(escalation_policies_mvc: false)
end
include_examples "deletes the target's escalations"
end
end
end
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe AlertManagement::Alerts::UpdateService do
let_it_be(:user_with_permissions) { create(:user) }
let_it_be(:project) { create(:project) }
let_it_be(:escalation_policy) { create(:incident_management_escalation_policy, project: project) }
let_it_be(:alert, reload: true) { create(:alert_management_alert, :triggered, project: project) }
let(:current_user) { user_with_permissions }
let(:params) { {} }
let(:service) { described_class.new(alert, current_user, params) }
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
before_all do
project.add_developer(user_with_permissions)
end
describe '#execute' do
context 'when a status is included' do
let(:params) { { status: new_status } }
subject(:execute) { service.execute }
context 'when moving from a closed status to an open status' do
let_it_be(:alert, reload: true) { create(:alert_management_alert, :resolved, project: project) }
let(:new_status) { :triggered }
it 'creates an escalation' do
expect { execute }.to change { IncidentManagement::PendingEscalations::Alert.count }.by(1)
expect(IncidentManagement::PendingEscalations::Alert.last.alert).to eq(alert)
end
end
context 'moving from an open status to closed status' do
let_it_be(:alert) { create(:alert_management_alert, :triggered, project: project) }
let_it_be(:escalation) { create(:incident_management_pending_alert_escalation, alert: alert) }
let(:new_status) { :resolved }
let(:target) { alert }
include_examples "deletes the target's escalations"
context 'with escalation policy feature disabled' do
before do
stub_feature_flags(escalation_policies_mvc: false)
end
include_examples "deletes the target's escalations"
end
end
context 'moving from a status of the same group' do
let(:new_status) { :ignored }
it 'does not create or delete escalations' do
expect { execute }.to change { IncidentManagement::PendingEscalations::Alert.count }.by(0)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe IncidentManagement::PendingEscalations::CreateService do
let_it_be(:project) { create(:project) }
let_it_be(:target) { create(:alert_management_alert, project: project) }
let_it_be(:rule_count) { 2 }
let!(:escalation_policy) { create(:incident_management_escalation_policy, project: project, rule_count: rule_count) }
let(:rules) { escalation_policy.rules }
let(:service) { described_class.new(target) }
subject(:execute) { service.execute }
context 'feature not available' do
it 'does nothing' do
expect { execute }.not_to change { IncidentManagement::PendingEscalations::Alert.count }
end
end
context 'feature available' do
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
context 'target is resolved' do
let(:target) { create(:alert_management_alert, :resolved, project: project) }
it 'does nothing' do
expect { execute }.not_to change { IncidentManagement::PendingEscalations::Alert.count }
end
end
it 'creates an escalation for each rule for the policy' do
execution_time = Time.current
expect { execute }.to change { IncidentManagement::PendingEscalations::Alert.count }.by(rule_count)
first_escalation, second_escalation = target.pending_escalations.order(created_at: :asc)
first_rule, second_rule = rules
expect_escalation_attributes_with(escalation: first_escalation, target: target, rule: first_rule, execution_time: execution_time)
expect_escalation_attributes_with(escalation: second_escalation, target: target, rule: second_rule, execution_time: execution_time)
end
context 'when there is no escalation policy for the project' do
let!(:escalation_policy) { nil }
it 'does nothing' do
expect { execute }.not_to change { IncidentManagement::PendingEscalations::Alert.count }
end
end
it 'processes the escalation' do
expect(IncidentManagement::PendingEscalations::ProcessService)
.to receive(:new)
.with(having_attributes(rule_id: first_escalation_rule.id))
.and_return(process_service_spy)
expect(process_service_spy).to receive(:execute)
expect { execute }.to change { IncidentManagement::PendingEscalations::Alert.count }.by(rule_count)
end
def expect_escalation_attributes_with(escalation:, target:, rule:, execution_time: Time.current)
expect(escalation).to have_attributes(
rule_id: rule.id,
alert_id: target.id,
schedule_id: rule.oncall_schedule_id,
status: rule.status,
process_at: be_within(1.minute).of(rule.elapsed_time_seconds.seconds.after(execution_time))
)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe IncidentManagement::PendingEscalations::ProcessService do
let_it_be(:project) { create(:project) }
let_it_be(:schedule_1) { create(:incident_management_oncall_schedule, :with_rotation, project: project) }
let_it_be(:schedule_2) { create(:incident_management_oncall_schedule, :with_rotation, project: project) }
let_it_be(:schedule_1_users) { schedule_1.participants.map(&:user) }
let_it_be(:schedule_2_users) { schedule_2.participants.map(&:user) }
let(:escalation_rule) { build(:incident_management_escalation_rule, oncall_schedule: schedule_1 ) }
let!(:escalation_policy) { create(:incident_management_escalation_policy, project: project, rules: [escalation_rule]) }
let(:alert) { create(:alert_management_alert, project: project, **alert_params) }
let(:alert_params) { { status: AlertManagement::Alert::STATUSES[:triggered] } }
let(:target) { alert }
let(:escalation) { create(:incident_management_pending_alert_escalation, rule: escalation_rule, oncall_schedule: schedule_1, target: target, status: IncidentManagement::EscalationRule.statuses[:acknowledged]) }
let(:service) { described_class.new(escalation) }
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
describe '#execute' do
subject(:execute) { service.execute }
shared_examples 'it does not escalate' do
it_behaves_like 'does not send on-call notification'
it 'does not delete the escalation' do
subject
expect { escalation.reload }.not_to raise_error(ActiveRecord::RecordNotFound)
end
end
shared_examples 'deletes the escalation' do
specify do
subject
expect { escalation.reload }.to raise_error(ActiveRecord::RecordNotFound)
end
end
context 'all conditions are met' do
let(:users) { schedule_1_users }
it_behaves_like 'sends on-call notification'
it_behaves_like 'deletes the escalation'
context 'feature flag is off' do
before do
stub_feature_flags(escalation_policies_mvc: false)
end
it_behaves_like 'it does not escalate'
end
end
context 'target is already resolved' do
let(:target) { create(:alert_management_alert, :resolved, project: project) }
it_behaves_like 'does not send on-call notification'
it_behaves_like 'deletes the escalation'
end
context 'target status is not above threshold' do
let(:target) { create(:alert_management_alert, :acknowledged, project: project) }
it_behaves_like 'it does not escalate'
end
end
end
......@@ -16,6 +16,10 @@ RSpec.describe Projects::Alerting::NotifyService do
}
end
before do
stub_feature_flags(escalation_policies_mvc: false)
end
subject { service.execute(token, integration) }
context 'existing alert with same payload fingerprint' do
......@@ -81,6 +85,18 @@ RSpec.describe Projects::Alerting::NotifyService do
include_examples 'oncall users are correctly notified of firing alert'
context 'with escalation policies ready' do
let_it_be(:policy) { create(:incident_management_escalation_policy, project: project) }
before do
stub_licensed_features(oncall_schedules: project, escalation_policies: true)
stub_feature_flags(escalation_policies_mvc: project)
end
include_examples 'oncall users are correctly notified of firing alert'
include_examples 'creates an escalation', 1
end
context 'with resolving payload' do
let(:payload) do
{
......@@ -90,6 +106,23 @@ RSpec.describe Projects::Alerting::NotifyService do
end
include_examples 'oncall users are correctly notified of recovery alert'
context 'with existing alert escalation' do
let_it_be(:alert) { create(:alert_management_alert, :ignored, fingerprint: gitlab_fingerprint, project: project) }
let_it_be(:pending_escalation) { create(:incident_management_pending_alert_escalation, alert: alert) }
let(:target) { alert }
include_examples "deletes the target's escalations"
context 'with escalation policy feature disabled' do
before do
stub_feature_flags(escalation_policies_mvc: false)
end
include_examples "deletes the target's escalations"
end
end
end
end
end
......
# frozen_string_literal: true
RSpec.shared_examples 'creates an escalation' do |count|
let(:count) { count }
specify do
expect(IncidentManagement::PendingEscalations::Alert).to receive(:create!)
.with(target: a_kind_of(AlertManagement::Alert), rule: a_kind_of(IncidentManagement::EscalationRule), schedule_id: a_kind_of(Integer), status: a_kind_of(String), process_at: a_kind_of(ActiveSupport::TimeWithZone))
.exactly(count).times
.and_call_original
subject
end
end
RSpec.shared_examples "deletes the target's escalations" do
specify do
before_count = target.pending_escalations.count
expect(before_count).to be > 0
expect { subject }.to change { target.pending_escalations.reload.count }.from(before_count).to(0)
end
end
......@@ -5,16 +5,17 @@
# - `gitlab_fingerprint`, SHA which is used to uniquely identify the alert
RSpec.shared_examples 'sends on-call notification if enabled' do
context 'with on-call schedules enabled' do
let(:notification_async) { double(NotificationService::Async) }
let(:alert) { having_attributes(class: AlertManagement::Alert, fingerprint: gitlab_fingerprint) }
it 'sends on-call notification' do
allow(NotificationService).to receive_message_chain(:new, :async).and_return(notification_async)
expect(notification_async).to receive(:notify_oncall_users_of_alert).with(
users,
having_attributes(class: AlertManagement::Alert, fingerprint: gitlab_fingerprint)
)
it_behaves_like 'sends on-call notification'
subject
context 'escalation policy features are disabled' do
before do
stub_licensed_features(oncall_schedules: true, escalation_policies: false)
stub_feature_flags(escalation_policies_mvc: false)
end
it_behaves_like 'sends on-call notification'
end
end
......@@ -27,6 +28,20 @@ RSpec.shared_examples 'sends on-call notification if enabled' do
end
end
RSpec.shared_examples 'sends on-call notification' do
let(:notification_async) { double(NotificationService::Async) }
specify do
allow(NotificationService).to receive_message_chain(:new, :async).and_return(notification_async)
expect(notification_async).to receive(:notify_oncall_users_of_alert).with(
users,
alert
)
subject
end
end
RSpec.shared_examples 'does not send on-call notification' do
specify do
expect(NotificationService).not_to receive(:new)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment