Commit 1bd8f8e3 authored by Sean Arnold's avatar Sean Arnold

Add handling for pending incident escalations

- Update process service
- Email templates for incident escalations

Changelog: added
EE: true
parent ba80ab37
...@@ -59,6 +59,7 @@ module Emails ...@@ -59,6 +59,7 @@ module Emails
def prometheus_alert_fired_email(project, user, alert) def prometheus_alert_fired_email(project, user, alert)
@project = project @project = project
@alert = alert.present @alert = alert.present
@incident = alert.issue
add_project_headers add_project_headers
add_alert_headers add_alert_headers
...@@ -80,11 +81,10 @@ module Emails ...@@ -80,11 +81,10 @@ module Emails
end end
def add_incident_headers def add_incident_headers
incident = @alert.issue return unless @incident
return unless incident
headers['X-GitLab-Incident-ID'] = incident.id headers['X-GitLab-Incident-ID'] = @incident.id
headers['X-GitLab-Incident-IID'] = incident.iid headers['X-GitLab-Incident-IID'] = @incident.iid
end end
end end
end end
......
---
name: incident_escalations
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/74337
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/345769
milestone: '14.6'
type: development
group: group::monitor
default_enabled: false
...@@ -41,6 +41,20 @@ module EE ...@@ -41,6 +41,20 @@ module EE
format.text { render layout: 'mailer' } format.text { render layout: 'mailer' }
end end
end end
def incident_escalation_fired_email(project, user, issue)
@project = project
@incident = issue.present
@escalation_status = issue.incident_management_issuable_escalation_status
add_project_headers
headers['X-GitLab-NotificationReason'] = "incident_#{@escalation_status.status_name}"
add_incident_headers
subject_text = "Incident: #{@incident.title}"
mail(to: user.notification_email_for(@project.group), subject: subject(subject_text))
end
end end
end end
end end
...@@ -9,14 +9,15 @@ module IncidentManagement ...@@ -9,14 +9,15 @@ module IncidentManagement
@escalation = escalation @escalation = escalation
@project = escalation.project @project = escalation.project
@rule = escalation.rule @rule = escalation.rule
@escalatable = escalation.escalatable
@target = escalation.target @target = escalation.target
end end
def execute def execute
return unless ::Gitlab::IncidentManagement.escalation_policies_available?(project) return unless ::Gitlab::IncidentManagement.escalation_policies_available?(project)
return if too_early_to_process? return if too_early_to_process?
return if target_already_resolved? return if escalatable_already_resolved?
return if target_status_exceeded_rule? return if escalatable_status_exceeded_rule?
notify_recipients notify_recipients
create_system_notes create_system_notes
...@@ -25,16 +26,16 @@ module IncidentManagement ...@@ -25,16 +26,16 @@ module IncidentManagement
private private
attr_reader :escalation, :project, :target, :rule attr_reader :escalation, :project, :target, :rule, :escalatable
def target_already_resolved? def escalatable_already_resolved?
return false unless target.resolved? return false unless escalatable.resolved?
destroy_escalation! destroy_escalation!
end end
def target_status_exceeded_rule? def escalatable_status_exceeded_rule?
target.status >= rule.status_before_type_cast escalatable.status >= rule.status_before_type_cast
end end
def too_early_to_process? def too_early_to_process?
...@@ -45,11 +46,11 @@ module IncidentManagement ...@@ -45,11 +46,11 @@ module IncidentManagement
NotificationService NotificationService
.new .new
.async .async
.notify_oncall_users_of_alert(oncall_notification_recipients, target) .send("notify_oncall_users_of_#{escalation.type}", oncall_notification_recipients, target) # rubocop: disable GitlabSecurity/PublicSend
end end
def create_system_notes def create_system_notes
SystemNoteService.notify_via_escalation(target, project, oncall_notification_recipients, rule.policy) SystemNoteService.notify_via_escalation(target, project, oncall_notification_recipients, rule.policy, escalation.type)
end end
def oncall_notification_recipients def oncall_notification_recipients
......
- body = @escalation_status.resolved? ? _('An incident has been resolved in %{project_path}.') : _('An incident has been triggered in %{project_path}.')
%p
= body % { project_path: @incident.project.full_path }
%p
= link_to(_('View incident details.'), @incident.web_url)
%p
= _('Title:')
= @incident.title
- if @incident.description
%p
= _('Description:')
= markdown(@incident.description, pipeline: :email, author: @incident.author)
- if @escalation_status.policy
%p
= _('Escalation policy:')
= @escalation_status.policy.name
- if @incident.metric_images.any?
%p
= _('Metrics:')
- @incident.metric_images.each do |image|
= link_to image.filename, image.url
<% body = @escalation_status.resolved? ? _('An incident has been resolved in %{project_path}.') : _('An incident has been triggered in %{project_path}.') %>
<%= body % { project_path: @incident.project.full_path } %>
<%= _('View incident details at') %> <%= @incident.web_url %>
<%= _('Title:') %> <%= @incident.title %>
<% if @incident.description %>
<%= _('Description:') %> <%= @incident.description %>
<% end %>
<% if @escalation_status.policy %>
<%= _('Escalation policy:') %> <%= @escalation_status.policy.name %>
<% end %>
...@@ -28,4 +28,58 @@ RSpec.describe Emails::Projects do ...@@ -28,4 +28,58 @@ RSpec.describe Emails::Projects do
is_expected.to have_body_text("It is recommended that you reach out to the current on-call responder to ensure continuity of on-call coverage") is_expected.to have_body_text("It is recommended that you reach out to the current on-call responder to ensure continuity of on-call coverage")
end end
end end
describe '#incident_escalation_fired_email' do
let_it_be(:project) { create(:project) }
let_it_be(:user) { create(:user) }
let!(:incident) { create(:issue, :incident, project: project) }
let!(:escalation_status) { create(:incident_management_issuable_escalation_status, issue: incident) }
subject do
Notify.incident_escalation_fired_email(project, user, incident)
end
include_context 'gitlab email notification'
it_behaves_like 'an email with X-GitLab headers containing project details'
it 'has expected X-GitLab alert headers', :aggregate_failures do
is_expected.to have_header('X-GitLab-NotificationReason', "incident_#{escalation_status.status_name}")
is_expected.to have_header('X-GitLab-Incident-ID', /#{incident.id}/)
is_expected.to have_header('X-GitLab-Incident-IID', /#{incident.iid}/)
end
it_behaves_like 'an email sent from GitLab'
it_behaves_like 'it should not have Gmail Actions links'
it_behaves_like 'a user cannot unsubscribe through footer link'
it 'has expected subject' do
is_expected.to have_subject("#{project.name} | Incident: #{incident.title}")
end
it 'has expected content' do
is_expected.to have_body_text('Title:')
is_expected.to have_body_text(incident.title)
end
context 'with description' do
let!(:incident) { create(:issue, :incident, project: project, description: 'some descripition') }
it 'has expected content' do
is_expected.to have_body_text('Description:')
is_expected.to have_body_text('some descripition')
end
end
context 'with escalation status policy' do
let!(:policy) { create(:incident_management_escalation_policy, project: project) }
let!(:escalation_status) { create(:incident_management_issuable_escalation_status, issue: incident, policy: policy, escalations_started_at: Time.current) }
it 'has expected content' do
is_expected.to have_body_text('Escalation policy:')
is_expected.to have_body_text(policy.name)
end
end
end
end end
...@@ -10,12 +10,7 @@ RSpec.describe IncidentManagement::PendingEscalations::ProcessService do ...@@ -10,12 +10,7 @@ RSpec.describe IncidentManagement::PendingEscalations::ProcessService do
let(:escalation_rule) { build(:incident_management_escalation_rule, oncall_schedule: schedule_1) } let(:escalation_rule) { build(:incident_management_escalation_rule, oncall_schedule: schedule_1) }
let!(:escalation_policy) { create(:incident_management_escalation_policy, project: project, rules: [escalation_rule]) } let!(:escalation_policy) { create(:incident_management_escalation_policy, project: project, rules: [escalation_rule]) }
let(:alert) { create(:alert_management_alert, project: project, **alert_params) }
let(:alert_params) { { status: ::IncidentManagement::Escalatable::STATUSES[:triggered] } }
let(:target) { alert }
let(:process_at) { 5.minutes.ago } let(:process_at) { 5.minutes.ago }
let(:escalation) { create(:incident_management_pending_alert_escalation, rule: escalation_rule, alert: target, process_at: process_at) }
let(:service) { described_class.new(escalation) } let(:service) { described_class.new(escalation) }
...@@ -42,47 +37,102 @@ RSpec.describe IncidentManagement::PendingEscalations::ProcessService do ...@@ -42,47 +37,102 @@ RSpec.describe IncidentManagement::PendingEscalations::ProcessService do
end end
end end
context 'all conditions are met' do shared_examples 'creates a system note' do
let(:users) { schedule_1_users } specify do
it_behaves_like 'sends on-call notification'
it_behaves_like 'deletes the escalation'
it 'creates a system note' do
expect(SystemNoteService) expect(SystemNoteService)
.to receive(:notify_via_escalation).with(alert, project, [a_kind_of(User)], escalation_policy) .to receive(:notify_via_escalation).with(target, project, [a_kind_of(User)], escalation_policy, escalation.type)
.and_call_original .and_call_original
expect { execute }.to change(Note, :count).by(1) expect { execute }.to change(Note, :count).by(1)
end end
end
shared_examples 'sends an on-call notification email' do
let(:notification_async) { double(NotificationService::Async) }
specify do
allow(NotificationService).to receive_message_chain(:new, :async).and_return(notification_async)
expect(notification_async).to receive(notification_action).with(
users,
target
)
subject
end
end
shared_examples 'escalates correctly when all conditions are met' do
let(:users) { schedule_1_users }
it_behaves_like 'sends an on-call notification email'
it_behaves_like 'deletes the escalation'
it_behaves_like 'creates a system note'
context 'when escalation rule is for a user' do context 'when escalation rule is for a user' do
let(:escalation_rule) { build(:incident_management_escalation_rule, :with_user) } let(:escalation_rule) { build(:incident_management_escalation_rule, :with_user) }
let(:users) { [escalation_rule.user] } let(:users) { [escalation_rule.user] }
it_behaves_like 'sends on-call notification' it_behaves_like 'sends an on-call notification email'
it_behaves_like 'deletes the escalation' it_behaves_like 'deletes the escalation'
end end
end end
context 'target is already resolved' do shared_examples 'does not escalate if escalation is not ready to be processed' do
let(:target) { create(:alert_management_alert, :resolved, project: project) } context 'does not escalate if escalation is not ready to be processed' do
let(:process_at) { 5.minutes.from_now }
it_behaves_like 'does not send on-call notification' it_behaves_like 'it does not escalate'
end
it_behaves_like 'deletes the escalation'
end end
context 'target status is not above threshold' do context 'alert escalation' do
let(:target) { create(:alert_management_alert, :acknowledged, project: project) } let(:alert) { create(:alert_management_alert, project: project, **alert_params) }
let(:alert_params) { { status: ::IncidentManagement::Escalatable::STATUSES[:triggered] } }
let(:target) { alert }
let(:escalation) { create(:incident_management_pending_alert_escalation, rule: escalation_rule, alert: target, process_at: process_at) }
let(:notification_action) { :notify_oncall_users_of_alert }
include_examples 'escalates correctly when all conditions are met'
include_examples 'does not escalate if escalation is not ready to be processed'
context 'target is already resolved' do
let(:target) { create(:alert_management_alert, :resolved, project: project) }
it_behaves_like 'does not send on-call notification'
it_behaves_like 'deletes the escalation'
end
context 'target status is not above threshold' do
let(:target) { create(:alert_management_alert, :acknowledged, project: project) }
it_behaves_like 'it does not escalate' it_behaves_like 'it does not escalate'
end
end end
context 'escalation is not ready to be processed' do context 'issue escalation' do
let(:process_at) { 5.minutes.from_now } let(:issue) { create(:issue, :incident, project: project) }
let!(:issue_escalation_status) { create(:incident_management_issuable_escalation_status, issue: target) }
let(:target) { issue }
let(:escalation) { create(:incident_management_pending_issue_escalation, rule: escalation_rule, issue: target, process_at: process_at) }
let(:notification_action) { :notify_oncall_users_of_incident }
include_examples 'escalates correctly when all conditions are met'
include_examples 'does not escalate if escalation is not ready to be processed'
it_behaves_like 'it does not escalate' context 'target escalation status is resolved' do
before do
target.incident_management_issuable_escalation_status.resolve!
end
it_behaves_like 'does not send on-call notification'
it_behaves_like 'deletes the escalation'
end
context 'target status is not above threshold' do
let!(:issue_escalation_status) { create(:incident_management_issuable_escalation_status, :acknowledged, issue: issue) }
it_behaves_like 'it does not escalate'
end
end end
end end
end end
...@@ -9,17 +9,18 @@ RSpec.describe SystemNotes::EscalationsService do ...@@ -9,17 +9,18 @@ RSpec.describe SystemNotes::EscalationsService do
let_it_be(:author) { User.alert_bot } let_it_be(:author) { User.alert_bot }
describe '#notify_via_escalation' do describe '#notify_via_escalation' do
subject { described_class.new(noteable: noteable, project: project).notify_via_escalation([user, user_2], escalation_policy: escalation_policy) } subject { described_class.new(noteable: noteable, project: project).notify_via_escalation([user, user_2], escalation_policy: escalation_policy, type: type) }
let_it_be(:escalation_policy) { create(:incident_management_escalation_policy, project: project) } let_it_be(:escalation_policy) { create(:incident_management_escalation_policy, project: project) }
let_it_be(:noteable) { create(:alert_management_alert, project: project) } let_it_be(:noteable) { create(:alert_management_alert, project: project) }
let_it_be(:type) { :alert }
it_behaves_like 'a system note' do it_behaves_like 'a system note' do
let(:action) { 'new_alert_added' } let(:action) { 'new_alert_added' }
end end
it 'posts the correct text to the system note' do it 'posts the correct text to the system note' do
expect(subject.note).to match("notified #{user.to_reference} and #{user_2.to_reference} of this alert via escalation policy **#{escalation_policy.name}**") expect(subject.note).to match("notified #{user.to_reference} and #{user_2.to_reference} of this #{type} via escalation policy **#{escalation_policy.name}**")
end end
end end
end end
...@@ -3988,6 +3988,12 @@ msgstr "" ...@@ -3988,6 +3988,12 @@ msgstr ""
msgid "An example showing how to use Jsonnet with GitLab dynamic child pipelines" msgid "An example showing how to use Jsonnet with GitLab dynamic child pipelines"
msgstr "" msgstr ""
msgid "An incident has been resolved in %{project_path}."
msgstr ""
msgid "An incident has been triggered in %{project_path}."
msgstr ""
msgid "An integer value is required for seconds" msgid "An integer value is required for seconds"
msgstr "" msgstr ""
...@@ -13930,6 +13936,9 @@ msgstr "" ...@@ -13930,6 +13936,9 @@ msgstr ""
msgid "Escalation policies must have at least one rule" msgid "Escalation policies must have at least one rule"
msgstr "" msgstr ""
msgid "Escalation policy:"
msgstr ""
msgid "EscalationPolicies|%{clockIcon} IF alert is not %{alertStatus} in %{minutes}" msgid "EscalationPolicies|%{clockIcon} IF alert is not %{alertStatus} in %{minutes}"
msgstr "" msgstr ""
...@@ -22249,6 +22258,9 @@ msgstr "" ...@@ -22249,6 +22258,9 @@ msgstr ""
msgid "Metrics and profiling" msgid "Metrics and profiling"
msgstr "" msgstr ""
msgid "Metrics:"
msgstr ""
msgid "MetricsDashboardAnnotation|Annotation can't belong to both a cluster and an environment at the same time" msgid "MetricsDashboardAnnotation|Annotation can't belong to both a cluster and an environment at the same time"
msgstr "" msgstr ""
...@@ -38619,6 +38631,12 @@ msgstr "" ...@@ -38619,6 +38631,12 @@ msgstr ""
msgid "View group labels" msgid "View group labels"
msgstr "" msgstr ""
msgid "View incident details at"
msgstr ""
msgid "View incident details."
msgstr ""
msgid "View incident issues." msgid "View incident issues."
msgstr "" msgstr ""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment