Commit 0cc23c86 authored by Sarah Yasonik's avatar Sarah Yasonik Committed by Toon Claes

Email participants of on-call rotation when alerts occur

When an alert comes in through an HTTP integration or Prometheus
integration, we'll find the user(s) who are currently on-call
and notify them of the alert. Also sends out a notification
when an alert is resolved. The email sent matches the email
sent if the project has alert emails configured.
parent 948e03df
......@@ -125,3 +125,5 @@ module AlertManagement
end
end
end
AlertManagement::AlertProcessing.prepend_ee_mod
# frozen_string_literal: true
module IncidentManagement
# Returns users who are oncall by reading shifts from the DB.
# The DB is the historical record, so we should adhere to it
# when it is available. If rotations are missing persited
# shifts for some reason, we'll fallback to a generated shift.
# It may also be possible that no one is on call for a rotation.
class OncallUsersFinder
include Gitlab::Utils::StrongMemoize
# @param project [Project]
# @option oncall_at [ActiveSupport::TimeWithZone]
# Limits users to only those
# on-call at the specified time.
def initialize(project, oncall_at: Time.current)
@project = project
@oncall_at = oncall_at
end
# @return [User::ActiveRecord_Relation]
def execute
return User.none unless Gitlab::IncidentManagement.oncall_schedules_available?(project)
return User.none unless user_ids.present?
User.id_in(user_ids)
end
private
attr_reader :project, :oncall_at
def user_ids
strong_memoize(:user_ids) do
user_ids_for_persisted_shifts.concat(user_ids_for_predicted_shifts).uniq
end
end
def user_ids_for_persisted_shifts
ids_for_persisted_shifts.flat_map(&:last)
end
def rotation_ids_for_persisted_shifts
ids_for_persisted_shifts.flat_map(&:first)
end
# @return [Array<[rotation_id, user_id]>]
# @example - [ [1, 16], [2, 200] ]
def ids_for_persisted_shifts
strong_memoize(:ids_for_persisted_shifts) do
project.incident_management_oncall_rotations
.merge(IncidentManagement::OncallShift.for_timestamp(oncall_at))
.pluck_id_and_user_id
end
end
def user_ids_for_predicted_shifts
rotations_without_persisted_shifts.map do |rotation|
next unless shift = IncidentManagement::OncallShiftGenerator.new(rotation).for_timestamp(oncall_at)
shift.participant.user_id
end
end
def rotations_without_persisted_shifts
project.incident_management_oncall_rotations
.except_ids(rotation_ids_for_persisted_shifts)
.with_shift_generation_associations
end
end
end
......@@ -103,6 +103,7 @@ module EE
has_many :sourced_pipelines, class_name: 'Ci::Sources::Project', foreign_key: :source_project_id
has_many :incident_management_oncall_schedules, class_name: 'IncidentManagement::OncallSchedule', inverse_of: :project
has_many :incident_management_oncall_rotations, class_name: 'IncidentManagement::OncallRotation', through: :incident_management_oncall_schedules, source: :rotations
elastic_index_dependant_association :issues, on_change: :visibility_level
......
......@@ -11,8 +11,6 @@ module IncidentManagement
belongs_to :user, class_name: 'User', foreign_key: :user_id
has_many :shifts, class_name: 'OncallShift', inverse_of: :participant, foreign_key: :participant_id
scope :ordered_asc, -> { order(id: :asc) }
# Uniqueness validations added here should be duplicated
# in IncidentManagement::OncallRotation::CreateService
# as bulk insertion skips validations
......
......@@ -13,7 +13,8 @@ module IncidentManagement
NAME_LENGTH = 200
belongs_to :schedule, class_name: 'OncallSchedule', inverse_of: 'rotations', foreign_key: 'oncall_schedule_id'
has_many :participants, class_name: 'OncallParticipant', inverse_of: :rotation
# Note! If changing the order of participants, also change the :with_shift_generation_associations scope.
has_many :participants, -> { order(id: :asc) }, class_name: 'OncallParticipant', inverse_of: :rotation
has_many :users, through: :participants
has_many :shifts, class_name: 'OncallShift', inverse_of: :rotation, foreign_key: :rotation_id
......@@ -23,9 +24,20 @@ module IncidentManagement
validates :length_unit, presence: true
scope :started, -> { where('starts_at < ?', Time.current) }
scope :except_ids, -> (ids) { where.not(id: ids) }
scope :with_shift_generation_associations, -> do
joins(:participants, :schedule)
.distinct
.includes(:participants, :schedule)
.order(:id, 'incident_management_oncall_participants.id ASC')
end
delegate :project, to: :schedule
def self.pluck_id_and_user_id
joins(shifts: { participant: :user }).pluck(:id, 'users.id')
end
def shift_duration
# As length_unit is an enum, input is guaranteed to be appropriate
length.public_send(length_unit) # rubocop:disable GitlabSecurity/PublicSend
......
......@@ -11,7 +11,7 @@ module IncidentManagement
DESCRIPTION_LENGTH = 1000
belongs_to :project, inverse_of: :incident_management_oncall_schedules
has_many :rotations, class_name: 'OncallRotation'
has_many :rotations, class_name: 'OncallRotation', inverse_of: :schedule
has_many :participants, class_name: 'OncallParticipant', through: :rotations
has_internal_id :iid, scope: :project
......
......@@ -21,6 +21,10 @@ module IncidentManagement
where("tstzrange(starts_at, ends_at, '[)') && tstzrange(?, ?, '[)')", starts_at, ends_at)
end
scope :for_timestamp, -> (timestamp) do
where('incident_management_oncall_shifts.starts_at <= :timestamp AND '\
'incident_management_oncall_shifts.ends_at > :timestamp', timestamp: timestamp)
end
private
......
# frozen_string_literal: true
module EE
module AlertManagement
module AlertProcessing
extend ::Gitlab::Utils::Override
private
override :complete_post_processing_tasks
def complete_post_processing_tasks
super
notify_oncall if oncall_notification_recipients.present?
end
def notify_oncall
notification_service
.async
.notify_oncall_users_of_alert(oncall_notification_recipients.to_a, alert)
end
def oncall_notification_recipients
strong_memoize(:oncall_notification_recipients) do
::IncidentManagement::OncallUsersFinder.new(project).execute
end
end
end
end
end
......@@ -66,6 +66,12 @@ module EE
mailer.provisioned_member_access_granted_email(group_member.id).deliver_later
end
def notify_oncall_users_of_alert(users, alert)
users.each do |user|
mailer.prometheus_alert_fired_email(alert.project, user, alert).deliver_later
end
end
private
def add_mr_approvers_email(merge_request, approvers, current_user)
......
......@@ -124,7 +124,7 @@ module IncidentManagement
end
def participants
@participants ||= rotation.participants.ordered_asc
@participants ||= rotation.participants
end
def rotation_starts_at
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe IncidentManagement::OncallUsersFinder do
let_it_be_with_refind(:project) { create(:project) }
# Project 1 Shift 1 Shift 2 Shift 3
# s1 -> r1 [p1] | user 1; saved | user 1; unsaved | user 1; unsaved |
# -> r2 [p1, p2] | user 2; saved | user 3; saved | user 2; unsaved |
# s2 -> r1 [p1] | user 4; saved | user 4; unsaved | user 4; unsaved |
# -> r2 [p1] | user 1; saved | user 1; unsaved | user 1; unsaved |
# -> r3 | N/A | N/A | N/A |
#
# Project 2
# s1 -> r1 [p1, p2] | user 5; saved | user 2; saved | user 5; unsaved |
# Schedule 1 / Rotation 1
let_it_be(:s1) { create(:incident_management_oncall_schedule, project: project) }
let_it_be(:s1_r1) { create(:incident_management_oncall_rotation, :with_participant, schedule: s1) }
let_it_be(:s1_r1_p1) { s1_r1.participants.first }
let_it_be(:user_1) { s1_r1_p1.user }
let_it_be(:s1_r1_shift1) { create(:incident_management_oncall_shift, participant: s1_r1_p1) }
# Schedule 1 / Rotation 2
let_it_be(:s1_r2) { create(:incident_management_oncall_rotation, :with_participant, schedule: s1) }
let_it_be(:s1_r2_p1) { s1_r2.participants.first }
let_it_be(:user_2) { s1_r2_p1.user }
let_it_be(:s1_r2_shift1) { create(:incident_management_oncall_shift, participant: s1_r2_p1) }
let_it_be(:s1_r2_p2) { create(:incident_management_oncall_participant, rotation: s1_r2) }
let_it_be(:user_3) { s1_r2_p2.user }
let_it_be(:s1_r2_shift2) { create(:incident_management_oncall_shift, participant: s1_r2_p2, starts_at: s1_r2_shift1.ends_at) }
# Schedule 2 / Rotation 1
let_it_be(:s2) { create(:incident_management_oncall_schedule, project: project) }
let_it_be(:s2_r1) { create(:incident_management_oncall_rotation, :with_participant, schedule: s2) }
let_it_be(:s2_r1_p1) { s2_r1.participants.first }
let_it_be(:user_4) { s2_r1_p1.user }
let_it_be(:s2_r1_shift1) { create(:incident_management_oncall_shift, participant: s2_r1_p1) }
# Schedule 2 / Rotation 2 - has same user as s1_r1_p1
let_it_be(:s2_r2) { create(:incident_management_oncall_rotation, schedule: s2) }
let_it_be(:s2_r2_p1) { create(:incident_management_oncall_participant, user: user_1, rotation: s2_r2) }
let_it_be(:s2_r2_shift1) { create(:incident_management_oncall_shift, participant: s2_r2_p1) }
# Schedule 2 / Rotation 3 - has no participants
let_it_be(:s2_r3) { create(:incident_management_oncall_rotation, schedule: s2) }
# Other Project - has same user as s1_r2_p1
let_it_be(:proj2_s1_r1_p1) { create(:incident_management_oncall_participant) } # user_5
let_it_be(:proj2_s1_r1_shift1) { create(:incident_management_oncall_shift, participant: proj2_s1_r1_p1) }
let_it_be(:proj2_s1_r1) { proj2_s1_r1_p1.rotation }
let_it_be(:proj2_s1_r1_p2) { create(:incident_management_oncall_participant, :with_developer_access, user: user_2, rotation: proj2_s1_r1) }
let_it_be(:proj2_s1_r1_shift2) { create(:incident_management_oncall_shift, participant: proj2_s1_r1_p2, starts_at: proj2_s1_r1_shift1.ends_at) }
let(:oncall_at) { Time.current }
describe '#execute' do
subject(:execute) { described_class.new(project, oncall_at: oncall_at).execute }
context 'when feature is available' do
before do
stub_licensed_features(oncall_schedules: true)
end
context 'without parameters uses current time' do
subject(:execute) { described_class.new(project).execute }
it { is_expected.to contain_exactly(user_1, user_2, user_4) }
end
context 'with :oncall_at parameter specified' do
let(:during_first_shift) { Time.current }
let(:during_second_shift) { s1_r2_shift2.starts_at + 5.minutes }
let(:after_second_shift) { s1_r2_shift2.ends_at + 5.minutes }
let(:before_shifts) { s1_r1.starts_at - 15.minutes }
context 'with all persisted shifts for oncall_at time' do
let(:oncall_at) { during_first_shift }
it { is_expected.to contain_exactly(user_1, user_2, user_4) }
it 'does not attempt to generate shifts' do
expect(IncidentManagement::OncallShiftGenerator).not_to receive(:new)
execute
end
end
context 'with some persisted shifts for oncall_at time' do
let(:oncall_at) { during_second_shift }
it { is_expected.to contain_exactly(user_1, user_3, user_4) }
it 'does not run additional queries for each persisted shift' do
query_count = ActiveRecord::QueryRecorder.new { execute }
create(:incident_management_oncall_shift, participant: s1_r1_p1, starts_at: s1_r1_shift1.ends_at)
expect { described_class.new(project, oncall_at: oncall_at).execute }.not_to exceed_query_limit(query_count)
end
end
context 'with no persisted shifts for oncall_at time' do
let(:oncall_at) { after_second_shift }
it { is_expected.to contain_exactly(user_1, user_2, user_4) }
end
context 'before rotations have started' do
let(:oncall_at) { before_shifts }
it { is_expected.to be_empty }
end
it 'does not require additional queries to generate shifts' do
query_count = ActiveRecord::QueryRecorder.new { described_class.new(project, oncall_at: during_first_shift).execute }
expect { described_class.new(project, oncall_at: after_second_shift).execute }
.not_to exceed_query_limit(query_count)
end
end
end
context 'when feature is not avaiable' do
before do
stub_licensed_features(oncall_schedules: false)
end
it { is_expected.to eq(IncidentManagement::OncallParticipant.none) }
end
end
end
......@@ -4,7 +4,7 @@ require 'spec_helper'
RSpec.describe IncidentManagement::OncallShiftGenerator do
let_it_be(:rotation_start_time) { Time.parse('2020-12-08 00:00:00 UTC').utc }
let_it_be(:rotation) { create(:incident_management_oncall_rotation, starts_at: rotation_start_time, length: 5, length_unit: :days) }
let_it_be_with_reload(:rotation) { create(:incident_management_oncall_rotation, starts_at: rotation_start_time, length: 5, length_unit: :days) }
let(:current_time) { Time.parse('2020-12-08 15:00:00 UTC').utc }
let(:shift_length) { rotation.shift_duration }
......
......@@ -38,18 +38,6 @@ RSpec.describe IncidentManagement::OncallParticipant do
end
end
describe 'scopes' do
describe '.ordered_asc' do
let_it_be(:participant1) { create(:incident_management_oncall_participant, :with_developer_access, rotation: rotation) }
let_it_be(:participant2) { create(:incident_management_oncall_participant, :with_developer_access, rotation: rotation) }
let_it_be(:participant3) { create(:incident_management_oncall_participant, :with_developer_access, rotation: rotation) }
subject { described_class.ordered_asc }
it { is_expected.to eq([participant1, participant2, participant3]) }
end
end
private
def remove_user_from_project(user, project)
......
......@@ -6,10 +6,10 @@ RSpec.describe IncidentManagement::OncallRotation do
let_it_be(:schedule) { create(:incident_management_oncall_schedule) }
describe '.associations' do
it { is_expected.to belong_to(:schedule) }
it { is_expected.to have_many(:participants) }
it { is_expected.to belong_to(:schedule).class_name('OncallSchedule').inverse_of(:rotations) }
it { is_expected.to have_many(:participants).order(id: :asc).class_name('OncallParticipant').inverse_of(:rotation) }
it { is_expected.to have_many(:users).through(:participants) }
it { is_expected.to have_many(:shifts) }
it { is_expected.to have_many(:shifts).class_name('OncallShift').inverse_of(:rotation) }
end
describe '.validations' do
......
......@@ -7,7 +7,7 @@ RSpec.describe IncidentManagement::OncallSchedule do
describe '.associations' do
it { is_expected.to belong_to(:project) }
it { is_expected.to have_many(:rotations) }
it { is_expected.to have_many(:rotations).inverse_of(:schedule) }
it { is_expected.to have_many(:participants).through(:rotations) }
end
......
......@@ -45,7 +45,6 @@ RSpec.describe IncidentManagement::OncallShift do
end
describe 'scopes' do
describe '.for_timeframe' do
let_it_be(:monday) { Time.current.next_week(:monday) }
let_it_be(:tuesday) { monday + 1.day }
let_it_be(:wednesday) { tuesday + 1.day }
......@@ -73,8 +72,9 @@ RSpec.describe IncidentManagement::OncallShift do
let_it_be(:fri_to_sun) { create_shift(friday, sunday, participant2) }
# Third rotation
let_it_be(:tue_to_sun) { create_shift(wednesday, sunday, participant3) }
let_it_be(:tue_to_sun) { create_shift(tuesday, sunday, participant3) }
describe '.for_timeframe' do
subject(:shifts) { described_class.for_timeframe(wednesday, saturday) }
it 'includes shifts which cover the timeframe' do
......@@ -102,10 +102,32 @@ RSpec.describe IncidentManagement::OncallShift do
describe '.order_starts_at_desc' do
subject { described_class.order_starts_at_desc }
let_it_be(:shift1) { create_shift(Time.current, Time.current + 1.hour, participant) }
let_it_be(:shift2) { create_shift(Time.current + 2.hours, Time.current + 3.hours, participant) }
it do
is_expected.to eq [
sat_to_sun,
fri_to_sun,
fri_to_sat,
thu_to_fri,
wed_to_thu,
tue_to_sun,
tue_to_wed,
mon_to_thu,
mon_to_tue
]
end
end
describe '.for_timestamp' do
subject(:shifts) { described_class.for_timestamp(wednesday) }
it { is_expected.to eq [shift2, shift1]}
it 'includes shifts which cover the timestamp' do
expect(shifts).to contain_exactly(
mon_to_thu, # Covers timestamp
wed_to_thu, # Starts at timestamp
tue_to_sun # Covers timestamp
)
# Notable excluded shift: tue_to_wed (Ends at timestamp)
end
end
end
......
......@@ -55,6 +55,7 @@ RSpec.describe Project do
it { is_expected.to have_many(:approval_rules) }
it { is_expected.to have_many(:incident_management_oncall_schedules).class_name('IncidentManagement::OncallSchedule') }
it { is_expected.to have_many(:incident_management_oncall_rotations).through(:incident_management_oncall_schedules).source(:rotations) }
describe 'approval_rules association' do
let_it_be(:rule, reload: true) { create(:approval_project_rule) }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe AlertManagement::ProcessPrometheusAlertService do
let_it_be(:project, refind: true) { create(:project) }
before do
allow(ProjectServiceWorker).to receive(:perform_async)
end
describe '#execute' do
let(:service) { described_class.new(project, payload) }
subject(:execute) { service.execute }
context 'when alert payload is valid' do
let(:parsed_payload) { Gitlab::AlertManagement::Payload.parse(project, payload, monitoring_tool: 'Prometheus') }
let(:fingerprint) { parsed_payload.gitlab_fingerprint }
let(:payload) do
{
'status' => 'firing',
'labels' => { 'alertname' => 'GitalyFileServerDown' },
'annotations' => { 'title' => 'Alert title' },
'startsAt' => '2020-04-27T10:10:22.265949279Z',
'generatorURL' => 'http://8d467bd4607a:9090/graph?g0.expr=vector%281%29&g0.tab=1'
}
end
context 'with on-call schedule' do
let_it_be(:schedule) { create(:incident_management_oncall_schedule, project: project) }
let_it_be(:rotation) { create(:incident_management_oncall_rotation, schedule: schedule) }
let_it_be(:participant) { create(:incident_management_oncall_participant, :with_developer_access, rotation: rotation) }
let(:notification_args) do
[
[participant.user],
having_attributes(class: AlertManagement::Alert, fingerprint: fingerprint)
]
end
it_behaves_like 'Alert Notification Service sends notification email to on-call users'
end
end
end
end
......@@ -862,4 +862,18 @@ RSpec.describe EE::NotificationService, :mailer do
end
end
end
context 'IncidentManagement::Oncall' do
describe '#notify_oncall_users_of_alert' do
let_it_be(:user) { create(:user) }
let_it_be(:alert) { create(:alert_management_alert) }
let_it_be(:project) { alert.project }
it 'sends an email to the specified users' do
expect(Notify).to receive(:prometheus_alert_fired_email).with(project, user, alert).and_call_original
subject.notify_oncall_users_of_alert([user], alert)
end
end
end
end
......@@ -6,6 +6,7 @@ RSpec.describe Projects::Alerting::NotifyService do
let_it_be(:project, refind: true) { create(:project) }
describe '#execute' do
let_it_be(:integration) { create(:alert_management_http_integration, project: project) }
let(:service) { described_class.new(project, payload) }
let(:token) { integration.token }
let(:payload) do
......@@ -14,8 +15,6 @@ RSpec.describe Projects::Alerting::NotifyService do
}
end
let(:integration) { create(:alert_management_http_integration, project: project) }
subject { service.execute(token, integration) }
context 'existing alert with same payload fingerprint' do
......@@ -64,5 +63,19 @@ RSpec.describe Projects::Alerting::NotifyService do
end
end
end
context 'with on-call schedules' do
let_it_be(:schedule) { create(:incident_management_oncall_schedule, project: project) }
let_it_be(:rotation) { create(:incident_management_oncall_rotation, schedule: schedule) }
let_it_be(:participant) { create(:incident_management_oncall_participant, :with_developer_access, rotation: rotation) }
let(:notification_args) do
[
[participant.user],
having_attributes(class: AlertManagement::Alert, title: payload['title'])
]
end
it_behaves_like 'Alert Notification Service sends notification email to on-call users'
end
end
end
# frozen_string_literal: true
RSpec.shared_examples 'Alert Notification Service sends notification email to on-call users' do
let(:notification_service) { instance_double(NotificationService) }
context 'with oncall schedules enabled' do
before do
stub_licensed_features(oncall_schedules: project)
end
it 'sends a notification' do
expect(NotificationService).to receive(:new).and_return(notification_service)
expect(notification_service)
.to receive_message_chain(:async, :notify_oncall_users_of_alert)
.with(*notification_args)
expect(subject).to be_success
end
end
context 'with oncall schedules disabled' do
it 'does not notify the on-call users' do
expect(NotificationService).not_to receive(:new)
expect(subject).to be_success
end
end
end
......@@ -561,6 +561,7 @@ project:
- alert_management_http_integrations
- exported_protected_branches
- incident_management_oncall_schedules
- incident_management_oncall_rotations
- debian_distributions
- merge_request_metrics
award_emoji:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment