Commit c91abc0d authored by Adam Hegyi's avatar Adam Hegyi

BG migration for populating stage event hash

This change adds BG migration to bakfill the stage_event_hash_id column
for the Value Stream Analytics group and project stages.

Changelog: added
parent 17389a8b
# frozen_string_literal: true
class BackfillStageEventHash < ActiveRecord::Migration[6.1]
include Gitlab::Database::MigrationHelpers
disable_ddl_transaction!
BATCH_SIZE = 100
EVENT_ID_IDENTIFIER_MAPPING = {
1 => :issue_created,
2 => :issue_first_mentioned_in_commit,
3 => :issue_closed,
4 => :issue_first_added_to_board,
5 => :issue_first_associated_with_milestone,
7 => :issue_last_edited,
8 => :issue_label_added,
9 => :issue_label_removed,
10 => :issue_deployed_to_production,
100 => :merge_request_created,
101 => :merge_request_first_deployed_to_production,
102 => :merge_request_last_build_finished,
103 => :merge_request_last_build_started,
104 => :merge_request_merged,
105 => :merge_request_closed,
106 => :merge_request_last_edited,
107 => :merge_request_label_added,
108 => :merge_request_label_removed,
109 => :merge_request_first_commit_at,
1000 => :code_stage_start,
1001 => :issue_stage_end,
1002 => :plan_stage_start
}.freeze
LABEL_BASED_EVENTS = Set.new([8, 9, 107, 108]).freeze
class GroupStage < ActiveRecord::Base
include EachBatch
self.table_name = 'analytics_cycle_analytics_group_stages'
end
class ProjectStage < ActiveRecord::Base
include EachBatch
self.table_name = 'analytics_cycle_analytics_project_stages'
end
class StageEventHash < ActiveRecord::Base
self.table_name = 'analytics_cycle_analytics_stage_event_hashes'
end
def up
GroupStage.reset_column_information
ProjectStage.reset_column_information
StageEventHash.reset_column_information
update_stage_table(GroupStage)
update_stage_table(ProjectStage)
add_not_null_constraint :analytics_cycle_analytics_group_stages, :stage_event_hash_id
add_not_null_constraint :analytics_cycle_analytics_project_stages, :stage_event_hash_id
end
def down
remove_not_null_constraint :analytics_cycle_analytics_group_stages, :stage_event_hash_id
remove_not_null_constraint :analytics_cycle_analytics_project_stages, :stage_event_hash_id
end
private
def update_stage_table(klass)
klass.each_batch(of: BATCH_SIZE) do |relation|
klass.transaction do
records = relation.where(stage_event_hash_id: nil).lock!.to_a # prevent concurrent modification (unlikely to happen)
records = delete_invalid_records(records)
next if records.empty?
hashes_by_stage = records.to_h { |stage| [stage, calculate_stage_events_hash(stage)] }
hashes = hashes_by_stage.values.uniq
StageEventHash.insert_all(hashes.map { |hash| { hash_sha256: hash } })
stage_event_hashes_by_hash = StageEventHash.where(hash_sha256: hashes).index_by(&:hash_sha256)
records.each do |stage|
stage.update!(stage_event_hash_id: stage_event_hashes_by_hash[hashes_by_stage[stage]].id)
end
end
end
end
def calculate_stage_events_hash(stage)
start_event_hash = calculate_event_hash(stage.start_event_identifier, stage.start_event_label_id)
end_event_hash = calculate_event_hash(stage.end_event_identifier, stage.end_event_label_id)
Digest::SHA256.hexdigest("#{start_event_hash}-#{end_event_hash}")
end
def calculate_event_hash(event_identifier, label_id = nil)
str = EVENT_ID_IDENTIFIER_MAPPING.fetch(event_identifier).to_s
str << "-#{label_id}" if LABEL_BASED_EVENTS.include?(event_identifier)
Digest::SHA256.hexdigest(str)
end
# Invalid records are safe to delete, since they are not working properly anyway
def delete_invalid_records(records)
to_be_deleted = records.select do |record|
EVENT_ID_IDENTIFIER_MAPPING[record.start_event_identifier].nil? ||
EVENT_ID_IDENTIFIER_MAPPING[record.end_event_identifier].nil?
end
to_be_deleted.each(&:delete)
records - to_be_deleted
end
end
97d968bba0eb2bf6faa19de8a3e4fe93dc03a623b623dc802ab0fe0a4afb0370
\ No newline at end of file
......@@ -9102,7 +9102,8 @@ CREATE TABLE analytics_cycle_analytics_group_stages (
custom boolean DEFAULT true NOT NULL,
name character varying(255) NOT NULL,
group_value_stream_id bigint NOT NULL,
stage_event_hash_id bigint
stage_event_hash_id bigint,
CONSTRAINT check_e6bd4271b5 CHECK ((stage_event_hash_id IS NOT NULL))
);
CREATE SEQUENCE analytics_cycle_analytics_group_stages_id_seq
......@@ -9146,7 +9147,8 @@ CREATE TABLE analytics_cycle_analytics_project_stages (
custom boolean DEFAULT true NOT NULL,
name character varying(255) NOT NULL,
project_value_stream_id bigint NOT NULL,
stage_event_hash_id bigint
stage_event_hash_id bigint,
CONSTRAINT check_8f6019de1e CHECK ((stage_event_hash_id IS NOT NULL))
);
CREATE SEQUENCE analytics_cycle_analytics_project_stages_id_seq
# frozen_string_literal: true
require 'spec_helper'
require_migration!
RSpec.describe BackfillStageEventHash, schema: 20210730103808 do
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:labels) { table(:labels) }
let(:group_stages) { table(:analytics_cycle_analytics_group_stages) }
let(:project_stages) { table(:analytics_cycle_analytics_project_stages) }
let(:group_value_streams) { table(:analytics_cycle_analytics_group_value_streams) }
let(:project_value_streams) { table(:analytics_cycle_analytics_project_value_streams) }
let(:stage_event_hashes) { table(:analytics_cycle_analytics_stage_event_hashes) }
let(:issue_created) { 1 }
let(:issue_closed) { 3 }
let(:issue_label_removed) { 9 }
let(:unknown_stage_event) { -1 }
let(:namespace) { namespaces.create!(name: 'ns', path: 'ns', type: 'Group') }
let(:project) { projects.create!(name: 'project', path: 'project', namespace_id: namespace.id) }
let(:group_label) { labels.create!(title: 'label', type: 'GroupLabel', group_id: namespace.id) }
let(:group_value_stream) { group_value_streams.create!(name: 'group vs', group_id: namespace.id) }
let(:project_value_stream) { project_value_streams.create!(name: 'project vs', project_id: project.id) }
let(:group_stage_1) do
group_stages.create!(
name: 'stage 1',
group_id: namespace.id,
start_event_identifier: issue_created,
end_event_identifier: issue_closed,
group_value_stream_id: group_value_stream.id
)
end
let(:group_stage_2) do
group_stages.create!(
name: 'stage 2',
group_id: namespace.id,
start_event_identifier: issue_created,
end_event_identifier: issue_label_removed,
end_event_label_id: group_label.id,
group_value_stream_id: group_value_stream.id
)
end
let(:project_stage_1) do
project_stages.create!(
name: 'stage 1',
project_id: project.id,
start_event_identifier: issue_created,
end_event_identifier: issue_closed,
project_value_stream_id: project_value_stream.id
)
end
let(:invalid_group_stage) do
group_stages.create!(
name: 'stage 3',
group_id: namespace.id,
start_event_identifier: issue_created,
end_event_identifier: unknown_stage_event,
group_value_stream_id: group_value_stream.id
)
end
describe '#up' do
it 'populates stage_event_hash_id column' do
group_stage_1
group_stage_2
project_stage_1
migrate!
group_stage_1.reload
group_stage_2.reload
project_stage_1.reload
expect(group_stage_1.stage_event_hash_id).not_to be_nil
expect(group_stage_2.stage_event_hash_id).not_to be_nil
expect(project_stage_1.stage_event_hash_id).not_to be_nil
expect(stage_event_hashes.count).to eq(2) # group_stage_1 and project_stage_1 has the same hash
end
it 'runs without problem without stages' do
expect { migrate! }.not_to raise_error
end
context 'when invalid event identifier is discovered' do
it 'removes the stage' do
group_stage_1
invalid_group_stage
expect { migrate! }.not_to change { group_stage_1 }
expect(group_stages.find_by_id(invalid_group_stage.id)).to eq(nil)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment