Commit e280a837 authored by Andreas Brandl's avatar Andreas Brandl

Merge branch '12079-productivity-data-migration' into 'master'

Add data migration for productivity analytics

Closes #32049

See merge request gitlab-org/gitlab!15137
parents 406b0ea8 cbf35906
---
title: Schedule productivity analytics recalculation for EE
merge_request: 15137
author:
type: other
# frozen_string_literal: true
class ScheduleProductivityAnalyticsBackfill < ActiveRecord::Migration[5.2]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 10_000
INTERVAL = 3.minutes
MIGRATION = 'Gitlab::BackgroundMigration::RecalculateProductivityAnalytics'.freeze
disable_ddl_transaction!
def up
return unless Gitlab.ee?
metrics_model = Class.new(ActiveRecord::Base) do
self.table_name = 'merge_request_metrics'
include ::EachBatch
end
scope = metrics_model.where("merged_at >= ?", 3.months.ago)
queue_background_migration_jobs_by_range_at_intervals(scope, MIGRATION, INTERVAL, batch_size: BATCH_SIZE)
end
def down
# no-op
end
end
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# Execution time estimates: 55 records per second,
# with 3 month period it will be 3.2mln records affected
# and with 320 batches it will be ~16h total execution time
class RecalculateProductivityAnalytics
BATCH_SIZE = 1_000
METRICS_TO_CALCULATE = %w[first_comment_at first_commit_at last_commit_at diff_size commits_count modified_paths_size].freeze
module Migratable
class Metrics < ActiveRecord::Base
include EachBatch
belongs_to :merge_request, class_name: 'Migratable::MergeRequest', foreign_key: :merge_request_id, inverse_of: :metrics
self.table_name = 'merge_request_metrics'
end
class MergeRequest < ActiveRecord::Base
self.table_name = 'merge_requests'
has_many :diffs, class_name: 'Migratable::MergeRequestDiff', foreign_key: :merge_request_id
has_many :user_notes, -> { where(noteable_type: 'MergeRequest').where.not(author_id: User.bots) }, class_name: 'Migratable::Note', foreign_key: :noteable_id
has_one :metrics, class_name: 'Migratable::Metrics', foreign_key: :merge_request_id, inverse_of: :merge_request
attr_writer :diff, :first_user_note
def first_user_note
@first_user_note ||= user_notes.order(created_at: :asc).first
end
def diff
@diff ||= diffs.order(id: :desc).includes(:files).first # max_by(&:id)
end
end
class Note < ActiveRecord::Base
self.table_name = 'notes'
end
class User < ActiveRecord::Base
self.table_name = 'users'
def self.bots
@bots ||= where.not(bot_type: nil).select(:id).to_a
end
end
class MergeRequestDiff < ActiveRecord::Base
self.table_name = 'merge_request_diffs'
has_many :commits, class_name: 'Migratable::MergeRequestDiffCommit', foreign_key: :merge_request_diff_id
has_many :files, class_name: 'Migratable::MergeRequestDiffFile', foreign_key: :merge_request_diff_id
attr_writer :first_commit, :last_commit
def first_commit
@first_commit ||= commits.order(relative_order: :desc).first
end
def last_commit
@last_commit ||= commits.order(relative_order: :desc).last
end
def lines_count
@lines_count ||= Gitlab::Git::DiffCollection.new(files.map(&:to_hash), limits: false).sum(&:line_count)
end
def modified_paths
@modified_paths ||= files.map { |f| [f.new_path, f.old_path] }.flatten.uniq
end
end
class MergeRequestDiffCommit < ActiveRecord::Base
self.table_name = 'merge_request_diff_commits'
end
class MergeRequestDiffFile < ActiveRecord::Base
include DiffFile
self.table_name = 'merge_request_diff_files'
end
end
def perform(start_id, end_id)
Migratable::Metrics.where("merged_at > ? ", 3.months.ago - 1.day)
.where(id: start_id...end_id).each_batch(of: BATCH_SIZE) do |batch|
ActiveRecord::Base.transaction do
preload(batch).each do |merge_request_metrics|
update_merge_request_metrics(merge_request_metrics)
end
end
end
end
private
def update_merge_request_metrics(metrics)
merge_request = metrics.merge_request
diff = merge_request.diff
return unless diff
metrics.first_comment_at = merge_request.first_user_note&.created_at
metrics.first_commit_at = diff.first_commit&.authored_date
metrics.last_commit_at = diff.last_commit&.committed_date
metrics.commits_count = diff.commits_count
metrics.diff_size = diff.lines_count
metrics.modified_paths_size = diff.modified_paths.size
metrics.save!
end
def preload(metrics_batch)
metrics_batch.includes(:merge_request).tap do |scope|
preload_diffs(scope)
preload_notes(scope)
end
end
def preload_notes(scope)
first_user_notes_ids = Migratable::Note
.where(noteable_id: scope.map(&:merge_request_id), noteable_type: 'MergeRequest')
.where.not(author_id: Migratable::User.bots).group(:noteable_id).pluck(Arel.sql('noteable_id, MIN(id)')).to_h
notes = Migratable::Note.where(id: first_user_notes_ids.values)
scope.each do |metric|
first_note_id = first_user_notes_ids[metric.merge_request_id]
metric.merge_request.first_user_note = notes.detect { |note| note.id == first_note_id}
end
end
def preload_diffs(scope)
last_diffs_ids = Migratable::MergeRequestDiff
.where(merge_request_id: scope.map(&:merge_request_id))
.group(:merge_request_id)
.pluck(Arel.sql('merge_request_id, MAX(id)')).to_h
last_diffs = Migratable::MergeRequestDiff.where(id: last_diffs_ids.values).includes(:files)
preload_commits(last_diffs)
scope.each do |metric|
diff_id = last_diffs_ids[metric.merge_request.id]
next unless diff_id
diff = last_diffs.detect { |d| d.id == diff_id }
metric.merge_request.diff = diff
end
end
def preload_commits(scope)
commits_map = Migratable::MergeRequestDiffCommit.where(merge_request_diff_id: scope.map(&:id))
.group(:merge_request_diff_id)
.pluck(Arel.sql('merge_request_diff_id, MIN(relative_order), MAX(relative_order)'))
commits_cond = Arel.sql(commits_map.map do |info|
"(merge_request_diff_id = #{info[0]} AND relative_order IN(#{info[1]}, #{info[2]}))"
end.join(' OR '))
commits = Migratable::MergeRequestDiffCommit.where(commits_cond)
scope.each do |diff|
related_commits = commits.select { |c| c.merge_request_diff_id == diff.id }
diff.first_commit = related_commits.max_by(&:relative_order)
diff.last_commit = related_commits.min_by(&:relative_order)
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::BackgroundMigration::RecalculateProductivityAnalytics, migration: true, schema: 20190802012622 do
include MigrationHelpers::RecalculateProductivityAnalyticsHelpers
let(:background_migration) { described_class.new }
describe '#perform' do
let(:merged_at_after) { 6.weeks.ago }
subject { background_migration.perform(*id_boundaries) }
let(:id_boundaries) do
[merged_mr.id, open_mr.id].minmax
end
let(:merged_mr) { create_populated_mr(user, project, metrics: { merged_at: merged_at_after + 1.week }) }
let(:open_mr) { create_populated_mr(user, project) }
let(:user) do
table(:users).create!(
email: 'sample_user@user.com',
projects_limit: 10,
name: 'Test User',
username: 'sample_user'
)
end
let(:bot) do
table(:users).create!(
email: 'bot@bot.com',
projects_limit: 10,
name: 'Test Bot',
username: 'sample_bot',
bot_type: 1 # support bot
)
end
let(:group) { table(:namespaces).create!(path: 'test_group', name: 'test_group') }
let(:project) do
table(:projects).create!(name: 'test project', path: 'test_project', namespace_id: group.id, creator_id: user.id)
end
it 'updates productivity metrics for merged MRs' do
Timecop.freeze(Time.zone.now.change(nsec: 0)) do
merged_mr
expect { subject }
.to change {
table(:merge_request_metrics).find_by(merge_request_id: merged_mr.id).attributes.slice(*described_class::METRICS_TO_CALCULATE)
}.to({ "commits_count" => 2, "diff_size" => 20, "first_comment_at" => 4.weeks.ago + 1.day, "first_commit_at" => 4.weeks.ago, "last_commit_at" => 1.week.ago, "modified_paths_size" => 2 })
end
end
it 'does not update productivity metrics for open MR' do
open_mr
expect { subject }
.not_to change {
table(:merge_request_metrics).find_by(merge_request_id: open_mr.id).attributes.slice(*described_class::METRICS_TO_CALCULATE)
}
end
end
end
# frozen_string_literal: true
module MigrationHelpers
module RecalculateProductivityAnalyticsHelpers
def create_populated_mr(user, project, attributes: {}, metrics: {})
base = {
author_id: user.id,
source_project_id: project.id,
target_project_id: project.id,
source_branch: 'master',
target_branch: 'feature'
}
mr = table(:merge_requests).create!(base.merge(attributes))
table(:merge_request_metrics).create!(metrics.merge(merge_request_id: mr.id))
create_mr_activity(mr)
create_notes(mr)
mr
end
def create_notes(mr)
# 4 notes per MR
table(:notes).create!(note: 'sample note', noteable_type: 'MergeRequest', noteable_id: mr.id, author_id: user.id, created_at: 4.weeks.ago + 1.day)
table(:notes).create!(note: 'sample note 2', noteable_type: 'MergeRequest', noteable_id: mr.id, author_id: user.id, created_at: 4.weeks.ago + 2.days)
table(:notes).create!(note: 'system note', noteable_type: 'MergeRequest', noteable_id: mr.id, created_at: 4.weeks.ago + 3.days)
table(:notes).create!(note: 'bot note', noteable_type: 'MergeRequest', noteable_id: mr.id, author_id: bot.id, created_at: 4.weeks.ago + 2.days)
end
def create_mr_activity(mr)
diff = table(:merge_request_diffs).create!(merge_request_id: mr.id, commits_count: 2)
# 2 commits per MR
table(:merge_request_diff_commits).create!(sha: '456', merge_request_diff_id: diff.id, authored_date: 2.weeks.ago, committed_date: 1.week.ago, relative_order: 0)
table(:merge_request_diff_commits).create!(sha: '123', merge_request_diff_id: diff.id, authored_date: 4.weeks.ago, committed_date: 3.weeks.ago, relative_order: 1)
# 2 diff files per MR
base_diff_files_attributes = {
merge_request_diff_id: diff.id,
new_file: true,
renamed_file: false,
deleted_file: false,
too_large: false,
a_mode: '0',
b_mode: '160000',
diff: "@@ -1,6 +1,6 @@\n class Commit\n constructor: ->\n $('.files .diff-file').each ->\n- new CommitFile(this)\n+ new CommitFile(@)\n \n-@Commit = Commit\n+@Commit = Commit\n\\ No newline at end of file\n",
binary: false
}
table(:merge_request_diff_files).create!(
base_diff_files_attributes.merge(relative_order: 1, new_path: 'test_file', old_path: 'test_file')
)
table(:merge_request_diff_files).create!(
base_diff_files_attributes.merge(relative_order: 2, new_path: 'test_file_2', old_path: 'test_file_2')
)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment