Commit bf33d5e5 authored by Adam Hegyi's avatar Adam Hegyi

Merge branch 'vsa-with-in-optimization' into 'master'

Use in-operator optimization in VSA

See merge request gitlab-org/gitlab!77763
parents 849528be 7b46faed
# frozen_string_literal: true
module Analytics
module CycleAnalytics
module StageEventModel
......@@ -16,12 +15,39 @@ module Analytics
scope :authored, ->(user) { where(author_id: user) }
scope :with_milestone_id, ->(milestone_id) { where(milestone_id: milestone_id) }
scope :end_event_is_not_happened_yet, -> { where(end_event_timestamp: nil) }
scope :order_by_end_event, -> (direction) do
# ORDER BY end_event_timestamp, merge_request_id/issue_id, start_event_timestamp
# start_event_timestamp must be included in the ORDER BY clause for the duration
# calculation to work: SELECT end_event_timestamp - start_event_timestamp
keyset_order(
:end_event_timestamp => { order_expression: arel_order(arel_table[:end_event_timestamp], direction), distinct: false },
issuable_id_column => { order_expression: arel_order(arel_table[issuable_id_column], direction), distinct: true },
:start_event_timestamp => { order_expression: arel_order(arel_table[:start_event_timestamp], direction), distinct: false }
)
end
scope :order_by_duration, -> (direction) do
# ORDER BY EXTRACT('epoch', end_event_timestamp - start_event_timestamp)
duration = Arel::Nodes::Subtraction.new(
arel_table[:end_event_timestamp],
arel_table[:start_event_timestamp]
)
duration_in_seconds = Arel::Nodes::Extract.new(duration, :epoch)
keyset_order(
:total_time => { order_expression: arel_order(duration_in_seconds, direction), distinct: false, sql_type: 'double precision' },
issuable_id_column => { order_expression: arel_order(arel_table[issuable_id_column], direction), distinct: true }
)
end
end
def issuable_id
attributes[self.class.issuable_id_column.to_s]
end
def total_time
read_attribute(:total_time) || (end_event_timestamp - start_event_timestamp).to_f
end
class_methods do
def upsert_data(data)
upsert_values = data.map do |row|
......@@ -68,6 +94,18 @@ module Analytics
result = connection.execute(query)
result.cmd_tuples
end
def keyset_order(column_definition_options)
built_definitions = column_definition_options.map do |attribute_name, column_options|
::Gitlab::Pagination::Keyset::ColumnOrderDefinition.new(attribute_name: attribute_name, **column_options)
end
order(Gitlab::Pagination::Keyset::Order.build(built_definitions))
end
def arel_order(arel_node, direction)
direction.to_sym == :desc ? arel_node.desc : arel_node.asc
end
end
end
end
......
......@@ -8,8 +8,31 @@ module EE::Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder
filter_by_project_ids(super)
end
# rubocop: disable CodeReuse/ActiveRecord
def build_sorted_query
return super unless stage.parent.instance_of?(Group)
::Gitlab::Pagination::Keyset::InOperatorOptimization::QueryBuilder.new(
scope: super.unscope(where: [:project_id, :group_id]), # unscoping the project_id and group_id queries because the in-operator optimization will apply these filters.
array_scope: in_optimization_array_scope,
array_mapping_scope: method(:in_optimization_array_mapping_scope)
).execute
end
# rubocop: enable CodeReuse/ActiveRecord
private
def in_optimization_array_scope
projects_filter_present? ? project_ids : stage.parent.self_and_descendant_ids.reselect(:id)
end
# rubocop: disable CodeReuse/ActiveRecord
def in_optimization_array_mapping_scope(id_expression)
issuable_id_column = projects_filter_present? ? :project_id : :group_id
stage_event_model.where(stage_event_model.arel_table[issuable_id_column].eq(id_expression))
end
# rubocop: enable CodeReuse/ActiveRecord
override :filter_by_stage_parent
def filter_by_stage_parent(query)
return super unless stage.parent.instance_of?(Group)
......@@ -19,15 +42,19 @@ module EE::Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder
def filter_by_project_ids(query)
return query unless stage.parent.instance_of?(Group)
return query if params[:project_ids].blank?
return query unless projects_filter_present?
query.by_project_id(project_ids)
end
project_ids = Project
def project_ids
@project_ids ||= Project
.id_in(params[:project_ids])
.in_namespace(stage.parent.self_and_descendant_ids)
.select(:id)
end
return query if project_ids.empty?
query.by_project_id(project_ids)
def projects_filter_present?
Array(params[:project_ids]).any?
end
end
......@@ -24,6 +24,8 @@ RSpec.describe Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder d
stage_event_hash_id: stage.stage_event_hash_id,
group_id: sub_group.id,
project_id: project_1.id,
start_event_timestamp: 4.weeks.ago,
end_event_timestamp: 1.week.ago,
issue_id: 1
)
end
......@@ -33,6 +35,8 @@ RSpec.describe Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder d
stage_event_hash_id: stage.stage_event_hash_id,
group_id: sub_group.id,
project_id: project_2.id,
start_event_timestamp: 2.weeks.ago,
end_event_timestamp: 1.week.ago,
issue_id: 2
)
end
......@@ -49,11 +53,15 @@ RSpec.describe Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder d
let(:params) do
{
from: 1.year.ago.to_date,
to: Date.today
to: Date.today,
sort: :end_event_timestamp,
direction: :desc
}
end
subject(:issue_ids) { described_class.new(stage: stage, params: params).build.pluck(:issue_id) }
let(:query_builder) { described_class.new(stage: stage, params: params) }
subject(:issue_ids) { query_builder.build.pluck(:issue_id) }
it 'looks up items within the group hierarchy' do
expect(issue_ids).to eq([stage_event_1.issue_id, stage_event_2.issue_id])
......@@ -65,4 +73,24 @@ RSpec.describe Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder d
expect(issue_ids).to eq([stage_event_1.issue_id])
end
describe '#build_sorted_query' do
subject(:issue_ids) { query_builder.build_sorted_query.pluck(:issue_id) }
it 'returns the items in order (by end_event)' do
expect(issue_ids).to eq([stage_event_2.issue_id, stage_event_1.issue_id])
end
it 'returns the items in order (by duration)' do
params[:sort] = :duration
expect(issue_ids).to eq([stage_event_1.issue_id, stage_event_2.issue_id])
end
it 'handles the project_ids filter' do
params[:project_ids] = [project_1.id]
expect(issue_ids).to eq([stage_event_1.issue_id])
end
end
end
......@@ -121,6 +121,11 @@ RSpec.describe Gitlab::Analytics::CycleAnalytics::DataCollector do
iids = data_collector.serialized_records.map { |record| record[:iid].to_i }
expect(iids).to eq(expected_ordered_iids)
if aggregated_data_collector_enabled
iids = aggregated_data_collector.serialized_records.map { |record| record[:iid].to_i }
expect(iids).to eq(expected_ordered_iids)
end
end
end
......
......@@ -228,17 +228,34 @@ RSpec.shared_examples 'Value Stream Analytics Stages controller' do
it 'accepts sort params' do
if Feature.enabled?(:use_vsa_aggregated_tables)
event_1 = create(:cycle_analytics_merge_request_stage_event, merge_request_id: 1, start_event_timestamp: 4.months.ago, end_event_timestamp: Date.today)
event_2 = create(:cycle_analytics_merge_request_stage_event, merge_request_id: 2, start_event_timestamp: 2.months.ago, end_event_timestamp: Date.today)
event_3 = create(:cycle_analytics_merge_request_stage_event, merge_request_id: 3, start_event_timestamp: 3.months.ago, end_event_timestamp: Date.today)
allow_any_instance_of(Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher).to receive(:query).and_return(Analytics::CycleAnalytics::MergeRequestStageEvent.all)
travel_to DateTime.new(2019, 1, 5) do
event_1 = create(:cycle_analytics_merge_request_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: stage.group.id,
merge_request_id: 1,
start_event_timestamp: Time.current,
end_event_timestamp: 20.days.from_now)
event_2 = create(:cycle_analytics_merge_request_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: stage.group.id,
merge_request_id: 2,
start_event_timestamp: Time.current,
end_event_timestamp: 1.day.from_now)
event_3 = create(:cycle_analytics_merge_request_stage_event,
stage_event_hash_id: stage.stage_event_hash_id,
group_id: stage.group.id,
merge_request_id: 3,
start_event_timestamp: Time.current,
end_event_timestamp: 3.days.from_now)
expect_next_instance_of(Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher) do |records_fetcher|
records_fetcher.serialized_records do |raw_active_record_scope|
expect(raw_active_record_scope.pluck(:merge_request_id)).to eq([event_2.merge_request_id, event_3.merge_request_id, event_1.merge_request_id])
end
end
end
else
expect_next_instance_of(Gitlab::Analytics::CycleAnalytics::Sorting) do |sort|
expect(sort).to receive(:apply).with(:duration, :asc).and_call_original
......
......@@ -37,6 +37,16 @@ module Gitlab
filter_assignees(query)
end
def build_sorted_query
direction = params[:direction] || :desc
if params[:sort] == :duration
build.order_by_duration(direction)
else
build.order_by_end_event(direction)
end
end
def filter_author(query)
return query if params[:author_username].blank?
......
......@@ -32,7 +32,7 @@ module Gitlab
def records_fetcher
strong_memoize(:records_fetcher) do
RecordsFetcher.new(stage: stage, query: query, params: params)
RecordsFetcher.new(stage: stage, query: query_builder.build_sorted_query, params: params)
end
end
......@@ -41,7 +41,11 @@ module Gitlab
attr_reader :stage, :params
def query
BaseQueryBuilder.new(stage: stage, params: params).build
query_builder.build
end
def query_builder
@query_builder = BaseQueryBuilder.new(stage: stage, params: params)
end
def limit_count
......
......@@ -36,7 +36,13 @@ module Gitlab
def serialized_records
strong_memoize(:serialized_records) do
records = ordered_and_limited_query.select(stage_event_model.arel_table[Arel.star], duration_in_seconds.as('total_time'))
# When RecordsFetcher is used with query sourced from
# InOperatorOptimization::QueryBuilder only columns
# used in ORDER BY statement would be selected by Arel.start operation
selections = [stage_event_model.arel_table[Arel.star]]
selections << duration_in_seconds.as('total_time') if params[:sort] != :duration # duration sorting already exposes this data
records = limited_query.select(*selections)
yield records if block_given?
issuables_and_records = load_issuables(records)
......@@ -56,27 +62,12 @@ module Gitlab
end
end
# rubocop: disable CodeReuse/ActiveRecord
def ordered_and_limited_query
sorting_options = {
end_event: {
asc: -> { query.order(end_event_timestamp: :asc) },
desc: -> { query.order(end_event_timestamp: :desc) }
},
duration: {
asc: -> { query.order(duration.asc) },
desc: -> { query.order(duration.desc) }
}
}
sort_lambda = sorting_options.dig(sort, direction) || sorting_options.dig(:end_event, :desc)
sort_lambda.call
def limited_query
query
.page(page)
.per(per_page)
.without_count
end
# rubocop: enable CodeReuse/ActiveRecord
private
......
......@@ -62,7 +62,11 @@ module Gitlab
attr_reader :stage, :params
def query
BaseQueryBuilder.new(stage: stage, params: params).build
query_builder.build
end
def query_builder
@query_builder ||= BaseQueryBuilder.new(stage: stage, params: params)
end
# Limiting the maximum number of records so the COUNT(*) query stays efficient for large groups.
......
......@@ -8,16 +8,17 @@ RSpec.describe Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher do
let_it_be(:issue_2) { create(:issue, project: project) }
let_it_be(:issue_3) { create(:issue, project: project) }
let_it_be(:stage_event_1) { create(:cycle_analytics_issue_stage_event, issue_id: issue_1.id, start_event_timestamp: 2.years.ago, end_event_timestamp: 1.year.ago) } # duration: 1 year
let_it_be(:stage_event_2) { create(:cycle_analytics_issue_stage_event, issue_id: issue_2.id, start_event_timestamp: 5.years.ago, end_event_timestamp: 2.years.ago) } # duration: 3 years
let_it_be(:stage_event_3) { create(:cycle_analytics_issue_stage_event, issue_id: issue_3.id, start_event_timestamp: 6.years.ago, end_event_timestamp: 3.months.ago) } # duration: 5+ years
let_it_be(:stage) { create(:cycle_analytics_project_stage, start_event_identifier: :issue_created, end_event_identifier: :issue_deployed_to_production, project: project) }
let(:params) { {} }
let_it_be(:stage_event_1) { create(:cycle_analytics_issue_stage_event, stage_event_hash_id: stage.stage_event_hash_id, project_id: project.id, issue_id: issue_1.id, start_event_timestamp: 2.years.ago, end_event_timestamp: 1.year.ago) } # duration: 1 year
let_it_be(:stage_event_2) { create(:cycle_analytics_issue_stage_event, stage_event_hash_id: stage.stage_event_hash_id, project_id: project.id, issue_id: issue_2.id, start_event_timestamp: 5.years.ago, end_event_timestamp: 2.years.ago) } # duration: 3 years
let_it_be(:stage_event_3) { create(:cycle_analytics_issue_stage_event, stage_event_hash_id: stage.stage_event_hash_id, project_id: project.id, issue_id: issue_3.id, start_event_timestamp: 6.years.ago, end_event_timestamp: 3.months.ago) } # duration: 5+ years
let(:params) { { from: 10.years.ago, to: Date.today } }
subject(:records_fetcher) do
described_class.new(stage: stage, query: Analytics::CycleAnalytics::IssueStageEvent.all, params: params)
query_builder = Gitlab::Analytics::CycleAnalytics::Aggregated::BaseQueryBuilder.new(stage: stage, params: params)
described_class.new(stage: stage, query: query_builder.build_sorted_query, params: params)
end
shared_examples 'match returned records' do
......@@ -123,6 +124,8 @@ RSpec.describe Gitlab::Analytics::CycleAnalytics::Aggregated::RecordsFetcher do
it 'skips non-existing issue records' do
create(:cycle_analytics_issue_stage_event, {
issue_id: 0, # non-existing id
stage_event_hash_id: stage.stage_event_hash_id,
project_id: project.id,
start_event_timestamp: 5.months.ago,
end_event_timestamp: 3.months.ago
})
......
......@@ -178,4 +178,22 @@ RSpec.shared_examples 'StageEventModel' do
end
end
end
describe '#total_time' do
it 'calcualtes total time from the start_event_timestamp and end_event_timestamp columns' do
model = described_class.new(start_event_timestamp: Time.new(2022, 1, 1, 12, 5, 0), end_event_timestamp: Time.new(2022, 1, 1, 12, 6, 30))
expect(model.total_time).to eq(90)
end
context 'when total time is calculated in SQL as an extra column' do
it 'returns the SQL calculated time' do
create(stage_event_factory) # rubocop:disable Rails/SaveBang
model = described_class.select('*, 5 AS total_time').first
expect(model.total_time).to eq(5)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment