Commit 1956550b authored by Alina Mihaila's avatar Alina Mihaila Committed by Adam Hegyi

Update according to review

Use user min and max as start and finish for batch counting
Rename method to actual
parent 1d5a6efb
---
title: Optimize service desk enabled projects counter
merge_request: 27589
author:
type: performance
# frozen_string_literal: true
class AddIndexOnIdCreatorIdAndCreatedAtToProjectsTable < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'index_service_desk_enabled_projects_on_id_creator_id_created_at'
disable_ddl_transaction!
def up
add_concurrent_index :projects, [:id, :creator_id, :created_at], where: '"projects"."service_desk_enabled" = TRUE', name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :projects, INDEX_NAME
end
end
......@@ -9924,6 +9924,8 @@ CREATE INDEX index_serverless_domain_cluster_on_creator_id ON public.serverless_
CREATE INDEX index_serverless_domain_cluster_on_pages_domain_id ON public.serverless_domain_cluster USING btree (pages_domain_id);
CREATE INDEX index_service_desk_enabled_projects_on_id_creator_id_created_at ON public.projects USING btree (id, creator_id, created_at) WHERE (service_desk_enabled = true);
CREATE INDEX index_services_on_project_id_and_type ON public.services USING btree (project_id, type);
CREATE INDEX index_services_on_template ON public.services USING btree (template);
......@@ -12845,6 +12847,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200323080714
20200323122201
20200323134519
20200324093258
20200324115359
20200325160952
20200325183636
......
......@@ -281,7 +281,7 @@ module EE
projects_jira_active: distinct_count(::Project.with_active_jira_services.where(time_period), :creator_id),
projects_jira_dvcs_cloud_active: distinct_count(::Project.with_active_jira_services.with_jira_dvcs_cloud.where(time_period), :creator_id),
projects_jira_dvcs_server_active: distinct_count(::Project.with_active_jira_services.with_jira_dvcs_server.where(time_period), :creator_id),
service_desk_enabled_projects: distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), :creator_id),
service_desk_enabled_projects: distinct_count_service_desk_enabled_projects(time_period),
service_desk_issues: count(::Issue.service_desk.where(time_period)),
todos: distinct_count(::Todo.where(time_period), :author_id)
}
......@@ -335,6 +335,16 @@ module EE
results
end
private
def distinct_count_service_desk_enabled_projects(time_period)
project_creator_id_start = ::User.minimum(:id)
project_creator_id_finish = ::User.maximum(:id)
distinct_count(::Project.service_desk_enabled.where(time_period), :creator_id, start: project_creator_id_start, finish: project_creator_id_finish)
end
# rubocop:enable CodeReuse/ActiveRecord
end
end
......
......@@ -4,12 +4,18 @@
# Implements a distinct and ordinary batch counter
# Needs indexes on the column below to calculate max, min and range queries
# For larger tables just set use higher batch_size with index optimization
#
# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
# the start and finish can be sent specifically
#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
#
# Examples:
# extend ::Gitlab::Database::BatchCount
# batch_count(User.active)
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
module Gitlab
module Database
module BatchCount
......@@ -17,8 +23,8 @@ module Gitlab
BatchCounter.new(relation, column: column).count(batch_size: batch_size)
end
def batch_distinct_count(relation, column = nil, batch_size: nil)
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size)
def batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size, start: start, finish: finish)
end
class << self
......@@ -31,9 +37,10 @@ module Gitlab
MIN_REQUIRED_BATCH_SIZE = 1_250
MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
# Each query should take <<500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 10_000
DEFAULT_BATCH_SIZE = 100_000
# Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 100_000
DEFAULT_BATCH_SIZE = 10_000
def initialize(relation, column: nil)
@relation = relation
......@@ -46,15 +53,15 @@ module Gitlab
start > finish
end
def count(batch_size: nil, mode: :itself)
def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
# non-distinct have better performance
batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
start = @relation.minimum(@column) || 0
finish = @relation.maximum(@column) || 0
start = actual_start(start)
finish = actual_finish(finish)
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
return FALLBACK if unwanted_configuration?(finish, batch_size, start)
......@@ -84,6 +91,16 @@ module Gitlab
# rubocop:disable GitlabSecurity/PublicSend
@relation.select(@column).public_send(mode).where(@column => start..(finish - 1)).count
end
private
def actual_start(start)
start || @relation.minimum(@column) || 0
end
def actual_finish(finish)
finish || @relation.maximum(@column) || 0
end
end
end
end
......@@ -240,9 +240,9 @@ module Gitlab
fallback
end
def distinct_count(relation, column = nil, fallback: -1, batch: true)
def distinct_count(relation, column = nil, fallback: -1, batch: true, start: nil, finish: nil)
if batch && Feature.enabled?(:usage_ping_batch_counter, default_enabled: true)
Gitlab::Database::BatchCount.batch_distinct_count(relation, column)
Gitlab::Database::BatchCount.batch_distinct_count(relation, column, start: start, finish: finish)
else
relation.distinct_count_by(column)
end
......
......@@ -90,5 +90,13 @@ describe Gitlab::Database::BatchCount do
[1, 2, 4, 5, 6].each { |i| expect(described_class.batch_distinct_count(model, column, batch_size: i)).to eq(2) }
end
it 'counts with a start and finish' do
expect(described_class.batch_distinct_count(model, column, start: model.minimum(column), finish: model.maximum(column))).to eq(2)
end
it 'counts with User min and max as start and finish' do
expect(described_class.batch_distinct_count(model, column, start: User.minimum(:id), finish: User.maximum(:id))).to eq(2)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment