Commit 1956550b authored by Alina Mihaila's avatar Alina Mihaila Committed by Adam Hegyi

Update according to review

Use user min and max as start and finish for batch counting
Rename method to actual
parent 1d5a6efb
---
title: Optimize service desk enabled projects counter
merge_request: 27589
author:
type: performance
# frozen_string_literal: true
class AddIndexOnIdCreatorIdAndCreatedAtToProjectsTable < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
INDEX_NAME = 'index_service_desk_enabled_projects_on_id_creator_id_created_at'
disable_ddl_transaction!
def up
add_concurrent_index :projects, [:id, :creator_id, :created_at], where: '"projects"."service_desk_enabled" = TRUE', name: INDEX_NAME
end
def down
remove_concurrent_index_by_name :projects, INDEX_NAME
end
end
...@@ -9924,6 +9924,8 @@ CREATE INDEX index_serverless_domain_cluster_on_creator_id ON public.serverless_ ...@@ -9924,6 +9924,8 @@ CREATE INDEX index_serverless_domain_cluster_on_creator_id ON public.serverless_
CREATE INDEX index_serverless_domain_cluster_on_pages_domain_id ON public.serverless_domain_cluster USING btree (pages_domain_id); CREATE INDEX index_serverless_domain_cluster_on_pages_domain_id ON public.serverless_domain_cluster USING btree (pages_domain_id);
CREATE INDEX index_service_desk_enabled_projects_on_id_creator_id_created_at ON public.projects USING btree (id, creator_id, created_at) WHERE (service_desk_enabled = true);
CREATE INDEX index_services_on_project_id_and_type ON public.services USING btree (project_id, type); CREATE INDEX index_services_on_project_id_and_type ON public.services USING btree (project_id, type);
CREATE INDEX index_services_on_template ON public.services USING btree (template); CREATE INDEX index_services_on_template ON public.services USING btree (template);
...@@ -12845,6 +12847,7 @@ COPY "schema_migrations" (version) FROM STDIN; ...@@ -12845,6 +12847,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200323080714 20200323080714
20200323122201 20200323122201
20200323134519 20200323134519
20200324093258
20200324115359 20200324115359
20200325160952 20200325160952
20200325183636 20200325183636
......
...@@ -281,7 +281,7 @@ module EE ...@@ -281,7 +281,7 @@ module EE
projects_jira_active: distinct_count(::Project.with_active_jira_services.where(time_period), :creator_id), projects_jira_active: distinct_count(::Project.with_active_jira_services.where(time_period), :creator_id),
projects_jira_dvcs_cloud_active: distinct_count(::Project.with_active_jira_services.with_jira_dvcs_cloud.where(time_period), :creator_id), projects_jira_dvcs_cloud_active: distinct_count(::Project.with_active_jira_services.with_jira_dvcs_cloud.where(time_period), :creator_id),
projects_jira_dvcs_server_active: distinct_count(::Project.with_active_jira_services.with_jira_dvcs_server.where(time_period), :creator_id), projects_jira_dvcs_server_active: distinct_count(::Project.with_active_jira_services.with_jira_dvcs_server.where(time_period), :creator_id),
service_desk_enabled_projects: distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), :creator_id), service_desk_enabled_projects: distinct_count_service_desk_enabled_projects(time_period),
service_desk_issues: count(::Issue.service_desk.where(time_period)), service_desk_issues: count(::Issue.service_desk.where(time_period)),
todos: distinct_count(::Todo.where(time_period), :author_id) todos: distinct_count(::Todo.where(time_period), :author_id)
} }
...@@ -335,6 +335,16 @@ module EE ...@@ -335,6 +335,16 @@ module EE
results results
end end
private
def distinct_count_service_desk_enabled_projects(time_period)
project_creator_id_start = ::User.minimum(:id)
project_creator_id_finish = ::User.maximum(:id)
distinct_count(::Project.service_desk_enabled.where(time_period), :creator_id, start: project_creator_id_start, finish: project_creator_id_finish)
end
# rubocop:enable CodeReuse/ActiveRecord # rubocop:enable CodeReuse/ActiveRecord
end end
end end
......
...@@ -4,12 +4,18 @@ ...@@ -4,12 +4,18 @@
# Implements a distinct and ordinary batch counter # Implements a distinct and ordinary batch counter
# Needs indexes on the column below to calculate max, min and range queries # Needs indexes on the column below to calculate max, min and range queries
# For larger tables just set use higher batch_size with index optimization # For larger tables just set use higher batch_size with index optimization
#
# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
# the start and finish can be sent specifically
#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705 # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
#
# Examples: # Examples:
# extend ::Gitlab::Database::BatchCount # extend ::Gitlab::Database::BatchCount
# batch_count(User.active) # batch_count(User.active)
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id) # batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
# batch_distinct_count(::Project, :creator_id) # batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
module Gitlab module Gitlab
module Database module Database
module BatchCount module BatchCount
...@@ -17,8 +23,8 @@ module Gitlab ...@@ -17,8 +23,8 @@ module Gitlab
BatchCounter.new(relation, column: column).count(batch_size: batch_size) BatchCounter.new(relation, column: column).count(batch_size: batch_size)
end end
def batch_distinct_count(relation, column = nil, batch_size: nil) def batch_distinct_count(relation, column = nil, batch_size: nil, start: nil, finish: nil)
BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size) BatchCounter.new(relation, column: column).count(mode: :distinct, batch_size: batch_size, start: start, finish: finish)
end end
class << self class << self
...@@ -31,9 +37,10 @@ module Gitlab ...@@ -31,9 +37,10 @@ module Gitlab
MIN_REQUIRED_BATCH_SIZE = 1_250 MIN_REQUIRED_BATCH_SIZE = 1_250
MAX_ALLOWED_LOOPS = 10_000 MAX_ALLOWED_LOOPS = 10_000
SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep SLEEP_TIME_IN_SECONDS = 0.01 # 10 msec sleep
# Each query should take <<500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_DISTINCT_BATCH_SIZE = 10_000 # Each query should take < 500ms https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
DEFAULT_BATCH_SIZE = 100_000 DEFAULT_DISTINCT_BATCH_SIZE = 100_000
DEFAULT_BATCH_SIZE = 10_000
def initialize(relation, column: nil) def initialize(relation, column: nil)
@relation = relation @relation = relation
...@@ -46,15 +53,15 @@ module Gitlab ...@@ -46,15 +53,15 @@ module Gitlab
start > finish start > finish
end end
def count(batch_size: nil, mode: :itself) def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open? raise 'BatchCount can not be run inside a transaction' if ActiveRecord::Base.connection.transaction_open?
raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode) raise "The mode #{mode.inspect} is not supported" unless [:itself, :distinct].include?(mode)
# non-distinct have better performance # non-distinct have better performance
batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE batch_size ||= mode == :distinct ? DEFAULT_DISTINCT_BATCH_SIZE : DEFAULT_BATCH_SIZE
start = @relation.minimum(@column) || 0 start = actual_start(start)
finish = @relation.maximum(@column) || 0 finish = actual_finish(finish)
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0 raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
return FALLBACK if unwanted_configuration?(finish, batch_size, start) return FALLBACK if unwanted_configuration?(finish, batch_size, start)
...@@ -84,6 +91,16 @@ module Gitlab ...@@ -84,6 +91,16 @@ module Gitlab
# rubocop:disable GitlabSecurity/PublicSend # rubocop:disable GitlabSecurity/PublicSend
@relation.select(@column).public_send(mode).where(@column => start..(finish - 1)).count @relation.select(@column).public_send(mode).where(@column => start..(finish - 1)).count
end end
private
def actual_start(start)
start || @relation.minimum(@column) || 0
end
def actual_finish(finish)
finish || @relation.maximum(@column) || 0
end
end end
end end
end end
...@@ -240,9 +240,9 @@ module Gitlab ...@@ -240,9 +240,9 @@ module Gitlab
fallback fallback
end end
def distinct_count(relation, column = nil, fallback: -1, batch: true) def distinct_count(relation, column = nil, fallback: -1, batch: true, start: nil, finish: nil)
if batch && Feature.enabled?(:usage_ping_batch_counter, default_enabled: true) if batch && Feature.enabled?(:usage_ping_batch_counter, default_enabled: true)
Gitlab::Database::BatchCount.batch_distinct_count(relation, column) Gitlab::Database::BatchCount.batch_distinct_count(relation, column, start: start, finish: finish)
else else
relation.distinct_count_by(column) relation.distinct_count_by(column)
end end
......
...@@ -90,5 +90,13 @@ describe Gitlab::Database::BatchCount do ...@@ -90,5 +90,13 @@ describe Gitlab::Database::BatchCount do
[1, 2, 4, 5, 6].each { |i| expect(described_class.batch_distinct_count(model, column, batch_size: i)).to eq(2) } [1, 2, 4, 5, 6].each { |i| expect(described_class.batch_distinct_count(model, column, batch_size: i)).to eq(2) }
end end
it 'counts with a start and finish' do
expect(described_class.batch_distinct_count(model, column, start: model.minimum(column), finish: model.maximum(column))).to eq(2)
end
it 'counts with User min and max as start and finish' do
expect(described_class.batch_distinct_count(model, column, start: User.minimum(:id), finish: User.maximum(:id))).to eq(2)
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment