Commit 1f443ede authored by Rajendra Kadam's avatar Rajendra Kadam Committed by Alper Akgun

Override min/max for usage data queries in EE module

parent 31871f08
......@@ -137,7 +137,7 @@ module EE
end
def count_approval_rules_with_users(relation)
count(relation, batch_size: 10_000, start: ApprovalProjectRule.regular.minimum(:id), finish: ApprovalProjectRule.regular.maximum(:id)).size
count(relation, batch_size: 10_000, start: minimum_id(ApprovalProjectRule.regular), finish: maximum_id(ApprovalProjectRule.regular)).size
end
# rubocop:enable CodeReuse/ActiveRecord, UsageData/LargeTable
......@@ -196,15 +196,15 @@ module EE
epic_issues: count(::EpicIssue),
feature_flags: count(Operations::FeatureFlag),
geo_nodes: count(::GeoNode),
geo_event_log_max_id: alt_usage_data { Geo::EventLog.maximum(:id) || 0 },
geo_event_log_max_id: alt_usage_data { maximum_id(Geo::EventLog) || 0 },
ldap_group_links: count(::LdapGroupLink),
issues_with_health_status: count(::Issue.with_health_status, start: minimum_id(::Issue), finish: maximum_id(::Issue)),
ldap_keys: count(::LDAPKey),
ldap_users: count(::User.ldap, 'users.id'),
pod_logs_usages_total: redis_usage_data { ::Gitlab::UsageCounters::PodLogs.usage_totals[:total] },
merged_merge_requests_using_approval_rules: count(::MergeRequest.merged.joins(:approval_rules), # rubocop: disable CodeReuse/ActiveRecord
start: merge_request_minimum_id,
finish: merge_request_maximum_id),
start: minimum_id(::MergeRequest),
finish: maximum_id(::MergeRequest)),
projects_mirrored_with_pipelines_enabled: count(::Project.mirrored_with_enabled_pipelines),
projects_reporting_ci_cd_back_to_github: count(::GithubService.active),
status_page_projects: count(::StatusPage::ProjectSetting.enabled),
......@@ -226,12 +226,6 @@ module EE
)
end
def epics_deepest_relationship_level
# rubocop: disable UsageData/LargeTable
{ epics_deepest_relationship_level: ::Epic.deepest_relationship_level.to_i }
# rubocop: enable UsageData/LargeTable
end
# Omitted because no user, creator or author associated: `auto_devops_disabled`, `auto_devops_enabled`
# Omitted because not in use anymore: `gcp_clusters`, `gcp_clusters_disabled`, `gcp_clusters_enabled`
# rubocop:disable CodeReuse/ActiveRecord
......@@ -252,8 +246,8 @@ module EE
projects_with_sectional_code_owner_rules: projects_with_sectional_code_owner_rules(time_period),
merge_requests_with_added_rules: distinct_count(::ApprovalMergeRequestRule.where(time_period).with_added_approval_rules,
:merge_request_id,
start: approval_merge_request_rule_minimum_id,
finish: approval_merge_request_rule_maximum_id),
start: minimum_id(::ApprovalMergeRequestRule, :merge_request_id),
finish: maximum_id(::ApprovalMergeRequestRule, :merge_request_id)),
merge_requests_with_optional_codeowners: distinct_count(::ApprovalMergeRequestRule.code_owner_approval_optional.where(time_period), :merge_request_id),
merge_requests_with_overridden_project_rules: merge_requests_with_overridden_project_rules(time_period),
merge_requests_with_required_codeowners: distinct_count(::ApprovalMergeRequestRule.code_owner_approval_required.where(time_period), :merge_request_id),
......@@ -426,8 +420,8 @@ module EE
# rubocop:disable CodeReuse/ActiveRecord
# rubocop: disable UsageData/LargeTable
def count_secure_scans(time_period)
start = ::Security::Scan.minimum(:build_id)
finish = ::Security::Scan.maximum(:build_id)
start = minimum_id(::Security::Scan, :build_id)
finish = maximum_id(::Security::Scan, :build_id)
{}.tap do |secure_jobs|
::Security::Scan.scan_types.each do |name, scan_type|
......@@ -479,8 +473,8 @@ module EE
end
end
else
start = ::Ci::Pipeline.minimum(:id)
finish = ::Ci::Pipeline.maximum(:id)
start = minimum_id(::Ci::Pipeline)
finish = maximum_id(::Ci::Pipeline)
::Security::Scan.scan_types.each do |name, scan_type|
relation = ::Ci::Build.joins(:security_scans)
......@@ -527,30 +521,6 @@ module EE
Arel::Nodes::NamedFunction.new('DATE', [locked_timezone])
end
def approval_merge_request_rule_minimum_id
strong_memoize(:approval_merge_request_rule_minimum_id) do
::ApprovalMergeRequestRule.minimum(:merge_request_id)
end
end
def approval_merge_request_rule_maximum_id
strong_memoize(:approval_merge_request_rule_maximum_id) do
::ApprovalMergeRequestRule.maximum(:merge_request_id)
end
end
def merge_request_minimum_id
strong_memoize(:merge_request_minimum_id) do
::MergeRequest.minimum(:id)
end
end
def merge_request_maximum_id
strong_memoize(:merge_request_maximum_id) do
::MergeRequest.maximum(:id)
end
end
def ldap_config_present_for_any_provider?(configuration_item)
ldap_available_servers.any? { |server_config| server_config[configuration_item.to_s] }
end
......@@ -590,15 +560,15 @@ module EE
distinct_count(
::ApprovalMergeRequestRule.where(time_period).where(sql),
:merge_request_id,
start: approval_merge_request_rule_minimum_id,
finish: approval_merge_request_rule_maximum_id
start: minimum_id(::ApprovalMergeRequestRule, :merge_request_id),
finish: maximum_id(::ApprovalMergeRequestRule, :merge_request_id)
)
end
def projects_jira_issuelist_active
# rubocop: disable UsageData/LargeTable:
min_id = JiraTrackerData.where(issues_enabled: true).minimum(:service_id)
max_id = JiraTrackerData.where(issues_enabled: true).maximum(:service_id)
min_id = minimum_id(JiraTrackerData.where(issues_enabled: true), :service_id)
max_id = maximum_id(JiraTrackerData.where(issues_enabled: true), :service_id)
# rubocop: enable UsageData/LargeTable:
count(::JiraService.active.includes(:jira_tracker_data).where(jira_tracker_data: { issues_enabled: true }), start: min_id, finish: max_id)
end
......
......@@ -15,7 +15,7 @@ RSpec.describe Gitlab::UsageDataNonSqlMetrics do
described_class.uncached_data
end
expect(recorder.count).to eq(74)
expect(recorder.count).to eq(50)
end
end
end
......@@ -435,18 +435,10 @@ module Gitlab
projects_jira_dvcs_server_active: count(ProjectFeatureUsage.with_jira_dvcs_integration_enabled(cloud: false))
}
# rubocop: disable UsageData/LargeTable:
JiraService.active.includes(:jira_tracker_data).find_in_batches(batch_size: 100) do |services|
counts = services.group_by do |service|
# TODO: Simplify as part of https://gitlab.com/gitlab-org/gitlab/issues/29404
service_url = service.data_fields&.url || (service.properties && service.properties['url'])
service_url&.include?('.atlassian.net') ? :cloud : :server
end
jira_service_data_hash = jira_service_data
results[:projects_jira_server_active] = jira_service_data_hash[:projects_jira_server_active]
results[:projects_jira_cloud_active] = jira_service_data_hash[:projects_jira_cloud_active]
results[:projects_jira_server_active] += counts[:server].size if counts[:server]
results[:projects_jira_cloud_active] += counts[:cloud].size if counts[:cloud]
end
# rubocop: enable UsageData/LargeTable:
results
rescue ActiveRecord::StatementInvalid
{ projects_jira_server_active: FALLBACK, projects_jira_cloud_active: FALLBACK }
......
......@@ -25,10 +25,17 @@ module Gitlab
SQL_METRIC_DEFAULT
end
def maximum_id(model)
def maximum_id(model, column = nil)
end
def minimum_id(model)
def minimum_id(model, column = nil)
end
def jira_service_data
{
projects_jira_server_active: 0,
projects_jira_cloud_active: 0
}
end
end
end
......
......@@ -25,6 +25,27 @@ module Gitlab
relation.select(relation.all.table[column].sum).to_sql
end
# rubocop: disable CodeReuse/ActiveRecord
def histogram(relation, column, buckets:, bucket_size: buckets.size)
count_grouped = relation.group(column).select(Arel.star.count.as('count_grouped'))
cte = Gitlab::SQL::CTE.new(:count_cte, count_grouped)
bucket_segments = bucket_size - 1
width_bucket = Arel::Nodes::NamedFunction
.new('WIDTH_BUCKET', [cte.table[:count_grouped], buckets.first, buckets.last, bucket_segments])
.as('buckets')
query = cte
.table
.project(width_bucket, cte.table[:count])
.group('buckets')
.order('buckets')
.with(cte.to_arel)
query.to_sql
end
# rubocop: enable CodeReuse/ActiveRecord
# For estimated distinct count use exact query instead of hll
# buckets query, because it can't be used to obtain estimations without
# supplementary ruby code present in Gitlab::Database::PostgresHll::BatchDistinctCounter
......@@ -36,10 +57,21 @@ module Gitlab
'SELECT ' + args.map {|arg| "(#{arg})" }.join(' + ')
end
def maximum_id(model)
def maximum_id(model, column = nil)
end
def minimum_id(model, column = nil)
end
def jira_service_data
{
projects_jira_server_active: 0,
projects_jira_cloud_active: 0
}
end
def minimum_id(model)
def epics_deepest_relationship_level
{ epics_deepest_relationship_level: 0 }
end
private
......
......@@ -210,18 +210,52 @@ module Gitlab
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(event_name.to_s, values: values)
end
def maximum_id(model)
key = :"#{model.name.downcase}_maximum_id"
def maximum_id(model, column = nil)
key = :"#{model.name.downcase.gsub('::', '_')}_maximum_id"
column_to_read = column || :id
strong_memoize(key) do
model.maximum(:id)
model.maximum(column_to_read)
end
end
# rubocop: disable UsageData/LargeTable:
def jira_service_data
data = {
projects_jira_server_active: 0,
projects_jira_cloud_active: 0
}
# rubocop: disable CodeReuse/ActiveRecord
JiraService.active.includes(:jira_tracker_data).find_in_batches(batch_size: 100) do |services|
counts = services.group_by do |service|
# TODO: Simplify as part of https://gitlab.com/gitlab-org/gitlab/issues/29404
service_url = service.data_fields&.url || (service.properties && service.properties['url'])
service_url&.include?('.atlassian.net') ? :cloud : :server
end
data[:projects_jira_server_active] += counts[:server].size if counts[:server]
data[:projects_jira_cloud_active] += counts[:cloud].size if counts[:cloud]
end
def minimum_id(model)
key = :"#{model.name.downcase}_minimum_id"
data
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: enable UsageData/LargeTable:
def minimum_id(model, column = nil)
key = :"#{model.name.downcase.gsub('::', '_')}_minimum_id"
column_to_read = column || :id
strong_memoize(key) do
model.minimum(:id)
model.minimum(column_to_read)
end
end
def epics_deepest_relationship_level
# rubocop: disable UsageData/LargeTable
{ epics_deepest_relationship_level: ::Epic.deepest_relationship_level.to_i }
# rubocop: enable UsageData/LargeTable
end
private
......
......@@ -59,6 +59,14 @@ RSpec.describe Gitlab::UsageDataQueries do
end
end
describe '.histogram' do
it 'returns the histogram sql' do
expect(described_class.histogram(AlertManagement::HttpIntegration.active,
:project_id, buckets: 1..2, bucket_size: 101))
.to eq('WITH "count_cte" AS (SELECT COUNT(*) AS count_grouped FROM "alert_management_http_integrations" WHERE "alert_management_http_integrations"."active" = TRUE GROUP BY "alert_management_http_integrations"."project_id") SELECT WIDTH_BUCKET("count_cte"."count_grouped", 1, 2, 100) AS buckets, "count_cte"."count" FROM "count_cte" GROUP BY buckets ORDER BY buckets')
end
end
describe 'min/max methods' do
it 'returns nil' do
# user min/max
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment