Commit 15d08ccd authored by Adam Hegyi's avatar Adam Hegyi Committed by Yannis Roussos

Persist instance statistics object counts

This change will periodically count various objects in the database and
store the data in a database table.
parent f84ce98f
......@@ -12,6 +12,15 @@ module Analytics
pipelines: 6
}
IDENTIFIER_QUERY_MAPPING = {
identifiers[:projects] => -> { Project },
identifiers[:users] => -> { User },
identifiers[:issues] => -> { Issue },
identifiers[:merge_requests] => -> { MergeRequest },
identifiers[:groups] => -> { Group },
identifiers[:pipelines] => -> { Ci::Pipeline }
}.freeze
validates :recorded_at, :identifier, :count, presence: true
validates :recorded_at, uniqueness: { scope: :identifier }
......
......@@ -115,6 +115,14 @@
:weight: 1
:idempotent:
:tags: []
- :name: cronjob:analytics_instance_statistics_count_job_trigger
:feature_category: :instance_statistics
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: cronjob:authorized_project_update_periodic_recalculate
:feature_category: :source_code_management
:has_external_dependencies:
......@@ -1204,6 +1212,14 @@
:weight: 1
:idempotent: true
:tags: []
- :name: analytics_instance_statistics_counter_job
:feature_category: :instance_statistics
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: authorized_keys
:feature_category: :source_code_management
:has_external_dependencies:
......
# frozen_string_literal: true
module Analytics
module InstanceStatistics
class CountJobTriggerWorker
include ApplicationWorker
include CronjobQueue # rubocop:disable Scalability/CronWorkerContext
DEFAULT_DELAY = 3.minutes.freeze
feature_category :instance_statistics
urgency :low
idempotent!
def perform
return if Feature.disabled?(:store_instance_statistics_measurements)
recorded_at = Time.zone.now
measurement_identifiers = Analytics::InstanceStatistics::Measurement.identifiers
worker_arguments = Gitlab::Analytics::InstanceStatistics::WorkersArgumentBuilder.new(
measurement_identifiers: measurement_identifiers.values,
recorded_at: recorded_at
).execute
perform_in = DEFAULT_DELAY.minutes.from_now
worker_arguments.each do |args|
CounterJobWorker.perform_in(perform_in, *args)
perform_in += DEFAULT_DELAY
end
end
end
end
end
# frozen_string_literal: true
module Analytics
module InstanceStatistics
class CounterJobWorker
include ApplicationWorker
feature_category :instance_statistics
urgency :low
idempotent!
def perform(measurement_identifier, min_id, max_id, recorded_at)
query_scope = ::Analytics::InstanceStatistics::Measurement::IDENTIFIER_QUERY_MAPPING[measurement_identifier].call
count = if min_id.nil? || max_id.nil? # table is empty
0
else
Gitlab::Database::BatchCount.batch_count(query_scope, start: min_id, finish: max_id)
end
return if count == Gitlab::Database::BatchCounter::FALLBACK
InstanceStatistics::Measurement.insert_all([{ recorded_at: recorded_at, count: count, identifier: measurement_identifier }])
end
end
end
end
......@@ -65,6 +65,7 @@
- integrations
- interactive_application_security_testing
- internationalization
- instance_statistics
- issue_tracking
- jenkins_importer
- jira_importer
......
---
name: store_instance_statistics_measurements
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/41300
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/247871
group: group::analytics
type: development
default_enabled: false
......@@ -514,6 +514,9 @@ Settings.cron_jobs['postgres_dynamic_partitions_creator']['job_class'] ||= 'Part
Settings.cron_jobs['ci_platform_metrics_update_cron_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['ci_platform_metrics_update_cron_worker']['cron'] ||= '47 9 * * *'
Settings.cron_jobs['ci_platform_metrics_update_cron_worker']['job_class'] = 'CiPlatformMetricsUpdateCronWorker'
Settings.cron_jobs['analytics_instance_statistics_count_job_trigger_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['analytics_instance_statistics_count_job_trigger_worker']['cron'] ||= '50 23 */1 * *'
Settings.cron_jobs['analytics_instance_statistics_count_job_trigger_worker']['job_class'] ||= 'Analytics::InstanceStatistics::CountJobTriggerWorker'
Gitlab.ee do
Settings.cron_jobs['adjourned_group_deletion_worker'] ||= Settingslogic.new({})
......
......@@ -30,6 +30,8 @@
- 1
- - analytics_code_review_metrics
- 1
- - analytics_instance_statistics_counter_job
- 1
- - authorized_keys
- 2
- - authorized_project_update
......
# frozen_string_literal: true
module Gitlab
module Analytics
module InstanceStatistics
class WorkersArgumentBuilder
def initialize(measurement_identifiers: [], recorded_at: Time.zone.now)
@measurement_identifiers = measurement_identifiers
@recorded_at = recorded_at
end
def execute
measurement_identifiers.map do |measurement_identifier|
query_scope = ::Analytics::InstanceStatistics::Measurement::IDENTIFIER_QUERY_MAPPING[measurement_identifier]&.call
next if query_scope.nil?
# Determining the query range (id range) as early as possible in order to get more accurate counts.
start = query_scope.minimum(:id)
finish = query_scope.maximum(:id)
[measurement_identifier, start, finish, recorded_at]
end.compact
end
private
attr_reader :measurement_identifiers, :recorded_at
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Analytics::InstanceStatistics::WorkersArgumentBuilder do
context 'when no measurement identifiers are given' do
it 'returns empty array' do
expect(described_class.new(measurement_identifiers: []).execute).to be_empty
end
end
context 'when measurement identifiers are given' do
let_it_be(:user_1) { create(:user) }
let_it_be(:project_1) { create(:project, namespace: user_1.namespace, creator: user_1) }
let_it_be(:project_2) { create(:project, namespace: user_1.namespace, creator: user_1) }
let_it_be(:project_3) { create(:project, namespace: user_1.namespace, creator: user_1) }
let(:recorded_at) { 2.days.ago }
let(:projects_measurement_identifier) { ::Analytics::InstanceStatistics::Measurement.identifiers.fetch(:projects) }
let(:users_measurement_identifier) { ::Analytics::InstanceStatistics::Measurement.identifiers.fetch(:users) }
let(:measurement_identifiers) { [projects_measurement_identifier, users_measurement_identifier] }
subject { described_class.new(measurement_identifiers: measurement_identifiers, recorded_at: recorded_at).execute }
it 'returns worker arguments' do
expect(subject).to eq([
[projects_measurement_identifier, project_1.id, project_3.id, recorded_at],
[users_measurement_identifier, user_1.id, user_1.id, recorded_at]
])
end
context 'when bogus measurement identifiers are given' do
before do
measurement_identifiers << 'bogus1'
measurement_identifiers << 'bogus2'
end
it 'skips bogus measurement identifiers' do
expect(subject).to eq([
[projects_measurement_identifier, project_1.id, project_3.id, recorded_at],
[users_measurement_identifier, user_1.id, user_1.id, recorded_at]
])
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Analytics::InstanceStatistics::CountJobTriggerWorker do
it_behaves_like 'an idempotent worker'
context 'triggers a job for each measurement identifiers' do
let(:expected_count) { Analytics::InstanceStatistics::Measurement.identifiers.size }
it 'triggers CounterJobWorker jobs' do
subject.perform
expect(Analytics::InstanceStatistics::CounterJobWorker.jobs.count).to eq(expected_count)
end
end
context 'when the `store_instance_statistics_measurements` feature flag is off' do
before do
stub_feature_flags(store_instance_statistics_measurements: false)
end
it 'does not trigger any CounterJobWorker job' do
subject.perform
expect(Analytics::InstanceStatistics::CounterJobWorker.jobs.count).to eq(0)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Analytics::InstanceStatistics::CounterJobWorker do
let_it_be(:user_1) { create(:user) }
let_it_be(:user_2) { create(:user) }
let(:users_measurement_identifier) { ::Analytics::InstanceStatistics::Measurement.identifiers.fetch(:users) }
let(:recorded_at) { Time.zone.now }
let(:job_args) { [users_measurement_identifier, user_1.id, user_2.id, recorded_at] }
before do
allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false)
end
include_examples 'an idempotent worker' do
it 'counts a scope and stores the result' do
subject
measurement = Analytics::InstanceStatistics::Measurement.first
expect(measurement.recorded_at).to be_like_time(recorded_at)
expect(measurement.identifier).to eq('users')
expect(measurement.count).to eq(2)
end
end
context 'when no records are in the database' do
let(:users_measurement_identifier) { ::Analytics::InstanceStatistics::Measurement.identifiers.fetch(:groups) }
subject { described_class.new.perform(users_measurement_identifier, nil, nil, recorded_at) }
it 'sets 0 as the count' do
subject
measurement = Analytics::InstanceStatistics::Measurement.first
expect(measurement.recorded_at).to be_like_time(recorded_at)
expect(measurement.identifier).to eq('groups')
expect(measurement.count).to eq(0)
end
end
it 'does not raise error when inserting duplicated measurement' do
subject
expect { subject }.not_to raise_error
end
it 'does not insert anything when BatchCount returns error' do
allow(Gitlab::Database::BatchCount).to receive(:batch_count).and_return(Gitlab::Database::BatchCounter::FALLBACK)
expect { subject }.not_to change { Analytics::InstanceStatistics::Measurement.count }
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment