Commit 71385691 authored by Peter Leitzen's avatar Peter Leitzen

Merge branch...

Merge branch '263532-instrument-usage-ping-count-number-of-enabled-integrations-per-project' into 'master'

Instrument usage ping: Count number of enabled integrations per project as histogram

See merge request gitlab-org/gitlab!55782
parents f1ce497f be509ff2
---
title: 'Usage ping: Histogram for enabled integrations per project'
merge_request: 55782
author:
type: added
---
key_path: usage_activity_by_stage.monitor.projects_with_enabled_alert_integrations_histogram
description: Histogram (buckets 1 to 100) of projects with at least 1 enabled integration.
product_section: ops
product_stage: monitor
product_group: group::monitor
product_category: incident_management
value_type: object
status: data_available
milestone: "13.10"
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/55782
time_frame: all
data_source: database
distribution:
- ce
- ee
tier:
- free
- premium
- ultimate
......@@ -22,7 +22,7 @@
},
"value_type": {
"type": "string",
"enum": ["string", "number", "boolean"]
"enum": ["string", "number", "boolean", "object"]
},
"status": {
"type": ["string"],
......
......@@ -14852,6 +14852,18 @@ Status: `data_available`
Tiers: `free`
### `usage_activity_by_stage.monitor.projects_with_enabled_alert_integrations_histogram`
Histogram (buckets 1 to 100) of projects with at least 1 enabled integration.
[YAML definition](https://gitlab.com/gitlab-org/gitlab/-/blob/master/config/metrics/counts_all/20210309165717_projects_with_enabled_alert_integrations_histogram.yml)
Group: `group::monitor`
Status: `data_available`
Tiers: `free`, `premium`, `ultimate`
### `usage_activity_by_stage.monitor.projects_with_error_tracking_enabled`
Projects where error tracking is enabled
......
......@@ -32,7 +32,7 @@ Each metric is defined in a separate YAML file consisting of a number of fields:
| `product_stage` | no | The [stage](https://gitlab.com/gitlab-com/www-gitlab-com/blob/master/data/stages.yml) for the metric. |
| `product_group` | yes | The [group](https://gitlab.com/gitlab-com/www-gitlab-com/blob/master/data/stages.yml) that owns the metric. |
| `product_category` | no | The [product category](https://gitlab.com/gitlab-com/www-gitlab-com/blob/master/data/categories.yml) for the metric. |
| `value_type` | yes | `string`; one of `string`, `number`, `boolean`. |
| `value_type` | yes | `string`; one of `string`, `number`, `boolean`, `object`. |
| `status` | yes | `string`; status of the metric, may be set to `data_available`, `planned`, `in_progress`, `implemented`, `not_used`, `deprecated` |
| `time_frame` | yes | `string`; may be set to a value like `7d`, `28d`, `all`, `none`. |
| `data_source` | yes | `string`; may be set to a value like `database`, `redis`, `redis_hll`, `prometheus`, `ruby`. |
......
......@@ -134,14 +134,6 @@ RSpec.describe Gitlab::UsageData do
expect(count_data[:epic_issues]).to eq(2)
end
it 'has integer value for epic relationship level' do
expect(count_data[:epics_deepest_relationship_level]).to be_a_kind_of(Integer)
end
it 'has integer values for all counts' do
expect(count_data.values).to all(be_a_kind_of(Integer))
end
it 'gathers security products usage data' do
expect(count_data[:container_scanning_jobs]).to eq(1)
expect(count_data[:dast_jobs]).to eq(1)
......
......@@ -629,6 +629,9 @@ module Gitlab
# rubocop: disable CodeReuse/ActiveRecord
def usage_activity_by_stage_monitor(time_period)
# Calculate histogram only for overall as other time periods aren't available/useful here.
integrations_histogram = time_period.empty? ? histogram(::AlertManagement::HttpIntegration.active, :project_id, buckets: 1..100) : nil
{
clusters: distinct_count(::Clusters::Cluster.where(time_period), :user_id),
clusters_applications_prometheus: cluster_applications_user_distinct_count(::Clusters::Applications::Prometheus, time_period),
......@@ -638,8 +641,9 @@ module Gitlab
projects_with_tracing_enabled: distinct_count(::Project.with_tracing_enabled.where(time_period), :creator_id),
projects_with_error_tracking_enabled: distinct_count(::Project.with_enabled_error_tracking.where(time_period), :creator_id),
projects_with_incidents: distinct_count(::Issue.incident.where(time_period), :project_id),
projects_with_alert_incidents: distinct_count(::Issue.incident.with_alert_management_alerts.where(time_period), :project_id)
}
projects_with_alert_incidents: distinct_count(::Issue.incident.with_alert_management_alerts.where(time_period), :project_id),
projects_with_enabled_alert_integrations_histogram: integrations_histogram
}.compact
end
# rubocop: enable CodeReuse/ActiveRecord
......
......@@ -39,10 +39,12 @@ module Gitlab
extend self
FALLBACK = -1
HISTOGRAM_FALLBACK = { '-1' => -1 }.freeze
DISTRIBUTED_HLL_FALLBACK = -2
ALL_TIME_TIME_FRAME_NAME = "all"
SEVEN_DAYS_TIME_FRAME_NAME = "7d"
TWENTY_EIGHT_DAYS_TIME_FRAME_NAME = "28d"
MAX_BUCKET_SIZE = 100
def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil)
if batch
......@@ -87,6 +89,73 @@ module Gitlab
FALLBACK
end
# We don't support batching with histograms.
# Please avoid using this method on large tables.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/323949.
#
# rubocop: disable CodeReuse/ActiveRecord
def histogram(relation, column, buckets:, bucket_size: buckets.size)
# Using lambda to avoid exposing histogram specific methods
parameters_valid = lambda do
error_message =
if buckets.first == buckets.last
'Lower bucket bound cannot equal to upper bucket bound'
elsif bucket_size == 0
'Bucket size cannot be zero'
elsif bucket_size > MAX_BUCKET_SIZE
"Bucket size #{bucket_size} exceeds the limit of #{MAX_BUCKET_SIZE}"
end
return true unless error_message
exception = ArgumentError.new(error_message)
exception.set_backtrace(caller)
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(exception)
false
end
return HISTOGRAM_FALLBACK unless parameters_valid.call
count_grouped = relation.group(column).select(Arel.star.count.as('count_grouped'))
cte = Gitlab::SQL::CTE.new(:count_cte, count_grouped)
# For example, 9 segements gives 10 buckets
bucket_segments = bucket_size - 1
width_bucket = Arel::Nodes::NamedFunction
.new('WIDTH_BUCKET', [cte.table[:count_grouped], buckets.first, buckets.last, bucket_segments])
.as('buckets')
query = cte
.table
.project(width_bucket, cte.table[:count])
.group('buckets')
.order('buckets')
.with(cte.to_arel)
# Return the histogram as a Hash because buckets are unique.
relation
.connection
.exec_query(query.to_sql)
.rows
.to_h
# Keys are converted to strings in Usage Ping JSON
.stringify_keys
rescue ActiveRecord::StatementInvalid => e
Gitlab::AppJsonLogger.error(
event: 'histogram',
relation: relation.table_name,
operation: 'histogram',
operation_args: [column, buckets.first, buckets.last, bucket_segments],
query: query.to_sql,
message: e.message
)
HISTOGRAM_FALLBACK
end
# rubocop: enable CodeReuse/ActiveRecord
def add(*args)
return -1 if args.any?(&:negative?)
......
......@@ -34,6 +34,7 @@ UsageData/LargeTable:
CountMethods:
- :count
- :distinct_count
- :histogram
AllowedMethods:
- :arel_table
- :minimum
......
......@@ -384,12 +384,13 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
for_defined_days_back do
user = create(:user, dashboard: 'operations')
cluster = create(:cluster, user: user)
create(:project, creator: user)
project = create(:project, creator: user)
create(:clusters_applications_prometheus, :installed, cluster: cluster)
create(:project_tracing_setting)
create(:project_error_tracking_setting)
create(:incident)
create(:incident, alert_management_alert: create(:alert_management_alert))
create(:alert_management_http_integration, :active, project: project)
end
expect(described_class.usage_activity_by_stage_monitor({})).to include(
......@@ -399,10 +400,12 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
projects_with_tracing_enabled: 2,
projects_with_error_tracking_enabled: 2,
projects_with_incidents: 4,
projects_with_alert_incidents: 2
projects_with_alert_incidents: 2,
projects_with_enabled_alert_integrations_histogram: { '1' => 2 }
)
expect(described_class.usage_activity_by_stage_monitor(described_class.last_28_days_time_period)).to include(
data_28_days = described_class.usage_activity_by_stage_monitor(described_class.last_28_days_time_period)
expect(data_28_days).to include(
clusters: 1,
clusters_applications_prometheus: 1,
operations_dashboard_default_dashboard: 1,
......@@ -411,6 +414,8 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
projects_with_incidents: 2,
projects_with_alert_incidents: 1
)
expect(data_28_days).not_to include(:projects_with_enabled_alert_integrations_histogram)
end
end
......@@ -528,14 +533,14 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
expect(subject.keys).to include(*UsageDataHelpers::USAGE_DATA_KEYS)
end
it 'gathers usage counts' do
it 'gathers usage counts', :aggregate_failures do
count_data = subject[:counts]
expect(count_data[:boards]).to eq(1)
expect(count_data[:projects]).to eq(4)
expect(count_data.values_at(*UsageDataHelpers::SMAU_KEYS)).to all(be_an(Integer))
expect(count_data.keys).to include(*UsageDataHelpers::COUNTS_KEYS)
expect(UsageDataHelpers::COUNTS_KEYS - count_data.keys).to be_empty
expect(count_data.values).to all(be_a_kind_of(Integer))
end
it 'gathers usage counts correctly' do
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Gitlab::Utils::UsageData do
include Database::DatabaseHelpers
describe '#count' do
let(:relation) { double(:relation) }
......@@ -183,6 +185,102 @@ RSpec.describe Gitlab::Utils::UsageData do
end
end
describe '#histogram' do
let_it_be(:projects) { create_list(:project, 3) }
let(:project1) { projects.first }
let(:project2) { projects.second }
let(:project3) { projects.third }
let(:fallback) { described_class::HISTOGRAM_FALLBACK }
let(:relation) { AlertManagement::HttpIntegration.active }
let(:column) { :project_id }
def expect_error(exception, message, &block)
expect(Gitlab::ErrorTracking)
.to receive(:track_and_raise_for_dev_exception)
.with(instance_of(exception))
.and_call_original
expect(&block).to raise_error(
an_instance_of(exception).and(
having_attributes(message: message, backtrace: be_kind_of(Array)))
)
end
it 'checks bucket bounds to be not equal' do
expect_error(ArgumentError, 'Lower bucket bound cannot equal to upper bucket bound') do
described_class.histogram(relation, column, buckets: 1..1)
end
end
it 'checks bucket_size being non-zero' do
expect_error(ArgumentError, 'Bucket size cannot be zero') do
described_class.histogram(relation, column, buckets: 1..2, bucket_size: 0)
end
end
it 'limits the amount of buckets without providing bucket_size argument' do
expect_error(ArgumentError, 'Bucket size 101 exceeds the limit of 100') do
described_class.histogram(relation, column, buckets: 1..101)
end
end
it 'limits the amount of buckets when providing bucket_size argument' do
expect_error(ArgumentError, 'Bucket size 101 exceeds the limit of 100') do
described_class.histogram(relation, column, buckets: 1..2, bucket_size: 101)
end
end
it 'without data' do
histogram = described_class.histogram(relation, column, buckets: 1..100)
expect(histogram).to eq({})
end
it 'aggregates properly within bounds' do
create(:alert_management_http_integration, :active, project: project1)
create(:alert_management_http_integration, :inactive, project: project1)
create(:alert_management_http_integration, :active, project: project2)
create(:alert_management_http_integration, :active, project: project2)
create(:alert_management_http_integration, :inactive, project: project2)
create(:alert_management_http_integration, :active, project: project3)
create(:alert_management_http_integration, :inactive, project: project3)
histogram = described_class.histogram(relation, column, buckets: 1..100)
expect(histogram).to eq('1' => 2, '2' => 1)
end
it 'aggregates properly out of bounds' do
create_list(:alert_management_http_integration, 3, :active, project: project1)
histogram = described_class.histogram(relation, column, buckets: 1..2)
expect(histogram).to eq('2' => 1)
end
it 'returns fallback and logs canceled queries' do
create(:alert_management_http_integration, :active, project: project1)
expect(Gitlab::AppJsonLogger).to receive(:error).with(
event: 'histogram',
relation: relation.table_name,
operation: 'histogram',
operation_args: [column, 1, 100, 99],
query: kind_of(String),
message: /PG::QueryCanceled/
)
with_statement_timeout(0.001) do
relation = AlertManagement::HttpIntegration.select('pg_sleep(0.002)')
histogram = described_class.histogram(relation, column, buckets: 1..100)
expect(histogram).to eq(fallback)
end
end
end
describe '#add' do
it 'adds given values' do
expect(described_class.add(1, 3)).to eq(4)
......
......@@ -5,11 +5,65 @@ module Database
# In order to directly work with views using factories,
# we can swapout the view for a table of identical structure.
def swapout_view_for_table(view)
ActiveRecord::Base.connection.execute(<<~SQL)
ActiveRecord::Base.connection.execute(<<~SQL.squish)
CREATE TABLE #{view}_copy (LIKE #{view});
DROP VIEW #{view};
ALTER TABLE #{view}_copy RENAME TO #{view};
SQL
end
# Set statement timeout temporarily.
# Useful when testing query timeouts.
#
# Note that this method cannot restore the timeout if a query
# was canceled due to e.g. a statement timeout.
# Refrain from using this transaction in these situations.
#
# @param timeout - Statement timeout in seconds
#
# Example:
#
# with_statement_timeout(0.1) do
# model.select('pg_sleep(0.11)')
# end
def with_statement_timeout(timeout)
# Force a positive value and a minimum of 1ms for very small values.
timeout = (timeout * 1000).abs.ceil
raise ArgumentError, 'Using a timeout of `0` means to disable statement timeout.' if timeout == 0
previous_timeout = ActiveRecord::Base.connection
.exec_query('SHOW statement_timeout')[0].fetch('statement_timeout')
set_statement_timeout("#{timeout}ms")
yield
ensure
begin
set_statement_timeout(previous_timeout)
rescue ActiveRecord::StatementInvalid
# After a transaction was canceled/aborted due to e.g. a statement
# timeout commands are ignored and will raise in PG::InFailedSqlTransaction.
# We can safely ignore this error because the statement timeout was set
# for the currrent transaction which will be closed anyway.
end
end
# Set statement timeout for the current transaction.
#
# Note, that it does not restore the previous statement timeout.
# Use `with_statement_timeout` instead.
#
# @param timeout - Statement timeout in seconds
#
# Example:
#
# set_statement_timeout(0.1)
# model.select('pg_sleep(0.11)')
def set_statement_timeout(timeout)
ActiveRecord::Base.connection.execute(
format(%(SET LOCAL statement_timeout = '%s'), timeout)
)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment