Commit 2940e3fd authored by Peter Leitzen's avatar Peter Leitzen

Implement usage ping `histogram`

The implementation utilizes `WIDTH_BUCKET` SQL function.

Note that this iteration does not use support batching.

Convert histogram keys to strings in Usage Ping

Return a more meaningful histogram fallback

Avoid raising exceptions during usage ping calculations

Return fallback also for invalid statements

Avoid control flow with exceptions
parent 67809477
...@@ -39,10 +39,12 @@ module Gitlab ...@@ -39,10 +39,12 @@ module Gitlab
extend self extend self
FALLBACK = -1 FALLBACK = -1
HISTOGRAM_FALLBACK = { '-1' => -1 }.freeze
DISTRIBUTED_HLL_FALLBACK = -2 DISTRIBUTED_HLL_FALLBACK = -2
ALL_TIME_TIME_FRAME_NAME = "all" ALL_TIME_TIME_FRAME_NAME = "all"
SEVEN_DAYS_TIME_FRAME_NAME = "7d" SEVEN_DAYS_TIME_FRAME_NAME = "7d"
TWENTY_EIGHT_DAYS_TIME_FRAME_NAME = "28d" TWENTY_EIGHT_DAYS_TIME_FRAME_NAME = "28d"
MAX_BUCKET_SIZE = 100
def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil) def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil)
if batch if batch
...@@ -87,6 +89,73 @@ module Gitlab ...@@ -87,6 +89,73 @@ module Gitlab
FALLBACK FALLBACK
end end
# We don't support batching with histograms.
# Please avoid using this method on large tables.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/323949.
#
# rubocop: disable CodeReuse/ActiveRecord
def histogram(relation, column, buckets:, bucket_size: buckets.size)
# Using lambda to avoid exposing histogram specific methods
parameters_valid = lambda do
error_message =
if buckets.first == buckets.last
'Lower bucket bound cannot equal to upper bucket bound'
elsif bucket_size == 0
'Bucket size cannot be zero'
elsif bucket_size > MAX_BUCKET_SIZE
"Bucket size #{bucket_size} exceeds the limit of #{MAX_BUCKET_SIZE}"
end
return true unless error_message
exception = ArgumentError.new(error_message)
exception.set_backtrace(caller)
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(exception)
false
end
return HISTOGRAM_FALLBACK unless parameters_valid.call
count_grouped = relation.group(column).select(Arel.star.count.as('count_grouped'))
cte = Gitlab::SQL::CTE.new(:count_cte, count_grouped)
# For example, 9 segements gives 10 buckets
bucket_segments = bucket_size - 1
width_bucket = Arel::Nodes::NamedFunction
.new('WIDTH_BUCKET', [cte.table[:count_grouped], buckets.first, buckets.last, bucket_segments])
.as('buckets')
query = cte
.table
.project(width_bucket, cte.table[:count])
.group('buckets')
.order('buckets')
.with(cte.to_arel)
# Return the histogram as a Hash because buckets are unique.
relation
.connection
.exec_query(query.to_sql)
.rows
.to_h
# Keys are converted to strings in Usage Ping JSON
.stringify_keys
rescue ActiveRecord::StatementInvalid => e
Gitlab::AppJsonLogger.error(
event: 'histogram',
relation: relation.table_name,
operation: 'histogram',
operation_args: [column, buckets.first, buckets.last, bucket_segments],
query: query.to_sql,
message: e.message
)
HISTOGRAM_FALLBACK
end
# rubocop: enable CodeReuse/ActiveRecord
def add(*args) def add(*args)
return -1 if args.any?(&:negative?) return -1 if args.any?(&:negative?)
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::Utils::UsageData do RSpec.describe Gitlab::Utils::UsageData do
include Database::DatabaseHelpers
describe '#count' do describe '#count' do
let(:relation) { double(:relation) } let(:relation) { double(:relation) }
...@@ -183,6 +185,102 @@ RSpec.describe Gitlab::Utils::UsageData do ...@@ -183,6 +185,102 @@ RSpec.describe Gitlab::Utils::UsageData do
end end
end end
describe '#histogram' do
let_it_be(:projects) { create_list(:project, 3) }
let(:project1) { projects.first }
let(:project2) { projects.second }
let(:project3) { projects.third }
let(:fallback) { described_class::HISTOGRAM_FALLBACK }
let(:relation) { AlertManagement::HttpIntegration.active }
let(:column) { :project_id }
def expect_error(exception, message, &block)
expect(Gitlab::ErrorTracking)
.to receive(:track_and_raise_for_dev_exception)
.with(instance_of(exception))
.and_call_original
expect(&block).to raise_error(
an_instance_of(exception).and(
having_attributes(message: message, backtrace: be_kind_of(Array)))
)
end
it 'checks bucket bounds to be not equal' do
expect_error(ArgumentError, 'Lower bucket bound cannot equal to upper bucket bound') do
described_class.histogram(relation, column, buckets: 1..1)
end
end
it 'checks bucket_size being non-zero' do
expect_error(ArgumentError, 'Bucket size cannot be zero') do
described_class.histogram(relation, column, buckets: 1..2, bucket_size: 0)
end
end
it 'limits the amount of buckets without providing bucket_size argument' do
expect_error(ArgumentError, 'Bucket size 101 exceeds the limit of 100') do
described_class.histogram(relation, column, buckets: 1..101)
end
end
it 'limits the amount of buckets when providing bucket_size argument' do
expect_error(ArgumentError, 'Bucket size 101 exceeds the limit of 100') do
described_class.histogram(relation, column, buckets: 1..2, bucket_size: 101)
end
end
it 'without data' do
histogram = described_class.histogram(relation, column, buckets: 1..100)
expect(histogram).to eq({})
end
it 'aggregates properly within bounds' do
create(:alert_management_http_integration, :active, project: project1)
create(:alert_management_http_integration, :inactive, project: project1)
create(:alert_management_http_integration, :active, project: project2)
create(:alert_management_http_integration, :active, project: project2)
create(:alert_management_http_integration, :inactive, project: project2)
create(:alert_management_http_integration, :active, project: project3)
create(:alert_management_http_integration, :inactive, project: project3)
histogram = described_class.histogram(relation, column, buckets: 1..100)
expect(histogram).to eq('1' => 2, '2' => 1)
end
it 'aggregates properly out of bounds' do
create_list(:alert_management_http_integration, 3, :active, project: project1)
histogram = described_class.histogram(relation, column, buckets: 1..2)
expect(histogram).to eq('2' => 1)
end
it 'returns fallback and logs canceled queries' do
create(:alert_management_http_integration, :active, project: project1)
expect(Gitlab::AppJsonLogger).to receive(:error).with(
event: 'histogram',
relation: relation.table_name,
operation: 'histogram',
operation_args: [column, 1, 100, 99],
query: kind_of(String),
message: /PG::QueryCanceled/
)
with_statement_timeout(0.001) do
relation = AlertManagement::HttpIntegration.select('pg_sleep(0.002)')
histogram = described_class.histogram(relation, column, buckets: 1..100)
expect(histogram).to eq(fallback)
end
end
end
describe '#add' do describe '#add' do
it 'adds given values' do it 'adds given values' do
expect(described_class.add(1, 3)).to eq(4) expect(described_class.add(1, 3)).to eq(4)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment