Commit 4eaed32a authored by Vitali Tatarintev's avatar Vitali Tatarintev

Merge branch '235697-add-basic-redis-hll-class-2' into 'master'

Add Basic Redis HLL module

See merge request gitlab-org/gitlab!39689
parents 779fd7e1 c9986945
......@@ -3,77 +3,36 @@
module Gitlab
module Analytics
class UniqueVisits
ANALYTICS_IDS = Set[
'g_analytics_contribution',
'g_analytics_insights',
'g_analytics_issues',
'g_analytics_productivity',
'g_analytics_valuestream',
'p_analytics_pipelines',
'p_analytics_code_reviews',
'p_analytics_valuestream',
'p_analytics_insights',
'p_analytics_issues',
'p_analytics_repo',
'i_analytics_cohorts',
'i_analytics_dev_ops_score'
]
COMPLIANCE_IDS = Set[
'g_compliance_dashboard',
'g_compliance_audit_events',
'i_compliance_credential_inventory',
'i_compliance_audit_events'
].freeze
KEY_EXPIRY_LENGTH = 12.weeks
def track_visit(visitor_id, target_id, time = Time.zone.now)
target_key = key(target_id, time)
Gitlab::Redis::HLL.add(key: target_key, value: visitor_id, expiry: KEY_EXPIRY_LENGTH)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(visitor_id, target_id, time)
end
# Returns number of unique visitors for given targets in given time frame
#
# @param [String, Array[<String>]] targets ids of targets to count visits on. Special case for :any
# @param [ActiveSupport::TimeWithZone] start_week start of time frame
# @param [Integer] weeks time frame length in weeks
# @param [ActiveSupport::TimeWithZone] start_date start of time frame
# @param [ActiveSupport::TimeWithZone] end_date end of time frame
# @return [Integer] number of unique visitors
def unique_visits_for(targets:, start_week: 7.days.ago, weeks: 1)
def unique_visits_for(targets:, start_date: 7.days.ago, end_date: start_date + 1.week)
target_ids = if targets == :analytics
ANALYTICS_IDS
self.class.analytics_ids
elsif targets == :compliance
COMPLIANCE_IDS
self.class.compliance_ids
else
Array(targets)
end
timeframe_start = [start_week, weeks.weeks.ago].min
redis_keys = keys(targets: target_ids, timeframe_start: timeframe_start, weeks: weeks)
Gitlab::Redis::HLL.count(keys: redis_keys)
Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: target_ids, start_date: start_date, end_date: end_date)
end
private
def key(target_id, time)
target_ids = ANALYTICS_IDS + COMPLIANCE_IDS
raise "Invalid target id #{target_id}" unless target_ids.include?(target_id.to_s)
target_key = target_id.to_s.gsub('analytics', '{analytics}').gsub('compliance', '{compliance}')
year_week = time.strftime('%G-%V')
"#{target_key}-#{year_week}"
end
class << self
def analytics_ids
Gitlab::UsageDataCounters::HLLRedisCounter.events_for_category('analytics')
end
def keys(targets:, timeframe_start:, weeks:)
(0..(weeks - 1)).map do |week_increment|
targets.map { |target_id| key(target_id, timeframe_start + week_increment * 7.days) }
end.flatten
def compliance_ids
Gitlab::UsageDataCounters::HLLRedisCounter.events_for_category('compliance')
end
end
end
end
......
......@@ -584,21 +584,21 @@ module Gitlab
end
def analytics_unique_visits_data
results = ::Gitlab::Analytics::UniqueVisits::ANALYTICS_IDS.each_with_object({}) do |target_id, hash|
results = ::Gitlab::Analytics::UniqueVisits.analytics_ids.each_with_object({}) do |target_id, hash|
hash[target_id] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target_id) }
end
results['analytics_unique_visits_for_any_target'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics) }
results['analytics_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics, weeks: 4) }
results['analytics_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics, start_date: 4.weeks.ago.to_date, end_date: Date.current) }
{ analytics_unique_visits: results }
end
def compliance_unique_visits_data
results = ::Gitlab::Analytics::UniqueVisits::COMPLIANCE_IDS.each_with_object({}) do |target_id, hash|
results = ::Gitlab::Analytics::UniqueVisits.compliance_ids.each_with_object({}) do |target_id, hash|
hash[target_id] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target_id) }
end
results['compliance_unique_visits_for_any_target'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance) }
results['compliance_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance, weeks: 4) }
results['compliance_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance, start_date: 4.weeks.ago.to_date, end_date: Date.current) }
{ compliance_unique_visits: results }
end
......
# frozen_string_literal: true
module Gitlab
module UsageDataCounters
module HLLRedisCounter
DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH = 6.weeks
DEFAULT_DAILY_KEY_EXPIRY_LENGTH = 29.days
DEFAULT_REDIS_SLOT = ''.freeze
UnknownEvent = Class.new(StandardError)
UnknownAggregation = Class.new(StandardError)
KNOWN_EVENTS_PATH = 'lib/gitlab/usage_data_counters/known_events.yml'.freeze
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
# Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id
#
# All events should be added to know_events file lib/gitlab/usage_data_counters/known_events.yml
#
# Event example:
#
# - name: g_compliance_dashboard # Unique event name
# redis_slot: compliance # Optional slot name, if not defined it will use name as a slot, used for totals
# category: compliance # Group events in categories
# expiry: 29 # Optional expiration time in days, default value 29 days for daily and 6.weeks for weekly
# aggregation: daily # Aggregation level, keys are stored daily or weekly
#
# Usage:
#
# * Track event: Gitlab::UsageDataCounters::HLLRedisCounter.track_event(user_id, 'g_compliance_dashboard')
# * Get unique counts per user: Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'g_compliance_dashboard', start_date: 28.days.ago, end_date: Date.current)
class << self
def track_event(entity_id, event_name, time = Time.zone.now)
event = event_for(event_name)
raise UnknownEvent.new("Unknown event #{event_name}") unless event.present?
Gitlab::Redis::HLL.add(key: redis_key(event, time), value: entity_id, expiry: expiry(event))
end
def unique_events(event_names:, start_date:, end_date:)
events = events_for(Array(event_names))
raise 'Events should be in same slot' unless events_in_same_slot?(events)
raise 'Events should be in same category' unless events_in_same_category?(events)
raise 'Events should have same aggregation level' unless events_same_aggregation?(events)
aggregation = events.first[:aggregation]
keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date)
Gitlab::Redis::HLL.count(keys: keys)
end
def events_for_category(category)
known_events.select { |event| event[:category] == category }.map { |event| event[:name] }
end
private
def keys_for_aggregation(aggregation, events:, start_date:, end_date:)
if aggregation.to_sym == :daily
daily_redis_keys(events: events, start_date: start_date, end_date: end_date)
else
weekly_redis_keys(events: events, start_date: start_date, end_date: end_date)
end
end
def known_events
@known_events ||= YAML.load_file(Rails.root.join(KNOWN_EVENTS_PATH)).map(&:with_indifferent_access)
end
def known_events_names
known_events.map { |event| event[:name] }
end
def events_in_same_slot?(events)
slot = events.first[:redis_slot]
events.all? { |event| event[:redis_slot] == slot }
end
def events_in_same_category?(events)
category = events.first[:category]
events.all? { |event| event[:category] == category }
end
def events_same_aggregation?(events)
aggregation = events.first[:aggregation]
events.all? { |event| event[:aggregation] == aggregation }
end
def expiry(event)
return event[:expiry] if event[:expiry].present?
event[:aggregation].to_sym == :daily ? DEFAULT_DAILY_KEY_EXPIRY_LENGTH : DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH
end
def event_for(event_name)
known_events.find { |event| event[:name] == event_name }
end
def events_for(event_names)
known_events.select { |event| event_names.include?(event[:name]) }
end
def redis_slot(event)
event[:redis_slot] || DEFAULT_REDIS_SLOT
end
# Compose the key in order to store events daily or weekly
def redis_key(event, time)
raise UnknownEvent.new("Unknown event #{event[:name]}") unless known_events_names.include?(event[:name].to_s)
raise UnknownAggregation.new("Use :daily or :weekly aggregation") unless ALLOWED_AGGREGATIONS.include?(event[:aggregation].to_sym)
slot = redis_slot(event)
key = if slot.present?
event[:name].to_s.gsub(slot, "{#{slot}}")
else
"{#{event[:name]}}"
end
if event[:aggregation].to_sym == :daily
year_day = time.strftime('%G-%j')
"#{year_day}-#{key}"
else
year_week = time.strftime('%G-%V')
"#{key}-#{year_week}"
end
end
def daily_redis_keys(events:, start_date:, end_date:)
(start_date.to_date..end_date.to_date).map do |date|
events.map { |event| redis_key(event, date) }
end.flatten
end
def weekly_redis_keys(events:, start_date:, end_date:)
weeks = end_date.to_date.cweek - start_date.to_date.cweek
weeks = 1 if weeks == 0
(0..(weeks - 1)).map do |week_increment|
events.map { |event| redis_key(event, start_date + week_increment * 7.days) }
end.flatten
end
end
end
end
end
---
# Compliance category
- name: g_compliance_dashboard
redis_slot: compliance
category: compliance
expiry: 84 # expiration time in days, equivalent to 12 weeks
aggregation: weekly
- name: g_compliance_audit_events
category: compliance
redis_slot: compliance
expiry: 84
aggregation: weekly
- name: i_compliance_audit_events
category: compliance
redis_slot: compliance
expiry: 84
aggregation: weekly
- name: i_compliance_credential_inventory
category: compliance
redis_slot: compliance
expiry: 84
aggregation: weekly
# Analytics category
- name: g_analytics_contribution
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: g_analytics_insights
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: g_analytics_issues
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: g_analytics_productivity
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: g_analytics_valuestream
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_pipelines
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_code_reviews
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_valuestream
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_insights
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_issues
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_repo
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: i_analytics_cohorts
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: i_analytics_dev_ops_score
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
......@@ -4,6 +4,7 @@ UsageData/LargeTable:
- 'lib/gitlab/usage_data.rb'
- 'ee/lib/ee/gitlab/usage_data.rb'
NonRelatedClasses:
- :Date
- :Feature
- :Gitlab
- :Gitlab::AppLogger
......
......@@ -41,23 +41,23 @@ RSpec.describe Gitlab::Analytics::UniqueVisits, :clean_gitlab_redis_shared_state
expect(unique_visits.unique_visits_for(targets: target2_id)).to eq(1)
expect(unique_visits.unique_visits_for(targets: target4_id)).to eq(1)
expect(unique_visits.unique_visits_for(targets: target2_id, start_week: 15.days.ago)).to eq(1)
expect(unique_visits.unique_visits_for(targets: target2_id, start_date: 15.days.ago)).to eq(1)
expect(unique_visits.unique_visits_for(targets: target3_id)).to eq(0)
expect(unique_visits.unique_visits_for(targets: target5_id, start_week: 15.days.ago)).to eq(2)
expect(unique_visits.unique_visits_for(targets: target5_id, start_date: 15.days.ago)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :analytics)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :analytics, start_week: 15.days.ago)).to eq(1)
expect(unique_visits.unique_visits_for(targets: :analytics, start_week: 30.days.ago)).to eq(0)
expect(unique_visits.unique_visits_for(targets: :analytics, start_date: 15.days.ago)).to eq(1)
expect(unique_visits.unique_visits_for(targets: :analytics, start_date: 30.days.ago)).to eq(0)
expect(unique_visits.unique_visits_for(targets: :analytics, weeks: 4)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :analytics, start_date: 4.weeks.ago, end_date: Date.current)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :compliance)).to eq(1)
expect(unique_visits.unique_visits_for(targets: :compliance, start_week: 15.days.ago)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :compliance, start_week: 30.days.ago)).to eq(0)
expect(unique_visits.unique_visits_for(targets: :compliance, start_date: 15.days.ago)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :compliance, start_date: 30.days.ago)).to eq(0)
expect(unique_visits.unique_visits_for(targets: :compliance, weeks: 4)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :compliance, start_date: 4.weeks.ago, end_date: Date.current)).to eq(2)
end
it 'sets the keys in Redis to expire automatically after 12 weeks' do
......@@ -75,7 +75,7 @@ RSpec.describe Gitlab::Analytics::UniqueVisits, :clean_gitlab_redis_shared_state
expect do
unique_visits.track_visit(visitor1_id, invalid_target_id)
end.to raise_error("Invalid target id #{invalid_target_id}")
end.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownEvent)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_shared_state do
let(:entity1) { 'dfb9d2d2-f56c-4c77-8aeb-6cddc4a1f857' }
let(:entity2) { '1dd9afb2-a3ee-4de1-8ae3-a405579c8584' }
let(:entity3) { '34rfjuuy-ce56-sa35-ds34-dfer567dfrf2' }
let(:entity4) { '8b9a2671-2abf-4bec-a682-22f6a8f7bf31' }
let(:weekly_event) { 'g_analytics_contribution' }
let(:daily_event) { 'g_search' }
let(:different_aggregation) { 'different_aggregation' }
let(:known_events) do
[
{ name: "g_analytics_contribution", redis_slot: "analytics", category: "analytics", expiry: 84, aggregation: "weekly" },
{ name: "g_analytics_valuestream", redis_slot: "analytics", category: "analytics", expiry: 84, aggregation: "daily" },
{ name: "g_analytics_productivity", redis_slot: "analytics", category: "productivity", expiry: 84, aggregation: "weekly" },
{ name: "g_compliance_dashboard", redis_slot: "compliance", category: "compliance", aggregation: "weekly" },
{ name: "g_search", category: "global", aggregation: "daily" },
{ name: "different_aggregation", category: "global", aggregation: "monthly" }
].map(&:with_indifferent_access)
end
before do
allow(described_class).to receive(:known_events).and_return(known_events)
end
around do |example|
# We need to freeze to a reference time
# because visits are grouped by the week number in the year
# Without freezing the time, the test may behave inconsistently
# depending on which day of the week test is run.
# Monday 6th of June
reference_time = Time.utc(2020, 6, 1)
Timecop.freeze(reference_time) { example.run }
end
describe '.track_event' do
it "raise error if metrics don't have same aggregation" do
expect { described_class.track_event(entity1, different_aggregation, Date.current) } .to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownAggregation)
end
it 'raise error if metrics of unknown aggregation' do
expect { described_class.track_event(entity1, 'unknown', Date.current) } .to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownEvent)
end
end
describe '.unique_events' do
before do
# events in current week, should not be counted as week is not complete
described_class.track_event(entity1, weekly_event, Date.current)
described_class.track_event(entity2, weekly_event, Date.current)
# Events last week
described_class.track_event(entity1, weekly_event, 2.days.ago)
described_class.track_event(entity1, weekly_event, 2.days.ago)
# Events 2 weeks ago
described_class.track_event(entity1, weekly_event, 2.weeks.ago)
# Events 4 weeks ago
described_class.track_event(entity3, weekly_event, 4.weeks.ago)
described_class.track_event(entity4, weekly_event, 29.days.ago)
# events in current day should be counted in daily aggregation
described_class.track_event(entity1, daily_event, Date.current)
described_class.track_event(entity2, daily_event, Date.current)
# Events last week
described_class.track_event(entity1, daily_event, 2.days.ago)
described_class.track_event(entity1, daily_event, 2.days.ago)
# Events 2 weeks ago
described_class.track_event(entity1, daily_event, 14.days.ago)
# Events 4 weeks ago
described_class.track_event(entity3, daily_event, 28.days.ago)
described_class.track_event(entity4, daily_event, 29.days.ago)
end
it 'raise error if metrics are not in the same slot' do
expect { described_class.unique_events(event_names: %w(g_analytics_contribution g_compliance_dashboard), start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should be in same slot')
end
it 'raise error if metrics are not in the same category' do
expect { described_class.unique_events(event_names: %w(g_analytics_contribution g_analytics_productivity), start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should be in same category')
end
it "raise error if metrics don't have same aggregation" do
expect { described_class.unique_events(event_names: %w(g_analytics_contribution g_analytics_valuestream), start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should have same aggregation level')
end
context 'when data for the last complete week' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 1.week.ago, end_date: Date.current)).to eq(1) }
end
context 'when data for the last 4 complete weeks' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 4.weeks.ago, end_date: Date.current)).to eq(2) }
end
context 'when data for the week 4 weeks ago' do
it { expect(described_class.unique_events(event_names: weekly_event, start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1) }
end
context 'when using daily aggregation' do
it { expect(described_class.unique_events(event_names: daily_event, start_date: 7.days.ago, end_date: Date.current)).to eq(2) }
it { expect(described_class.unique_events(event_names: daily_event, start_date: 28.days.ago, end_date: Date.current)).to eq(3) }
it { expect(described_class.unique_events(event_names: daily_event, start_date: 28.days.ago, end_date: 21.days.ago)).to eq(1) }
end
end
end
......@@ -942,12 +942,12 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
subject { described_class.analytics_unique_visits_data }
it 'returns the number of unique visits to pages with analytics features' do
::Gitlab::Analytics::UniqueVisits::ANALYTICS_IDS.each do |target_id|
::Gitlab::Analytics::UniqueVisits.analytics_ids.each do |target_id|
expect_any_instance_of(::Gitlab::Analytics::UniqueVisits).to receive(:unique_visits_for).with(targets: target_id).and_return(123)
end
expect_any_instance_of(::Gitlab::Analytics::UniqueVisits).to receive(:unique_visits_for).with(targets: :analytics).and_return(543)
expect_any_instance_of(::Gitlab::Analytics::UniqueVisits).to receive(:unique_visits_for).with(targets: :analytics, weeks: 4).and_return(987)
expect_any_instance_of(::Gitlab::Analytics::UniqueVisits).to receive(:unique_visits_for).with(targets: :analytics, start_date: 4.weeks.ago.to_date, end_date: Date.current).and_return(987)
expect(subject).to eq({
analytics_unique_visits: {
......@@ -978,13 +978,13 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
described_class.clear_memoization(:unique_visit_service)
allow_next_instance_of(::Gitlab::Analytics::UniqueVisits) do |instance|
::Gitlab::Analytics::UniqueVisits::COMPLIANCE_IDS.each do |target_id|
::Gitlab::Analytics::UniqueVisits.compliance_ids.each do |target_id|
allow(instance).to receive(:unique_visits_for).with(targets: target_id).and_return(123)
end
allow(instance).to receive(:unique_visits_for).with(targets: :compliance).and_return(543)
allow(instance).to receive(:unique_visits_for).with(targets: :compliance, weeks: 4).and_return(987)
allow(instance).to receive(:unique_visits_for).with(targets: :compliance, start_date: 4.weeks.ago.to_date, end_date: Date.current).and_return(987)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment