Commit fba29932 authored by alinamihaila's avatar alinamihaila

Add Basic Redis HLL Counter module

  - Add track_event method
  - Add unique_events method
  - Adjust existing UniqueVisits class and tests
  - Add known_events.yml file
  - Fix usage_data tests
  - Add missing event
  - Add tests for HLLRedisCounter
  - Use Date.current, add comments
parent bbd825bd
......@@ -3,35 +3,8 @@
module Gitlab
module Analytics
class UniqueVisits
ANALYTICS_IDS = Set[
'g_analytics_contribution',
'g_analytics_insights',
'g_analytics_issues',
'g_analytics_productivity',
'g_analytics_valuestream',
'p_analytics_pipelines',
'p_analytics_code_reviews',
'p_analytics_valuestream',
'p_analytics_insights',
'p_analytics_issues',
'p_analytics_repo',
'i_analytics_cohorts',
'i_analytics_dev_ops_score'
]
COMPLIANCE_IDS = Set[
'g_compliance_dashboard',
'g_compliance_audit_events',
'i_compliance_credential_inventory',
'i_compliance_audit_events'
].freeze
KEY_EXPIRY_LENGTH = 12.weeks
def track_visit(visitor_id, target_id, time = Time.zone.now)
target_key = key(target_id, time)
Gitlab::Redis::HLL.add(key: target_key, value: visitor_id, expiry: KEY_EXPIRY_LENGTH)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event(visitor_id, target_id, time)
end
# Returns number of unique visitors for given targets in given time frame
......@@ -40,40 +13,26 @@ module Gitlab
# @param [ActiveSupport::TimeWithZone] start_week start of time frame
# @param [Integer] weeks time frame length in weeks
# @return [Integer] number of unique visitors
def unique_visits_for(targets:, start_week: 7.days.ago, weeks: 1)
def unique_visits_for(targets:, start_date: 7.days.ago, end_date: start_date + 1.week)
target_ids = if targets == :analytics
ANALYTICS_IDS
self.class.analytics_ids
elsif targets == :compliance
COMPLIANCE_IDS
self.class.compliance_ids
else
Array(targets)
end
timeframe_start = [start_week, weeks.weeks.ago].min
redis_keys = keys(targets: target_ids, timeframe_start: timeframe_start, weeks: weeks)
Gitlab::Redis::HLL.count(keys: redis_keys)
Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: target_ids, start_date: start_date, end_date: end_date)
end
private
def key(target_id, time)
target_ids = ANALYTICS_IDS + COMPLIANCE_IDS
raise "Invalid target id #{target_id}" unless target_ids.include?(target_id.to_s)
target_key = target_id.to_s.gsub('analytics', '{analytics}').gsub('compliance', '{compliance}')
year_week = time.strftime('%G-%V')
"#{target_key}-#{year_week}"
class << self
def analytics_ids
Gitlab::UsageDataCounters::HLLRedisCounter.events_for_category('analytics')
end
def keys(targets:, timeframe_start:, weeks:)
(0..(weeks - 1)).map do |week_increment|
targets.map { |target_id| key(target_id, timeframe_start + week_increment * 7.days) }
end.flatten
def compliance_ids
Gitlab::UsageDataCounters::HLLRedisCounter.events_for_category('compliance')
end
end
end
end
......
......@@ -584,21 +584,21 @@ module Gitlab
end
def analytics_unique_visits_data
results = ::Gitlab::Analytics::UniqueVisits::ANALYTICS_IDS.each_with_object({}) do |target_id, hash|
results = ::Gitlab::Analytics::UniqueVisits.analytics_ids.each_with_object({}) do |target_id, hash|
hash[target_id] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target_id) }
end
results['analytics_unique_visits_for_any_target'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics) }
results['analytics_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics, weeks: 4) }
results['analytics_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :analytics, start_date: 4.weeks.ago.to_date, end_date: Date.current) }
{ analytics_unique_visits: results }
end
def compliance_unique_visits_data
results = ::Gitlab::Analytics::UniqueVisits::COMPLIANCE_IDS.each_with_object({}) do |target_id, hash|
results = ::Gitlab::Analytics::UniqueVisits.compliance_ids.each_with_object({}) do |target_id, hash|
hash[target_id] = redis_usage_data { unique_visit_service.unique_visits_for(targets: target_id) }
end
results['compliance_unique_visits_for_any_target'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance) }
results['compliance_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance, weeks: 4) }
results['compliance_unique_visits_for_any_target_monthly'] = redis_usage_data { unique_visit_service.unique_visits_for(targets: :compliance, start_date: 4.weeks.ago.to_date, end_date: Date.current) }
{ compliance_unique_visits: results }
end
......
# frozen_string_literal: true
module Gitlab
module UsageDataCounters
module HLLRedisCounter
DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH = 6.weeks
DEFAULT_DAILY_KEY_EXPIRY_LENGTH = 29.days
DEFAULT_REDIS_SLOT = ''.freeze
UnknownEvent = Class.new(StandardError)
UnknownAggregation = Class.new(StandardError)
KNOWN_EVENTS_PATH = 'lib/gitlab/usage_data_counters/known_events.yml'.freeze
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
# Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id
#
# All events should be added to know_events file lib/gitlab/usage_data_counters/known_events.yml
#
# Event example:
#
# - name: g_compliance_dashboard # Unique event name
# redis_slot: compliance # Optional slot name, if not defined it will use name as a slot, used for totals
# category: compliance # Group events in categories
# expiry: 29 # Optional expiration time in days, defalut value 29 days for daily and 6.weeks for weekly
# aggregation: daily # Aggregation level, keys are stored daily or weekly
#
# Usage:
#
# * Track event: Gitlab::UsageDataCounters::HLLRedisCounter.track_event(user_id, 'g_compliance_dashboard')
# * Get unique counts per user: Gitlab::UsageDataCounters::HLLRedisCounter.unique_events(event_names: 'g_compliance_dashboard', start_date: 28.days.ago, end_date: Date.current)
class << self
def track_event(entity_id, event_name, time = Time.zone.now)
event = event_for(event_name)
raise UnknownEvent.new("Unknown event #{event_name}") unless event.present?
key = redis_key(event, time)
Gitlab::Redis::HLL.add(key: key, value: entity_id, expiry: expiry(event))
end
def unique_events(event_names:, start_date:, end_date:)
events = events_for(Array(event_names))
raise 'Events should be in same slot' unless events_in_same_slot?(events)
raise 'Events should be in same category' unless events_in_same_category?(events)
raise 'Events should have same aggregation level' unless events_same_aggregation?(events)
aggregation = events.first[:aggregation]
keys = keys_for_aggregation(aggregation, events: events, start_date: start_date, end_date: end_date)
Gitlab::Redis::HLL.count(keys: keys)
end
def events_for_category(category)
known_events.select { |event| event[:category] == category }.map { |event| event[:name] }
end
private
def keys_for_aggregation(aggregation, events:, start_date:, end_date:)
if aggregation.to_sym == :daily
daily_redis_keys(events: events, start_date: start_date, end_date: end_date)
else
weekly_redis_keys(events: events, start_date: start_date, end_date: end_date)
end
end
def known_events
@known_events ||= YAML.load_file( Rails.root.join(KNOWN_EVENTS_PATH)).map(&:with_indifferent_access)
end
def known_events_names
known_events.map { |event| event[:name] }
end
def events_in_same_slot?(events)
slot = events.first[:redis_slot]
events.all? { |event| event[:redis_slot] == slot }
end
def events_in_same_category?(events)
category = events.first[:category]
events.all? { |event| event[:category] == category }
end
def events_same_aggregation?(events)
aggregation = events.first[:aggregation]
events.all? { |event| event[:aggregation] == aggregation }
end
def expiry(event)
if event[:expiry].present?
event[:expiry]
else
event[:aggregation].to_sym == :daily ? DEFAULT_DAILY_KEY_EXPIRY_LENGTH : DEFAULT_WEEKLY_KEY_EXPIRY_LENGTH
end
end
def event_for(event_name)
known_events.find { |event| event[:name] == event_name }
end
def events_for(event_names)
known_events.select { |event| event_names.include?(event[:name]) }
end
def redis_slot(event)
event[:redis_slot] || DEFAULT_REDIS_SLOT
end
# Compose the key in order to store events daily or weekly
def redis_key(event, time)
raise UnknownEvent.new("Unknown event #{event[:name]}") unless known_events_names.include?(event[:name].to_s)
raise UnknownAggregation.new("Use :daily or :weekly aggregation") unless ALLOWED_AGGREGATIONS.include?(event[:aggregation].to_sym)
slot = redis_slot(event)
key = if slot.present?
event[:name].to_s.gsub(slot, "{#{slot}}")
else
"{#{event[:name]}}"
end
if event[:aggregation].to_sym == :daily
year_day = time.strftime('%G-%j')
"#{year_day}-#{key}"
else
year_week = time.strftime('%G-%V')
"#{key}-#{year_week}"
end
end
def daily_redis_keys(events:, start_date:, end_date:)
(start_date.to_date..end_date.to_date).map do |date|
events.map { |event| redis_key(event, date) }
end.flatten
end
def weekly_redis_keys(events:, start_date:, end_date:)
weeks = end_date.to_date.cweek - start_date.to_date.cweek
weeks = 1 if weeks == 0
(0..(weeks - 1)).map do |week_increment|
events.map { |event| redis_key(event, start_date + week_increment * 7.days) }
end.flatten
end
end
end
end
end
---
# Compliance category
- name: g_compliance_dashboard
redis_slot: compliance
category: compliance
expiry: 84 # expiration time in days, equivalent to 12 weeks
aggregation: weekly
- name: g_compliance_audit_events
category: compliance
redis_slot: compliance
expiry: 84
aggregation: weekly
- name: i_compliance_audit_events
category: compliance
redis_slot: compliance
expiry: 84
aggregation: weekly
- name: i_compliance_credential_inventory
category: compliance
redis_slot: compliance
expiry: 84
aggregation: weekly
# Analytics category
- name: g_analytics_contribution
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: g_analytics_insights
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: g_analytics_issues
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: g_analytics_productivity
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: g_analytics_valuestream
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_pipelines
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_code_reviews
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_valuestream
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_insights
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_issues
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: p_analytics_repo
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: i_analytics_cohorts
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
- name: i_analytics_dev_ops_score
category: analytics
redis_slot: analytics
expiry: 84
aggregation: weekly
......@@ -4,6 +4,7 @@ UsageData/LargeTable:
- 'lib/gitlab/usage_data.rb'
- 'ee/lib/ee/gitlab/usage_data.rb'
NonRelatedClasses:
- :Date
- :Feature
- :Gitlab
- :Gitlab::AppLogger
......
......@@ -41,23 +41,23 @@ RSpec.describe Gitlab::Analytics::UniqueVisits, :clean_gitlab_redis_shared_state
expect(unique_visits.unique_visits_for(targets: target2_id)).to eq(1)
expect(unique_visits.unique_visits_for(targets: target4_id)).to eq(1)
expect(unique_visits.unique_visits_for(targets: target2_id, start_week: 15.days.ago)).to eq(1)
expect(unique_visits.unique_visits_for(targets: target2_id, start_date: 15.days.ago)).to eq(1)
expect(unique_visits.unique_visits_for(targets: target3_id)).to eq(0)
expect(unique_visits.unique_visits_for(targets: target5_id, start_week: 15.days.ago)).to eq(2)
expect(unique_visits.unique_visits_for(targets: target5_id, start_date: 15.days.ago)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :analytics)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :analytics, start_week: 15.days.ago)).to eq(1)
expect(unique_visits.unique_visits_for(targets: :analytics, start_week: 30.days.ago)).to eq(0)
expect(unique_visits.unique_visits_for(targets: :analytics, start_date: 15.days.ago)).to eq(1)
expect(unique_visits.unique_visits_for(targets: :analytics, start_date: 30.days.ago)).to eq(0)
expect(unique_visits.unique_visits_for(targets: :analytics, weeks: 4)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :analytics, start_date: 4.weeks.ago, end_date: Date.current)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :compliance)).to eq(1)
expect(unique_visits.unique_visits_for(targets: :compliance, start_week: 15.days.ago)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :compliance, start_week: 30.days.ago)).to eq(0)
expect(unique_visits.unique_visits_for(targets: :compliance, start_date: 15.days.ago)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :compliance, start_date: 30.days.ago)).to eq(0)
expect(unique_visits.unique_visits_for(targets: :compliance, weeks: 4)).to eq(2)
expect(unique_visits.unique_visits_for(targets: :compliance, start_date: 4.weeks.ago, end_date: Date.current)).to eq(2)
end
it 'sets the keys in Redis to expire automatically after 12 weeks' do
......@@ -75,7 +75,7 @@ RSpec.describe Gitlab::Analytics::UniqueVisits, :clean_gitlab_redis_shared_state
expect do
unique_visits.track_visit(visitor1_id, invalid_target_id)
end.to raise_error("Invalid target id #{invalid_target_id}")
end.to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownEvent)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_shared_state do
let(:entity1) { 'dfb9d2d2-f56c-4c77-8aeb-6cddc4a1f857' }
let(:entity2) { '1dd9afb2-a3ee-4de1-8ae3-a405579c8584' }
let(:entity3) { '34rfjuuy-ce56-sa35-ds34-dfer567dfrf2' }
let(:entity4) { '34rfjuuy-ce56-sa35-ds34-dferddddfrf2' }
let(:weekly_event) { 'g_analytics_contribution' }
let(:daily_event) { 'g_search' }
let(:different_aggregation) { 'different_aggregation' }
let(:known_events) do
[
{ name: "g_analytics_contribution", redis_slot: "analytics", category: "analytics", expiry: 84, aggregation: "weekly" },
{ name: "g_analytics_valuestream", redis_slot: "analytics", category: "analytics", expiry: 84, aggregation: "daily" },
{ name: "g_analytics_productivity", redis_slot: "analytics", category: "productivity", expiry: 84, aggregation: "weekly" },
{ name: "g_compliance_dashboard", redis_slot: "compliance", category: "compliance", aggregation: "weekly" },
{ name: "g_search", category: "global", aggregation: "daily" },
{ name: "different_aggregation", category: "global", aggregation: "monthly" }
].map(&:with_indifferent_access)
end
before do
allow(described_class).to receive(:known_events).and_return(known_events)
end
around do |example|
# We need to freeze to a reference time
# because visits are grouped by the week number in the year
# Without freezing the time, the test may behave inconsistently
# depending on which day of the week test is run.
# Monday 6th of June
reference_time = Time.utc(2020, 6, 1)
Timecop.freeze(reference_time) { example.run }
end
it 'raise error if metrics are not in the same slot' do
expect { described_class.unique_events(event_names: %w(g_analytics_contribution g_compliance_dashboard), start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should be in same slot')
end
it 'raise error if metrics are not in the same category' do
expect { described_class.unique_events(event_names: %w(g_analytics_contribution g_analytics_productivity), start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should be in same category')
end
it "raise error if metrics don't have same aggregation" do
expect { described_class.unique_events(event_names: %w(g_analytics_contribution g_analytics_valuestream), start_date: 4.weeks.ago, end_date: Date.current) }.to raise_error('Events should have same aggregation level')
end
it "raise error if metrics don't have same aggregation" do
expect { described_class.track_event(entity1, different_aggregation, Date.current) } .to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownAggregation)
end
it "raise error if metrics don't have same aggregation" do
expect { described_class.track_event(entity1, 'unknown', Date.current) } .to raise_error(Gitlab::UsageDataCounters::HLLRedisCounter::UnknownEvent)
end
context 'when tracking' do
before do
# events in current week, should not be counted as week is not complete
described_class.track_event(entity1, weekly_event, Date.current)
described_class.track_event(entity2, weekly_event, Date.current)
# Events last week
described_class.track_event(entity1, weekly_event, 2.days.ago)
described_class.track_event(entity1, weekly_event, 2.days.ago)
# Events 2 weeks ago
described_class.track_event(entity1, weekly_event, 2.weeks.ago)
# Events 4 weeks ago
described_class.track_event(entity3, weekly_event, 4.weeks.ago)
described_class.track_event(entity4, weekly_event, 29.days.ago)
# events in current day should be counted in daily aggregation
described_class.track_event(entity1, daily_event, Date.current)
described_class.track_event(entity2, daily_event, Date.current)
# Events last week
described_class.track_event(entity1, daily_event, 2.days.ago)
described_class.track_event(entity1, daily_event, 2.days.ago)
# Events 2 weeks ago
described_class.track_event(entity1, daily_event, 14.days.ago)
# Events 4 weeks ago
described_class.track_event(entity3, daily_event, 28.days.ago)
described_class.track_event(entity4, daily_event, 29.days.ago)
end
it 'gets correct data' do
# data for last complete week
expect(described_class.unique_events(event_names: weekly_event, start_date: 1.week.ago, end_date: Date.current)).to eq(1)
# data for last 4 complete weeks
expect(described_class.unique_events(event_names: weekly_event, start_date: 4.weeks.ago, end_date: Date.current)).to eq(2)
# data for week 4 weeks ago
expect(described_class.unique_events(event_names: weekly_event, start_date: 4.weeks.ago, end_date: 3.weeks.ago)).to eq(1)
# daily aggregation
expect(described_class.unique_events(event_names: daily_event, start_date: 7.days.ago, end_date: Date.current)).to eq(2)
expect(described_class.unique_events(event_names: daily_event, start_date: 28.days.ago, end_date: Date.current)).to eq(3)
expect(described_class.unique_events(event_names: daily_event, start_date: 28.days.ago, end_date: 21.days.ago)).to eq(1)
end
end
end
......@@ -942,12 +942,12 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
subject { described_class.analytics_unique_visits_data }
it 'returns the number of unique visits to pages with analytics features' do
::Gitlab::Analytics::UniqueVisits::ANALYTICS_IDS.each do |target_id|
::Gitlab::Analytics::UniqueVisits.analytics_ids.each do |target_id|
expect_any_instance_of(::Gitlab::Analytics::UniqueVisits).to receive(:unique_visits_for).with(targets: target_id).and_return(123)
end
expect_any_instance_of(::Gitlab::Analytics::UniqueVisits).to receive(:unique_visits_for).with(targets: :analytics).and_return(543)
expect_any_instance_of(::Gitlab::Analytics::UniqueVisits).to receive(:unique_visits_for).with(targets: :analytics, weeks: 4).and_return(987)
expect_any_instance_of(::Gitlab::Analytics::UniqueVisits).to receive(:unique_visits_for).with(targets: :analytics, start_date: 4.weeks.ago.to_date, end_date: Date.current).and_return(987)
expect(subject).to eq({
analytics_unique_visits: {
......@@ -978,13 +978,13 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
described_class.clear_memoization(:unique_visit_service)
allow_next_instance_of(::Gitlab::Analytics::UniqueVisits) do |instance|
::Gitlab::Analytics::UniqueVisits::COMPLIANCE_IDS.each do |target_id|
::Gitlab::Analytics::UniqueVisits.compliance_ids.each do |target_id|
allow(instance).to receive(:unique_visits_for).with(targets: target_id).and_return(123)
end
allow(instance).to receive(:unique_visits_for).with(targets: :compliance).and_return(543)
allow(instance).to receive(:unique_visits_for).with(targets: :compliance, weeks: 4).and_return(987)
allow(instance).to receive(:unique_visits_for).with(targets: :compliance, start_date: 4.weeks.ago.to_date, end_date: Date.current).and_return(987)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment