Commit 0674ef65 authored by GitLab Bot's avatar GitLab Bot

Automatic merge of gitlab-org/gitlab master

parents eb06a776 78da8949
......@@ -477,7 +477,7 @@ gem 'flipper', '~> 0.17.1'
gem 'flipper-active_record', '~> 0.17.1'
gem 'flipper-active_support_cache_store', '~> 0.17.1'
gem 'unleash', '~> 0.1.5'
gem 'gitlab-experiment', '~> 0.4.5'
gem 'gitlab-experiment', '~> 0.4.8'
# Structured logging
gem 'lograge', '~> 0.5'
......
......@@ -424,7 +424,7 @@ GEM
github-markup (1.7.0)
gitlab-chronic (0.10.5)
numerizer (~> 0.2)
gitlab-experiment (0.4.5)
gitlab-experiment (0.4.8)
activesupport (>= 3.0)
scientist (~> 1.5, >= 1.5.0)
gitlab-fog-azure-rm (1.0.0)
......@@ -1364,7 +1364,7 @@ DEPENDENCIES
gitaly (~> 13.8.0.pre.rc3)
github-markup (~> 1.7.0)
gitlab-chronic (~> 0.10.5)
gitlab-experiment (~> 0.4.5)
gitlab-experiment (~> 0.4.8)
gitlab-fog-azure-rm (~> 1.0)
gitlab-labkit (= 0.14.0)
gitlab-license (~> 1.0)
......
......@@ -59,8 +59,7 @@ class Projects::IssuesController < Projects::ApplicationController
around_action :allow_gitaly_ref_name_caching, only: [:discussions]
before_action :run_null_hypothesis_experiment,
only: [:index, :new, :create],
if: -> { Feature.enabled?(:gitlab_experiments) }
only: [:index, :new, :create]
respond_to :html
......
......@@ -67,6 +67,9 @@ class SearchController < ApplicationController
end
# rubocop: enable CodeReuse/ActiveRecord
def opensearch
end
private
# overridden in EE
......
# frozen_string_literal: true
class ApplicationExperiment < Gitlab::Experiment
def enabled?
return false if Feature::Definition.get(name).nil? # there has to be a feature flag yaml file
return false unless Gitlab.dev_env_or_com? # we're in an environment that allows experiments
Feature.get(name).state != :off # rubocop:disable Gitlab/AvoidFeatureGet
end
def publish(_result)
track(:assignment) # track that we've assigned a variant for this context
Gon.global.push({ experiment: { name => signature } }, true) # push to client
end
def track(action, **event_args)
return if excluded? # no events for opted out actors or excluded subjects
return unless should_track? # no events for opted out actors or excluded subjects
Gitlab::Tracking.event(name, action.to_s, **event_args.merge(
context: (event_args[:context] || []) << SnowplowTracker::SelfDescribingJson.new(
......
......@@ -79,6 +79,9 @@
= favicon_link_tag 'touch-icon-ipad-retina.png', rel: 'apple-touch-icon', sizes: '152x152'
%link{ rel: 'mask-icon', href: image_path('logo.svg'), color: 'rgb(226, 67, 41)' }
-# OpenSearch
%link{ href: search_opensearch_path(format: :xml), rel: 'search', title: 'Search GitLab', type: 'application/opensearchdescription+xml' }
-# Windows 8 pinned site tile
%meta{ name: 'msapplication-TileImage', content: image_path('msapplication-tile.png') }
%meta{ name: 'msapplication-TileColor', content: '#30353E' }
......
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
xmlns:moz="http://www.mozilla.org/2006/browser/search/">
<ShortName>GitLab</ShortName>
<Description>Search GitLab</Description>
<InputEncoding>UTF-8</InputEncoding>
<Image width="16" height="16" type="image/x-icon"><%= root_url %>favicon.ico</Image>
<Url type="text/html" method="get" template="<%= search_url %>?search={searchTerms}"/>
<moz:SearchForm><%= search_url %></moz:SearchForm>
</OpenSearchDescription>
\ No newline at end of file
---
title: Escaped markdown should not be interpreted as shortcuts
merge_request: 45922
author:
type: changed
---
title: Expose if user is a bot in the REST api
merge_request: 52003
author:
type: added
---
title: Add OpenSearch support
merge_request: 52583
author:
type: added
---
name: gitlab_experiments
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45840
rollout_issue_url:
milestone: '13.7'
name: honor_escaped_markdown
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/45922
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/300531
milestone: '13.9'
type: development
group: group::adoption
group: 'group::project management'
default_enabled: false
......@@ -60,9 +60,10 @@ Rails.application.routes.draw do
end
# Search
get 'search' => 'search#show'
get 'search' => 'search#show', as: :search
get 'search/autocomplete' => 'search#autocomplete', as: :search_autocomplete
get 'search/count' => 'search#count', as: :search_count
get 'search/opensearch' => 'search#opensearch', as: :search_opensearch
# JSON Web Token
get 'jwt/auth' => 'jwt#auth'
......
......@@ -264,6 +264,7 @@ Parameters:
"created_at": "2012-05-23T08:00:58Z",
"bio": "",
"bio_html": "",
"bot": false,
"location": null,
"public_email": "john@example.com",
"skype": "",
......
......@@ -23,107 +23,28 @@ Rewriting repository history is a destructive operation. Make sure to back up yo
you begin. The best way back up a repository is to
[export the project](../settings/import_export.md#exporting-a-project-and-its-data).
NOTE:
Git LFS files can only be removed by an Administrator using a
[Rake task](../../../raketasks/cleanup.md). Removal of this limitation
[is planned](https://gitlab.com/gitlab-org/gitlab/-/issues/223621).
## Purge files from repository history
## Purge files from repository history and storage
To reduce the size of your repository in GitLab, you must remove references to large files from branches, tags, and
To reduce the size of your repository in GitLab, you must remove references to large files from branches, tags, *and*
other internal references (refs) that are automatically created by GitLab. These refs include:
- `refs/merge-requests/*` for merge requests.
- `refs/pipelines/*` for
[pipelines](../../../ci/troubleshooting.md#fatal-reference-is-not-a-tree-error).
- `refs/environments/*` for environments.
- `refs/keep-around/*` are created as hidden refs to prevent commits referenced in the database from being removed
Git doesn't usually download these refs to make cloning and fetch faster, but we can use the `--mirror` option to
download all the advertised refs.
1. [Install `git filter-repo`](https://github.com/newren/git-filter-repo/blob/main/INSTALL.md)
using a supported package manager or from source.
1. Clone a fresh copy of the repository using `--bare` and `--mirror`:
```shell
git clone --bare --mirror https://gitlab.example.com/my/project.git
```
1. Using `git filter-repo`, purge any files from the history of your repository.
To purge large files, the `--strip-blobs-bigger-than` option can be used:
```shell
git filter-repo --strip-blobs-bigger-than 10M
```
To purge large files stored using Git LFS, the `--blob--callback` option can
be used. The example below, uses the callback to read the file size from the
Git LFS pointer, and removes files larger than 10MB.
```shell
git filter-repo --blob-callback '
if blob.data.startswith(b"version https://git-lfs.github.com/spec/v1"):
size_in_bytes = int.from_bytes(blob.data[124:], byteorder="big")
if size_in_bytes > 10*1000:
blob.skip()
'
```
To purge specific large files by path, the `--path` and `--invert-paths` options can be combined:
```shell
git filter-repo --path path/to/big/file.m4v --invert-paths
```
See the
[`git filter-repo` documentation](https://htmlpreview.github.io/?https://github.com/newren/git-filter-repo/blob/docs/html/git-filter-repo.html#EXAMPLES)
for more examples and the complete documentation.
1. Force push your changes to overwrite all branches on GitLab:
```shell
git push origin --force 'refs/heads/*'
```
[Protected branches](../protected_branches.md) cause this to fail. To proceed, you must
remove branch protection, push, and then re-enable protected branches.
1. To remove large files from tagged releases, force push your changes to all tags on GitLab:
```shell
git push origin --force 'refs/tags/*'
```
[Protected tags](../protected_tags.md) cause this to fail. To proceed, you must remove tag
protection, push, and then re-enable protected tags.
1. To prevent dead links to commits that no longer exist, push the `refs/replace` created by `git filter-repo`.
```shell
git push origin --force 'refs/replace/*'
```
Refer to the Git [`replace`](https://git-scm.com/book/en/v2/Git-Tools-Replace) documentation for information on how this works.
1. Run a [repository cleanup](#repository-cleanup).
NOTE:
Project statistics are cached for performance. You may need to wait 5-10 minutes
to see a reduction in storage utilization.
## Purge files from GitLab storage
In addition to the refs mentioned above, GitLab also creates hidden `refs/keep-around/*`to prevent commits being deleted. Hidden refs are not advertised, which means we can't download them using Git, but these refs are included in a project export.
These refs are not automatically downloaded and hidden refs are not advertised, but we can remove these refs using a project export.
To purge files from GitLab storage:
To purge files from a GitLab repository:
1. [Install `git filter-repo`](https://github.com/newren/git-filter-repo/blob/main/INSTALL.md)
using a supported package manager or from source.
1. Generate a fresh [export from the
project](../settings/import_export.html#exporting-a-project-and-its-data) and download it.
This project export contains a backup copy of your repository *and* refs
we can use to purge files from your repository.
1. Decompress the backup using `tar`:
......@@ -134,7 +55,7 @@ To purge files from GitLab storage:
This contains a `project.bundle` file, which was created by
[`git bundle`](https://git-scm.com/docs/git-bundle).
1. Clone a fresh copy of the repository from the bundle:
1. Clone a fresh copy of the repository from the bundle using `--bare` and `--mirror` options:
```shell
git clone --bare --mirror /path/to/project.bundle
......@@ -149,7 +70,7 @@ To purge files from GitLab storage:
the previous run. You need this file from **every** run. Do the next step every time you run
`git filter-repo`.
To purge all large files, the `--strip-blobs-bigger-than` option can be used:
To purge all files larger than 10M, the `--strip-blobs-bigger-than` option can be used:
```shell
git filter-repo --strip-blobs-bigger-than 10M
......@@ -236,14 +157,14 @@ This:
- Runs `git gc --prune=30.minutes.ago` against the repository to remove unreferenced objects. Repacking your repository temporarily
causes the size of your repository to increase significantly, because the old pack files are not removed until the
new pack files have been created.
- Unlinks any unused LFS objects currently attached to your project, freeing up storage space.
- Unlinks any unused LFS objects attached to your project, freeing up storage space.
- Recalculates the size of your repository on disk.
GitLab sends an email notification with the recalculated repository size after the cleanup has completed.
If the repository size does not decrease, this may be caused by loose objects
being kept around because they were referenced in a Git operation that happened
in the last 30 minutes. Try re-running these steps once the repository has been
in the last 30 minutes. Try re-running these steps after the repository has been
dormant for at least 30 minutes.
When using repository cleanup, note:
......
......@@ -6,6 +6,7 @@ module API
include UsersHelper
expose :created_at, if: ->(user, opts) { Ability.allowed?(opts[:current_user], :read_user_profile, user) }
expose :bio, :bio_html, :location, :public_email, :skype, :linkedin, :twitter, :website_url, :organization, :job_title
expose :bot?, as: :bot
expose :work_information do |user|
work_information(user)
end
......
# frozen_string_literal: true
module Banzai
module Filter
class MarkdownPostEscapeFilter < HTML::Pipeline::Filter
LITERAL_KEYWORD = MarkdownPreEscapeFilter::LITERAL_KEYWORD
LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-(.*?)-#{LITERAL_KEYWORD}}.freeze
NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze
SPAN_REGEX = %r{<span>(.*?)</span>}.freeze
def call
return doc unless result[:escaped_literals]
# For any literals that actually didn't get escape processed
# (for example in code blocks), remove the special sequence.
html.gsub!(NOT_LITERAL_REGEX, '\1')
# Replace any left over literal sequences with `span` so that our
# reference processing is short-circuited
html.gsub!(LITERAL_REGEX, '<span>\1</span>')
# Since literals are converted in links, we need to remove any surrounding `span`.
# Note: this could have been done in the renderer,
# Banzai::Renderer::CommonMark::HTML. However, we eventually want to use
# the built-in compiled renderer, rather than the ruby version, for speed.
# So let's do this work here.
doc.css('a').each do |node|
node.attributes['href'].value = node.attributes['href'].value.gsub(SPAN_REGEX, '\1') if node.attributes['href']
node.attributes['title'].value = node.attributes['title'].value.gsub(SPAN_REGEX, '\1') if node.attributes['title']
end
doc.css('code').each do |node|
node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang']
end
doc
end
end
end
end
# frozen_string_literal: true
module Banzai
module Filter
# In order to allow a user to short-circuit our reference shortcuts
# (such as # or !), the user should be able to escape them, like \#.
# CommonMark supports this, however it removes all information about
# what was actually a literal. In order to short-circuit the reference,
# we must surround backslash escaped ASCII punctuation with a custom sequence.
# This way CommonMark will properly handle the backslash escaped chars
# but we will maintain knowledge (the sequence) that it was a literal.
#
# We need to surround the character, not just prefix it. It could
# get converted into an entity by CommonMark and we wouldn't know how many
# characters there are. The entire literal needs to be surrounded with
# a `span` tag, which short-circuits our reference processing.
#
# We can't use a custom HTML tag since we could be initially surrounding
# text in an href, and then CommonMark will not be able to parse links
# properly. So we use `cmliteral-` and `-cmliteral`
#
# https://spec.commonmark.org/0.29/#backslash-escapes
#
# This filter does the initial surrounding, and MarkdownPostEscapeFilter
# does the conversion into span tags.
class MarkdownPreEscapeFilter < HTML::Pipeline::TextFilter
ASCII_PUNCTUATION = %r{([\\][!"#$%&'()*+,-./:;<=>?@\[\\\]^_`{|}~])}.freeze
LITERAL_KEYWORD = 'cmliteral'
def call
return @text unless Feature.enabled?(:honor_escaped_markdown, context[:group] || context[:project]&.group)
@text.gsub(ASCII_PUNCTUATION) do |match|
# The majority of markdown does not have literals. If none
# are found, we can bypass the post filter
result[:escaped_literals] = true
"#{LITERAL_KEYWORD}-#{match}-#{LITERAL_KEYWORD}"
end
end
end
end
end
......@@ -5,7 +5,9 @@ module Banzai
class PlainMarkdownPipeline < BasePipeline
def self.filters
FilterArray[
Filter::MarkdownFilter
Filter::MarkdownPreEscapeFilter,
Filter::MarkdownFilter,
Filter::MarkdownPostEscapeFilter
]
end
end
......
# frozen_string_literal: true
module Gitlab
module Usage
module Metrics
module Aggregates
UNION_OF_AGGREGATED_METRICS = 'OR'
INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = Rails.root.join('lib/gitlab/usage_data_counters/aggregated_metrics/*.yml')
UnknownAggregationOperator = Class.new(StandardError)
class Aggregate
delegate :calculate_events_union,
:weekly_time_range,
:monthly_time_range,
to: Gitlab::UsageDataCounters::HLLRedisCounter
def initialize
@aggregated_metrics = load_events(AGGREGATED_METRICS_PATH)
end
def monthly_data
aggregated_metrics_data(**monthly_time_range)
end
def weekly_data
aggregated_metrics_data(**weekly_time_range)
end
private
attr_accessor :aggregated_metrics
def aggregated_metrics_data(start_date:, end_date:)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: end_date)
end
end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
else
Gitlab::ErrorTracking
.track_and_raise_for_dev_exception(UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"))
Gitlab::Utils::UsageData::FALLBACK
end
rescue Gitlab::UsageDataCounters::HLLRedisCounter::EventError => error
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(error)
Gitlab::Utils::UsageData::FALLBACK
end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
# is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
subset_powers_size_even = subset_powers_data.size.even?
# sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
end
def sum_subset_powers(subset_powers_data, subset_powers_size_even)
sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
(index + 1).odd? ? value : -value
end
(subset_powers_size_even ? -1 : 1) * sum_without_sign
end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1))
subset_sizes.map do |subset_size|
if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end
else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event|
subset_powers_cache[subset_size][event] ||= \
calculate_events_union(event_names: event, start_date: start_date, end_date: end_date)
end
end
end
end
def load_events(wildcard)
Dir[wildcard].each_with_object([]) do |path, events|
events.push(*load_yaml_from_path(path))
end
end
def load_yaml_from_path(path)
YAML.safe_load(File.read(path))&.map(&:with_indifferent_access)
end
end
end
end
end
end
......@@ -23,6 +23,7 @@ module Gitlab
deployment_minimum_id
deployment_maximum_id
auth_providers
aggregated_metrics
recorded_at
).freeze
......@@ -691,13 +692,13 @@ module Gitlab
def aggregated_metrics_monthly
{
aggregated_metrics: ::Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics_monthly_data
aggregated_metrics: aggregated_metrics.monthly_data
}
end
def aggregated_metrics_weekly
{
aggregated_metrics: ::Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics_weekly_data
aggregated_metrics: aggregated_metrics.weekly_data
}
end
......@@ -742,6 +743,10 @@ module Gitlab
private
def aggregated_metrics
@aggregated_metrics ||= ::Gitlab::Usage::Metrics::Aggregates::Aggregate.new
end
def event_monthly_active_users(date_range)
data = {
action_monthly_active_users_project_repo: Gitlab::UsageDataCounters::TrackUniqueEvents::PUSH_ACTION,
......
......@@ -13,15 +13,10 @@ module Gitlab
AggregationMismatch = Class.new(EventError)
SlotMismatch = Class.new(EventError)
CategoryMismatch = Class.new(EventError)
UnknownAggregationOperator = Class.new(EventError)
InvalidContext = Class.new(EventError)
KNOWN_EVENTS_PATH = File.expand_path('known_events/*.yml', __dir__)
ALLOWED_AGGREGATIONS = %i(daily weekly).freeze
UNION_OF_AGGREGATED_METRICS = 'OR'
INTERSECTION_OF_AGGREGATED_METRICS = 'AND'
ALLOWED_METRICS_AGGREGATIONS = [UNION_OF_AGGREGATED_METRICS, INTERSECTION_OF_AGGREGATED_METRICS].freeze
AGGREGATED_METRICS_PATH = File.expand_path('aggregated_metrics/*.yml', __dir__)
# Track event on entity_id
# Increment a Redis HLL counter for unique event_name and entity_id
......@@ -90,37 +85,40 @@ module Gitlab
events_names = events_for_category(category)
event_results = events_names.each_with_object({}) do |event, hash|
hash["#{event}_weekly"] = unique_events(event_names: [event], start_date: 7.days.ago.to_date, end_date: Date.current)
hash["#{event}_monthly"] = unique_events(event_names: [event], start_date: 4.weeks.ago.to_date, end_date: Date.current)
hash["#{event}_weekly"] = unique_events(**weekly_time_range.merge(event_names: [event]))
hash["#{event}_monthly"] = unique_events(**monthly_time_range.merge(event_names: [event]))
end
if eligible_for_totals?(events_names)
event_results["#{category}_total_unique_counts_weekly"] = unique_events(event_names: events_names, start_date: 7.days.ago.to_date, end_date: Date.current)
event_results["#{category}_total_unique_counts_monthly"] = unique_events(event_names: events_names, start_date: 4.weeks.ago.to_date, end_date: Date.current)
event_results["#{category}_total_unique_counts_weekly"] = unique_events(**weekly_time_range.merge(event_names: events_names))
event_results["#{category}_total_unique_counts_monthly"] = unique_events(**monthly_time_range.merge(event_names: events_names))
end
category_results["#{category}"] = event_results
end
end
def known_event?(event_name)
event_for(event_name).present?
def weekly_time_range
{ start_date: 7.days.ago.to_date, end_date: Date.current }
end
def aggregated_metrics_monthly_data
aggregated_metrics_data(4.weeks.ago.to_date)
def monthly_time_range
{ start_date: 4.weeks.ago.to_date, end_date: Date.current }
end
def aggregated_metrics_weekly_data
aggregated_metrics_data(7.days.ago.to_date)
def known_event?(event_name)
event_for(event_name).present?
end
def known_events
@known_events ||= load_events(KNOWN_EVENTS_PATH)
end
def aggregated_metrics
@aggregated_metrics ||= load_events(AGGREGATED_METRICS_PATH)
def calculate_events_union(event_names:, start_date:, end_date:)
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
private
......@@ -139,93 +137,6 @@ module Gitlab
Plan.all_plans
end
def aggregated_metrics_data(start_date)
aggregated_metrics.each_with_object({}) do |aggregation, weekly_data|
next if aggregation[:feature_flag] && Feature.disabled?(aggregation[:feature_flag], default_enabled: false, type: :development)
weekly_data[aggregation[:name]] = calculate_count_for_aggregation(aggregation, start_date: start_date, end_date: Date.current)
end
end
def calculate_count_for_aggregation(aggregation, start_date:, end_date:)
case aggregation[:operator]
when UNION_OF_AGGREGATED_METRICS
calculate_events_union(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
when INTERSECTION_OF_AGGREGATED_METRICS
calculate_events_intersections(event_names: aggregation[:events], start_date: start_date, end_date: end_date)
else
raise UnknownAggregationOperator, "Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}"
end
end
# calculate intersection of 'n' sets based on inclusion exclusion principle https://en.wikipedia.org/wiki/Inclusion%E2%80%93exclusion_principle
# this method will be extracted to dedicated module with https://gitlab.com/gitlab-org/gitlab/-/issues/273391
def calculate_events_intersections(event_names:, start_date:, end_date:, subset_powers_cache: Hash.new({}))
# calculate power of intersection of all given metrics from inclusion exclusion principle
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C|) =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
# calculate each components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ...
subset_powers_data = subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
# calculate last component of the equation |A & B & C & D| = .... - |A + B + C + D|
power_of_union_of_all_events = begin
subset_powers_cache[event_names.size][event_names.join('_+_')] ||= \
calculate_events_union(event_names: event_names, start_date: start_date, end_date: end_date)
end
# in order to determine if part of equation (|A & B & C|, |A & B & C & D|), that represents the intersection that we need to calculate,
# is positive or negative in particular equation we need to determine if number of subsets is even or odd. Please take a look at two examples below
# |A + B + C| = (|A| + |B| + |C|) - (|A & B| + |A & C| + .. + |C & D|) + |A & B & C| =>
# |A & B & C| = - (|A| + |B| + |C|) + (|A & B| + |A & C| + .. + |C & D|) + |A + B + C|
# |A + B + C + D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A & B & C & D| =>
# |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - |A + B + C + D|
subset_powers_size_even = subset_powers_data.size.even?
# sum all components of equation except for the last one |A & B & C & D| = (|A| + |B| + |C| + |D|) - (|A & B| + |A & C| + .. + |C & D|) + (|A & B & C| + |B & C & D|) - ... =>
sum_of_all_subset_powers = sum_subset_powers(subset_powers_data, subset_powers_size_even)
# add last component of the equation |A & B & C & D| = sum_of_all_subset_powers - |A + B + C + D|
sum_of_all_subset_powers + (subset_powers_size_even ? power_of_union_of_all_events : -power_of_union_of_all_events)
end
def sum_subset_powers(subset_powers_data, subset_powers_size_even)
sum_without_sign = subset_powers_data.to_enum.with_index.sum do |value, index|
(index + 1).odd? ? value : -value
end
(subset_powers_size_even ? -1 : 1) * sum_without_sign
end
def subsets_intersection_powers(event_names, start_date, end_date, subset_powers_cache)
subset_sizes = (1..(event_names.size - 1))
subset_sizes.map do |subset_size|
if subset_size > 1
# calculate sum of powers of intersection between each subset (with given size) of metrics: #|A + B + C + D| = ... - (|A & B| + |A & C| + .. + |C & D|)
event_names.combination(subset_size).sum do |events_subset|
subset_powers_cache[subset_size][events_subset.join('_&_')] ||= \
calculate_events_intersections(event_names: events_subset, start_date: start_date, end_date: end_date, subset_powers_cache: subset_powers_cache)
end
else
# calculate sum of powers of each set (metric) alone #|A + B + C + D| = (|A| + |B| + |C| + |D|) - ...
event_names.sum do |event|
subset_powers_cache[subset_size][event] ||= \
unique_events(event_names: event, start_date: start_date, end_date: end_date)
end
end
end
end
def calculate_events_union(event_names:, start_date:, end_date:)
count_unique_events(event_names: event_names, start_date: start_date, end_date: end_date) do |events|
raise SlotMismatch, events unless events_in_same_slot?(events)
raise AggregationMismatch, events unless events_same_aggregation?(events)
end
end
def count_unique_events(event_names:, start_date:, end_date:, context: '')
events = events_for(Array(event_names).map(&:to_s))
......@@ -340,12 +251,6 @@ module Gitlab
end.flatten
end
def validate_aggregation_operator!(operator)
return true if ALLOWED_METRICS_AGGREGATIONS.include?(operator)
raise UnknownAggregationOperator.new("Events should be aggregated with one of operators #{ALLOWED_METRICS_AGGREGATIONS}")
end
def weekly_redis_keys(events:, start_date:, end_date:, context: '')
end_date = end_date.end_of_week - 1.week
(start_date.to_date..end_date.to_date).map do |date|
......
......@@ -63,53 +63,20 @@ RSpec.describe Projects::IssuesController do
end
end
describe 'the null hypothesis experiment', :snowplow do
it 'defines the expected before actions' do
expect(controller).to use_before_action(:run_null_hypothesis_experiment)
end
context 'when rolled out to 100%' do
it 'assigns the candidate experience and tracks the event' do
get :index, params: { namespace_id: project.namespace, project_id: project }
expect_snowplow_event(
category: 'null_hypothesis',
action: 'index',
context: [{
schema: 'iglu:com.gitlab/gitlab_experiment/jsonschema/0-3-0',
data: { variant: 'candidate', experiment: 'null_hypothesis', key: anything }
}]
)
end
describe 'the null hypothesis experiment', :experiment do
before do
stub_experiments(null_hypothesis: :candidate)
end
context 'when not rolled out' do
before do
stub_feature_flags(null_hypothesis: false)
end
it 'assigns the control experience and tracks the event' do
get :index, params: { namespace_id: project.namespace, project_id: project }
expect_snowplow_event(
category: 'null_hypothesis',
action: 'index',
context: [{
schema: 'iglu:com.gitlab/gitlab_experiment/jsonschema/0-3-0',
data: { variant: 'control', experiment: 'null_hypothesis', key: anything }
}]
)
end
it 'defines the expected before actions' do
expect(controller).to use_before_action(:run_null_hypothesis_experiment)
end
context 'when gitlab_experiments is disabled' do
it 'does not run the experiment at all' do
stub_feature_flags(gitlab_experiments: false)
it 'assigns the candidate experience and tracks the event' do
expect(experiment(:null_hypothesis)).to track('index').on_any_instance.for(:candidate)
.with_context(project: project)
expect(controller).not_to receive(:run_null_hypothesis_experiment)
get :index, params: { namespace_id: project.namespace, project_id: project }
end
get :index, params: { namespace_id: project.namespace, project_id: project }
end
end
end
......
......@@ -258,6 +258,20 @@ RSpec.describe SearchController do
it_behaves_like 'with external authorization service enabled', :autocomplete, { term: 'hello' }
end
describe 'GET #opensearch' do
render_views
it 'renders xml' do
get :opensearch, format: :xml
doc = Nokogiri::XML.parse(response.body)
expect(response).to have_gitlab_http_status(:ok)
expect(doc.css('OpenSearchDescription ShortName').text).to eq('GitLab')
expect(doc.css('OpenSearchDescription *').map(&:name)).to eq(%w[ShortName Description InputEncoding Image Url SearchForm])
end
end
describe '#append_info_to_payload' do
it 'appends search metadata for logging' do
last_payload = nil
......
......@@ -2,15 +2,51 @@
require 'spec_helper'
RSpec.describe ApplicationExperiment do
RSpec.describe ApplicationExperiment, :experiment do
subject { described_class.new(:stub) }
before do
allow(subject).to receive(:enabled?).and_return(true)
end
it "naively assumes a 1x1 relationship to feature flags for tests" do
expect(Feature).to receive(:persist_used!).with('stub')
described_class.new(:stub)
end
describe "enabled" do
before do
allow(subject).to receive(:enabled?).and_call_original
allow(Feature::Definition).to receive(:get).and_return('_instance_')
allow(Gitlab).to receive(:dev_env_or_com?).and_return(true)
allow(Feature).to receive(:get).and_return(double(state: :on))
end
it "is enabled when all criteria are met" do
expect(subject).to be_enabled
end
it "isn't enabled if the feature definition doesn't exist" do
expect(Feature::Definition).to receive(:get).with('stub').and_return(nil)
expect(subject).not_to be_enabled
end
it "isn't enabled if we're not in dev or dotcom environments" do
expect(Gitlab).to receive(:dev_env_or_com?).and_return(false)
expect(subject).not_to be_enabled
end
it "isn't enabled if the feature flag state is :off" do
expect(Feature).to receive(:get).with('stub').and_return(double(state: :off))
expect(subject).not_to be_enabled
end
end
describe "publishing results" do
it "tracks the assignment" do
expect(subject).to receive(:track).with(:assignment)
......@@ -37,8 +73,8 @@ RSpec.describe ApplicationExperiment do
end
describe "tracking events", :snowplow do
it "doesn't track if excluded" do
subject.exclude { true }
it "doesn't track if we shouldn't track" do
allow(subject).to receive(:should_track?).and_return(false)
subject.track(:action)
......
......@@ -170,6 +170,8 @@ References should be parseable even inside _<%= merge_request.to_reference %>_ e
- Ignores invalid: <%= User.reference_prefix %>fake_user
- Ignored in code: `<%= user.to_reference %>`
- Ignored in links: [Link to <%= user.to_reference %>](#user-link)
- Ignored when backslash escaped: \<%= user.to_reference %>
- Ignored when backslash escaped: \<%= group.to_reference %>
- Link to user by reference: [User](<%= user.to_reference %>)
#### IssueReferenceFilter
......@@ -178,6 +180,7 @@ References should be parseable even inside _<%= merge_request.to_reference %>_ e
- Issue in another project: <%= xissue.to_reference(project) %>
- Ignored in code: `<%= issue.to_reference %>`
- Ignored in links: [Link to <%= issue.to_reference %>](#issue-link)
- Ignored when backslash escaped: \<%= issue.to_reference %>
- Issue by URL: <%= urls.project_issue_url(issue.project, issue) %>
- Link to issue by reference: [Issue](<%= issue.to_reference %>)
- Link to issue by URL: [Issue](<%= urls.project_issue_url(issue.project, issue) %>)
......@@ -188,6 +191,7 @@ References should be parseable even inside _<%= merge_request.to_reference %>_ e
- Merge request in another project: <%= xmerge_request.to_reference(project) %>
- Ignored in code: `<%= merge_request.to_reference %>`
- Ignored in links: [Link to <%= merge_request.to_reference %>](#merge-request-link)
- Ignored when backslash escaped: \<%= merge_request.to_reference %>
- Merge request by URL: <%= urls.project_merge_request_url(merge_request.project, merge_request) %>
- Link to merge request by reference: [Merge request](<%= merge_request.to_reference %>)
- Link to merge request by URL: [Merge request](<%= urls.project_merge_request_url(merge_request.project, merge_request) %>)
......@@ -198,6 +202,7 @@ References should be parseable even inside _<%= merge_request.to_reference %>_ e
- Snippet in another project: <%= xsnippet.to_reference(project) %>
- Ignored in code: `<%= snippet.to_reference %>`
- Ignored in links: [Link to <%= snippet.to_reference %>](#snippet-link)
- Ignored when backslash escaped: \<%= snippet.to_reference %>
- Snippet by URL: <%= urls.project_snippet_url(snippet.project, snippet) %>
- Link to snippet by reference: [Snippet](<%= snippet.to_reference %>)
- Link to snippet by URL: [Snippet](<%= urls.project_snippet_url(snippet.project, snippet) %>)
......@@ -229,6 +234,7 @@ References should be parseable even inside _<%= merge_request.to_reference %>_ e
- Label by name in quotes: <%= label.to_reference(format: :name) %>
- Ignored in code: `<%= simple_label.to_reference %>`
- Ignored in links: [Link to <%= simple_label.to_reference %>](#label-link)
- Ignored when backslash escaped: \<%= simple_label.to_reference %>
- Link to label by reference: [Label](<%= label.to_reference %>)
#### MilestoneReferenceFilter
......@@ -239,6 +245,7 @@ References should be parseable even inside _<%= merge_request.to_reference %>_ e
- Milestone in another project: <%= xmilestone.to_reference(project) %>
- Ignored in code: `<%= simple_milestone.to_reference %>`
- Ignored in links: [Link to <%= simple_milestone.to_reference %>](#milestone-link)
- Ignored when backslash escaped: \<%= simple_milestone.to_reference %>
- Milestone by URL: <%= urls.milestone_url(milestone) %>
- Link to milestone by URL: [Milestone](<%= milestone.to_reference %>)
- Group milestone by name: <%= Milestone.reference_prefix %><%= group_milestone.name %>
......@@ -250,6 +257,7 @@ References should be parseable even inside _<%= merge_request.to_reference %>_ e
- Alert in another project: <%= xalert.to_reference(project) %>
- Ignored in code: `<%= alert.to_reference %>`
- Ignored in links: [Link to <%= alert.to_reference %>](#alert-link)
- Ignored when backslash escaped: \<%= alert.to_reference %>
- Alert by URL: <%= alert.details_url %>
- Link to alert by reference: [Alert](<%= alert.to_reference %>)
- Link to alert by URL: [Alert](<%= alert.details_url %>)
......
......@@ -23,4 +23,16 @@ RSpec.describe API::Entities::User do
expect(subject).not_to include(:created_at)
end
it 'exposes user as not a bot' do
expect(subject[:bot]).to be_falsey
end
context 'with bot user' do
let(:user) { create(:user, :security_bot) }
it 'exposes user as a bot' do
expect(subject[:bot]).to eq(true)
end
end
end
......@@ -131,4 +131,16 @@ RSpec.describe Banzai::Pipeline::FullPipeline do
expect(output).to include("test [[<em>TOC</em>]]")
end
end
describe 'backslash escapes' do
let_it_be(:project) { create(:project, :public) }
let_it_be(:issue) { create(:issue, project: project) }
it 'does not convert an escaped reference' do
markdown = "\\#{issue.to_reference}"
output = described_class.to_html(markdown, project: project)
expect(output).to include("<span>#</span>#{issue.iid}")
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Banzai::Pipeline::PlainMarkdownPipeline do
using RSpec::Parameterized::TableSyntax
describe 'backslash escapes' do
let_it_be(:project) { create(:project, :public) }
let_it_be(:issue) { create(:issue, project: project) }
def correct_html_included(markdown, expected)
result = described_class.call(markdown, {})
expect(result[:output].to_html).to include(expected)
result
end
context 'when feature flag honor_escaped_markdown is disabled' do
before do
stub_feature_flags(honor_escaped_markdown: false)
end
it 'does not escape the markdown' do
result = described_class.call(%q(\!), project: project)
output = result[:output].to_html
expect(output).to eq('<p data-sourcepos="1:1-1:2">!</p>')
expect(result[:escaped_literals]).to be_falsey
end
end
# Test strings taken from https://spec.commonmark.org/0.29/#backslash-escapes
describe 'CommonMark tests', :aggregate_failures do
it 'converts all ASCII punctuation to literals' do
markdown = %q(\!\"\#\$\%\&\'\*\+\,\-\.\/\:\;\<\=\>\?\@\[\]\^\_\`\{\|\}\~) + %q[\(\)\\\\]
punctuation = %w(! " # $ % &amp; ' * + , - . / : ; &lt; = &gt; ? @ [ \\ ] ^ _ ` { | } ~) + %w[( )]
result = described_class.call(markdown, project: project)
output = result[:output].to_html
punctuation.each { |char| expect(output).to include("<span>#{char}</span>") }
expect(result[:escaped_literals]).to be_truthy
end
it 'does not convert other characters to literals' do
markdown = %q(\→\A\a\ \3\φ\«)
expected = '\→\A\a\ \3\φ\«'
result = correct_html_included(markdown, expected)
expect(result[:escaped_literals]).to be_falsey
end
describe 'escaped characters are treated as regular characters and do not have their usual Markdown meanings' do
where(:markdown, :expected) do
%q(\*not emphasized*) | %q(<span>*</span>not emphasized*)
%q(\<br/> not a tag) | %q(<span>&lt;</span>br/&gt; not a tag)
%q!\[not a link](/foo)! | %q!<span>[</span>not a link](/foo)!
%q(\`not code`) | %q(<span>`</span>not code`)
%q(1\. not a list) | %q(1<span>.</span> not a list)
%q(\# not a heading) | %q(<span>#</span> not a heading)
%q(\[foo]: /url "not a reference") | %q(<span>[</span>foo]: /url "not a reference")
%q(\&ouml; not a character entity) | %q(<span>&amp;</span>ouml; not a character entity)
end
with_them do
it 'keeps them as literals' do
correct_html_included(markdown, expected)
end
end
end
it 'backslash is itself escaped, the following character is not' do
markdown = %q(\\\\*emphasis*)
expected = %q(<span>\</span><em>emphasis</em>)
correct_html_included(markdown, expected)
end
it 'backslash at the end of the line is a hard line break' do
markdown = <<~MARKDOWN
foo\\
bar
MARKDOWN
expected = "foo<br>\nbar"
correct_html_included(markdown, expected)
end
describe 'backslash escapes do not work in code blocks, code spans, autolinks, or raw HTML' do
where(:markdown, :expected) do
%q(`` \[\` ``) | %q(<code>\[\`</code>)
%q( \[\]) | %Q(<code>\\[\\]\n</code>)
%Q(~~~\n\\[\\]\n~~~) | %Q(<code>\\[\\]\n</code>)
%q(<http://example.com?find=\*>) | %q(<a href="http://example.com?find=%5C*">http://example.com?find=\*</a>)
%q[<a href="/bar\/)">] | %q[<a href="/bar%5C/)">]
end
with_them do
it { correct_html_included(markdown, expected) }
end
end
describe 'work in all other contexts, including URLs and link titles, link references, and info strings in fenced code blocks' do
where(:markdown, :expected) do
%q![foo](/bar\* "ti\*tle")! | %q(<a href="/bar*" title="ti*tle">foo</a>)
%Q![foo]\n\n[foo]: /bar\\* "ti\\*tle"! | %q(<a href="/bar*" title="ti*tle">foo</a>)
%Q(``` foo\\+bar\nfoo\n```) | %Q(<code lang="foo+bar">foo\n</code>)
end
with_them do
it { correct_html_included(markdown, expected) }
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Usage::Metrics::Aggregates::Aggregate, :clean_gitlab_redis_shared_state do
let(:entity1) { 'dfb9d2d2-f56c-4c77-8aeb-6cddc4a1f857' }
let(:entity2) { '1dd9afb2-a3ee-4de1-8ae3-a405579c8584' }
let(:entity3) { '34rfjuuy-ce56-sa35-ds34-dfer567dfrf2' }
let(:entity4) { '8b9a2671-2abf-4bec-a682-22f6a8f7bf31' }
around do |example|
# We need to freeze to a reference time
# because visits are grouped by the week number in the year
# Without freezing the time, the test may behave inconsistently
# depending on which day of the week test is run.
# Monday 6th of June
reference_time = Time.utc(2020, 6, 1)
travel_to(reference_time) { example.run }
end
context 'aggregated_metrics_data' do
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" },
{ name: 'event4', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
before do
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:known_events).and_return(known_events)
end
shared_examples 'aggregated_metrics_data' do
context 'no aggregated metrics is defined' do
it 'returns empty hash' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return([])
end
expect(aggregated_metrics_data).to eq({})
end
end
context 'there are aggregated metrics defined' do
before do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
end
context 'with AND operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "AND" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot], operator: "AND" },
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" },
{ name: 'gmau_4', events: %w[event4], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 3,
'gmau_2' => 2,
'gmau_3' => 1,
'gmau_4' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'with OR operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_3', events: %w[event4], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 2,
'gmau_2' => 3,
'gmau_3' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'hidden behind feature flag' do
let(:enabled_feature_flag) { 'test_ff_enabled' }
let(:disabled_feature_flag) { 'test_ff_disabled' }
let(:aggregated_metrics) do
[
# represents stable aggregated metrics that has been fully released
{ name: 'gmau_without_ff', events: %w[event3_slot event5_slot], operator: "OR" },
# represents new aggregated metric that is under performance testing on gitlab.com
{ name: 'gmau_enabled', events: %w[event4], operator: "AND", feature_flag: enabled_feature_flag },
# represents aggregated metric that is under development and shouldn't be yet collected even on gitlab.com
{ name: 'gmau_disabled', events: %w[event4], operator: "AND", feature_flag: disabled_feature_flag }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
skip_feature_flags_yaml_validation
stub_feature_flags(enabled_feature_flag => true, disabled_feature_flag => false)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 2, 'gmau_enabled' => 3)
end
end
end
context 'error handling' do
context 'development and test environment' do
it 'raises error when unknown aggregation operator is used' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
end
expect { aggregated_metrics_data }.to raise_error Gitlab::Usage::Metrics::Aggregates::UnknownAggregationOperator
end
it 're raises Gitlab::UsageDataCounters::HLLRedisCounter::EventError' do
error = Gitlab::UsageDataCounters::HLLRedisCounter::EventError
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_raise(error)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
end
expect { aggregated_metrics_data }.to raise_error error
end
end
context 'production' do
before do
stub_rails_env('production')
end
it 'rescues unknown aggregation operator error' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "SUM" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
end
it 'rescues Gitlab::UsageDataCounters::HLLRedisCounter::EventError' do
error = Gitlab::UsageDataCounters::HLLRedisCounter::EventError
allow(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union).and_raise(error)
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics)
.and_return([{ name: 'gmau_1', events: %w[event1_slot], operator: "OR" }])
end
expect(aggregated_metrics_data).to eq('gmau_1' => -1)
end
end
end
end
describe '.aggregated_metrics_weekly_data' do
subject(:aggregated_metrics_data) { described_class.new.weekly_data }
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 8.days.ago)
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
it_behaves_like 'aggregated_metrics_data'
end
describe '.aggregated_metrics_monthly_data' do
subject(:aggregated_metrics_data) { described_class.new.monthly_data }
it_behaves_like 'aggregated_metrics_data' do
before do
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event1_slot', values: entity3, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity2, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event2_slot', values: entity3, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity1, time: 3.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event3_slot', values: entity2, time: 10.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity2, time: 4.weeks.ago.advance(days: 1))
# events out of time scope
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event5_slot', values: entity1, time: 4.weeks.ago.advance(days: -1))
# events in different slots
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity1, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity2, time: 2.days.ago)
Gitlab::UsageDataCounters::HLLRedisCounter.track_event('event4', values: entity4, time: 2.days.ago)
end
end
context 'Redis calls' do
let(:aggregated_metrics) do
[
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'caches intermediate operations' do
allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
aggregated_metrics[0][:events].each do |event|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: event, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
end
2.upto(4) do |subset_size|
aggregated_metrics[0][:events].combination(subset_size).each do |events|
expect(Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:calculate_events_union)
.with(event_names: events, start_date: 4.weeks.ago.to_date, end_date: Date.current)
.once
.and_return(0)
end
end
aggregated_metrics_data
end
end
end
end
end
......@@ -17,7 +17,7 @@ RSpec.describe 'aggregated metrics' do
Gitlab::UsageDataCounters::HLLRedisCounter.known_events
end
Gitlab::UsageDataCounters::HLLRedisCounter.aggregated_metrics.tap do |aggregated_metrics|
Gitlab::Usage::Metrics::Aggregates::Aggregate.new.send(:aggregated_metrics).tap do |aggregated_metrics|
it 'all events has unique name' do
event_names = aggregated_metrics&.map { |event| event[:name] }
......@@ -37,7 +37,7 @@ RSpec.describe 'aggregated metrics' do
end
it "uses allowed aggregation operators" do
expect(Gitlab::UsageDataCounters::HLLRedisCounter::ALLOWED_METRICS_AGGREGATIONS).to include aggregate[:operator]
expect(Gitlab::Usage::Metrics::Aggregates::ALLOWED_METRICS_AGGREGATIONS).to include aggregate[:operator]
end
it "uses events from the same Redis slot" do
......
......@@ -426,182 +426,59 @@ RSpec.describe Gitlab::UsageDataCounters::HLLRedisCounter, :clean_gitlab_redis_s
end
end
context 'aggregated_metrics_data' do
describe '.calculate_events_union' do
let(:time_range) { { start_date: 7.days.ago, end_date: DateTime.current } }
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "daily" },
{ name: 'event4', category: 'category2', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
before do
allow(described_class).to receive(:known_events).and_return(known_events)
end
shared_examples 'aggregated_metrics_data' do
context 'no aggregated metrics is defined' do
it 'returns empty hash' do
allow(described_class).to receive(:aggregated_metrics).and_return([])
expect(aggregated_metrics_data).to eq({})
end
end
context 'there are aggregated metrics defined' do
before do
allow(described_class).to receive(:aggregated_metrics).and_return(aggregated_metrics)
end
context 'with AND operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event1_slot event2_slot], operator: "AND" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot], operator: "AND" },
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" },
{ name: 'gmau_4', events: %w[event4], operator: "AND" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 3,
'gmau_2' => 2,
'gmau_3' => 1,
'gmau_4' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'with OR operator' do
let(:aggregated_metrics) do
[
{ name: 'gmau_1', events: %w[event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_2', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "OR" },
{ name: 'gmau_3', events: %w[event4], operator: "OR" }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
results = {
'gmau_1' => 2,
'gmau_2' => 3,
'gmau_3' => 3
}
expect(aggregated_metrics_data).to eq(results)
end
end
context 'hidden behind feature flag' do
let(:enabled_feature_flag) { 'test_ff_enabled' }
let(:disabled_feature_flag) { 'test_ff_disabled' }
let(:aggregated_metrics) do
[
# represents stable aggregated metrics that has been fully released
{ name: 'gmau_without_ff', events: %w[event3_slot event5_slot], operator: "OR" },
# represents new aggregated metric that is under performance testing on gitlab.com
{ name: 'gmau_enabled', events: %w[event4], operator: "AND", feature_flag: enabled_feature_flag },
# represents aggregated metric that is under development and shouldn't be yet collected even on gitlab.com
{ name: 'gmau_disabled', events: %w[event4], operator: "AND", feature_flag: disabled_feature_flag }
].map(&:with_indifferent_access)
end
it 'returns the number of unique events for all known events' do
skip_feature_flags_yaml_validation
stub_feature_flags(enabled_feature_flag => true, disabled_feature_flag => false)
described_class.track_event('event1_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity2, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity3, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event2_slot', values: entity3, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity1, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
described_class.track_event('event2_slot', values: entity4, time: 8.days.ago)
expect(aggregated_metrics_data).to eq('gmau_without_ff' => 2, 'gmau_enabled' => 3)
end
end
end
# events in different slots
described_class.track_event('event4', values: entity1, time: 2.days.ago)
described_class.track_event('event4', values: entity2, time: 2.days.ago)
end
describe '.aggregated_metrics_weekly_data' do
subject(:aggregated_metrics_data) { described_class.aggregated_metrics_weekly_data }
before do
described_class.track_event('event1_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity2, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity3, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event2_slot', values: entity3, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity1, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event5_slot', values: entity2, time: 3.days.ago)
# events out of time scope
described_class.track_event('event2_slot', values: entity3, time: 8.days.ago)
# events in different slots
described_class.track_event('event4', values: entity1, time: 2.days.ago)
described_class.track_event('event4', values: entity2, time: 2.days.ago)
described_class.track_event('event4', values: entity4, time: 2.days.ago)
end
it_behaves_like 'aggregated_metrics_data'
it 'calculates union of given events', :aggregate_failure do
expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[event4]))).to eq 2
expect(described_class.calculate_events_union(**time_range.merge(event_names: %w[event1_slot event2_slot event3_slot]))).to eq 3
end
describe '.aggregated_metrics_monthly_data' do
subject(:aggregated_metrics_data) { described_class.aggregated_metrics_monthly_data }
it_behaves_like 'aggregated_metrics_data' do
before do
described_class.track_event('event1_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity2, time: 2.days.ago)
described_class.track_event('event1_slot', values: entity3, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity1, time: 2.days.ago)
described_class.track_event('event2_slot', values: entity2, time: 3.days.ago)
described_class.track_event('event2_slot', values: entity3, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity1, time: 3.days.ago)
described_class.track_event('event3_slot', values: entity2, time: 10.days.ago)
described_class.track_event('event5_slot', values: entity2, time: 4.weeks.ago.advance(days: 1))
# events out of time scope
described_class.track_event('event5_slot', values: entity1, time: 4.weeks.ago.advance(days: -1))
# events in different slots
described_class.track_event('event4', values: entity1, time: 2.days.ago)
described_class.track_event('event4', values: entity2, time: 2.days.ago)
described_class.track_event('event4', values: entity4, time: 2.days.ago)
end
end
context 'Redis calls' do
let(:aggregated_metrics) do
[
{ name: 'gmau_3', events: %w[event1_slot event2_slot event3_slot event5_slot], operator: "AND" }
].map(&:with_indifferent_access)
end
let(:known_events) do
[
{ name: 'event1_slot', redis_slot: "slot", category: 'category1', aggregation: "weekly" },
{ name: 'event2_slot', redis_slot: "slot", category: 'category2', aggregation: "weekly" },
{ name: 'event3_slot', redis_slot: "slot", category: 'category3', aggregation: "weekly" },
{ name: 'event5_slot', redis_slot: "slot", category: 'category4', aggregation: "weekly" }
].map(&:with_indifferent_access)
end
it 'caches intermediate operations' do
allow(described_class).to receive(:known_events).and_return(known_events)
allow(described_class).to receive(:aggregated_metrics).and_return(aggregated_metrics)
it 'validates and raise exception if events has mismatched slot or aggregation', :aggregate_failure do
expect { described_class.calculate_events_union(**time_range.merge(event_names: %w[event1_slot event4])) }.to raise_error described_class::SlotMismatch
expect { described_class.calculate_events_union(**time_range.merge(event_names: %w[event5_slot event3_slot])) }.to raise_error described_class::AggregationMismatch
end
end
4.downto(1) do |subset_size|
known_events.combination(subset_size).each do |events|
keys = described_class.send(:weekly_redis_keys, events: events, start_date: 4.weeks.ago.to_date, end_date: Date.current)
expect(Gitlab::Redis::HLL).to receive(:count).with(keys: keys).once.and_return(0)
end
end
describe '.weekly_time_range' do
it 'return hash with weekly time range boundaries' do
expect(described_class.weekly_time_range).to eq(start_date: 7.days.ago.to_date, end_date: Date.current)
end
end
subject
end
end
describe '.monthly_time_range' do
it 'return hash with monthly time range boundaries' do
expect(described_class.monthly_time_range).to eq(start_date: 4.weeks.ago.to_date, end_date: Date.current)
end
end
end
......@@ -1286,8 +1286,10 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
describe '.aggregated_metrics_weekly' do
subject(:aggregated_metrics_payload) { described_class.aggregated_metrics_weekly }
it 'uses ::Gitlab::UsageDataCounters::HLLRedisCounter#aggregated_metrics_data', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:aggregated_metrics_weekly_data).and_return(global_search_gmau: 123)
it 'uses ::Gitlab::Usage::Metrics::Aggregates::Aggregate#weekly_data', :aggregate_failures do
expect_next_instance_of(::Gitlab::Usage::Metrics::Aggregates::Aggregate) do |instance|
expect(instance).to receive(:weekly_data).and_return(global_search_gmau: 123)
end
expect(aggregated_metrics_payload).to eq(aggregated_metrics: { global_search_gmau: 123 })
end
end
......@@ -1295,8 +1297,10 @@ RSpec.describe Gitlab::UsageData, :aggregate_failures do
describe '.aggregated_metrics_monthly' do
subject(:aggregated_metrics_payload) { described_class.aggregated_metrics_monthly }
it 'uses ::Gitlab::UsageDataCounters::HLLRedisCounter#aggregated_metrics_data', :aggregate_failures do
expect(::Gitlab::UsageDataCounters::HLLRedisCounter).to receive(:aggregated_metrics_monthly_data).and_return(global_search_gmau: 123)
it 'uses ::Gitlab::Usage::Metrics::Aggregates::Aggregate#monthly_data', :aggregate_failures do
expect_next_instance_of(::Gitlab::Usage::Metrics::Aggregates::Aggregate) do |instance|
expect(instance).to receive(:monthly_data).and_return(global_search_gmau: 123)
end
expect(aggregated_metrics_payload).to eq(aggregated_metrics: { global_search_gmau: 123 })
end
end
......
# frozen_string_literal: true
# Require the provided spec helper and matchers.
require 'gitlab/experiment/rspec'
# This is a temporary fix until we have a larger discussion around the
# challenges raised in https://gitlab.com/gitlab-org/gitlab/-/issues/300104
class ApplicationExperiment < Gitlab::Experiment # rubocop:disable Gitlab/NamespacedClass
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment