Backfill project snippet statistics

In this commit we add a background migration
to create/update project snippet statistics.

It also update the `snippets_size` in the project
and namespace statistics.
parent 8cb0a93a
---
title: Backfill project snippet statistics
merge_request: 36444
author:
type: other
# frozen_string_literal: true
class SchedulePopulateProjectSnippetStatistics < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
DELAY_INTERVAL = 2.minutes.to_i
BATCH_SIZE = 500
MIGRATION = 'PopulateProjectSnippetStatistics'
disable_ddl_transaction!
def up
snippets = exec_query <<~SQL
SELECT snippets.id
FROM snippets
INNER JOIN projects ON projects.id = snippets.project_id
WHERE snippets.type = 'ProjectSnippet'
ORDER BY projects.namespace_id ASC, snippets.project_id ASC, snippets.id ASC
SQL
snippets.rows.flatten.in_groups_of(BATCH_SIZE, false).each_with_index do |snippet_ids, index|
migrate_in(index * DELAY_INTERVAL, MIGRATION, [snippet_ids])
end
end
def down
# no-op
end
end
...@@ -23769,6 +23769,7 @@ COPY "schema_migrations" (version) FROM STDIN; ...@@ -23769,6 +23769,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200707094341 20200707094341
20200707095849 20200707095849
20200708080631 20200708080631
20200709101408
20200710102846 20200710102846
20200710105332 20200710105332
20200710130234 20200710130234
......
# frozen_string_literal: true
module Gitlab
module BackgroundMigration
# This class creates/updates those project snippets statistics
# that haven't been created nor initialized.
# It also updates the related project statistics and its root storage namespace stats
class PopulateProjectSnippetStatistics
def perform(snippet_ids)
project_snippets(snippet_ids).group_by(&:namespace_id).each do |namespace_id, namespace_snippets|
namespace_snippets.group_by(&:project).each do |project, snippets|
upsert_snippet_statistics(snippets)
update_project_statistics(project)
rescue
error_message("Error updating statistics for project #{project.id}")
end
update_namespace_statistics(namespace_snippets.first.project.root_namespace)
rescue => e
error_message("Error updating statistics for namespace #{namespace_id}: #{e.message}")
end
end
private
def project_snippets(snippet_ids)
ProjectSnippet
.select('snippets.*, projects.namespace_id')
.where(id: snippet_ids)
.joins(:project)
.includes(:statistics)
.includes(snippet_repository: :shard)
.includes(project: [:route, :statistics, :namespace])
end
def upsert_snippet_statistics(snippets)
snippets.each do |snippet|
response = Snippets::UpdateStatisticsService.new(snippet).execute
error_message("#{response.message} snippet: #{snippet.id}") if response.error?
end
end
def logger
@logger ||= Gitlab::BackgroundMigration::Logger.build
end
def error_message(message)
logger.error(message: "Snippet Statistics Migration: #{message}")
end
def update_project_statistics(project)
project.statistics&.refresh!(only: [:snippets_size])
end
def update_namespace_statistics(namespace)
Namespaces::StatisticsRefresherService.new.execute(namespace)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::BackgroundMigration::PopulateProjectSnippetStatistics do
let(:file_name) { 'file_name.rb' }
let(:content) { 'content' }
let(:snippets) { table(:snippets) }
let(:snippet_repositories) { table(:snippet_repositories) }
let(:users) { table(:users) }
let(:namespaces) { table(:namespaces) }
let(:snippet_statistics) { table(:snippet_statistics) }
let(:project_statistics) { table(:project_statistics) }
let(:projects) { table(:projects) }
let(:namespace_statistics) { table(:namespace_root_storage_statistics) }
let(:routes) { table(:routes) }
let(:repo_size) { 123456 }
let(:expected_repo_size) { repo_size.megabytes }
let(:user) { users.create!(id: 1, email: 'test@example.com', projects_limit: 100, username: 'test') }
let(:group) { namespaces.create!(id: 10, type: 'Group', name: 'group1', path: 'group1') }
let(:user_namespace) { namespaces.create!(id: 20, name: 'user', path: 'user', owner_id: user.id) }
let(:project1) { create_project(1, 'test', group) }
let(:project2) { create_project(2, 'test1', user_namespace) }
let(:project3) { create_project(3, 'test2', group) }
let!(:project_stats1) { create_project_statistics(project1) }
let!(:project_stats2) { create_project_statistics(project2) }
let!(:project_stats3) { create_project_statistics(project3) }
let(:ids) { snippets.pluck(:id) }
let(:migration) { described_class.new }
subject do
migration.perform(ids)
project_stats1.reload if project_stats1.persisted?
project_stats2.reload if project_stats2.persisted?
project_stats3.reload if project_stats3.persisted?
end
before do
allow_any_instance_of(Repository).to receive(:size).and_return(repo_size)
end
after do
snippets.all.each { |s| raw_repository(s).remove }
end
context 'with existing user and group snippets' do
let!(:snippet1) { create_snippet(1, project1) }
let!(:snippet2) { create_snippet(2, project1) }
let!(:snippet3) { create_snippet(3, project2) }
let!(:snippet4) { create_snippet(4, project2) }
let!(:snippet5) { create_snippet(5, project3) }
before do
create_snippet_statistics(2, 0)
create_snippet_statistics(4, 123)
end
it 'creates/updates all snippet_statistics' do
expect(snippet_statistics.count).to eq 2
subject
expect(snippet_statistics.count).to eq 5
snippet_statistics.all.each do |stat|
expect(stat.repository_size).to eq expected_repo_size
end
end
it 'updates associated snippet project statistics' do
expect(project_stats1.snippets_size).to be_nil
expect(project_stats2.snippets_size).to be_nil
subject
snippets_size = snippet_statistics.where(snippet_id: [snippet1.id, snippet2.id]).sum(:repository_size)
expect(project_stats1.snippets_size).to eq snippets_size
snippets_size = snippet_statistics.where(snippet_id: [snippet3.id, snippet4.id]).sum(:repository_size)
expect(project_stats2.snippets_size).to eq snippets_size
snippets_size = snippet_statistics.where(snippet_id: snippet5.id).sum(:repository_size)
expect(project_stats3.snippets_size).to eq snippets_size
end
it 'forces the project statistics refresh' do
expect(migration).to receive(:update_project_statistics).exactly(3).times
subject
end
it 'creates/updates the associated namespace statistics' do
expect(migration).to receive(:update_namespace_statistics).twice.and_call_original
subject
expect(namespace_statistics.find_by(namespace_id: group.id).snippets_size).to eq project_stats1.snippets_size + project_stats3.snippets_size
expect(namespace_statistics.find_by(namespace_id: user_namespace.id).snippets_size).to eq project_stats2.snippets_size
end
context 'when the project statistics does not exists' do
it 'does not raise any error' do
project_stats3.delete
subject
expect(namespace_statistics.find_by(namespace_id: group.id).snippets_size).to eq project_stats1.snippets_size
expect(namespace_statistics.find_by(namespace_id: user_namespace.id).snippets_size).to eq project_stats2.snippets_size
end
end
context 'when an error is raised when updating a project statistics' do
it 'logs the error and continue execution' do
expect(migration).to receive(:update_project_statistics).with(Project.find(project1.id)).and_raise('Error')
expect(migration).to receive(:update_project_statistics).with(Project.find(project2.id)).and_call_original
expect(migration).to receive(:update_project_statistics).with(Project.find(project3.id)).and_call_original
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:error).with(message: /Error updating statistics for project #{project1.id}/).once
end
subject
expect(project_stats2.snippets_size).not_to be_nil
expect(project_stats3.snippets_size).not_to be_nil
end
end
context 'when an error is raised when updating a namespace statistics' do
it 'logs the error and continue execution' do
expect(migration).to receive(:update_namespace_statistics).with(Group.find(group.id)).and_raise('Error')
expect(migration).to receive(:update_namespace_statistics).with(Namespace.find(user_namespace.id)).and_call_original
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:error).with(message: /Error updating statistics for namespace/).once
end
subject
expect(namespace_statistics.find_by(namespace_id: user_namespace.id).snippets_size).to eq project_stats2.snippets_size
end
end
end
context 'when project snippet is in a subgroup' do
let(:subgroup) { namespaces.create!(id: 30, type: 'Group', name: 'subgroup', path: 'subgroup', parent_id: group.id) }
let(:project1) { create_project(1, 'test', subgroup, "#{group.path}/#{subgroup.path}/test") }
let!(:snippet1) { create_snippet(1, project1) }
it 'updates the root namespace statistics' do
subject
expect(snippet_statistics.count).to eq 1
expect(project_stats1.snippets_size).to eq snippet_statistics.first.repository_size
expect(namespace_statistics.find_by(namespace_id: subgroup.id)).to be_nil
expect(namespace_statistics.find_by(namespace_id: group.id).snippets_size).to eq project_stats1.snippets_size
end
end
context 'when a snippet repository is empty' do
let!(:snippet1) { create_snippet(1, project1, with_repo: false) }
let!(:snippet2) { create_snippet(2, project1) }
it 'logs error and continues execution' do
expect_next_instance_of(Gitlab::BackgroundMigration::Logger) do |instance|
expect(instance).to receive(:error).with(message: /Invalid snippet repository/).once
end
subject
expect(snippet_statistics.find_by(snippet_id: snippet1.id)).to be_nil
expect(project_stats1.snippets_size).to eq snippet_statistics.find(snippet2.id).repository_size
end
end
def create_snippet(id, project, with_repo: true)
snippets.create!(id: id, type: 'ProjectSnippet', project_id: project.id, author_id: user.id, file_name: file_name, content: content).tap do |snippet|
if with_repo
allow(snippet).to receive(:disk_path).and_return(disk_path(snippet))
TestEnv.copy_repo(snippet,
bare_repo: TestEnv.factory_repo_path_bare,
refs: TestEnv::BRANCH_SHA)
raw_repository(snippet).create_repository
end
end
end
def create_project(id, name, namespace, path = nil)
projects.create!(id: id, name: name, path: name.downcase.gsub(/\s/, '_'), namespace_id: namespace.id).tap do |project|
path ||= "#{namespace.path}/#{project.path}"
routes.create!(id: id, source_type: 'Project', source_id: project.id, path: path)
end
end
def create_snippet_statistics(snippet_id, repository_size = 0)
snippet_statistics.create!(snippet_id: snippet_id, repository_size: repository_size)
end
def create_project_statistics(project, snippets_size = nil)
project_statistics.create!(id: project.id, project_id: project.id, namespace_id: project.namespace_id, snippets_size: snippets_size)
end
def raw_repository(snippet)
Gitlab::Git::Repository.new('default',
"#{disk_path(snippet)}.git",
Gitlab::GlRepository::SNIPPET.identifier_for_container(snippet),
"@snippets/#{snippet.id}")
end
def hashed_repository(snippet)
Storage::Hashed.new(snippet, prefix: '@snippets')
end
def disk_path(snippet)
hashed_repository(snippet).disk_path
end
end
# frozen_string_literal: true
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20200709101408_schedule_populate_project_snippet_statistics.rb')
RSpec.describe SchedulePopulateProjectSnippetStatistics do
let(:users) { table(:users) }
let(:snippets) { table(:snippets) }
let(:projects) { table(:projects) }
let(:namespaces) { table(:namespaces) }
let(:user1) { users.create!(id: 1, email: 'user1@example.com', projects_limit: 10, username: 'test1', name: 'Test1', state: 'active') }
let(:user2) { users.create!(id: 2, email: 'user2@example.com', projects_limit: 10, username: 'test2', name: 'Test2', state: 'active') }
let(:namespace1) { namespaces.create!(id: 1, owner_id: user1.id, name: 'user1', path: 'user1') }
let(:namespace2) { namespaces.create!(id: 2, owner_id: user2.id, name: 'user2', path: 'user2') }
let(:project1) { projects.create!(id: 1, namespace_id: namespace1.id) }
let(:project2) { projects.create!(id: 2, namespace_id: namespace1.id) }
let(:project3) { projects.create!(id: 3, namespace_id: namespace2.id) }
def create_snippet(id, user_id, project_id, type = 'ProjectSnippet')
params = {
id: id,
type: type,
author_id: user_id,
project_id: project_id,
file_name: 'foo',
content: 'bar'
}
snippets.create!(params)
end
it 'correctly schedules background migrations' do
# Creating the snippets in different order
create_snippet(1, user1.id, project1.id)
create_snippet(2, user2.id, project3.id)
create_snippet(3, user1.id, project1.id)
create_snippet(4, user1.id, project2.id)
create_snippet(5, user2.id, project3.id)
create_snippet(6, user1.id, project1.id)
# Creating a personal snippet to ensure we don't pick it
create_snippet(7, user1.id, nil, 'PersonalSnippet')
stub_const("#{described_class}::BATCH_SIZE", 4)
Sidekiq::Testing.fake! do
Timecop.freeze do
migrate!
aggregate_failures do
expect(described_class::MIGRATION)
.to be_scheduled_migration([1, 3, 6, 4])
expect(described_class::MIGRATION)
.to be_scheduled_delayed_migration(2.minutes, [2, 5])
expect(BackgroundMigrationWorker.jobs.size).to eq(2)
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment