Commit 307a96f5 authored by Kassio Borges's avatar Kassio Borges

GithubImporter: Count objects imported using github bulk import

Github Labels, Releases and Milestones uses a different approach to be
imported. These objects were not being counted, or logged, in the same
way of the rest of Github objects.

Related to: https://gitlab.com/gitlab-org/gitlab/-/issues/335199
MR: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65773
parent 7678d206
---
name: import_redis_increment_by
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/65773
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/336226
milestone: '14.1'
type: development
group: group::import
default_enabled: false
...@@ -57,7 +57,7 @@ module Gitlab ...@@ -57,7 +57,7 @@ module Gitlab
# Sets a cache key to the given value. # Sets a cache key to the given value.
# #
# key - The cache key to write. # raw_key - The cache key to write.
# value - The value to set. # value - The value to set.
# timeout - The time after which the cache key should expire. # timeout - The time after which the cache key should expire.
def self.write(raw_key, value, timeout: TIMEOUT) def self.write(raw_key, value, timeout: TIMEOUT)
...@@ -73,7 +73,7 @@ module Gitlab ...@@ -73,7 +73,7 @@ module Gitlab
# Increment the integer value of a key by one. # Increment the integer value of a key by one.
# Sets the value to zero if missing before incrementing # Sets the value to zero if missing before incrementing
# #
# key - The cache key to increment. # raw_key - The cache key to increment.
# timeout - The time after which the cache key should expire. # timeout - The time after which the cache key should expire.
# @return - the incremented value # @return - the incremented value
def self.increment(raw_key, timeout: TIMEOUT) def self.increment(raw_key, timeout: TIMEOUT)
...@@ -85,6 +85,22 @@ module Gitlab ...@@ -85,6 +85,22 @@ module Gitlab
end end
end end
# Increment the integer value of a key by the given value.
# Sets the value to zero if missing before incrementing
#
# raw_key - The cache key to increment.
# value - The value to increment the key
# timeout - The time after which the cache key should expire.
# @return - the incremented value
def self.increment_by(raw_key, value, timeout: TIMEOUT)
key = cache_key_for(raw_key)
Redis::Cache.with do |redis|
redis.incrby(key, value)
redis.expire(key, timeout)
end
end
# Adds a value to a set. # Adds a value to a set.
# #
# raw_key - The key of the set to add the value to. # raw_key - The key of the set to add the value to.
......
...@@ -3,23 +3,60 @@ ...@@ -3,23 +3,60 @@
module Gitlab module Gitlab
module GithubImport module GithubImport
module BulkImporting module BulkImporting
attr_reader :project, :client
# project - An instance of `Project`.
# client - An instance of `Gitlab::GithubImport::Client`.
def initialize(project, client)
@project = project
@client = client
end
# Builds and returns an Array of objects to bulk insert into the # Builds and returns an Array of objects to bulk insert into the
# database. # database.
# #
# enum - An Enumerable that returns the objects to turn into database # enum - An Enumerable that returns the objects to turn into database
# rows. # rows.
def build_database_rows(enum) def build_database_rows(enum)
enum.each_with_object([]) do |(object, _), rows| rows = enum.each_with_object([]) do |(object, _), result|
rows << build(object) unless already_imported?(object) result << build(object) unless already_imported?(object)
end end
log_and_increment_counter(rows.size, :fetched)
rows
end end
# Bulk inserts the given rows into the database. # Bulk inserts the given rows into the database.
def bulk_insert(model, rows, batch_size: 100) def bulk_insert(model, rows, batch_size: 100)
rows.each_slice(batch_size) do |slice| rows.each_slice(batch_size) do |slice|
Gitlab::Database.bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert Gitlab::Database.bulk_insert(model.table_name, slice) # rubocop:disable Gitlab/BulkInsert
log_and_increment_counter(slice.size, :imported)
end end
end end
def object_type
raise NotImplementedError
end
private
def log_and_increment_counter(value, operation)
Gitlab::Import::Logger.info(
import_source: :github,
project_id: project.id,
importer: self.class.name,
message: "#{value} #{object_type.to_s.pluralize} #{operation}"
)
Gitlab::GithubImport::ObjectCounter.increment(
project,
object_type,
operation,
value: value
)
end
end end
end end
end end
...@@ -6,15 +6,9 @@ module Gitlab ...@@ -6,15 +6,9 @@ module Gitlab
class LabelsImporter class LabelsImporter
include BulkImporting include BulkImporting
attr_reader :project, :client, :existing_labels
# project - An instance of `Project`.
# client - An instance of `Gitlab::GithubImport::Client`.
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def initialize(project, client) def existing_labels
@project = project @existing_labels ||= project.labels.pluck(:title).to_set
@client = client
@existing_labels = project.labels.pluck(:title).to_set
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
...@@ -51,6 +45,10 @@ module Gitlab ...@@ -51,6 +45,10 @@ module Gitlab
def each_label def each_label
client.labels(project.import_source) client.labels(project.import_source)
end end
def object_type
:label
end
end end
end end
end end
......
...@@ -6,15 +6,9 @@ module Gitlab ...@@ -6,15 +6,9 @@ module Gitlab
class MilestonesImporter class MilestonesImporter
include BulkImporting include BulkImporting
attr_reader :project, :client, :existing_milestones
# project - An instance of `Project`
# client - An instance of `Gitlab::GithubImport::Client`
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def initialize(project, client) def existing_milestones
@project = project @existing_milestones ||= project.milestones.pluck(:iid).to_set
@client = client
@existing_milestones = project.milestones.pluck(:iid).to_set
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
...@@ -55,6 +49,10 @@ module Gitlab ...@@ -55,6 +49,10 @@ module Gitlab
def each_milestone def each_milestone
client.milestones(project.import_source, state: 'all') client.milestones(project.import_source, state: 'all')
end end
def object_type
:milestone
end
end end
end end
end end
......
...@@ -6,15 +6,9 @@ module Gitlab ...@@ -6,15 +6,9 @@ module Gitlab
class ReleasesImporter class ReleasesImporter
include BulkImporting include BulkImporting
attr_reader :project, :client, :existing_tags
# project - An instance of `Project`
# client - An instance of `Gitlab::GithubImport::Client`
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def initialize(project, client) def existing_tags
@project = project @existing_tags ||= project.releases.pluck(:tag).to_set
@client = client
@existing_tags = project.releases.pluck(:tag).to_set
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
...@@ -50,6 +44,10 @@ module Gitlab ...@@ -50,6 +44,10 @@ module Gitlab
def description_for(release) def description_for(release)
release.body.presence || "Release for tag #{release.tag_name}" release.body.presence || "Release for tag #{release.tag_name}"
end end
def object_type
:release
end
end end
end end
end end
......
...@@ -14,11 +14,16 @@ module Gitlab ...@@ -14,11 +14,16 @@ module Gitlab
CACHING = Gitlab::Cache::Import::Caching CACHING = Gitlab::Cache::Import::Caching
class << self class << self
def increment(project, object_type, operation) # Increments the project and the global counters if the given value is >= 1
def increment(project, object_type, operation, value: 1)
integer = value.to_i
return if integer <= 0
validate_operation!(operation) validate_operation!(operation)
increment_project_counter(project, object_type, operation) increment_project_counter(project, object_type, operation, integer)
increment_global_counter(object_type, operation) increment_global_counter(object_type, operation, integer)
end end
def summary(project) def summary(project)
...@@ -41,7 +46,7 @@ module Gitlab ...@@ -41,7 +46,7 @@ module Gitlab
# and it's used to report the health of the Github Importer # and it's used to report the health of the Github Importer
# in the Grafana Dashboard # in the Grafana Dashboard
# https://dashboards.gitlab.net/d/2zgM_rImz/github-importer?orgId=1 # https://dashboards.gitlab.net/d/2zgM_rImz/github-importer?orgId=1
def increment_global_counter(object_type, operation) def increment_global_counter(object_type, operation, value)
key = GLOBAL_COUNTER_KEY % { key = GLOBAL_COUNTER_KEY % {
operation: operation, operation: operation,
object_type: object_type object_type: object_type
...@@ -51,18 +56,26 @@ module Gitlab ...@@ -51,18 +56,26 @@ module Gitlab
object_type: object_type.to_s.humanize object_type: object_type.to_s.humanize
} }
Gitlab::Metrics.counter(key.to_sym, description).increment Gitlab::Metrics.counter(key.to_sym, description).increment(by: value)
end end
# Project counters are short lived, in Redis, # Project counters are short lived, in Redis,
# and it's used to report how successful a project # and it's used to report how successful a project
# import was with the #summary method. # import was with the #summary method.
def increment_project_counter(project, object_type, operation) def increment_project_counter(project, object_type, operation, value)
counter_key = PROJECT_COUNTER_KEY % { project: project.id, operation: operation, object_type: object_type } counter_key = PROJECT_COUNTER_KEY % {
project: project.id,
operation: operation,
object_type: object_type
}
add_counter_to_list(project, operation, counter_key) add_counter_to_list(project, operation, counter_key)
CACHING.increment(counter_key) if Feature.disabled?(:import_redis_increment_by, default_enabled: :yaml)
CACHING.increment(counter_key)
else
CACHING.increment_by(counter_key, value)
end
end end
def add_counter_to_list(project, operation, key) def add_counter_to_list(project, operation, key)
...@@ -75,7 +88,7 @@ module Gitlab ...@@ -75,7 +88,7 @@ module Gitlab
def validate_operation!(operation) def validate_operation!(operation)
unless operation.to_s.presence_in(OPERATIONS) unless operation.to_s.presence_in(OPERATIONS)
raise ArgumentError, "Operation must be #{OPERATIONS.join(' or ')}" raise ArgumentError, "operation must be #{OPERATIONS.join(' or ')}"
end end
end end
end end
......
...@@ -3,8 +3,20 @@ ...@@ -3,8 +3,20 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Gitlab::GithubImport::BulkImporting do RSpec.describe Gitlab::GithubImport::BulkImporting do
let(:importer) do let(:project) { instance_double(Project, id: 1) }
Class.new { include(Gitlab::GithubImport::BulkImporting) }.new let(:importer) { MyImporter.new(project, double) }
let(:importer_class) do
Class.new do
include Gitlab::GithubImport::BulkImporting
def object_type
:object_type
end
end
end
before do
stub_const 'MyImporter', importer_class
end end
describe '#build_database_rows' do describe '#build_database_rows' do
...@@ -21,6 +33,24 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do ...@@ -21,6 +33,24 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do
.with(object) .with(object)
.and_return(false) .and_return(false)
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
import_source: :github,
project_id: 1,
importer: 'MyImporter',
message: '1 object_types fetched'
)
expect(Gitlab::GithubImport::ObjectCounter)
.to receive(:increment)
.with(
project,
:object_type,
:fetched,
value: 1
)
enum = [[object, 1]].to_enum enum = [[object, 1]].to_enum
expect(importer.build_database_rows(enum)).to eq([{ title: 'Foo' }]) expect(importer.build_database_rows(enum)).to eq([{ title: 'Foo' }])
...@@ -37,6 +67,24 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do ...@@ -37,6 +67,24 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do
.with(object) .with(object)
.and_return(true) .and_return(true)
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
import_source: :github,
project_id: 1,
importer: 'MyImporter',
message: '0 object_types fetched'
)
expect(Gitlab::GithubImport::ObjectCounter)
.to receive(:increment)
.with(
project,
:object_type,
:fetched,
value: 0
)
enum = [[object, 1]].to_enum enum = [[object, 1]].to_enum
expect(importer.build_database_rows(enum)).to be_empty expect(importer.build_database_rows(enum)).to be_empty
...@@ -48,6 +96,26 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do ...@@ -48,6 +96,26 @@ RSpec.describe Gitlab::GithubImport::BulkImporting do
rows = [{ title: 'Foo' }] * 10 rows = [{ title: 'Foo' }] * 10
model = double(:model, table_name: 'kittens') model = double(:model, table_name: 'kittens')
expect(Gitlab::Import::Logger)
.to receive(:info)
.twice
.with(
import_source: :github,
project_id: 1,
importer: 'MyImporter',
message: '5 object_types imported'
)
expect(Gitlab::GithubImport::ObjectCounter)
.to receive(:increment)
.twice
.with(
project,
:object_type,
:imported,
value: 5
)
expect(Gitlab::Database) expect(Gitlab::Database)
.to receive(:bulk_insert) .to receive(:bulk_insert)
.ordered .ordered
......
...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ObjectCounter, :clean_gitlab_redis_cache do ...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ObjectCounter, :clean_gitlab_redis_cache do
it 'validates the operation being incremented' do it 'validates the operation being incremented' do
expect { described_class.increment(project, :issue, :unknown) } expect { described_class.increment(project, :issue, :unknown) }
.to raise_error(ArgumentError, 'Operation must be fetched or imported') .to raise_error(ArgumentError, 'operation must be fetched or imported')
end end
it 'increments the counter and saves the key to be listed in the summary later' do it 'increments the counter and saves the key to be listed in the summary later' do
...@@ -33,4 +33,20 @@ RSpec.describe Gitlab::GithubImport::ObjectCounter, :clean_gitlab_redis_cache do ...@@ -33,4 +33,20 @@ RSpec.describe Gitlab::GithubImport::ObjectCounter, :clean_gitlab_redis_cache do
'imported' => { 'issue' => 2 } 'imported' => { 'issue' => 2 }
}) })
end end
it 'does not increment the counter if the given value is <= 0' do
expect(Gitlab::Metrics)
.not_to receive(:counter)
expect(Gitlab::Metrics)
.not_to receive(:counter)
described_class.increment(project, :issue, :fetched, value: 0)
described_class.increment(project, :issue, :imported, value: nil)
expect(described_class.summary(project)).to eq({
'fetched' => {},
'imported' => {}
})
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment