Commit 05485397 authored by Kassio Borges's avatar Kassio Borges Committed by Stan Hu

Add logs to the Github Import process

Currently is very hard to debug a Github Import. This commit logs each
step of the import process to make it easier to debug.
parent f9651d0e
...@@ -15,17 +15,25 @@ module Gitlab ...@@ -15,17 +15,25 @@ module Gitlab
feature_category :importers feature_category :importers
worker_has_external_dependencies! worker_has_external_dependencies!
def logger
@logger ||= Gitlab::Import::Logger.build
end
end end
# project - An instance of `Project` to import the data into. # project - An instance of `Project` to import the data into.
# client - An instance of `Gitlab::GithubImport::Client` # client - An instance of `Gitlab::GithubImport::Client`
# hash - A Hash containing the details of the object to import. # hash - A Hash containing the details of the object to import.
def import(project, client, hash) def import(project, client, hash)
object = representation_class.from_json_hash(hash) info(project.id, message: 'starting importer')
object = representation_class.from_json_hash(hash)
importer_class.new(object, project, client).execute importer_class.new(object, project, client).execute
counter.increment counter.increment
info(project.id, message: 'importer finished')
rescue => e
error(project.id, e)
end end
def counter def counter
...@@ -52,6 +60,35 @@ module Gitlab ...@@ -52,6 +60,35 @@ module Gitlab
def counter_description def counter_description
raise NotImplementedError raise NotImplementedError
end end
private
def info(project_id, extra = {})
logger.info(log_attributes(project_id, extra))
end
def error(project_id, exception)
logger.error(
log_attributes(
project_id,
message: 'importer failed',
'error.message': exception.message
)
)
Gitlab::ErrorTracking.track_and_raise_exception(
exception,
log_attributes(project_id)
)
end
def log_attributes(project_id, extra = {})
extra.merge(
import_source: :github,
project_id: project_id,
importer: importer_class.name
)
end
end end
end end
end end
...@@ -6,7 +6,7 @@ module Gitlab ...@@ -6,7 +6,7 @@ module Gitlab
# importing GitHub projects. # importing GitHub projects.
module ReschedulingMethods module ReschedulingMethods
# project_id - The ID of the GitLab project to import the note into. # project_id - The ID of the GitLab project to import the note into.
# hash - A Hash containing the details of the GitHub object to imoprt. # hash - A Hash containing the details of the GitHub object to import.
# notify_key - The Redis key to notify upon completion, if any. # notify_key - The Redis key to notify upon completion, if any.
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, hash, notify_key = nil) def perform(project_id, hash, notify_key = nil)
......
...@@ -5,11 +5,17 @@ module Gitlab ...@@ -5,11 +5,17 @@ module Gitlab
module StageMethods module StageMethods
# project_id - The ID of the GitLab project to import the data into. # project_id - The ID of the GitLab project to import the data into.
def perform(project_id) def perform(project_id)
info(project_id, message: 'starting stage')
return unless (project = find_project(project_id)) return unless (project = find_project(project_id))
client = GithubImport.new_client_for(project) client = GithubImport.new_client_for(project)
try_import(client, project) try_import(client, project)
info(project_id, message: 'stage finished')
rescue => e
error(project_id, e)
end end
# client - An instance of Gitlab::GithubImport::Client. # client - An instance of Gitlab::GithubImport::Client.
...@@ -27,6 +33,39 @@ module Gitlab ...@@ -27,6 +33,39 @@ module Gitlab
Project.joins_import_state.where(import_state: { status: :started }).find_by(id: id) Project.joins_import_state.where(import_state: { status: :started }).find_by(id: id)
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
private
def info(project_id, extra = {})
logger.info(log_attributes(project_id, extra))
end
def error(project_id, exception)
logger.error(
log_attributes(
project_id,
message: 'stage failed',
'error.message': exception.message
)
)
Gitlab::ErrorTracking.track_and_raise_exception(
exception,
log_attributes(project_id)
)
end
def log_attributes(project_id, extra = {})
extra.merge(
import_source: :github,
project_id: project_id,
import_stage: self.class.name
)
end
def logger
@logger ||= Gitlab::Import::Logger.build
end
end end
end end
end end
...@@ -20,12 +20,15 @@ module Gitlab ...@@ -20,12 +20,15 @@ module Gitlab
def report_import_time(project) def report_import_time(project)
duration = Time.zone.now - project.created_at duration = Time.zone.now - project.created_at
path = project.full_path
histogram.observe({ project: path }, duration) histogram.observe({ project: project.full_path }, duration)
counter.increment counter.increment
logger.info("GitHub importer finished for #{path} in #{duration.round(2)} seconds") info(
project.id,
message: "GitHub project import finished",
duration_s: duration.round(2)
)
end end
def histogram def histogram
......
...@@ -20,6 +20,7 @@ module Gitlab ...@@ -20,6 +20,7 @@ module Gitlab
# project - An instance of Project. # project - An instance of Project.
def import(client, project) def import(client, project)
IMPORTERS.each do |klass| IMPORTERS.each do |klass|
info(project.id, message: "starting importer", importer: klass.name)
klass.new(project, client).execute klass.new(project, client).execute
end end
......
...@@ -19,6 +19,7 @@ module Gitlab ...@@ -19,6 +19,7 @@ module Gitlab
# project - An instance of Project. # project - An instance of Project.
def import(client, project) def import(client, project)
waiters = IMPORTERS.each_with_object({}) do |klass, hash| waiters = IMPORTERS.each_with_object({}) do |klass, hash|
info(project.id, message: "starting importer", importer: klass.name)
waiter = klass.new(project, client).execute waiter = klass.new(project, client).execute
hash[waiter.key] = waiter.jobs_remaining hash[waiter.key] = waiter.jobs_remaining
end end
......
...@@ -16,6 +16,8 @@ module Gitlab ...@@ -16,6 +16,8 @@ module Gitlab
# project - An instance of Project. # project - An instance of Project.
def import(project) def import(project)
info(project.id, message: "starting importer", importer: 'Importer::LfsObjectsImporter')
waiter = Importer::LfsObjectsImporter waiter = Importer::LfsObjectsImporter
.new(project, nil) .new(project, nil)
.execute .execute
......
...@@ -11,6 +11,7 @@ module Gitlab ...@@ -11,6 +11,7 @@ module Gitlab
# client - An instance of Gitlab::GithubImport::Client. # client - An instance of Gitlab::GithubImport::Client.
# project - An instance of Project. # project - An instance of Project.
def import(client, project) def import(client, project)
info(project.id, message: "starting importer", importer: 'Importer::NotesImporter')
waiter = Importer::NotesImporter waiter = Importer::NotesImporter
.new(project, client) .new(project, client)
.execute .execute
......
...@@ -11,6 +11,7 @@ module Gitlab ...@@ -11,6 +11,7 @@ module Gitlab
# client - An instance of Gitlab::GithubImport::Client. # client - An instance of Gitlab::GithubImport::Client.
# project - An instance of Project. # project - An instance of Project.
def import(client, project) def import(client, project)
info(project.id, message: "starting importer", importer: 'Importer::PullRequestsImporter')
waiter = Importer::PullRequestsImporter waiter = Importer::PullRequestsImporter
.new(project, client) .new(project, client)
.execute .execute
......
...@@ -21,6 +21,7 @@ module Gitlab ...@@ -21,6 +21,7 @@ module Gitlab
# expiration time. # expiration time.
RefreshImportJidWorker.perform_in_the_future(project.id, jid) RefreshImportJidWorker.perform_in_the_future(project.id, jid)
info(project.id, message: "starting importer", importer: 'Importer::RepositoryImporter')
importer = Importer::RepositoryImporter.new(project, client) importer = Importer::RepositoryImporter.new(project, client)
return unless importer.execute return unless importer.execute
......
...@@ -28,11 +28,6 @@ module Gitlab ...@@ -28,11 +28,6 @@ module Gitlab
lfs_objects.each do |object| lfs_objects.each do |object|
yield object yield object
end end
rescue StandardError => e
Gitlab::Import::Logger.error(
message: 'The Lfs import process failed',
error: e.message
)
end end
end end
end end
......
...@@ -26,6 +26,8 @@ module Gitlab ...@@ -26,6 +26,8 @@ module Gitlab
end end
def execute def execute
info(project.id, message: "starting importer")
retval = retval =
if parallel? if parallel?
parallel_import parallel_import
...@@ -43,8 +45,11 @@ module Gitlab ...@@ -43,8 +45,11 @@ module Gitlab
# completed those jobs will just cycle through any remaining pages while # completed those jobs will just cycle through any remaining pages while
# not scheduling anything. # not scheduling anything.
Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, 15.minutes.to_i) Gitlab::Cache::Import::Caching.expire(already_imported_cache_key, 15.minutes.to_i)
info(project.id, message: "importer finished")
retval retval
rescue => e
error(project.id, e)
end end
# Imports all the objects in sequence in the current thread. # Imports all the objects in sequence in the current thread.
...@@ -157,6 +162,40 @@ module Gitlab ...@@ -157,6 +162,40 @@ module Gitlab
def collection_options def collection_options
{} {}
end end
private
def info(project_id, extra = {})
logger.info(log_attributes(project_id, extra))
end
def error(project_id, exception)
logger.error(
log_attributes(
project_id,
message: 'importer failed',
'error.message': exception.message
)
)
Gitlab::ErrorTracking.track_and_raise_exception(
exception,
log_attributes(project_id)
)
end
def log_attributes(project_id, extra = {})
extra.merge(
import_source: :github,
project_id: project_id,
importer: importer_class.name,
parallel: parallel?
)
end
def logger
@logger ||= Gitlab::Import::Logger.build
end
end end
end end
end end
...@@ -7,6 +7,10 @@ RSpec.describe Gitlab::GithubImport::ParallelScheduling do ...@@ -7,6 +7,10 @@ RSpec.describe Gitlab::GithubImport::ParallelScheduling do
Class.new do Class.new do
include(Gitlab::GithubImport::ParallelScheduling) include(Gitlab::GithubImport::ParallelScheduling)
def importer_class
Class
end
def collection_method def collection_method
:issues :issues
end end
...@@ -63,6 +67,82 @@ RSpec.describe Gitlab::GithubImport::ParallelScheduling do ...@@ -63,6 +67,82 @@ RSpec.describe Gitlab::GithubImport::ParallelScheduling do
importer.execute importer.execute
end end
it 'logs the the process' do
importer = importer_class.new(project, client, parallel: false)
expect(importer)
.to receive(:sequential_import)
.and_return([])
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'starting importer',
import_source: :github,
parallel: false,
project_id: project.id,
importer: 'Class'
)
expect(logger)
.to receive(:info)
.with(
message: 'importer finished',
import_source: :github,
parallel: false,
project_id: project.id,
importer: 'Class'
)
end
importer.execute
end
it 'logs the error when it fails' do
exception = StandardError.new('some error')
importer = importer_class.new(project, client, parallel: false)
expect(importer)
.to receive(:sequential_import)
.and_raise(exception)
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'starting importer',
import_source: :github,
parallel: false,
project_id: project.id,
importer: 'Class'
)
expect(logger)
.to receive(:error)
.with(
message: 'importer failed',
import_source: :github,
project_id: project.id,
parallel: false,
importer: 'Class',
'error.message': 'some error'
)
end
expect(Gitlab::ErrorTracking)
.to receive(:track_and_raise_exception)
.with(
exception,
import_source: :github,
parallel: false,
project_id: project.id,
importer: 'Class'
)
.and_call_original
expect { importer.execute }.to raise_error(exception)
end
end end
describe '#sequential_import' do describe '#sequential_import' do
......
...@@ -22,20 +22,21 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do ...@@ -22,20 +22,21 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do
end end
describe '#import' do describe '#import' do
it 'imports the object' do let(:representation_class) { double(:representation_class) }
representation_class = double(:representation_class) let(:importer_class) { double(:importer_class, name: 'klass_name') }
importer_class = double(:importer_class) let(:importer_instance) { double(:importer_instance) }
importer_instance = double(:importer_instance) let(:representation) { double(:representation) }
representation = double(:representation) let(:project) { double(:project, full_path: 'foo/bar', id: 1) }
project = double(:project, full_path: 'foo/bar') let(:client) { double(:client) }
client = double(:client)
before do
expect(worker) expect(worker)
.to receive(:representation_class) .to receive(:representation_class)
.and_return(representation_class) .and_return(representation_class)
expect(worker) expect(worker)
.to receive(:importer_class) .to receive(:importer_class)
.at_least(:once)
.and_return(importer_class) .and_return(importer_class)
expect(representation_class) expect(representation_class)
...@@ -47,7 +48,9 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do ...@@ -47,7 +48,9 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do
.to receive(:new) .to receive(:new)
.with(representation, project, client) .with(representation, project, client)
.and_return(importer_instance) .and_return(importer_instance)
end
it 'imports the object' do
expect(importer_instance) expect(importer_instance)
.to receive(:execute) .to receive(:execute)
...@@ -55,8 +58,61 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do ...@@ -55,8 +58,61 @@ RSpec.describe Gitlab::GithubImport::ObjectImporter do
.to receive(:increment) .to receive(:increment)
.and_call_original .and_call_original
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'starting importer',
import_source: :github,
project_id: 1,
importer: 'klass_name'
)
expect(logger)
.to receive(:info)
.with(
message: 'importer finished',
import_source: :github,
project_id: 1,
importer: 'klass_name'
)
end
worker.import(project, client, { 'number' => 10 }) worker.import(project, client, { 'number' => 10 })
end end
it 'logs error when the import fails' do
exception = StandardError.new('some error')
expect(importer_instance)
.to receive(:execute)
.and_raise(exception)
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'starting importer',
import_source: :github,
project_id: project.id,
importer: 'klass_name'
)
expect(logger)
.to receive(:error)
.with(
message: 'importer failed',
import_source: :github,
project_id: project.id,
importer: 'klass_name',
'error.message': 'some error'
)
end
expect(Gitlab::ErrorTracking)
.to receive(:track_and_raise_exception)
.with(exception, import_source: :github, project_id: 1, importer: 'klass_name')
.and_call_original
expect { worker.import(project, client, { 'number' => 10 }) }.to raise_error(exception)
end
end end
describe '#counter' do describe '#counter' do
......
...@@ -5,7 +5,13 @@ require 'spec_helper' ...@@ -5,7 +5,13 @@ require 'spec_helper'
RSpec.describe Gitlab::GithubImport::StageMethods do RSpec.describe Gitlab::GithubImport::StageMethods do
let(:project) { create(:project) } let(:project) { create(:project) }
let(:worker) do let(:worker) do
Class.new { include(Gitlab::GithubImport::StageMethods) }.new Class.new do
def self.name
'DummyStage'
end
include(Gitlab::GithubImport::StageMethods)
end.new
end end
describe '#perform' do describe '#perform' do
...@@ -30,8 +36,72 @@ RSpec.describe Gitlab::GithubImport::StageMethods do ...@@ -30,8 +36,72 @@ RSpec.describe Gitlab::GithubImport::StageMethods do
an_instance_of(Project) an_instance_of(Project)
) )
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'starting stage',
import_source: :github,
project_id: project.id,
import_stage: 'DummyStage'
)
expect(logger)
.to receive(:info)
.with(
message: 'stage finished',
import_source: :github,
project_id: project.id,
import_stage: 'DummyStage'
)
end
worker.perform(project.id) worker.perform(project.id)
end end
it 'logs error when import fails' do
exception = StandardError.new('some error')
allow(worker)
.to receive(:find_project)
.with(project.id)
.and_return(project)
expect(worker)
.to receive(:try_import)
.and_raise(exception)
expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'starting stage',
import_source: :github,
project_id: project.id,
import_stage: 'DummyStage'
)
expect(logger)
.to receive(:error)
.with(
message: 'stage failed',
import_source: :github,
project_id: project.id,
import_stage: 'DummyStage',
'error.message': 'some error'
)
end
expect(Gitlab::ErrorTracking)
.to receive(:track_and_raise_exception)
.with(
exception,
import_source: :github,
project_id: project.id,
import_stage: 'DummyStage'
)
.and_call_original
expect { worker.perform(project.id) }.to raise_error(exception)
end
end end
describe '#try_import' do describe '#try_import' do
......
...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ImportDiffNoteWorker do ...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ImportDiffNoteWorker do
describe '#import' do describe '#import' do
it 'imports a diff note' do it 'imports a diff note' do
project = double(:project, full_path: 'foo/bar') project = double(:project, full_path: 'foo/bar', id: 1)
client = double(:client) client = double(:client)
importer = double(:importer) importer = double(:importer)
hash = { hash = {
......
...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ImportIssueWorker do ...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ImportIssueWorker do
describe '#import' do describe '#import' do
it 'imports an issue' do it 'imports an issue' do
project = double(:project, full_path: 'foo/bar') project = double(:project, full_path: 'foo/bar', id: 1)
client = double(:client) client = double(:client)
importer = double(:importer) importer = double(:importer)
hash = { hash = {
......
...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ImportNoteWorker do ...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ImportNoteWorker do
describe '#import' do describe '#import' do
it 'imports a note' do it 'imports a note' do
project = double(:project, full_path: 'foo/bar') project = double(:project, full_path: 'foo/bar', id: 1)
client = double(:client) client = double(:client)
importer = double(:importer) importer = double(:importer)
hash = { hash = {
......
...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ImportPullRequestWorker do ...@@ -7,7 +7,7 @@ RSpec.describe Gitlab::GithubImport::ImportPullRequestWorker do
describe '#import' do describe '#import' do
it 'imports a pull request' do it 'imports a pull request' do
project = double(:project, full_path: 'foo/bar') project = double(:project, full_path: 'foo/bar', id: 1)
client = double(:client) client = double(:client)
importer = double(:importer) importer = double(:importer)
hash = { hash = {
......
...@@ -26,7 +26,17 @@ RSpec.describe Gitlab::GithubImport::Stage::FinishImportWorker do ...@@ -26,7 +26,17 @@ RSpec.describe Gitlab::GithubImport::Stage::FinishImportWorker do
.to receive(:increment) .to receive(:increment)
.and_call_original .and_call_original
expect(worker.logger).to receive(:info).with(an_instance_of(String)) expect_next_instance_of(Gitlab::Import::Logger) do |logger|
expect(logger)
.to receive(:info)
.with(
message: 'GitHub project import finished',
import_stage: 'Gitlab::GithubImport::Stage::FinishImportWorker',
import_source: :github,
project_id: project.id,
duration_s: a_kind_of(Numeric)
)
end
worker.report_import_time(project) worker.report_import_time(project)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment