Commit dd3d4221 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'github' into 'master'

Improve GitHub importer

Closes #27429

See merge request !12886
parents 4c8b6668 38704e42
...@@ -34,8 +34,12 @@ module Projects ...@@ -34,8 +34,12 @@ module Projects
def import_repository def import_repository
raise Error, 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url) raise Error, 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url)
# We should return early for a GitHub import because the new GitHub
# importer fetch the project repositories for us.
return if project.github_import?
begin begin
if project.github_import? || project.gitea_import? if project.gitea_import?
fetch_repository fetch_repository
else else
clone_repository clone_repository
...@@ -55,7 +59,7 @@ module Projects ...@@ -55,7 +59,7 @@ module Projects
end end
def fetch_repository def fetch_repository
project.create_repository project.ensure_repository
project.repository.add_remote(project.import_type, project.import_url) project.repository.add_remote(project.import_type, project.import_url)
project.repository.set_remote_as_mirror(project.import_type) project.repository.set_remote_as_mirror(project.import_type)
project.repository.fetch_remote(project.import_type, forced: true) project.repository.fetch_remote(project.import_type, forced: true)
......
---
title: Reduce memory usage of the GitHub importer
merge_request: 12886
author:
module Github module Github
class Client class Client
TIMEOUT = 60
attr_reader :connection, :rate_limit attr_reader :connection, :rate_limit
def initialize(options) def initialize(options)
@connection = Faraday.new(url: options.fetch(:url)) do |faraday| @connection = Faraday.new(url: options.fetch(:url, root_endpoint)) do |faraday|
faraday.options.open_timeout = options.fetch(:timeout, 60) faraday.options.open_timeout = options.fetch(:timeout, TIMEOUT)
faraday.options.timeout = options.fetch(:timeout, 60) faraday.options.timeout = options.fetch(:timeout, TIMEOUT)
faraday.authorization 'token', options.fetch(:token) faraday.authorization 'token', options.fetch(:token)
faraday.adapter :net_http faraday.adapter :net_http
faraday.ssl.verify = verify_ssl
end end
@rate_limit = RateLimit.new(connection) @rate_limit = RateLimit.new(connection)
...@@ -19,5 +22,32 @@ module Github ...@@ -19,5 +22,32 @@ module Github
Github::Response.new(connection.get(url, query)) Github::Response.new(connection.get(url, query))
end end
private
def root_endpoint
custom_endpoint || github_endpoint
end
def custom_endpoint
github_omniauth_provider.dig('args', 'client_options', 'site')
end
def verify_ssl
# If there is no config, we're connecting to github.com
# and we should verify ssl.
github_omniauth_provider.fetch('verify_ssl', true)
end
def github_endpoint
OmniAuth::Strategies::GitHub.default_options[:client_options][:site]
end
def github_omniauth_provider
@github_omniauth_provider ||=
Gitlab.config.omniauth.providers
.find { |provider| provider.name == 'github' }
.to_h
end
end end
end end
...@@ -41,13 +41,16 @@ module Github ...@@ -41,13 +41,16 @@ module Github
self.reset_callbacks :validate self.reset_callbacks :validate
end end
attr_reader :project, :repository, :repo, :options, :errors, :cached, :verbose attr_reader :project, :repository, :repo, :repo_url, :wiki_url,
:options, :errors, :cached, :verbose
def initialize(project, options) def initialize(project, options = {})
@project = project @project = project
@repository = project.repository @repository = project.repository
@repo = project.import_source @repo = project.import_source
@options = options @repo_url = project.import_url
@wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git')
@options = options.reverse_merge(token: project.import_data&.credentials&.fetch(:user))
@verbose = options.fetch(:verbose, false) @verbose = options.fetch(:verbose, false)
@cached = Hash.new { |hash, key| hash[key] = Hash.new } @cached = Hash.new { |hash, key| hash[key] = Hash.new }
@errors = [] @errors = []
...@@ -65,6 +68,8 @@ module Github ...@@ -65,6 +68,8 @@ module Github
fetch_pull_requests fetch_pull_requests
puts 'Fetching issues...'.color(:aqua) if verbose puts 'Fetching issues...'.color(:aqua) if verbose
fetch_issues fetch_issues
puts 'Fetching releases...'.color(:aqua) if verbose
fetch_releases
puts 'Cloning wiki repository...'.color(:aqua) if verbose puts 'Cloning wiki repository...'.color(:aqua) if verbose
fetch_wiki_repository fetch_wiki_repository
puts 'Expiring repository cache...'.color(:aqua) if verbose puts 'Expiring repository cache...'.color(:aqua) if verbose
...@@ -72,6 +77,7 @@ module Github ...@@ -72,6 +77,7 @@ module Github
true true
rescue Github::RepositoryFetchError rescue Github::RepositoryFetchError
expire_repository_cache
false false
ensure ensure
keep_track_of_errors keep_track_of_errors
...@@ -81,23 +87,21 @@ module Github ...@@ -81,23 +87,21 @@ module Github
def fetch_repository def fetch_repository
begin begin
project.create_repository unless project.repository.exists? project.ensure_repository
project.repository.add_remote('github', "https://#{options.fetch(:token)}@github.com/#{repo}.git") project.repository.add_remote('github', repo_url)
project.repository.set_remote_as_mirror('github') project.repository.set_remote_as_mirror('github')
project.repository.fetch_remote('github', forced: true) project.repository.fetch_remote('github', forced: true)
rescue Gitlab::Shell::Error => e rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
error(:project, "https://github.com/#{repo}.git", e.message) error(:project, repo_url, e.message)
raise Github::RepositoryFetchError raise Github::RepositoryFetchError
end end
end end
def fetch_wiki_repository def fetch_wiki_repository
wiki_url = "https://#{options.fetch(:token)}@github.com/#{repo}.wiki.git" return if project.wiki.repository_exists?
wiki_path = "#{project.full_path}.wiki"
unless project.wiki.repository_exists? wiki_path = "#{project.disk_path}.wiki"
gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url) gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url)
end
rescue Gitlab::Shell::Error => e rescue Gitlab::Shell::Error => e
# GitHub error message when the wiki repo has not been created, # GitHub error message when the wiki repo has not been created,
# this means that repo has wiki enabled, but have no pages. So, # this means that repo has wiki enabled, but have no pages. So,
...@@ -309,7 +313,7 @@ module Github ...@@ -309,7 +313,7 @@ module Github
next unless representation.valid? next unless representation.valid?
release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag) release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag)
next unless relese.new_record? next unless release.new_record?
begin begin
release.description = representation.description release.description = representation.description
...@@ -337,7 +341,7 @@ module Github ...@@ -337,7 +341,7 @@ module Github
def user_id(user, fallback_id = nil) def user_id(user, fallback_id = nil)
return unless user.present? return unless user.present?
return cached[:user_ids][user.id] if cached[:user_ids].key?(user.id) return cached[:user_ids][user.id] if cached[:user_ids][user.id].present?
gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email) gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email)
...@@ -367,7 +371,7 @@ module Github ...@@ -367,7 +371,7 @@ module Github
end end
def expire_repository_cache def expire_repository_cache
repository.expire_content_cache repository.expire_content_cache if project.repository_exists?
end end
def keep_track_of_errors def keep_track_of_errors
......
...@@ -8,7 +8,7 @@ module Gitlab ...@@ -8,7 +8,7 @@ module Gitlab
ImportSource = Struct.new(:name, :title, :importer) ImportSource = Struct.new(:name, :title, :importer)
ImportTable = [ ImportTable = [
ImportSource.new('github', 'GitHub', Gitlab::GithubImport::Importer), ImportSource.new('github', 'GitHub', Github::Import),
ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer), ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer),
ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer), ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer),
ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer), ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer),
......
...@@ -7,7 +7,7 @@ class GithubImport ...@@ -7,7 +7,7 @@ class GithubImport
end end
def initialize(token, gitlab_username, project_path, extras) def initialize(token, gitlab_username, project_path, extras)
@options = { url: 'https://api.github.com', token: token, verbose: true } @options = { token: token, verbose: true }
@project_path = project_path @project_path = project_path
@current_user = User.find_by_username(gitlab_username) @current_user = User.find_by_username(gitlab_username)
@github_repo = extras.empty? ? nil : extras.first @github_repo = extras.empty? ? nil : extras.first
...@@ -62,6 +62,7 @@ class GithubImport ...@@ -62,6 +62,7 @@ class GithubImport
visibility_level: visibility_level, visibility_level: visibility_level,
import_type: 'github', import_type: 'github',
import_source: @repo['full_name'], import_source: @repo['full_name'],
import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@"),
skip_wiki: @repo['has_wiki'] skip_wiki: @repo['has_wiki']
).execute ).execute
end end
......
...@@ -56,7 +56,7 @@ describe Gitlab::ImportSources do ...@@ -56,7 +56,7 @@ describe Gitlab::ImportSources do
describe '.importer' do describe '.importer' do
import_sources = { import_sources = {
'github' => Gitlab::GithubImport::Importer, 'github' => Github::Import,
'bitbucket' => Gitlab::BitbucketImport::Importer, 'bitbucket' => Gitlab::BitbucketImport::Importer,
'gitlab' => Gitlab::GitlabImport::Importer, 'gitlab' => Gitlab::GitlabImport::Importer,
'google_code' => Gitlab::GoogleCodeImport::Importer, 'google_code' => Gitlab::GoogleCodeImport::Importer,
......
...@@ -38,8 +38,7 @@ describe Projects::ImportService do ...@@ -38,8 +38,7 @@ describe Projects::ImportService do
context 'with a Github repository' do context 'with a Github repository' do
it 'succeeds if repository import is successfully' do it 'succeeds if repository import is successfully' do
expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) expect_any_instance_of(Github::Import).to receive(:execute).and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
result = subject.execute result = subject.execute
...@@ -52,16 +51,7 @@ describe Projects::ImportService do ...@@ -52,16 +51,7 @@ describe Projects::ImportService do
result = subject.execute result = subject.execute
expect(result[:status]).to eq :error expect(result[:status]).to eq :error
expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - Failed to import the repository" expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The remote data could not be imported."
end
it 'does not remove the GitHub remote' do
expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true)
expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
subject.execute
expect(project.repository.raw_repository.remote_names).to include('github')
end end
end end
...@@ -102,8 +92,7 @@ describe Projects::ImportService do ...@@ -102,8 +92,7 @@ describe Projects::ImportService do
end end
it 'succeeds if importer succeeds' do it 'succeeds if importer succeeds' do
allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) allow_any_instance_of(Github::Import).to receive(:execute).and_return(true)
allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true)
result = subject.execute result = subject.execute
...@@ -111,10 +100,7 @@ describe Projects::ImportService do ...@@ -111,10 +100,7 @@ describe Projects::ImportService do
end end
it 'flushes various caches' do it 'flushes various caches' do
allow_any_instance_of(Repository).to receive(:fetch_remote) allow_any_instance_of(Github::Import).to receive(:execute)
.and_return(true)
allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute)
.and_return(true) .and_return(true)
expect_any_instance_of(Repository).to receive(:expire_content_cache) expect_any_instance_of(Repository).to receive(:expire_content_cache)
...@@ -123,8 +109,7 @@ describe Projects::ImportService do ...@@ -123,8 +109,7 @@ describe Projects::ImportService do
end end
it 'fails if importer fails' do it 'fails if importer fails' do
allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) allow_any_instance_of(Github::Import).to receive(:execute).and_return(false)
allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false)
result = subject.execute result = subject.execute
...@@ -133,8 +118,7 @@ describe Projects::ImportService do ...@@ -133,8 +118,7 @@ describe Projects::ImportService do
end end
it 'fails if importer raise an error' do it 'fails if importer raise an error' do
allow_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_return(true) allow_any_instance_of(Github::Import).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API'))
allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API'))
result = subject.execute result = subject.execute
...@@ -143,9 +127,9 @@ describe Projects::ImportService do ...@@ -143,9 +127,9 @@ describe Projects::ImportService do
end end
it 'expires content cache after error' do it 'expires content cache after error' do
allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true) allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false)
expect_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) expect_any_instance_of(Repository).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new)
expect_any_instance_of(Repository).to receive(:expire_content_cache) expect_any_instance_of(Repository).to receive(:expire_content_cache)
subject.execute subject.execute
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment