Commit 26ae1a72 authored by Kassio Borges's avatar Kassio Borges

Github Importer: Add Cache to Pull Request Reviews importer

Github importer have a mechanism to re-schedule an Importer job if it
gets throttled. But, repositories with large number of pull requests,
were in a _throttling loop_ for the Pull Request Reviews Importer.

In other words, when re-scheduled, the job was re-starting importing
reviews to all imported Pull Requests, regardless if already imported
or not, causing another throttling.

To fix this, already imported Pull Requests ids are being cached and
skipped, this way, when the job re-start, we don't do requests related
to already imported Pull Requests.

The same is applied to the Pull Request MergedBy importer.

Related to: https://gitlab.com/gitlab-org/gitlab/-/issues/329552

Changelog: changed
MR: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60668
parent 2c4abf28
---
title: 'Github Importer: Add Cache to Pull Request Reviews importer'
merge_request: 60668
author:
type: changed
...@@ -70,7 +70,7 @@ module Gitlab ...@@ -70,7 +70,7 @@ module Gitlab
end end
def pull_request_reviews(repo_name, iid) def pull_request_reviews(repo_name, iid)
with_rate_limit { octokit.pull_request_reviews(repo_name, iid) } each_object(:pull_request_reviews, repo_name, iid)
end end
# Returns the details of a GitHub repository. # Returns the details of a GitHub repository.
......
...@@ -22,14 +22,18 @@ module Gitlab ...@@ -22,14 +22,18 @@ module Gitlab
:pull_requests_merged_by :pull_requests_merged_by
end end
def id_for_already_imported_cache(pr) def id_for_already_imported_cache(merge_request)
pr.number merge_request.id
end end
def each_object_to_import def each_object_to_import
project.merge_requests.with_state(:merged).find_each do |merge_request| project.merge_requests.with_state(:merged).find_each do |merge_request|
next if already_imported?(merge_request)
pull_request = client.pull_request(project.import_source, merge_request.iid) pull_request = client.pull_request(project.import_source, merge_request.iid)
yield(pull_request) yield(pull_request)
mark_as_imported(merge_request)
end end
end end
end end
......
...@@ -22,17 +22,22 @@ module Gitlab ...@@ -22,17 +22,22 @@ module Gitlab
:pull_request_reviews :pull_request_reviews
end end
def id_for_already_imported_cache(review) def id_for_already_imported_cache(merge_request)
review.github_id merge_request.id
end end
def each_object_to_import def each_object_to_import
project.merge_requests.find_each do |merge_request| project.merge_requests.find_each do |merge_request|
reviews = client.pull_request_reviews(project.import_source, merge_request.iid) next if already_imported?(merge_request)
reviews.each do |review|
client
.pull_request_reviews(project.import_source, merge_request.iid)
.each do |review|
review.merge_request_id = merge_request.id review.merge_request_id = merge_request.id
yield(review) yield(review)
end end
mark_as_imported(merge_request)
end end
end end
end end
......
...@@ -32,8 +32,9 @@ RSpec.describe Gitlab::GithubImport::Client do ...@@ -32,8 +32,9 @@ RSpec.describe Gitlab::GithubImport::Client do
it 'returns the pull request reviews' do it 'returns the pull request reviews' do
client = described_class.new('foo') client = described_class.new('foo')
expect(client.octokit).to receive(:pull_request_reviews).with('foo/bar', 999) expect(client)
expect(client).to receive(:with_rate_limit).and_yield .to receive(:each_object)
.with(:pull_request_reviews, 'foo/bar', 999)
client.pull_request_reviews('foo/bar', 999) client.pull_request_reviews('foo/bar', 999)
end end
......
...@@ -23,12 +23,11 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsMergedByImporter do ...@@ -23,12 +23,11 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsMergedByImporter do
end end
describe '#id_for_already_imported_cache' do describe '#id_for_already_imported_cache' do
it { expect(subject.id_for_already_imported_cache(double(number: 1))).to eq(1) } it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) }
end end
describe '#each_object_to_import' do describe '#each_object_to_import', :clean_gitlab_redis_cache do
it 'fetchs the merged pull requests data' do it 'fetchs the merged pull requests data' do
pull_request = double
create( create(
:merged_merge_request, :merged_merge_request,
iid: 999, iid: 999,
...@@ -36,12 +35,18 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsMergedByImporter do ...@@ -36,12 +35,18 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsMergedByImporter do
target_project: project target_project: project
) )
pull_request = double
allow(client) allow(client)
.to receive(:pull_request) .to receive(:pull_request)
.exactly(:once) # ensure to be cached on the second call
.with('http://somegithub.com', 999) .with('http://somegithub.com', 999)
.and_return(pull_request) .and_return(pull_request)
expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(pull_request) expect { |b| subject.each_object_to_import(&b) }
.to yield_with_args(pull_request)
subject.each_object_to_import {}
end end
end end
end end
...@@ -23,12 +23,18 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsReviewsImporter do ...@@ -23,12 +23,18 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsReviewsImporter do
end end
describe '#id_for_already_imported_cache' do describe '#id_for_already_imported_cache' do
it { expect(subject.id_for_already_imported_cache(double(github_id: 1))).to eq(1) } it { expect(subject.id_for_already_imported_cache(double(id: 1))).to eq(1) }
end end
describe '#each_object_to_import' do describe '#each_object_to_import', :clean_gitlab_redis_cache do
it 'fetchs the merged pull requests data' do it 'fetchs the merged pull requests data' do
merge_request = create(:merge_request, source_project: project) merge_request = create(
:merged_merge_request,
iid: 999,
source_project: project,
target_project: project
)
review = double review = double
expect(review) expect(review)
...@@ -37,10 +43,14 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsReviewsImporter do ...@@ -37,10 +43,14 @@ RSpec.describe Gitlab::GithubImport::Importer::PullRequestsReviewsImporter do
allow(client) allow(client)
.to receive(:pull_request_reviews) .to receive(:pull_request_reviews)
.exactly(:once) # ensure to be cached on the second call
.with('github/repo', merge_request.iid) .with('github/repo', merge_request.iid)
.and_return([review]) .and_return([review])
expect { |b| subject.each_object_to_import(&b) }.to yield_with_args(review) expect { |b| subject.each_object_to_import(&b) }
.to yield_with_args(review)
subject.each_object_to_import {}
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment