Commit fcfb4043 authored by David Kim's avatar David Kim

Make batch size adaptive and attempt to download LFS object lists

parent d61d11fc
...@@ -13,6 +13,7 @@ module Projects ...@@ -13,6 +13,7 @@ module Projects
DownloadLinksError = Class.new(StandardError) DownloadLinksError = Class.new(StandardError)
DownloadLinkNotFound = Class.new(StandardError) DownloadLinkNotFound = Class.new(StandardError)
DownloadLinksRequestEntityTooLargeError = Class.new(StandardError)
attr_reader :remote_uri attr_reader :remote_uri
...@@ -29,22 +30,39 @@ module Projects ...@@ -29,22 +30,39 @@ module Projects
def execute(oids) def execute(oids)
return [] unless project&.lfs_enabled? && remote_uri && oids.present? return [] unless project&.lfs_enabled? && remote_uri && oids.present?
get_download_links_in_batches(oids)
end
private
def get_download_links_in_batches(oids, batch_size = REQUEST_BATCH_SIZE)
download_links = [] download_links = []
oids.each_slice(REQUEST_BATCH_SIZE) do |batch| oids.each_slice(batch_size) do |batch|
download_links += get_download_links(batch) download_links += get_download_links(batch)
end end
download_links download_links
end
private rescue DownloadLinksRequestEntityTooLargeError => e
# Log this exceptions to see how open it happens
Gitlab::ErrorTracking
.track_exception(e, project_id: project&.id, batch_size: batch_size, oids_count: oids.count)
# Try again with a smaller batch
batch_size /= 2
retry if batch_size > REQUEST_BATCH_SIZE / 3
raise DownloadLinksError, 'Unable to download due to RequestEntityTooLarge errors'
end
def get_download_links(oids) def get_download_links(oids)
response = Gitlab::HTTP.post(remote_uri, response = Gitlab::HTTP.post(remote_uri,
body: request_body(oids), body: request_body(oids),
headers: headers) headers: headers)
raise DownloadLinksRequestEntityTooLargeError if response.request_entity_too_large?
raise DownloadLinksError, response.message unless response.success? raise DownloadLinksError, response.message unless response.success?
# Since the LFS Batch API may return a Content-Ttpe of # Since the LFS Batch API may return a Content-Ttpe of
......
...@@ -8,11 +8,15 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -8,11 +8,15 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
let(:new_oids) { { 'oid1' => 123, 'oid2' => 125 } } let(:new_oids) { { 'oid1' => 123, 'oid2' => 125 } }
let(:remote_uri) { URI.parse(lfs_endpoint) } let(:remote_uri) { URI.parse(lfs_endpoint) }
let(:request_object) { HTTParty::Request.new(Net::HTTP::Post, '/') }
let(:parsed_block) { lambda {} }
let(:success_net_response) { Net::HTTPOK.new('', '', '') }
let(:response) { Gitlab::HTTP::Response.new(request_object, net_response, parsed_block) }
def objects_response(oids) def objects_response(oids)
body = oids.map do |oid, size| body = oids.map do |oid, size|
{ {
'oid' => oid, 'oid' => oid, 'size' => size,
'size' => size,
'actions' => { 'actions' => {
'download' => { 'href' => "#{import_url}/gitlab-lfs/objects/#{oid}" } 'download' => { 'href' => "#{import_url}/gitlab-lfs/objects/#{oid}" }
} }
...@@ -22,6 +26,11 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -22,6 +26,11 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
Struct.new(:success?, :objects).new(true, body).to_json Struct.new(:success?, :objects).new(true, body).to_json
end end
def custom_response(net_response, body = nil)
allow(net_response).to receive(:body).and_return(body)
Gitlab::HTTP::Response.new(request_object, net_response, parsed_block)
end
let(:invalid_object_response) do let(:invalid_object_response) do
[ [
'oid' => 'whatever', 'oid' => 'whatever',
...@@ -33,9 +42,8 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -33,9 +42,8 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
before do before do
allow(project).to receive(:lfs_enabled?).and_return(true) allow(project).to receive(:lfs_enabled?).and_return(true)
response = instance_double(Gitlab::HTTP::Response)
allow(response).to receive(:body).and_return(objects_response(new_oids)) response = custom_response(success_net_response, objects_response(new_oids))
allow(response).to receive(:success?).and_return(true)
allow(Gitlab::HTTP).to receive(:post).and_return(response) allow(Gitlab::HTTP).to receive(:post).and_return(response)
end end
...@@ -47,8 +55,18 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -47,8 +55,18 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
end end
context 'when lfs objects size is larger than the batch size' do context 'when lfs objects size is larger than the batch size' do
def stub_request(batch) def stub_successful_request(batch)
response = instance_double Gitlab::HTTP::Response, body: objects_response(batch), success?: true response = custom_response(success_net_response, objects_response(batch))
stub_request(batch, response)
end
def stub_entity_too_large_error_request(batch)
entity_too_large_net_response = Net::HTTPRequestEntityTooLarge.new('', '', '')
response = custom_response(entity_too_large_net_response)
stub_request(batch, response)
end
def stub_request(batch, response)
expect(Gitlab::HTTP).to receive(:post).with( expect(Gitlab::HTTP).to receive(:post).with(
remote_uri, remote_uri,
{ {
...@@ -60,12 +78,14 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -60,12 +78,14 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
let(:new_oids) { { 'oid1' => 123, 'oid2' => 125, 'oid3' => 126, 'oid4' => 127, 'oid5' => 128 } } let(:new_oids) { { 'oid1' => 123, 'oid2' => 125, 'oid3' => 126, 'oid4' => 127, 'oid5' => 128 } }
context 'when batch size' do
before do before do
stub_const("#{described_class.name}::REQUEST_BATCH_SIZE", 2) stub_const("#{described_class.name}::REQUEST_BATCH_SIZE", 2)
stub_request([['oid1', 123], ['oid2', 125]]) data = new_oids.to_a
stub_request([['oid3', 126], ['oid4', 127]]) stub_successful_request([data[0], data[1]])
stub_request([['oid5', 128]]) stub_successful_request([data[2], data[3]])
stub_successful_request([data[4]])
end end
it 'retreives them in batches' do it 'retreives them in batches' do
...@@ -75,6 +95,61 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -75,6 +95,61 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
end end
end end
context 'when request fails with PayloadTooLarge error' do
let(:error_class) { described_class::DownloadLinksRequestEntityTooLargeError }
context 'when the smaller batch eventually works' do
before do
stub_const("#{described_class.name}::REQUEST_BATCH_SIZE", 5)
data = new_oids.to_a
# with the batch size of 5
stub_entity_too_large_error_request(data)
# with the batch size of 2
stub_successful_request([data[0], data[1]])
stub_successful_request([data[2], data[3]])
stub_successful_request([data[4]])
end
it 'retreives them eventually and logs exceptions' do
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(
an_instance_of(error_class), project_id: project.id, batch_size: 5, oids_count: 5
)
subject.execute(new_oids).each do |lfs_download_object|
expect(lfs_download_object.link).to eq "#{import_url}/gitlab-lfs/objects/#{lfs_download_object.oid}"
end
end
end
context 'when batch size cannot be any smaller' do
before do
stub_const("#{described_class.name}::REQUEST_BATCH_SIZE", 5)
data = new_oids.to_a
# with the batch size of 5
stub_entity_too_large_error_request(data)
# with the batch size of 2
stub_entity_too_large_error_request([data[0], data[1]])
end
it 'raises an error and logs exceptions' do
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(
an_instance_of(error_class), project_id: project.id, batch_size: 5, oids_count: 5
)
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(
an_instance_of(error_class), project_id: project.id, batch_size: 2, oids_count: 5
)
expect { subject.execute(new_oids) }.to raise_error(described_class::DownloadLinksError)
end
end
end
end
context 'credentials' do context 'credentials' do
context 'when the download link and the lfs_endpoint have the same host' do context 'when the download link and the lfs_endpoint have the same host' do
context 'when lfs_endpoint has credentials' do context 'when lfs_endpoint has credentials' do
...@@ -116,17 +191,22 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do ...@@ -116,17 +191,22 @@ describe Projects::LfsPointers::LfsDownloadLinkListService do
end end
describe '#get_download_links' do describe '#get_download_links' do
it 'raise error if request fails' do context 'if request fails' do
allow(Gitlab::HTTP).to receive(:post).and_return(Struct.new(:success?, :message).new(false, 'Failed request')) before do
request_timeout_net_response = Net::HTTPRequestTimeout.new('', '', '')
response = custom_response(request_timeout_net_response)
allow(Gitlab::HTTP).to receive(:post).and_return(response)
end
it 'raises an error' do
expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError) expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError)
end end
end
shared_examples 'JSON parse errors' do |body| shared_examples 'JSON parse errors' do |body|
it 'raises error' do it 'raises an error' do
response = instance_double(Gitlab::HTTP::Response) response = custom_response(success_net_response)
allow(response).to receive(:body).and_return(body) allow(response).to receive(:body).and_return(body)
allow(response).to receive(:success?).and_return(true)
allow(Gitlab::HTTP).to receive(:post).and_return(response) allow(Gitlab::HTTP).to receive(:post).and_return(response)
expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError) expect { subject.send(:get_download_links, new_oids) }.to raise_error(described_class::DownloadLinksError)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment