Commit fe9c323f authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre Committed by Michael Kozono

Skip download of blobs with foreign layers

Some images contains layers that might have the media type
"application/vnd.docker.image.rootfs.foreign.diff.tar.gzip"
which are not present on the registry.

In this case, the client doesn't go to the registry to fetch
the layer because it is not there but will follow the URL,
specified among other details of the layer.

Geo should not try to fetch it from the registry during sync
but skip this layer. It also does not want to follow the URL
and download it because it might violate the license.
parent 5ae87f35
......@@ -6,21 +6,18 @@ module Geo
class ContainerRepositorySync
include Gitlab::Utils::StrongMemoize
attr_reader :name, :container_repository
FOREIGN_MEDIA_TYPE = 'application/vnd.docker.image.rootfs.foreign.diff.tar.gzip'
attr_reader :repository_path, :container_repository
def initialize(container_repository)
@container_repository = container_repository
@name = container_repository.path
@repository_path = container_repository.path
end
def execute
tags_to_sync.each do |tag|
sync_tag(tag[:name])
end
tags_to_remove.each do |tag|
container_repository.delete_tag_by_digest(tag[:digest])
end
tags_to_sync.each { |tag| sync_tag(tag) }
tags_to_remove.each { |tag| remove_tag(tag) }
true
end
......@@ -29,44 +26,51 @@ module Geo
def sync_tag(tag)
file = nil
manifest = client.repository_raw_manifest(name, tag)
manifest = client.repository_raw_manifest(repository_path, tag[:name])
manifest_parsed = Gitlab::Json.parse(manifest)
list_blobs(manifest_parsed).each do |digest|
next if container_repository.blob_exists?(digest)
file = client.pull_blob(name, digest)
file = client.pull_blob(repository_path, digest)
container_repository.push_blob(digest, file.path)
file.unlink
end
container_repository.push_manifest(tag, manifest, manifest_parsed['mediaType'])
container_repository.push_manifest(tag[:name], manifest, manifest_parsed['mediaType'])
ensure
file.try(:unlink)
end
def remove_tag(tag)
container_repository.delete_tag_by_digest(tag[:digest])
end
def list_blobs(manifest)
layers = manifest['layers'].map do |layer|
layer['digest']
layers = manifest['layers'].filter_map do |layer|
layer['digest'] unless foreign_layer?(layer)
end
layers.push(manifest.dig('config', 'digest')).compact
end
def primary_tags
@primary_tags ||= begin
manifest = client.repository_tags(name)
def foreign_layer?(layer)
layer['mediaType'] == FOREIGN_MEDIA_TYPE
end
return [] unless manifest && manifest['tags']
def primary_tags
strong_memoize(:primary_tags) do
manifest = client.repository_tags(repository_path)
next [] unless manifest && manifest['tags']
manifest['tags'].map do |tag|
{ name: tag, digest: client.repository_tag_digest(name, tag) }
{ name: tag, digest: client.repository_tag_digest(repository_path, tag) }
end
end
end
def secondary_tags
@secondary_tags ||= begin
strong_memoize(:secondary_tags) do
container_repository.tags.map do |tag|
{ name: tag.name, digest: tag.digest }
end
......@@ -86,7 +90,7 @@ module Geo
strong_memoize(:client) do
ContainerRegistry::Client.new(
Gitlab.config.geo.registry_replication.primary_api_url,
token: ::Auth::ContainerRegistryAuthenticationService.pull_access_token(name)
token: ::Auth::ContainerRegistryAuthenticationService.pull_access_token(repository_path)
)
end
end
......
---
title: Geo - Skip download of blobs with foreign layers
merge_request: 61072
author:
type: fixed
......@@ -3,102 +3,108 @@
require 'spec_helper'
RSpec.describe Geo::ContainerRepositorySync, :geo do
let(:group) { create(:group, name: 'group') }
let(:project) { create(:project, path: 'test', group: group) }
let(:container_repository) do
create(:container_repository, name: 'my_image', project: project)
let_it_be(:group) { create(:group, name: 'group') }
let_it_be(:project) { create(:project, path: 'test', group: group) }
let_it_be(:container_repository) { create(:container_repository, name: 'my_image', project: project) }
let(:primary_api_url) { 'http://primary.registry.gitlab' }
let(:secondary_api_url) { 'http://registry.gitlab' }
let(:primary_repository_url) { "#{primary_api_url}/v2/#{container_repository.path}" }
let(:secondary_repository_url ) { "#{secondary_api_url}/v2/#{container_repository.path}" }
# Break symbol will be removed if JSON encode/decode operation happens so we use this
# to prove that it does not happen and we preserve original human readable JSON
let(:manifest) do
"{" \
"\n\"schemaVersion\":2," \
"\n\"layers\":[" \
"{\n\"mediaType\":\"application/vnd.docker.distribution.manifest.v2+json\",\n\"size\":3333,\n\"digest\":\"sha256:3333\"}," \
"{\n\"mediaType\":\"application/vnd.docker.distribution.manifest.v2+json\",\n\"size\":4444,\n\"digest\":\"sha256:4444\"}," \
"{\n\"mediaType\":\"application/vnd.docker.image.rootfs.foreign.diff.tar.gzip\",\n\"size\":5555,\n\"digest\":\"sha256:5555\",\n\"urls\":[\"https://foo.bar/v2/zoo/blobs/sha256:5555\"]}" \
"]" \
"}"
end
# Break symbol will be removed if JSON encode/decode operation happens
# so we use this to prove that it does not happen and we preserve original
# human readable JSON
let(:manifest) { "{\"schemaVersion\":2,\n\"layers\":[]}" }
before do
stub_container_registry_config(enabled: true,
api_url: 'http://registry.gitlab',
host_port: 'registry.gitlab')
stub_registry_replication_config(enabled: true,
primary_api_url: 'http://primary.registry.gitlab')
stub_request(:get, "http://registry.gitlab/v2/group/test/my_image/tags/list")
.with(
headers: {
'Accept' => 'application/vnd.docker.distribution.manifest.v2+json, application/vnd.oci.image.manifest.v1+json',
'Authorization' => 'bearer token'
})
stub_container_registry_config(enabled: true, api_url: secondary_api_url)
stub_registry_replication_config(enabled: true, primary_api_url: primary_api_url)
end
def stub_primary_repository_tags_requests(repository_url, tags)
stub_request(:get, "#{repository_url}/tags/list")
.to_return(
status: 200,
body: Gitlab::Json.dump(tags: %w(obsolete)),
body: Gitlab::Json.dump(tags: tags.keys),
headers: { 'Content-Type' => 'application/json' })
stub_request(:get, "http://primary.registry.gitlab/v2/group/test/my_image/tags/list")
.with(
headers: { 'Authorization' => 'bearer pull-token' })
tags.each do |tag, digest|
stub_request(:head, "#{repository_url}/manifests/#{tag}")
.to_return(status: 200, body: "", headers: { 'docker-content-digest' => digest })
end
end
def stub_secondary_repository_tags_requests(repository_url, tags)
stub_request(:get, "#{repository_url}/tags/list")
.to_return(
status: 200,
body: Gitlab::Json.dump(tags: %w(tag-to-sync)),
body: Gitlab::Json.dump(tags: tags.keys),
headers: { 'Content-Type' => 'application/json' })
stub_request(:head, "http://primary.registry.gitlab/v2/group/test/my_image/manifests/tag-to-sync")
.with(
headers: {
'Accept' => 'application/vnd.docker.distribution.manifest.v2+json, application/vnd.oci.image.manifest.v1+json',
'Authorization' => 'bearer pull-token'
})
.to_return(status: 200, body: "", headers: { 'docker-content-digest' => 'sha256:ccccc' })
stub_request(:head, "http://registry.gitlab/v2/group/test/my_image/manifests/obsolete")
.with(
headers: {
'Accept' => 'application/vnd.docker.distribution.manifest.v2+json, application/vnd.oci.image.manifest.v1+json',
'Authorization' => 'bearer token'
})
.to_return(status: 200, body: "", headers: { 'docker-content-digest' => 'sha256:aaaaa' })
stub_request(:get, "http://primary.registry.gitlab/v2/group/test/my_image/manifests/tag-to-sync")
.with(
headers: {
'Accept' => 'application/vnd.docker.distribution.manifest.v2+json, application/vnd.oci.image.manifest.v1+json',
'Authorization' => 'bearer pull-token'
})
tags.each do |tag, digest|
stub_request(:head, "#{repository_url}/manifests/#{tag}")
.to_return(status: 200, body: "", headers: { 'docker-content-digest' => digest })
end
end
def stub_primary_raw_manifest_request(repository_url, tag, manifest)
stub_request(:get, "#{repository_url}/manifests/#{tag}")
.to_return(status: 200, body: manifest, headers: {})
end
stub_request(:put, "http://registry.gitlab/v2/group/test/my_image/manifests/tag-to-sync")
.with(
body: manifest,
headers: {
'Accept' => 'application/vnd.docker.distribution.manifest.v2+json, application/vnd.oci.image.manifest.v1+json',
'Authorization' => 'bearer token',
'Content-Type' => 'application/json'
})
def stub_secondary_push_manifest_request(repository_url, tag, manifest)
stub_request(:put, "#{repository_url}/manifests/#{tag}")
.with(body: manifest)
.to_return(status: 200, body: "", headers: {})
end
describe 'execute' do
it 'determines list of tags to sync and to remove correctly' do
expect(container_repository).to receive(:delete_tag_by_digest).with('sha256:aaaaa')
expect_next_instance_of(described_class) do |instance|
expect(instance).to receive(:sync_tag).with('tag-to-sync').and_call_original
end
def stub_missing_blobs_requests(primary_repository_url, secondary_repository_url, blobs)
blobs.each do |digest, missing|
stub_request(:head, "#{secondary_repository_url}/blobs/#{digest}")
.to_return(status: (missing ? 404 : 200), body: "", headers: {})
described_class.new(container_repository).execute
next unless missing
stub_request(:get, "#{primary_repository_url}/blobs/#{digest}")
.to_return(status: 200, body: File.new(Rails.root.join('ee/spec/fixtures/ee_sample_schema.json')), headers: {})
end
end
describe '#execute' do
subject { described_class.new(container_repository) }
it 'determines list of tags to sync and to remove correctly' do
stub_primary_repository_tags_requests(primary_repository_url, { 'tag-to-sync' => 'sha256:1111' })
stub_secondary_repository_tags_requests(secondary_repository_url, { 'tag-to-remove' => 'sha256:2222' })
stub_primary_raw_manifest_request(primary_repository_url, 'tag-to-sync', manifest)
stub_missing_blobs_requests(primary_repository_url, secondary_repository_url, { 'sha256:3333' => true, 'sha256:4444' => false })
stub_secondary_push_manifest_request(secondary_repository_url, 'tag-to-sync', manifest)
expect(container_repository).to receive(:push_blob).with('sha256:3333', anything)
expect(container_repository).not_to receive(:push_blob).with('sha256:4444', anything)
expect(container_repository).not_to receive(:push_blob).with('sha256:5555', anything)
expect(container_repository).to receive(:delete_tag_by_digest).with('sha256:2222')
subject.execute
end
context 'when primary repository has no tags' do
it 'considers the primary repository empty and does not fail' do
stub_request(:get, "http://primary.registry.gitlab/v2/group/test/my_image/tags/list")
.with(
headers: { 'Authorization' => 'bearer pull-token' })
.to_return(
status: 200,
headers: { 'Content-Type' => 'application/json' })
it 'removes secondary tags and does not fail' do
stub_primary_repository_tags_requests(primary_repository_url, {})
stub_secondary_repository_tags_requests(secondary_repository_url, { 'tag-to-remove' => 'sha256:2222' })
expect(container_repository).to receive(:delete_tag_by_digest).with('sha256:aaaaa')
expect(container_repository).to receive(:delete_tag_by_digest).with('sha256:2222')
described_class.new(container_repository).execute
subject.execute
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment