Commit 900dfc27 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'ee-39345-get-raw-archive' into 'master'

Add an API endpoint to download git repository snapshots (EE)

Closes #4767

See merge request gitlab-org/gitlab-ee!5313
parents 213c9c83 0edd50c1
---
title: Add an API endpoint to download git repository snapshots
merge_request: 18173
author:
type: added
...@@ -1523,3 +1523,25 @@ Read more in the [Project Badges](project_badges.md) documentation. ...@@ -1523,3 +1523,25 @@ Read more in the [Project Badges](project_badges.md) documentation.
## Issue and merge request description templates ## Issue and merge request description templates
The non-default [issue and merge request description templates](../user/project/description_templates.md) are managed inside the project's repository. So you can manage them via the API through the [Repositories API](repositories.md) and the [Repository Files API](repository_files.md). The non-default [issue and merge request description templates](../user/project/description_templates.md) are managed inside the project's repository. So you can manage them via the API through the [Repositories API](repositories.md) and the [Repository Files API](repository_files.md).
## Download snapshot of a git repository
> Introduced in GitLab 10.7
This endpoint may only be accessed by an administrative user.
Download a snapshot of the project (or wiki, if requested) git repository. This
snapshot is always in uncompressed [tar](https://en.wikipedia.org/wiki/Tar_(computing))
format.
If a repository is corrupted to the point where `git clone` does not work, the
snapshot may allow some of the data to be retrieved.
```
GET /projects/:id/snapshot
```
| Attribute | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `id` | integer/string | yes | The ID or [URL-encoded path of the project](README.md#namespaced-path-encoding) |
| `wiki` | boolean | no | Whether to download the wiki, rather than project, repository |
...@@ -105,6 +105,13 @@ class GeoNode < ActiveRecord::Base ...@@ -105,6 +105,13 @@ class GeoNode < ActiveRecord::Base
geo_api_url('status') geo_api_url('status')
end end
def snapshot_url(repository)
url = api_url("projects/#{repository.project.id}/snapshot")
url += "?wiki=1" if repository.is_wiki
url
end
def oauth_callback_url def oauth_callback_url
Gitlab::Routing.url_helpers.oauth_geo_callback_url(url_helper_args) Gitlab::Routing.url_helpers.oauth_geo_callback_url(url_helper_args)
end end
...@@ -185,7 +192,11 @@ class GeoNode < ActiveRecord::Base ...@@ -185,7 +192,11 @@ class GeoNode < ActiveRecord::Base
private private
def geo_api_url(suffix) def geo_api_url(suffix)
URI.join(uri, "#{uri.path}", "api/#{API::API.version}/geo/#{suffix}").to_s api_url("geo/#{suffix}")
end
def api_url(suffix)
URI.join(uri, "#{uri.path}", "api/#{API::API.version}/#{suffix}").to_s
end end
def ensure_access_keys! def ensure_access_keys!
......
...@@ -55,11 +55,11 @@ module Geo ...@@ -55,11 +55,11 @@ module Geo
def fetch_repository(redownload) def fetch_repository(redownload)
log_info("Trying to fetch #{type}") log_info("Trying to fetch #{type}")
clean_up_temporary_repository clean_up_temporary_repository
update_registry!(started_at: DateTime.now) update_registry!(started_at: DateTime.now)
if redownload if redownload
log_info("Redownloading #{type}") redownload_repository
fetch_geo_mirror(build_temporary_repository)
set_temp_repository_as_main set_temp_repository_as_main
else else
ensure_repository ensure_repository
...@@ -67,6 +67,21 @@ module Geo ...@@ -67,6 +67,21 @@ module Geo
end end
end end
def redownload_repository
log_info("Redownloading #{type}")
return if fetch_snapshot
log_info("Attempting to fetch repository via git")
# `git fetch` needs an empty bare repository to fetch into
unless gitlab_shell.create_repository(project.repository_storage, disk_path_temp)
raise Gitlab::Shell::Error, 'Can not create a temporary repository'
end
fetch_geo_mirror(temp_repo)
end
def retry_count def retry_count
registry.public_send("#{type}_retry_count") || -1 # rubocop:disable GitlabSecurity/PublicSend registry.public_send("#{type}_retry_count") || -1 # rubocop:disable GitlabSecurity/PublicSend
end end
...@@ -99,6 +114,26 @@ module Geo ...@@ -99,6 +114,26 @@ module Geo
end end
end end
# Use snapshotting for redownloads *only* when enabled.
#
# If writes happen to the repository while snapshotting, it may be
# returned in an inconsistent state. However, a subsequent git fetch
# will be enqueued by the log cursor, which should resolve any problems
# it is possible to fix.
def fetch_snapshot
return unless Feature.enabled?(:geo_redownload_with_snapshot)
log_info("Attempting to fetch repository via snapshot")
temp_repo.create_from_snapshot(
::Gitlab::Geo.primary_node.snapshot_url(temp_repo),
::Gitlab::Geo::RepoSyncRequest.new.authorization
)
rescue => err
log_error('Snapshot attempt failed', err)
false
end
def registry def registry
@registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id) @registry ||= Geo::ProjectRegistry.find_or_initialize_by(project_id: project.id)
end end
...@@ -171,21 +206,15 @@ module Geo ...@@ -171,21 +206,15 @@ module Geo
@deleted_path ||= "@failed-geo-sync/#{repository.disk_path}" @deleted_path ||= "@failed-geo-sync/#{repository.disk_path}"
end end
def build_temporary_repository def temp_repo
unless gitlab_shell.create_repository(project.repository_storage, disk_path_temp) @temp_repo ||= ::Repository.new(repository.full_path, repository.project, disk_path: disk_path_temp, is_wiki: repository.is_wiki)
raise Gitlab::Shell::Error, 'Can not create a temporary repository'
end
log_info("Created temporary repository")
::Repository.new(repository.full_path, repository.project, disk_path: disk_path_temp, is_wiki: repository.is_wiki)
end end
def clean_up_temporary_repository def clean_up_temporary_repository
exists = gitlab_shell.exists?(project.repository_storage_path, disk_path_temp) exists = gitlab_shell.exists?(project.repository_storage_path, disk_path_temp)
if exists && !gitlab_shell.remove_repository(project.repository_storage_path, disk_path_temp) if exists && !gitlab_shell.remove_repository(project.repository_storage_path, disk_path_temp)
raise Gitlab::Shell::Error, "Temporary #{type} can not been removed" raise Gitlab::Shell::Error, "Temporary #{type} can not be removed"
end end
end end
...@@ -218,7 +247,7 @@ module Geo ...@@ -218,7 +247,7 @@ module Geo
ensure_repository_namespace(repository.disk_path) ensure_repository_namespace(repository.disk_path)
unless gitlab_shell.mv_repository(project.repository_storage_path, disk_path_temp, repository.disk_path) unless gitlab_shell.mv_repository(project.repository_storage_path, disk_path_temp, repository.disk_path)
raise Gitlab::Shell::Error, 'Can not move temporary repository' raise Gitlab::Shell::Error, 'Can not move temporary repository to canonical location'
end end
# Purge the original repository # Purge the original repository
......
...@@ -38,23 +38,5 @@ module API ...@@ -38,23 +38,5 @@ module API
present status, with: EE::API::Entities::GeoNodeStatus present status, with: EE::API::Entities::GeoNodeStatus
end end
end end
helpers do
def authenticate_by_gitlab_geo_node_token!
auth_header = headers['Authorization']
begin
unless auth_header && Gitlab::Geo::JwtRequestDecoder.new(auth_header).decode
unauthorized!
end
rescue Gitlab::Geo::InvalidDecryptionKeyError, Gitlab::Geo::SignatureTimeInvalidError => e
render_api_error!(e.to_s, 401)
end
end
def require_node_to_be_enabled!
forbidden! 'Geo node is disabled.' unless Gitlab::Geo.current_node&.enabled?
end
end
end end
end end
...@@ -3,6 +3,26 @@ module EE ...@@ -3,6 +3,26 @@ module EE
module Helpers module Helpers
extend ::Gitlab::Utils::Override extend ::Gitlab::Utils::Override
def require_node_to_be_enabled!
forbidden! 'Geo node is disabled.' unless ::Gitlab::Geo.current_node&.enabled?
end
def gitlab_geo_node_token?
headers['Authorization']&.start_with?(::Gitlab::Geo::BaseRequest::GITLAB_GEO_AUTH_TOKEN_TYPE)
end
def authenticate_by_gitlab_geo_node_token!
auth_header = headers['Authorization']
begin
unless auth_header && ::Gitlab::Geo::JwtRequestDecoder.new(auth_header).decode
unauthorized!
end
rescue ::Gitlab::Geo::InvalidDecryptionKeyError, ::Gitlab::Geo::SignatureTimeInvalidError => e
render_api_error!(e.to_s, 401)
end
end
override :current_user override :current_user
def current_user def current_user
strong_memoize(:current_user) do strong_memoize(:current_user) do
......
module EE
module API
module Helpers
module ProjectSnapshotsHelpers
extend ::Gitlab::Utils::Override
# Allow Geo nodes to access snapshots by presenting a valid JWT
override :authorize_read_git_snapshot!
def authorize_read_git_snapshot!
if gitlab_geo_node_token?
require_node_to_be_enabled!
authenticate_by_gitlab_geo_node_token!
else
super
end
end
# Skip checking authorization of current_user if authenticated via Geo
override :snapshot_project
def snapshot_project
if gitlab_geo_node_token?
project = find_project(params[:id])
not_found!('Project') if project.nil?
project
else
super
end
end
end
end
end
end
...@@ -200,6 +200,19 @@ describe GeoNode, type: :model do ...@@ -200,6 +200,19 @@ describe GeoNode, type: :model do
end end
end end
describe '#snapshot_url' do
let(:project) { create(:project) }
let(:snapshot_url) { "https://localhost:3000/gitlab/api/#{api_version}/projects/#{project.id}/snapshot" }
it 'returns snapshot URL based on node URI' do
expect(new_node.snapshot_url(project.repository)).to eq(snapshot_url)
end
it 'adds ?wiki=1 to the snapshot URL when the repository is a wiki' do
expect(new_node.snapshot_url(project.wiki.repository)).to eq(snapshot_url + "?wiki=1")
end
end
describe '#find_or_build_status' do describe '#find_or_build_status' do
it 'returns a new status' do it 'returns a new status' do
status = new_node.find_or_build_status status = new_node.find_or_build_status
......
require 'spec_helper'
describe API::ProjectSnapshots do
include ::EE::GeoHelpers
let(:project) { create(:project) }
describe 'GET /projects/:id/snapshot' do
let(:primary) { create(:geo_node, :primary) }
let(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(primary)
end
it 'requests project repository raw archive from Geo primary as Geo secondary' do
req = Gitlab::Geo::BaseRequest.new
allow(req).to receive(:requesting_node) { secondary }
get api("/projects/#{project.id}/snapshot", nil), {}, req.headers
expect(response).to have_gitlab_http_status(200)
end
end
end
...@@ -335,5 +335,9 @@ describe Geo::RepositorySyncService do ...@@ -335,5 +335,9 @@ describe Geo::RepositorySyncService do
end end
end end
end end
it_behaves_like 'sync retries use the snapshot RPC' do
let(:repository) { project.repository }
end
end end
end end
...@@ -200,5 +200,9 @@ RSpec.describe Geo::WikiSyncService do ...@@ -200,5 +200,9 @@ RSpec.describe Geo::WikiSyncService do
end end
end end
end end
it_behaves_like 'sync retries use the snapshot RPC' do
let(:repository) { project.wiki.repository }
end
end end
end end
...@@ -47,3 +47,60 @@ shared_examples 'cleans temporary repositories' do ...@@ -47,3 +47,60 @@ shared_examples 'cleans temporary repositories' do
end end
end end
end end
shared_examples 'sync retries use the snapshot RPC' do
let(:retry_count) { Geo::BaseSyncService::RETRY_BEFORE_REDOWNLOAD }
context 'snapshot synchronization method' do
before do
allow(subject).to receive(:temp_repo) { repository }
end
def receive_create_from_snapshot
receive(:create_from_snapshot).with(primary.snapshot_url(repository), match(/^GL-Geo/)) { Gitaly::CreateRepositoryFromSnapshotResponse.new }
end
it 'does not attempt to snapshot for initial sync' do
expect(repository).not_to receive_create_from_snapshot
expect(subject).to receive(:fetch_geo_mirror).with(repository)
subject.execute
end
it 'does not attempt to snapshot for ordinary retries' do
create(:geo_project_registry, project: project, repository_retry_count: retry_count - 1, wiki_retry_count: retry_count - 1)
expect(repository).not_to receive_create_from_snapshot
expect(subject).to receive(:fetch_geo_mirror).with(repository)
subject.execute
end
context 'registry is ready to be snapshotted' do
let!(:registry) { create(:geo_project_registry, project: project, repository_retry_count: retry_count + 1, wiki_retry_count: retry_count + 1) }
it 'attempts to snapshot' do
expect(repository).to receive_create_from_snapshot
expect(subject).not_to receive(:fetch_geo_mirror).with(repository)
subject.execute
end
it 'attempts to fetch if snapshotting raises an exception' do
expect(repository).to receive_create_from_snapshot.and_raise(ArgumentError)
expect(subject).to receive(:fetch_geo_mirror).with(repository)
subject.execute
end
it 'does not attempt to snapshot if the feature flag is disabled' do
stub_feature_flags(geo_redownload_with_snapshot: false)
expect(repository).not_to receive_create_from_snapshot
expect(subject).to receive(:fetch_geo_mirror).with(repository)
subject.execute
end
end
end
end
...@@ -168,6 +168,7 @@ module API ...@@ -168,6 +168,7 @@ module API
mount ::API::ProjectHooks mount ::API::ProjectHooks
mount ::API::Projects mount ::API::Projects
mount ::API::ProjectMilestones mount ::API::ProjectMilestones
mount ::API::ProjectSnapshots
mount ::API::ProjectSnippets mount ::API::ProjectSnippets
mount ::API::ProtectedBranches mount ::API::ProtectedBranches
mount ::API::Repositories mount ::API::Repositories
......
module API
module Helpers
module ProjectSnapshotsHelpers
prepend ::EE::API::Helpers::ProjectSnapshotsHelpers
def authorize_read_git_snapshot!
authenticated_with_full_private_access!
end
def send_git_snapshot(repository)
header(*Gitlab::Workhorse.send_git_snapshot(repository))
end
def snapshot_project
user_project
end
def snapshot_repository
if to_boolean(params[:wiki])
snapshot_project.wiki.repository
else
snapshot_project.repository
end
end
end
end
end
module API
class ProjectSnapshots < Grape::API
helpers ::API::Helpers::ProjectSnapshotsHelpers
before { authorize_read_git_snapshot! }
resource :projects do
desc 'Download a (possibly inconsistent) snapshot of a repository' do
detail 'This feature was introduced in GitLab 10.7'
end
params do
optional :wiki, type: Boolean, desc: 'Set to true to receive the wiki repository'
end
get ':id/snapshot' do
send_git_snapshot(snapshot_repository)
end
end
end
end
...@@ -1262,6 +1262,10 @@ module Gitlab ...@@ -1262,6 +1262,10 @@ module Gitlab
true true
end end
def create_from_snapshot(url, auth)
gitaly_repository_client.create_from_snapshot(url, auth)
end
def rebase(user, rebase_id, branch:, branch_sha:, remote_repository:, remote_branch:) def rebase(user, rebase_id, branch:, branch_sha:, remote_repository:, remote_branch:)
gitaly_migrate(:rebase) do |is_enabled| gitaly_migrate(:rebase) do |is_enabled|
if is_enabled if is_enabled
......
...@@ -235,6 +235,22 @@ module Gitlab ...@@ -235,6 +235,22 @@ module Gitlab
) )
end end
def create_from_snapshot(http_url, http_auth)
request = Gitaly::CreateRepositoryFromSnapshotRequest.new(
repository: @gitaly_repo,
http_url: http_url,
http_auth: http_auth
)
GitalyClient.call(
@storage,
:repository_service,
:create_repository_from_snapshot,
request,
timeout: GitalyClient.default_timeout
)
end
def write_ref(ref_path, ref, old_ref, shell) def write_ref(ref_path, ref, old_ref, shell)
request = Gitaly::WriteRefRequest.new( request = Gitaly::WriteRefRequest.new(
repository: @gitaly_repo, repository: @gitaly_repo,
......
...@@ -81,6 +81,20 @@ module Gitlab ...@@ -81,6 +81,20 @@ module Gitlab
] ]
end end
def send_git_snapshot(repository)
params = {
'GitalyServer' => gitaly_server_hash(repository),
'GetSnapshotRequest' => Gitaly::GetSnapshotRequest.new(
repository: repository.gitaly_repository
).to_json
}
[
SEND_DATA_HEADER,
"git-snapshot:#{encode(params)}"
]
end
def send_git_diff(repository, diff_refs) def send_git_diff(repository, diff_refs)
params = if Gitlab::GitalyClient.feature_enabled?(:workhorse_send_git_diff, status: Gitlab::GitalyClient::MigrationStatus::OPT_OUT) params = if Gitlab::GitalyClient.feature_enabled?(:workhorse_send_git_diff, status: Gitlab::GitalyClient::MigrationStatus::OPT_OUT)
{ {
......
...@@ -156,4 +156,15 @@ describe Gitlab::GitalyClient::RepositoryService do ...@@ -156,4 +156,15 @@ describe Gitlab::GitalyClient::RepositoryService do
client.calculate_checksum client.calculate_checksum
end end
end end
describe '#create_from_snapshot' do
it 'sends a create_repository_from_snapshot message' do
expect_any_instance_of(Gitaly::RepositoryService::Stub)
.to receive(:create_repository_from_snapshot)
.with(gitaly_request_with_path(storage_name, relative_path), kind_of(Hash))
.and_return(double)
client.create_from_snapshot('http://example.com?wiki=1', 'Custom xyz')
end
end
end end
...@@ -482,4 +482,26 @@ describe Gitlab::Workhorse do ...@@ -482,4 +482,26 @@ describe Gitlab::Workhorse do
}.deep_stringify_keys) }.deep_stringify_keys)
end end
end end
describe '.send_git_snapshot' do
let(:url) { 'http://example.com' }
subject(:request) { described_class.send_git_snapshot(repository) }
it 'sets the header correctly' do
key, command, params = decode_workhorse_header(request)
expect(key).to eq("Gitlab-Workhorse-Send-Data")
expect(command).to eq('git-snapshot')
expect(params).to eq(
'GitalyServer' => {
'address' => Gitlab::GitalyClient.address(project.repository_storage),
'token' => Gitlab::GitalyClient.token(project.repository_storage)
},
'GetSnapshotRequest' => Gitaly::GetSnapshotRequest.new(
repository: repository.gitaly_repository
).to_json
)
end
end
end end
require 'spec_helper'
describe API::ProjectSnapshots do
include WorkhorseHelpers
let(:project) { create(:project) }
let(:admin) { create(:admin) }
describe 'GET /projects/:id/snapshot' do
def expect_snapshot_response_for(repository)
type, params = workhorse_send_data
expect(type).to eq('git-snapshot')
expect(params).to eq(
'GitalyServer' => {
'address' => Gitlab::GitalyClient.address(repository.project.repository_storage),
'token' => Gitlab::GitalyClient.token(repository.project.repository_storage)
},
'GetSnapshotRequest' => Gitaly::GetSnapshotRequest.new(
repository: repository.gitaly_repository
).to_json
)
end
it 'returns authentication error as project owner' do
get api("/projects/#{project.id}/snapshot", project.owner)
expect(response).to have_gitlab_http_status(403)
end
it 'returns authentication error as unauthenticated user' do
get api("/projects/#{project.id}/snapshot", nil)
expect(response).to have_gitlab_http_status(401)
end
it 'requests project repository raw archive as administrator' do
get api("/projects/#{project.id}/snapshot", admin), wiki: '0'
expect(response).to have_gitlab_http_status(200)
expect_snapshot_response_for(project.repository)
end
it 'requests wiki repository raw archive as administrator' do
get api("/projects/#{project.id}/snapshot", admin), wiki: '1'
expect(response).to have_gitlab_http_status(200)
expect_snapshot_response_for(project.wiki.repository)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment