Commit 89e7e0a1 authored by Stan Hu's avatar Stan Hu

Optimize ref name lookups in archive downloads

When a user requests an archive download, `RepositoriesController`
attempts to extract the given ref by finding the longest match of
available branch and tag names. This is unnecessary work because unlike
other controllers, the ref name isn't ambiguous. The `id` parameter
takes the from `:ref/:filename.ext`, where `:ref` can contain any number
of slashes, and `:filename` is simply a base filename with no slashes.

We can optimize this by doing a greedy match for the reference and using
the last part as the filename. This avoid having to call out to Redis
and load large sets of data.

Relates to https://gitlab.com/gitlab-com/gl-infra/scalability/issues/124
parent 3b9944fc
......@@ -81,7 +81,7 @@ class Projects::RepositoriesController < Projects::ApplicationController
def assign_archive_vars
if params[:id]
@ref, @filename = extract_ref(params[:id])
@ref, @filename = extract_ref_and_filename(params[:id])
else
@ref = params[:ref]
@filename = nil
......@@ -89,6 +89,26 @@ class Projects::RepositoriesController < Projects::ApplicationController
rescue InvalidPathError
render_404
end
# path can be of the form:
# master
# master/first.zip
# master/first/second.tar.gz
# master/first/second/third.zip
#
# In the archive case, we know that the last value is always the filename, so we
# do a greedy match to extract the ref. This avoid having to pull all ref names
# from Redis.
def extract_ref_and_filename(id)
path = id.strip
data = path.match(/(.*)\/(.*)/)
if data
[data[1], data[2]]
else
[path, nil]
end
end
end
Projects::RepositoriesController.prepend_if_ee('EE::Projects::RepositoriesController')
---
title: Optimize ref name lookups in archive downloads
merge_request: 23890
author:
type: performance
......@@ -17,6 +17,7 @@ describe Projects::RepositoriesController do
context 'as a user' do
let(:user) { create(:user) }
let(:archive_name) { "#{project.path}-master" }
before do
project.add_developer(user)
......@@ -30,9 +31,18 @@ describe Projects::RepositoriesController do
end
it 'responds with redirect to the short name archive if fully qualified' do
get :archive, params: { namespace_id: project.namespace, project_id: project, id: "master/#{project.path}-master" }, format: "zip"
get :archive, params: { namespace_id: project.namespace, project_id: project, id: "master/#{archive_name}" }, format: "zip"
expect(assigns(:ref)).to eq("master")
expect(assigns(:filename)).to eq(archive_name)
expect(response.header[Gitlab::Workhorse::SEND_DATA_HEADER]).to start_with("git-archive:")
end
it 'responds with redirect for a path with multiple slashes' do
get :archive, params: { namespace_id: project.namespace, project_id: project, id: "improve/awesome/#{archive_name}" }, format: "zip"
expect(assigns(:ref)).to eq("improve/awesome")
expect(assigns(:filename)).to eq(archive_name)
expect(response.header[Gitlab::Workhorse::SEND_DATA_HEADER]).to start_with("git-archive:")
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment