Commit ff8cda92 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'ab/keyset-pagination-on-id' into 'master'

Keyset pagination on primary key

See merge request gitlab-org/gitlab!21194
parents fc0bcf9b bfdb6e52
---
title: Keyset pagination for REST API (Project endpoint)
merge_request: 21194
author:
type: added
...@@ -3,14 +3,33 @@ ...@@ -3,14 +3,33 @@
module API module API
module Helpers module Helpers
module Pagination module Pagination
# This returns an ActiveRecord relation
def paginate(relation) def paginate(relation)
::Gitlab::Pagination::OffsetPagination.new(self).paginate(relation) Gitlab::Pagination::OffsetPagination.new(self).paginate(relation)
end end
# This applies pagination and executes the query # This applies pagination and executes the query
# It always returns an array instead of an ActiveRecord relation # It always returns an array instead of an ActiveRecord relation
def paginate_and_retrieve!(relation) def paginate_and_retrieve!(relation)
paginate(relation).to_a offset_or_keyset_pagination(relation).to_a
end
private
def offset_or_keyset_pagination(relation)
return paginate(relation) unless keyset_pagination_enabled?
request_context = Gitlab::Pagination::Keyset::RequestContext.new(self)
unless Gitlab::Pagination::Keyset.available?(request_context, relation)
return error!('Keyset pagination is not yet available for this type of request', 405)
end
Gitlab::Pagination::Keyset.paginate(request_context, relation)
end
def keyset_pagination_enabled?
params[:pagination] == 'keyset' && Feature.enabled?(:api_keyset_pagination, default_enabled: true)
end end
end end
end end
......
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
def self.paginate(request_context, relation)
Gitlab::Pagination::Keyset::Pager.new(request_context).paginate(relation)
end
def self.available?(request_context, relation)
order_by = request_context.page.order_by
# This is only available for Project and order-by id (asc/desc)
return false unless relation.klass == Project
return false unless order_by.size == 1 && order_by[:id]
true
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
# A Page models the pagination information for a particular page of the collection
class Page
# Default number of records for a page
DEFAULT_PAGE_SIZE = 20
# Maximum number of records for a page
MAXIMUM_PAGE_SIZE = 100
attr_accessor :lower_bounds, :end_reached
attr_reader :order_by
def initialize(order_by: {}, lower_bounds: nil, per_page: DEFAULT_PAGE_SIZE, end_reached: false)
@order_by = order_by.symbolize_keys
@lower_bounds = lower_bounds&.symbolize_keys
@per_page = per_page
@end_reached = end_reached
end
# Number of records to return per page
def per_page
return DEFAULT_PAGE_SIZE if @per_page <= 0
[@per_page, MAXIMUM_PAGE_SIZE].min
end
# Determine whether this page indicates the end of the collection
def end_reached?
@end_reached
end
# Construct a Page for the next page
# Uses identical order_by/per_page information for the next page
def next(lower_bounds, end_reached)
dup.tap do |next_page|
next_page.lower_bounds = lower_bounds&.symbolize_keys
next_page.end_reached = end_reached
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
class Pager
attr_reader :request
def initialize(request)
@request = request
end
def paginate(relation)
# Validate assumption: The last two columns must match the page order_by
validate_order!(relation)
# This performs the database query and retrieves records
# We retrieve one record more to check if we have data beyond this page
all_records = relation.limit(page.per_page + 1).to_a # rubocop: disable CodeReuse/ActiveRecord
records_for_page = all_records.first(page.per_page)
# If we retrieved more records than belong on this page,
# we know there's a next page
there_is_more = all_records.size > records_for_page.size
apply_headers(records_for_page.last, there_is_more)
records_for_page
end
private
def apply_headers(last_record_in_page, there_is_more)
end_reached = last_record_in_page.nil? || !there_is_more
lower_bounds = last_record_in_page&.slice(page.order_by.keys)
next_page = page.next(lower_bounds, end_reached)
request.apply_headers(next_page)
end
def page
@page ||= request.page
end
def validate_order!(rel)
present_order = rel.order_values.map { |val| [val.expr.name.to_sym, val.direction] }.last(2).to_h
unless page.order_by == present_order
raise ArgumentError, "Page's order_by does not match the relation's order: #{present_order} vs #{page.order_by}"
end
end
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Pagination
module Keyset
class RequestContext
attr_reader :request
DEFAULT_SORT_DIRECTION = :desc
PRIMARY_KEY = :id
# A tie breaker is added as an additional order-by column
# to establish a well-defined order. We use the primary key
# column here.
TIE_BREAKER = { PRIMARY_KEY => DEFAULT_SORT_DIRECTION }.freeze
def initialize(request)
@request = request
end
# extracts Paging information from request parameters
def page
@page ||= Page.new(order_by: order_by, per_page: params[:per_page])
end
def apply_headers(next_page)
request.header('Links', pagination_links(next_page))
end
private
def order_by
return TIE_BREAKER.dup unless params[:order_by]
order_by = { params[:order_by].to_sym => params[:sort]&.to_sym || DEFAULT_SORT_DIRECTION }
# Order by an additional unique key, we use the primary key here
order_by = order_by.merge(TIE_BREAKER) unless order_by[PRIMARY_KEY]
order_by
end
def params
@params ||= request.params
end
def lower_bounds_params(page)
page.lower_bounds.each_with_object({}) do |(column, value), params|
filter = filter_with_comparator(page, column)
params[filter] = value
end
end
def filter_with_comparator(page, column)
direction = page.order_by[column]
if direction&.to_sym == :desc
"#{column}_before"
else
"#{column}_after"
end
end
def page_href(page)
base_request_uri.tap do |uri|
uri.query = query_params_for(page).to_query
end.to_s
end
def pagination_links(next_page)
return if next_page.end_reached?
%(<#{page_href(next_page)}>; rel="next")
end
def base_request_uri
@base_request_uri ||= URI.parse(request.request.url).tap do |uri|
uri.host = Gitlab.config.gitlab.host
uri.port = Gitlab.config.gitlab.port
end
end
def query_params_for(page)
request.params.merge(lower_bounds_params(page))
end
end
end
end
end
...@@ -5,10 +5,16 @@ require 'spec_helper' ...@@ -5,10 +5,16 @@ require 'spec_helper'
describe API::Helpers::Pagination do describe API::Helpers::Pagination do
subject { Class.new.include(described_class).new } subject { Class.new.include(described_class).new }
let(:expected_result) { double("result", to_a: double) }
let(:relation) { double("relation") }
let(:params) { {} }
before do
allow(subject).to receive(:params).and_return(params)
end
describe '#paginate' do describe '#paginate' do
let(:relation) { double("relation") }
let(:offset_pagination) { double("offset pagination") } let(:offset_pagination) { double("offset pagination") }
let(:expected_result) { double("result") }
it 'delegates to OffsetPagination' do it 'delegates to OffsetPagination' do
expect(::Gitlab::Pagination::OffsetPagination).to receive(:new).with(subject).and_return(offset_pagination) expect(::Gitlab::Pagination::OffsetPagination).to receive(:new).with(subject).and_return(offset_pagination)
...@@ -21,16 +27,48 @@ describe API::Helpers::Pagination do ...@@ -21,16 +27,48 @@ describe API::Helpers::Pagination do
end end
describe '#paginate_and_retrieve!' do describe '#paginate_and_retrieve!' do
let(:relation) { double("relation") } context 'for offset pagination' do
before do
allow(Gitlab::Pagination::Keyset).to receive(:available?).and_return(false)
end
it 'delegates to paginate' do
expect(subject).to receive(:paginate).with(relation).and_return(expected_result)
result = subject.paginate_and_retrieve!(relation)
expect(result).to eq(expected_result.to_a)
end
end
context 'for keyset pagination' do
let(:params) { { pagination: 'keyset' } }
let(:request_context) { double('request context') }
before do
allow(Gitlab::Pagination::Keyset::RequestContext).to receive(:new).with(subject).and_return(request_context)
end
context 'when keyset pagination is available' do
it 'delegates to KeysetPagination' do
expect(Gitlab::Pagination::Keyset).to receive(:available?).and_return(true)
expect(Gitlab::Pagination::Keyset).to receive(:paginate).with(request_context, relation).and_return(expected_result)
result = subject.paginate_and_retrieve!(relation)
let(:paginated_result) { double } expect(result).to eq(expected_result.to_a)
let(:result) { double } end
end
it 'applies pagination and returns an array' do context 'when keyset pagination is not available' do
expect(subject).to receive(:paginate).with(relation).and_return(paginated_result) it 'renders a 501 error if keyset pagination isnt available yet' do
expect(paginated_result).to receive(:to_a).and_return(result) expect(Gitlab::Pagination::Keyset).to receive(:available?).with(request_context, relation).and_return(false)
expect(Gitlab::Pagination::Keyset).not_to receive(:paginate)
expect(subject).to receive(:error!).with(/not yet available/, 405)
expect(subject.paginate_and_retrieve!(relation)).to eq(result) subject.paginate_and_retrieve!(relation)
end
end
end end
end end
end end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Pagination::Keyset::Page do
describe '#per_page' do
it 'limits to a maximum of 100 records per page' do
per_page = described_class.new(per_page: 101).per_page
expect(per_page).to eq(described_class::MAXIMUM_PAGE_SIZE)
end
it 'uses default value when given 0' do
per_page = described_class.new(per_page: 0).per_page
expect(per_page).to eq(described_class::DEFAULT_PAGE_SIZE)
end
it 'uses default value when given negative values' do
per_page = described_class.new(per_page: -1).per_page
expect(per_page).to eq(described_class::DEFAULT_PAGE_SIZE)
end
it 'uses the given value if it is within range' do
per_page = described_class.new(per_page: 10).per_page
expect(per_page).to eq(10)
end
end
describe '#next' do
let(:page) { described_class.new(order_by: order_by, lower_bounds: lower_bounds, per_page: per_page, end_reached: end_reached) }
subject { page.next(new_lower_bounds, new_end_reached) }
let(:order_by) { { id: :desc } }
let(:lower_bounds) { { id: 42 } }
let(:per_page) { 10 }
let(:end_reached) { false }
let(:new_lower_bounds) { { id: 21 } }
let(:new_end_reached) { true }
it 'copies over order_by' do
expect(subject.order_by).to eq(page.order_by)
end
it 'copies over per_page' do
expect(subject.per_page).to eq(page.per_page)
end
it 'dups the instance' do
expect(subject).not_to eq(page)
end
it 'sets lower_bounds only on new instance' do
expect(subject.lower_bounds).to eq(new_lower_bounds)
expect(page.lower_bounds).to eq(lower_bounds)
end
it 'sets end_reached only on new instance' do
expect(subject.end_reached?).to eq(new_end_reached)
expect(page.end_reached?).to eq(end_reached)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Pagination::Keyset::Pager do
let(:relation) { Project.all.order(id: :asc) }
let(:request) { double('request', page: page, apply_headers: nil) }
let(:page) { Gitlab::Pagination::Keyset::Page.new(order_by: { id: :asc }, per_page: 3) }
let(:next_page) { double('next page') }
before_all do
create_list(:project, 7)
end
describe '#paginate' do
subject { described_class.new(request).paginate(relation) }
it 'loads the result relation only once' do
expect do
subject
end.not_to exceed_query_limit(1)
end
it 'passes information about next page to request' do
lower_bounds = relation.limit(page.per_page).last.slice(:id)
expect(page).to receive(:next).with(lower_bounds, false).and_return(next_page)
expect(request).to receive(:apply_headers).with(next_page)
subject
end
context 'when retrieving the last page' do
let(:relation) { Project.where('id > ?', Project.maximum(:id) - page.per_page).order(id: :asc) }
it 'indicates this is the last page' do
expect(request).to receive(:apply_headers) do |next_page|
expect(next_page.end_reached?).to be_truthy
end
subject
end
end
context 'when retrieving an empty page' do
let(:relation) { Project.where('id > ?', Project.maximum(:id) + 1).order(id: :asc) }
it 'indicates this is the last page' do
expect(request).to receive(:apply_headers) do |next_page|
expect(next_page.end_reached?).to be_truthy
end
subject
end
end
it 'returns an array with the loaded records' do
expect(subject).to eq(relation.limit(page.per_page).to_a)
end
context 'validating the order clause' do
let(:page) { Gitlab::Pagination::Keyset::Page.new(order_by: { created_at: :asc }, per_page: 3) }
it 'raises an error if has a different order clause than the page' do
expect { subject }.to raise_error(ArgumentError, /order_by does not match/)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Pagination::Keyset::RequestContext do
let(:request) { double('request', params: params) }
describe '#page' do
subject { described_class.new(request).page }
context 'with only order_by given' do
let(:params) { { order_by: :id } }
it 'extracts order_by/sorting information' do
page = subject
expect(page.order_by).to eq(id: :desc)
end
end
context 'with order_by and sort given' do
let(:params) { { order_by: :created_at, sort: :desc } }
it 'extracts order_by/sorting information and adds tie breaker' do
page = subject
expect(page.order_by).to eq(created_at: :desc, id: :desc)
end
end
context 'with no order_by information given' do
let(:params) { {} }
it 'defaults to tie breaker' do
page = subject
expect(page.order_by).to eq({ id: :desc })
end
end
context 'with per_page params given' do
let(:params) { { per_page: 10 } }
it 'extracts per_page information' do
page = subject
expect(page.per_page).to eq(params[:per_page])
end
end
end
describe '#apply_headers' do
let(:request) { double('request', url: "http://#{Gitlab.config.gitlab.host}/api/v4/projects?foo=bar") }
let(:params) { { foo: 'bar' } }
let(:request_context) { double('request context', params: params, request: request) }
let(:next_page) { double('next page', order_by: { id: :asc }, lower_bounds: { id: 42 }, end_reached?: false) }
subject { described_class.new(request_context).apply_headers(next_page) }
it 'sets Links header with same host/path as the original request' do
orig_uri = URI.parse(request_context.request.url)
expect(request_context).to receive(:header) do |name, header|
expect(name).to eq('Links')
first_link, _ = /<([^>]+)>; rel="next"/.match(header).captures
uri = URI.parse(first_link)
expect(uri.host).to eq(orig_uri.host)
expect(uri.path).to eq(orig_uri.path)
end
subject
end
it 'sets Links header with a link to the next page' do
orig_uri = URI.parse(request_context.request.url)
expect(request_context).to receive(:header) do |name, header|
expect(name).to eq('Links')
first_link, _ = /<([^>]+)>; rel="next"/.match(header).captures
query = CGI.parse(URI.parse(first_link).query)
expect(query.except('id_after')).to eq(CGI.parse(orig_uri.query).except('id_after'))
expect(query['id_after']).to eq(['42'])
end
subject
end
context 'with descending order' do
let(:next_page) { double('next page', order_by: { id: :desc }, lower_bounds: { id: 42 }, end_reached?: false) }
it 'sets Links header with a link to the next page' do
orig_uri = URI.parse(request_context.request.url)
expect(request_context).to receive(:header) do |name, header|
expect(name).to eq('Links')
first_link, _ = /<([^>]+)>; rel="next"/.match(header).captures
query = CGI.parse(URI.parse(first_link).query)
expect(query.except('id_before')).to eq(CGI.parse(orig_uri.query).except('id_before'))
expect(query['id_before']).to eq(['42'])
end
subject
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Pagination::Keyset do
describe '.paginate' do
subject { described_class.paginate(request_context, relation) }
let(:request_context) { double }
let(:relation) { double }
let(:pager) { double }
let(:result) { double }
it 'uses Pager to paginate the relation' do
expect(Gitlab::Pagination::Keyset::Pager).to receive(:new).with(request_context).and_return(pager)
expect(pager).to receive(:paginate).with(relation).and_return(result)
expect(subject).to eq(result)
end
end
describe '.available?' do
subject { described_class }
let(:request_context) { double("request context", page: page)}
let(:page) { double("page", order_by: order_by) }
shared_examples_for 'keyset pagination is available' do
it 'returns true for Project' do
expect(subject.available?(request_context, Project.all)).to be_truthy
end
it 'return false for other types of relations' do
expect(subject.available?(request_context, User.all)).to be_falsey
end
end
context 'with order-by id asc' do
let(:order_by) { { id: :asc } }
it_behaves_like 'keyset pagination is available'
end
context 'with order-by id desc' do
let(:order_by) { { id: :desc } }
it_behaves_like 'keyset pagination is available'
end
context 'with other order-by columns' do
let(:order_by) { { created_at: :desc, id: :desc } }
it 'returns false for Project' do
expect(subject.available?(request_context, Project.all)).to be_falsey
end
it 'return false for other types of relations' do
expect(subject.available?(request_context, User.all)).to be_falsey
end
end
end
end
...@@ -599,6 +599,87 @@ describe API::Projects do ...@@ -599,6 +599,87 @@ describe API::Projects do
let(:projects) { Project.all } let(:projects) { Project.all }
end end
end end
context 'with keyset pagination' do
let(:current_user) { user }
let(:projects) { [public_project, project, project2, project3] }
context 'headers and records' do
let(:params) { { pagination: 'keyset', order_by: :id, sort: :asc, per_page: 1 } }
it 'includes a pagination header with link to the next page' do
get api('/projects', current_user), params: params
expect(response.header).to include('Links')
expect(response.header['Links']).to include('pagination=keyset')
expect(response.header['Links']).to include("id_after=#{public_project.id}")
end
it 'contains only the first project with per_page = 1' do
get api('/projects', current_user), params: params
expect(response).to have_gitlab_http_status(200)
expect(json_response).to be_an Array
expect(json_response.map { |p| p['id'] }).to contain_exactly(public_project.id)
end
it 'does not include a link if the end has reached and there is no more data' do
get api('/projects', current_user), params: params.merge(id_after: project2.id)
expect(response.header).not_to include('Links')
end
it 'responds with 501 if order_by is different from id' do
get api('/projects', current_user), params: params.merge(order_by: :created_at)
expect(response).to have_gitlab_http_status(405)
end
end
context 'with descending sorting' do
let(:params) { { pagination: 'keyset', order_by: :id, sort: :desc, per_page: 1 } }
it 'includes a pagination header with link to the next page' do
get api('/projects', current_user), params: params
expect(response.header).to include('Links')
expect(response.header['Links']).to include('pagination=keyset')
expect(response.header['Links']).to include("id_before=#{project3.id}")
end
it 'contains only the last project with per_page = 1' do
get api('/projects', current_user), params: params
expect(response).to have_gitlab_http_status(200)
expect(json_response).to be_an Array
expect(json_response.map { |p| p['id'] }).to contain_exactly(project3.id)
end
end
context 'retrieving the full relation' do
let(:params) { { pagination: 'keyset', order_by: :id, sort: :desc, per_page: 2 } }
it 'returns all projects' do
url = '/projects'
requests = 0
ids = []
while url && requests <= 5 # circuit breaker
requests += 1
get api(url, current_user), params: params
links = response.header['Links']
url = links&.match(/<[^>]+(\/projects\?[^>]+)>; rel="next"/) do |match|
match[1]
end
ids += JSON.parse(response.body).map { |p| p['id'] }
end
expect(ids).to contain_exactly(*projects.map(&:id))
end
end
end
end end
describe 'POST /projects' do describe 'POST /projects' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment