Commit 2066f29a authored by Nick Thomas's avatar Nick Thomas Committed by Mayra Cabrera

Paginate the notes incremental fetch endpoint

Returning an unlimited number of elements from any endpoint is a bad
idea. For the notes endpoint specifically, we return all notes since
a given `X-Last-Fetched-At` value. The backend specifies the new value
to use for that item in the response, so if we return a limited set of
notes and use an appropriate value, we get a rudimentary form of keyset
pagination for free.

The way the notes endpoint is used today, this is mostly a theoretical
worry, and in practice, basically every response will fit within a
single page anyway. However, we can use the `notes` endpoint *instead
of* the `discussions` endpoint to fill a merge request discussion from
the beginning of time. When we do this, large discussions are loaded in
batches automatically, using pre-existing frontend code. So this change
is best seen as preparatory work for removing `discussions` altogether.

This commit also fixes a pre-existing problem with the endpoint, which
is the fact that `updated_at` is stored with *microsecond* precision in
the database. Converting the X-Last-Fetched-At value from integer to
float allows us to avoid a range of boundary conditions that result.
parent 4cf1fbe2
...@@ -5,6 +5,11 @@ module NotesActions ...@@ -5,6 +5,11 @@ module NotesActions
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
extend ActiveSupport::Concern extend ActiveSupport::Concern
# last_fetched_at is an integer number of microseconds, which is the same
# precision as PostgreSQL "timestamp" fields. It's important for them to have
# identical precision for accurate pagination
MICROSECOND = 1_000_000
included do included do
before_action :set_polling_interval_header, only: [:index] before_action :set_polling_interval_header, only: [:index]
before_action :require_noteable!, only: [:index, :create] before_action :require_noteable!, only: [:index, :create]
...@@ -13,30 +18,20 @@ module NotesActions ...@@ -13,30 +18,20 @@ module NotesActions
end end
def index def index
notes_json = { notes: [], last_fetched_at: Time.current.to_i } notes, meta = gather_notes
notes = notes_finder
.execute
.inc_relations_for_view
if notes_filter != UserPreference::NOTES_FILTERS[:only_comments]
notes =
ResourceEvents::MergeIntoNotesService
.new(noteable, current_user, last_fetched_at: last_fetched_at)
.execute(notes)
end
notes = prepare_notes_for_rendering(notes) notes = prepare_notes_for_rendering(notes)
notes = notes.select { |n| n.readable_by?(current_user) } notes = notes.select { |n| n.readable_by?(current_user) }
notes =
notes_json[:notes] =
if use_note_serializer? if use_note_serializer?
note_serializer.represent(notes) note_serializer.represent(notes)
else else
notes.map { |note| note_json(note) } notes.map { |note| note_json(note) }
end end
render json: notes_json # We know there's more data, so tell the frontend to poll again after 1ms
set_polling_interval_header(interval: 1) if meta[:more]
render json: meta.merge(notes: notes)
end end
# rubocop:disable Gitlab/ModuleWithInstanceVariables # rubocop:disable Gitlab/ModuleWithInstanceVariables
...@@ -101,6 +96,48 @@ module NotesActions ...@@ -101,6 +96,48 @@ module NotesActions
private private
# Lower bound (last_fetched_at as specified in the request) is already set in
# the finder. Here, we select between returning all notes since then, or a
# page's worth of notes.
def gather_notes
if Feature.enabled?(:paginated_notes, project)
gather_some_notes
else
gather_all_notes
end
end
def gather_all_notes
now = Time.current
notes = merge_resource_events(notes_finder.execute.inc_relations_for_view)
[notes, { last_fetched_at: (now.to_i * MICROSECOND) + now.usec }]
end
def gather_some_notes
paginator = Gitlab::UpdatedNotesPaginator.new(
notes_finder.execute.inc_relations_for_view,
last_fetched_at: last_fetched_at
)
notes = paginator.notes
# Fetch all the synthetic notes in the same time range as the real notes.
# Although we don't limit the number, their text is under our control so
# should be fairly cheap to process.
notes = merge_resource_events(notes, fetch_until: paginator.next_fetched_at)
[notes, paginator.metadata]
end
def merge_resource_events(notes, fetch_until: nil)
return notes if notes_filter == UserPreference::NOTES_FILTERS[:only_comments]
ResourceEvents::MergeIntoNotesService
.new(noteable, current_user, last_fetched_at: last_fetched_at, fetch_until: fetch_until)
.execute(notes)
end
def note_html(note) def note_html(note)
render_to_string( render_to_string(
"shared/notes/_note", "shared/notes/_note",
...@@ -229,8 +266,8 @@ module NotesActions ...@@ -229,8 +266,8 @@ module NotesActions
params.require(:note).permit(:note, :position) params.require(:note).permit(:note, :position)
end end
def set_polling_interval_header def set_polling_interval_header(interval: 6000)
Gitlab::PollingInterval.set_header(response, interval: 6_000) Gitlab::PollingInterval.set_header(response, interval: interval)
end end
def noteable def noteable
...@@ -242,7 +279,14 @@ module NotesActions ...@@ -242,7 +279,14 @@ module NotesActions
end end
def last_fetched_at def last_fetched_at
request.headers['X-Last-Fetched-At'] strong_memoize(:last_fetched_at) do
microseconds = request.headers['X-Last-Fetched-At'].to_i
seconds = microseconds / MICROSECOND
frac = microseconds % MICROSECOND
Time.zone.at(seconds, frac)
end
end end
def notes_filter def notes_filter
......
...@@ -158,13 +158,16 @@ class NotesFinder ...@@ -158,13 +158,16 @@ class NotesFinder
end end
# Notes changed since last fetch # Notes changed since last fetch
# Uses overlapping intervals to avoid worrying about race conditions
def since_fetch_at(notes) def since_fetch_at(notes)
return notes unless @params[:last_fetched_at] return notes unless @params[:last_fetched_at]
# Default to 0 to remain compatible with old clients # Default to 0 to remain compatible with old clients
last_fetched_at = Time.at(@params.fetch(:last_fetched_at, 0).to_i) last_fetched_at = @params.fetch(:last_fetched_at, Time.at(0))
notes.updated_after(last_fetched_at - FETCH_OVERLAP)
# Use overlapping intervals to avoid worrying about race conditions
last_fetched_at -= FETCH_OVERLAP
notes.updated_after(last_fetched_at)
end end
def notes_filter? def notes_filter?
......
...@@ -38,6 +38,10 @@ class ApplicationRecord < ActiveRecord::Base ...@@ -38,6 +38,10 @@ class ApplicationRecord < ActiveRecord::Base
false false
end end
def self.at_most(count)
limit(count)
end
def self.safe_find_or_create_by!(*args) def self.safe_find_or_create_by!(*args)
safe_find_or_create_by(*args).tap do |record| safe_find_or_create_by(*args).tap do |record|
record.validate! unless record.persisted? record.validate! unless record.persisted?
......
...@@ -123,6 +123,8 @@ class Note < ApplicationRecord ...@@ -123,6 +123,8 @@ class Note < ApplicationRecord
scope :common, -> { where(noteable_type: ["", nil]) } scope :common, -> { where(noteable_type: ["", nil]) }
scope :fresh, -> { order(created_at: :asc, id: :asc) } scope :fresh, -> { order(created_at: :asc, id: :asc) }
scope :updated_after, ->(time) { where('updated_at > ?', time) } scope :updated_after, ->(time) { where('updated_at > ?', time) }
scope :with_updated_at, ->(time) { where(updated_at: time) }
scope :by_updated_at, -> { reorder(:updated_at, :id) }
scope :inc_author_project, -> { includes(:project, :author) } scope :inc_author_project, -> { includes(:project, :author) }
scope :inc_author, -> { includes(:author) } scope :inc_author, -> { includes(:author) }
scope :inc_relations_for_view, -> do scope :inc_relations_for_view, -> do
......
...@@ -11,6 +11,7 @@ class ResourceEvent < ApplicationRecord ...@@ -11,6 +11,7 @@ class ResourceEvent < ApplicationRecord
belongs_to :user belongs_to :user
scope :created_after, ->(time) { where('created_at > ?', time) } scope :created_after, ->(time) { where('created_at > ?', time) }
scope :created_on_or_before, ->(time) { where('created_at <= ?', time) }
def discussion_id def discussion_id
strong_memoize(:discussion_id) do strong_memoize(:discussion_id) do
......
...@@ -23,11 +23,25 @@ module ResourceEvents ...@@ -23,11 +23,25 @@ module ResourceEvents
private private
def since_fetch_at(events) def apply_common_filters(events)
events = apply_last_fetched_at(events)
events = apply_fetch_until(events)
events
end
def apply_last_fetched_at(events)
return events unless params[:last_fetched_at].present? return events unless params[:last_fetched_at].present?
last_fetched_at = Time.zone.at(params.fetch(:last_fetched_at).to_i) last_fetched_at = params[:last_fetched_at] - NotesFinder::FETCH_OVERLAP
events.created_after(last_fetched_at - NotesFinder::FETCH_OVERLAP)
events.created_after(last_fetched_at)
end
def apply_fetch_until(events)
return events unless params[:fetch_until].present?
events.created_on_or_before(params[:fetch_until])
end end
def resource_parent def resource_parent
......
...@@ -19,7 +19,7 @@ module ResourceEvents ...@@ -19,7 +19,7 @@ module ResourceEvents
return [] unless resource.respond_to?(:resource_label_events) return [] unless resource.respond_to?(:resource_label_events)
events = resource.resource_label_events.includes(:label, user: :status) # rubocop: disable CodeReuse/ActiveRecord events = resource.resource_label_events.includes(:label, user: :status) # rubocop: disable CodeReuse/ActiveRecord
events = since_fetch_at(events) events = apply_common_filters(events)
events.group_by { |event| event.discussion_id } events.group_by { |event| event.discussion_id }
end end
......
...@@ -19,7 +19,7 @@ module ResourceEvents ...@@ -19,7 +19,7 @@ module ResourceEvents
return [] unless resource.respond_to?(:resource_milestone_events) return [] unless resource.respond_to?(:resource_milestone_events)
events = resource.resource_milestone_events.includes(user: :status) # rubocop: disable CodeReuse/ActiveRecord events = resource.resource_milestone_events.includes(user: :status) # rubocop: disable CodeReuse/ActiveRecord
since_fetch_at(events) apply_common_filters(events)
end end
end end
end end
...@@ -14,7 +14,7 @@ module ResourceEvents ...@@ -14,7 +14,7 @@ module ResourceEvents
return [] unless resource.respond_to?(:resource_state_events) return [] unless resource.respond_to?(:resource_state_events)
events = resource.resource_state_events.includes(user: :status) # rubocop: disable CodeReuse/ActiveRecord events = resource.resource_state_events.includes(user: :status) # rubocop: disable CodeReuse/ActiveRecord
since_fetch_at(events) apply_common_filters(events)
end end
end end
end end
---
title: Paginate the notes incremental fetch endpoint
merge_request: 34628
author:
type: performance
...@@ -20,7 +20,7 @@ module EE ...@@ -20,7 +20,7 @@ module EE
return [] unless resource.respond_to?(:resource_weight_events) return [] unless resource.respond_to?(:resource_weight_events)
events = resource.resource_weight_events.includes(user: :status).order(:id) # rubocop: disable CodeReuse/ActiveRecord events = resource.resource_weight_events.includes(user: :status).order(:id) # rubocop: disable CodeReuse/ActiveRecord
since_fetch_at(events) apply_common_filters(events)
end end
end end
end end
......
...@@ -81,7 +81,8 @@ ...@@ -81,7 +81,8 @@
} }
} }
}, },
"last_fetched_at": { "type": "integer" } "last_fetched_at": { "type": "integer" },
"more": { "type": "boolean" }
}, },
"additionalProperties": false "additionalProperties": false
} }
# frozen_string_literal: true
module Gitlab
# UpdatedNotesPaginator implements a rudimentary form of keyset pagination on
# top of a notes relation that has been initialized with a `last_fetched_at`
# value. This class will attempt to limit the number of notes returned, and
# specify a new value for `last_fetched_at` that will pick up where the last
# page of notes left off.
class UpdatedNotesPaginator
LIMIT = 50
MICROSECOND = 1_000_000
attr_reader :next_fetched_at, :notes
def initialize(relation, last_fetched_at:)
@last_fetched_at = last_fetched_at
@now = Time.current
notes, more = fetch_page(relation)
if more
init_middle_page(notes)
else
init_final_page(notes)
end
end
def metadata
{ last_fetched_at: next_fetched_at_microseconds, more: more }
end
private
attr_reader :last_fetched_at, :more, :now
def next_fetched_at_microseconds
(next_fetched_at.to_i * MICROSECOND) + next_fetched_at.usec
end
def fetch_page(relation)
relation = relation.by_updated_at
notes = relation.at_most(LIMIT + 1).to_a
return [notes, false] unless notes.size > LIMIT
marker = notes.pop # Remove the marker note
# Although very unlikely, it is possible that more notes with the same
# updated_at may exist, e.g., if created in bulk. Add them all to the page
# if this is detected, so pagination won't get stuck indefinitely
if notes.last.updated_at == marker.updated_at
notes += relation
.with_updated_at(marker.updated_at)
.id_not_in(notes.map(&:id))
.to_a
end
[notes, true]
end
def init_middle_page(notes)
@more = true
# The fetch overlap can be ignored if we're in an intermediate page.
@next_fetched_at = notes.last.updated_at + NotesFinder::FETCH_OVERLAP
@notes = notes
end
def init_final_page(notes)
@more = false
@next_fetched_at = now
@notes = notes
end
end
end
...@@ -38,9 +38,9 @@ RSpec.describe Projects::NotesController do ...@@ -38,9 +38,9 @@ RSpec.describe Projects::NotesController do
end end
it 'passes last_fetched_at from headers to NotesFinder and MergeIntoNotesService' do it 'passes last_fetched_at from headers to NotesFinder and MergeIntoNotesService' do
last_fetched_at = 3.hours.ago.to_i last_fetched_at = Time.zone.at(3.hours.ago.to_i) # remove nanoseconds
request.headers['X-Last-Fetched-At'] = last_fetched_at request.headers['X-Last-Fetched-At'] = microseconds(last_fetched_at)
expect(NotesFinder).to receive(:new) expect(NotesFinder).to receive(:new)
.with(anything, hash_including(last_fetched_at: last_fetched_at)) .with(anything, hash_including(last_fetched_at: last_fetched_at))
...@@ -84,6 +84,81 @@ RSpec.describe Projects::NotesController do ...@@ -84,6 +84,81 @@ RSpec.describe Projects::NotesController do
end end
end end
context 'for multiple pages of notes', :aggregate_failures do
# 3 pages worth: 1 normal page, 1 oversized due to clashing updated_at,
# and a final, short page
let!(:page_1) { create_list(:note, 2, noteable: issue, project: project, updated_at: 3.days.ago) }
let!(:page_2) { create_list(:note, 3, noteable: issue, project: project, updated_at: 2.days.ago) }
let!(:page_3) { create_list(:note, 2, noteable: issue, project: project, updated_at: 1.day.ago) }
# Include a resource event in the middle page as well
let!(:resource_event) { create(:resource_state_event, issue: issue, user: user, created_at: 2.days.ago) }
let(:page_1_boundary) { microseconds(page_1.last.updated_at + NotesFinder::FETCH_OVERLAP) }
let(:page_2_boundary) { microseconds(page_2.last.updated_at + NotesFinder::FETCH_OVERLAP) }
around do |example|
Timecop.freeze do
example.run
end
end
before do
stub_const('Gitlab::UpdatedNotesPaginator::LIMIT', 2)
end
context 'feature flag enabled' do
before do
stub_feature_flags(paginated_notes: true)
end
it 'returns the first page of notes' do
get :index, params: request_params
expect(json_response['notes'].count).to eq(page_1.count)
expect(json_response['more']).to be_truthy
expect(json_response['last_fetched_at']).to eq(page_1_boundary)
expect(response.headers['Poll-Interval'].to_i).to eq(1)
end
it 'returns the second page of notes' do
request.headers['X-Last-Fetched-At'] = page_1_boundary
get :index, params: request_params
expect(json_response['notes'].count).to eq(page_2.count + 1) # resource event
expect(json_response['more']).to be_truthy
expect(json_response['last_fetched_at']).to eq(page_2_boundary)
expect(response.headers['Poll-Interval'].to_i).to eq(1)
end
it 'returns the final page of notes' do
request.headers['X-Last-Fetched-At'] = page_2_boundary
get :index, params: request_params
expect(json_response['notes'].count).to eq(page_3.count)
expect(json_response['more']).to be_falsy
expect(json_response['last_fetched_at']).to eq(microseconds(Time.zone.now))
expect(response.headers['Poll-Interval'].to_i).to be > 1
end
end
context 'feature flag disabled' do
before do
stub_feature_flags(paginated_notes: false)
end
it 'returns all notes' do
get :index, params: request_params
expect(json_response['notes'].count).to eq((page_1 + page_2 + page_3).size + 1)
expect(json_response['more']).to be_falsy
expect(json_response['last_fetched_at']).to eq(microseconds(Time.zone.now))
end
end
end
context 'for a discussion note' do context 'for a discussion note' do
let(:project) { create(:project, :repository) } let(:project) { create(:project, :repository) }
let!(:note) { create(:discussion_note_on_merge_request, project: project) } let!(:note) { create(:discussion_note_on_merge_request, project: project) }
...@@ -870,4 +945,9 @@ RSpec.describe Projects::NotesController do ...@@ -870,4 +945,9 @@ RSpec.describe Projects::NotesController do
end end
end end
end end
# Convert a time to an integer number of microseconds
def microseconds(time)
(time.to_i * 1_000_000) + time.usec
end
end end
...@@ -123,7 +123,7 @@ RSpec.describe NotesFinder do ...@@ -123,7 +123,7 @@ RSpec.describe NotesFinder do
let!(:note1) { create :note_on_commit, project: project } let!(:note1) { create :note_on_commit, project: project }
let!(:note2) { create :note_on_commit, project: project } let!(:note2) { create :note_on_commit, project: project }
let(:commit) { note1.noteable } let(:commit) { note1.noteable }
let(:params) { { project: project, target_id: commit.id, target_type: 'commit', last_fetched_at: 1.hour.ago.to_i } } let(:params) { { project: project, target_id: commit.id, target_type: 'commit', last_fetched_at: 1.hour.ago } }
it 'finds all notes' do it 'finds all notes' do
notes = described_class.new(user, params).execute notes = described_class.new(user, params).execute
...@@ -172,7 +172,7 @@ RSpec.describe NotesFinder do ...@@ -172,7 +172,7 @@ RSpec.describe NotesFinder do
let(:confidential_issue) { create(:issue, :confidential, project: project, author: user) } let(:confidential_issue) { create(:issue, :confidential, project: project, author: user) }
let!(:confidential_note) { create(:note, noteable: confidential_issue, project: confidential_issue.project) } let!(:confidential_note) { create(:note, noteable: confidential_issue, project: confidential_issue.project) }
let(:params) { { project: confidential_issue.project, target_id: confidential_issue.id, target_type: 'issue', last_fetched_at: 1.hour.ago.to_i } } let(:params) { { project: confidential_issue.project, target_id: confidential_issue.id, target_type: 'issue', last_fetched_at: 1.hour.ago } }
it 'returns notes if user can see the issue' do it 'returns notes if user can see the issue' do
expect(described_class.new(user, params).execute).to eq([confidential_note]) expect(described_class.new(user, params).execute).to eq([confidential_note])
...@@ -204,7 +204,7 @@ RSpec.describe NotesFinder do ...@@ -204,7 +204,7 @@ RSpec.describe NotesFinder do
end end
it 'returns the expected notes when last_fetched_at is given' do it 'returns the expected notes when last_fetched_at is given' do
params = { project: project, target: commit, last_fetched_at: 1.hour.ago.to_i } params = { project: project, target: commit, last_fetched_at: 1.hour.ago }
expect(described_class.new(user, params).execute).to eq([note2]) expect(described_class.new(user, params).execute).to eq([note2])
end end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::UpdatedNotesPaginator do
let(:issue) { create(:issue) }
let(:project) { issue.project }
let(:finder) { NotesFinder.new(user, target: issue, last_fetched_at: last_fetched_at) }
let(:user) { issue.author }
let!(:page_1) { create_list(:note, 2, noteable: issue, project: project, updated_at: 2.days.ago) }
let!(:page_2) { [create(:note, noteable: issue, project: project, updated_at: 1.day.ago)] }
let(:page_1_boundary) { page_1.last.updated_at + NotesFinder::FETCH_OVERLAP }
around do |example|
Timecop.freeze do
example.run
end
end
before do
stub_const("Gitlab::UpdatedNotesPaginator::LIMIT", 2)
end
subject(:paginator) { described_class.new(finder.execute, last_fetched_at: last_fetched_at) }
describe 'last_fetched_at: start of time' do
let(:last_fetched_at) { Time.at(0) }
it 'calculates the first page of notes', :aggregate_failures do
expect(paginator.notes).to match_array(page_1)
expect(paginator.metadata).to match(
more: true,
last_fetched_at: microseconds(page_1_boundary)
)
end
end
describe 'last_fetched_at: start of final page' do
let(:last_fetched_at) { page_1_boundary }
it 'calculates a final page', :aggregate_failures do
expect(paginator.notes).to match_array(page_2)
expect(paginator.metadata).to match(
more: false,
last_fetched_at: microseconds(Time.zone.now)
)
end
end
# Convert a time to an integer number of microseconds
def microseconds(time)
(time.to_i * 1_000_000) + time.usec
end
end
...@@ -58,4 +58,11 @@ RSpec.describe ApplicationRecord do ...@@ -58,4 +58,11 @@ RSpec.describe ApplicationRecord do
expect(MergeRequest.underscore).to eq('merge_request') expect(MergeRequest.underscore).to eq('merge_request')
end end
end end
describe '.at_most' do
it 'limits the number of records returned' do
create_list(:user, 3)
expect(User.at_most(2).count).to eq(2)
end
end
end end
...@@ -61,7 +61,7 @@ RSpec.describe ResourceEvents::MergeIntoNotesService do ...@@ -61,7 +61,7 @@ RSpec.describe ResourceEvents::MergeIntoNotesService do
event = create_event(created_at: 1.day.ago) event = create_event(created_at: 1.day.ago)
notes = described_class.new(resource, user, notes = described_class.new(resource, user,
last_fetched_at: 2.days.ago.to_i).execute last_fetched_at: 2.days.ago).execute
expect(notes.count).to eq 1 expect(notes.count).to eq 1
expect(notes.first.discussion_id).to eq event.discussion_id expect(notes.first.discussion_id).to eq event.discussion_id
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment