Commit 1099804b authored by Nick Thomas's avatar Nick Thomas

Merge branch 'ajk-gql-lazy-collections' into 'master'

Lazy array resolver

See merge request gitlab-org/gitlab!45417
parents 1d71f8d8 90a345b7
# frozen_string_literal: true
# Concern that will eliminate N+1 queries for size-constrained
# collections of items.
#
# **note**: The resolver will never load more items than
# `@field.max_page_size` if defined, falling back to
# `context.schema.default_max_page_size`.
#
# provided that:
#
# - the query can be uniquely determined by the object and the arguments
# - the model class includes FromUnion
# - the model class defines a scalar primary key
#
# This comes at the cost of returning arrays, not relations, so we don't get
# any keyset pagination goodness. Consequently, this is only suitable for small-ish
# result sets, as the full result set will be loaded into memory.
#
# To enforce this, the resolver limits the size of result sets to
# `@field.max_page_size || context.schema.default_max_page_size`.
#
# **important**: If the cardinality of your collection is likely to be greater than 100,
# then you will want to pass `max_page_size:` as part of the field definition
# or (ideally) as part of the resolver `field_options`.
#
# How to implement:
# --------------------
#
# Each including class operates on two generic parameters, A and R:
# - A is any Object that can be used as a Hash key. Instances of A
# are returned by `query_input` and then passed to `query_for`.
# - R is any subclass of ApplicationRecord that includes FromUnion.
# R must have a single scalar primary_key
#
# Classes must implement:
# - #model_class -> Class[R]. (Must respond to :primary_key, and :from_union)
# - #query_input(**kwargs) -> A (Must be hashable)
# - #query_for(A) -> ActiveRecord::Relation[R]
#
# Note the relationship between query_input and query_for, one of which
# consumes the input of the other
# (i.e. `resolve(**args).sync == query_for(query_input(**args)).to_a`).
#
# Classes may implement:
# - #item_found(A, R) (return value is ignored)
# - max_union_size Integer (the maximum number of queries to run in any one union)
module CachingArrayResolver
MAX_UNION_SIZE = 50
def resolve(**args)
key = query_input(**args)
BatchLoader::GraphQL.for(key).batch(**batch) do |keys, loader|
if keys.size == 1
# We can avoid the union entirely.
k = keys.first
limit(query_for(k)).each { |item| found(loader, k, item) }
else
queries = keys.map { |key| query_for(key) }
queries.in_groups_of(max_union_size, false).each do |group|
by_id = model_class
.from_union(tag(group), remove_duplicates: false)
.group_by { |r| r[primary_key] }
by_id.values.each do |item_group|
item = item_group.first
item_group.map(&:union_member_idx).each do |i|
found(loader, keys[i], item)
end
end
end
end
end
end
# Override this to intercept the items once they are found
def item_found(query_input, item)
end
def max_union_size
MAX_UNION_SIZE
end
private
def primary_key
@primary_key ||= (model_class.primary_key || raise("No primary key for #{model_class}"))
end
def batch
{ key: self.class, default_value: [] }
end
def found(loader, key, value)
loader.call(key) do |vs|
item_found(key, value)
vs << value
end
end
# Tag each row returned from each query with a the index of which query in
# the union it comes from. This lets us map the results back to the cache key.
def tag(queries)
queries.each_with_index.map do |q, i|
limit(q.select(all_fields, member_idx(i)))
end
end
def limit(query)
query.limit(query_limit) # rubocop: disable CodeReuse/ActiveRecord
end
def all_fields
model_class.arel_table[Arel.star]
end
# rubocop: disable Graphql/Descriptions (false positive!)
def query_limit
field&.max_page_size.presence || context.schema.default_max_page_size
end
# rubocop: enable Graphql/Descriptions
def member_idx(idx)
::Arel::Nodes::SqlLiteral.new(idx.to_s).as('union_member_idx')
end
end
......@@ -4,6 +4,8 @@ module Gitlab
module Graphql
module Present
class Instrumentation
SAFE_CONTEXT_KEYS = %i[current_user].freeze
def instrument(type, field)
return field unless field.metadata[:type_class]
......@@ -22,7 +24,8 @@ module Gitlab
next old_resolver.call(presented_type, args, context)
end
presenter = presented_in.presenter_class.new(object, **context.to_h)
attrs = safe_context_values(context)
presenter = presented_in.presenter_class.new(object, **attrs)
# we have to use the new `authorized_new` method, as `new` is protected
wrapped = presented_type.class.authorized_new(presenter, context)
......@@ -34,6 +37,12 @@ module Gitlab
resolve(resolve_with_presenter)
end
end
private
def safe_context_values(context)
context.to_h.slice(*SAFE_CONTEXT_KEYS)
end
end
end
end
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::CachingArrayResolver do
include GraphqlHelpers
let_it_be(:non_admins) { create_list(:user, 4, admin: false) }
let(:query_context) { {} }
let(:max_page_size) { 10 }
let(:field) { double('Field', max_page_size: max_page_size) }
let(:schema) { double('Schema', default_max_page_size: 3) }
let_it_be(:caching_resolver) do
mod = described_class
Class.new(::Resolvers::BaseResolver) do
include mod
def query_input(is_admin:)
is_admin
end
def query_for(is_admin)
if is_admin.nil?
model_class.all
else
model_class.where(admin: is_admin)
end
end
def model_class
User # Happens to include FromUnion, and is cheap-ish to create
end
end
end
describe '#resolve' do
context 'there are more than MAX_UNION_SIZE queries' do
let_it_be(:max_union) { 3 }
let_it_be(:resolver) do
mod = described_class
max = max_union
Class.new(::Resolvers::BaseResolver) do
include mod
def query_input(username:)
username
end
def query_for(username)
if username.nil?
model_class.all
else
model_class.where(username: username)
end
end
def model_class
User # Happens to include FromUnion, and is cheap-ish to create
end
define_method :max_union_size do
max
end
end
end
it 'executes the queries in multiple batches' do
users = create_list(:user, (max_union * 2) + 1)
expect(User).to receive(:from_union).twice.and_call_original
results = users.in_groups_of(2, false).map do |users|
resolve(resolver, args: { username: users.map(&:username) }, field: field, schema: schema)
end
expect(results.flat_map(&method(:force))).to match_array(users)
end
end
context 'all queries return results' do
let_it_be(:admins) { create_list(:admin, 3) }
it 'batches the queries' do
expect do
[resolve_users(true), resolve_users(false)].each(&method(:force))
end.to issue_same_number_of_queries_as { force(resolve_users(nil)) }
end
it 'finds the correct values' do
found_admins = resolve_users(true)
found_others = resolve_users(false)
admins_again = resolve_users(true)
found_all = resolve_users(nil)
expect(force(found_admins)).to match_array(admins)
expect(force(found_others)).to match_array(non_admins)
expect(force(admins_again)).to match_array(admins)
expect(force(found_all)).to match_array(admins + non_admins)
end
end
it 'does not perform a union of a query with itself' do
expect(User).to receive(:where).once.and_call_original
[resolve_users(false), resolve_users(false)].each(&method(:force))
end
context 'one of the queries returns no results' do
it 'finds the correct values' do
found_admins = resolve_users(true)
found_others = resolve_users(false)
found_all = resolve_users(nil)
expect(force(found_admins)).to be_empty
expect(force(found_others)).to match_array(non_admins)
expect(force(found_all)).to match_array(non_admins)
end
end
context 'one of the queries has already been cached' do
before do
force(resolve_users(nil))
end
it 'avoids further queries' do
expect do
repeated_find = resolve_users(nil)
expect(force(repeated_find)).to match_array(non_admins)
end.not_to exceed_query_limit(0)
end
end
context 'the resolver overrides item_found' do
let_it_be(:admins) { create_list(:admin, 2) }
let(:query_context) do
{
found: { true => [], false => [], nil => [] }
}
end
let_it_be(:with_item_found) do
Class.new(caching_resolver) do
def item_found(key, item)
context[:found][key] << item
end
end
end
it 'receives item_found for each key the item mapped to' do
found_admins = resolve_users(true, with_item_found)
found_all = resolve_users(nil, with_item_found)
[found_admins, found_all].each(&method(:force))
expect(query_context[:found]).to match({
false => be_empty,
true => match_array(admins),
nil => match_array(admins + non_admins)
})
end
end
context 'the max_page_size is lower than the total result size' do
let(:max_page_size) { 2 }
it 'respects the max_page_size, on a per subset basis' do
found_all = resolve_users(nil)
found_others = resolve_users(false)
expect(force(found_all).size).to eq(2)
expect(force(found_others).size).to eq(2)
end
end
context 'the field does not declare max_page_size' do
let(:max_page_size) { nil }
it 'takes the page size from schema.default_max_page_size' do
found_all = resolve_users(nil)
found_others = resolve_users(false)
expect(force(found_all).size).to eq(schema.default_max_page_size)
expect(force(found_others).size).to eq(schema.default_max_page_size)
end
end
specify 'force . resolve === to_a . query_for . query_input' do
r = resolver_instance(caching_resolver)
args = { is_admin: false }
naive = r.query_for(r.query_input(**args)).to_a
expect(force(r.resolve(**args))).to eq(naive)
end
end
def resolve_users(is_admin, resolver = caching_resolver)
args = { is_admin: is_admin }
resolve(resolver, args: args, field: field, ctx: query_context, schema: schema)
end
def force(lazy)
::Gitlab::Graphql::Lazy.force(lazy)
end
end
......@@ -17,8 +17,8 @@ module GraphqlHelpers
# ready, then the early return is returned instead.
#
# Then the resolve method is called.
def resolve(resolver_class, obj: nil, args: {}, ctx: {}, field: nil)
resolver = resolver_class.new(object: obj, context: ctx, field: field)
def resolve(resolver_class, args: {}, **resolver_args)
resolver = resolver_instance(resolver_class, **resolver_args)
ready, early_return = sync_all { resolver.ready?(**args) }
return early_return unless ready
......@@ -26,6 +26,15 @@ module GraphqlHelpers
resolver.resolve(**args)
end
def resolver_instance(resolver_class, obj: nil, ctx: {}, field: nil, schema: GitlabSchema)
if ctx.is_a?(Hash)
q = double('Query', schema: schema)
ctx = GraphQL::Query::Context.new(query: q, object: obj, values: ctx)
end
resolver_class.new(object: obj, context: ctx, field: field)
end
# Eagerly run a loader's named resolver
# (syncs any lazy values returned by resolve)
def eager_resolve(resolver_class, **opts)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment