Commit 9c7a8911 authored by Shinya Maeda's avatar Shinya Maeda

Optimize Deployment API that filters by updated_at

This commit optimizes some queries on Deployment API
that related to updated_at filter.
parent 694ce654
......@@ -16,14 +16,25 @@
class DeploymentsFinder
attr_reader :params
# Warning:
# These const are directly used in Deployment Rest API, thus
# modifying these values could implicity change the API interface or introduce a breaking change.
# Also, if you add a sort value, make sure that the new query will stay
# performant with the other filtering/sorting parameters.
# The composed query could be significantly slower when the filtering and sorting columns are different.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/325627 for example.
ALLOWED_SORT_VALUES = %w[id iid created_at updated_at ref finished_at].freeze
DEFAULT_SORT_VALUE = 'id'
ALLOWED_SORT_DIRECTIONS = %w[asc desc].freeze
DEFAULT_SORT_DIRECTION = 'asc'
InefficientQueryError = Class.new(StandardError)
def initialize(params = {})
@params = params
validate!
end
def execute
......@@ -38,6 +49,32 @@ class DeploymentsFinder
private
def validate!
if filter_by_updated_at? && filter_by_finished_at?
raise InefficientQueryError, 'Both `updated_at` filter and `finished_at` filter can not be specified'
end
# Currently, the inefficient parameters are allowed in order to avoid breaking changes in Deployment API.
# We'll switch to a hard error in https://gitlab.com/gitlab-org/gitlab/-/issues/328500.
if (filter_by_updated_at? && !order_by_updated_at?) || (!filter_by_updated_at? && order_by_updated_at?)
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(
InefficientQueryError.new('`updated_at` filter and `updated_at` sorting must be paired')
)
end
if (filter_by_finished_at? && !order_by_finished_at?) || (!filter_by_finished_at? && order_by_finished_at?)
raise InefficientQueryError, '`finished_at` filter and `finished_at` sorting must be paired'
end
if filter_by_finished_at? && !filter_by_successful_deployment?
raise InefficientQueryError, '`finished_at` filter must be combined with `success` status filter.'
end
if params[:environment].present? && !params[:project].present?
raise InefficientQueryError, '`environment` filter must be combined with `project` scope.'
end
end
def init_collection
if params[:project].present?
params[:project].deployments
......@@ -49,6 +86,8 @@ class DeploymentsFinder
end
def sort(items)
sort_params = build_sort_params
optimize_sort_params!(sort_params)
items.order(sort_params) # rubocop: disable CodeReuse/ActiveRecord
end
......@@ -67,8 +106,8 @@ class DeploymentsFinder
end
def by_environment(items)
if params[:environment].present?
items.for_environment_name(params[:environment])
if params[:project].present? && params[:environment].present?
items.for_environment_name(params[:project], params[:environment])
else
items
end
......@@ -84,14 +123,58 @@ class DeploymentsFinder
items.for_status(params[:status])
end
def sort_params
def build_sort_params
order_by = ALLOWED_SORT_VALUES.include?(params[:order_by]) ? params[:order_by] : DEFAULT_SORT_VALUE
order_direction = ALLOWED_SORT_DIRECTIONS.include?(params[:sort]) ? params[:sort] : DEFAULT_SORT_DIRECTION
{ order_by => order_direction }.tap do |sort_values|
sort_values['id'] = 'desc' if sort_values['updated_at']
sort_values['id'] = sort_values.delete('created_at') if sort_values['created_at'] # Sorting by `id` produces the same result as sorting by `created_at`
{ order_by => order_direction }
end
def optimize_sort_params!(sort_params)
# Implicitly enforce the ordering when filtered by `updated_at` column for performance optimization.
# See https://gitlab.com/gitlab-org/gitlab/-/issues/325627#note_552417509.
# We remove this in https://gitlab.com/gitlab-org/gitlab/-/issues/328500.
if filter_by_updated_at? && implicitly_enforce_ordering_for_updated_at_filter?
sort_params.replace('updated_at' => sort_params.each_value.first)
end
if sort_params['created_at'] || sort_params['iid']
# Sorting by `id` produces the same result as sorting by `created_at` or `iid`
sort_params.replace(id: sort_params.each_value.first)
elsif sort_params['updated_at']
# This adds the order as a tie-breaker when multiple rows have the same updated_at value.
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/20848.
sort_params.merge!(id: :desc)
end
end
def filter_by_updated_at?
params[:updated_before].present? || params[:updated_after].present?
end
def filter_by_finished_at?
params[:finished_before].present? || params[:finished_after].present?
end
def filter_by_successful_deployment?
params[:status].to_s == 'success'
end
def order_by_updated_at?
params[:order_by].to_s == 'updated_at'
end
def order_by_finished_at?
params[:order_by].to_s == 'finished_at'
end
def implicitly_enforce_ordering_for_updated_at_filter?
return false unless params[:project].present?
::Feature.enabled?(
:deployments_finder_implicitly_enforce_ordering_for_updated_at_filter,
params[:project],
default_enabled: :yaml)
end
# rubocop: disable CodeReuse/ActiveRecord
......
......@@ -32,8 +32,8 @@ class Deployment < ApplicationRecord
delegate :kubernetes_namespace, to: :deployment_cluster, allow_nil: true
scope :for_environment, -> (environment) { where(environment_id: environment) }
scope :for_environment_name, -> (name) do
joins(:environment).where(environments: { name: name })
scope :for_environment_name, -> (project, name) do
where(environment_id: Environment.select(:id).where(project: project, name: name))
end
scope :for_status, -> (status) { where(status: status) }
......
---
title: Recreate index for deployments updated_at and finished_at
merge_request: 59771
author:
type: performance
---
name: deployments_finder_implicitly_enforce_ordering_for_updated_at_filter
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/59771
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/329286
milestone: '13.12'
type: development
group: group::release
default_enabled: false
# frozen_string_literal: true
# This migration recreates the index that introduced in 20210326035553_add_index_for_project_deployments_with_environment_id_and_updated_at.rb.
class RecreateIndexForProjectDeploymentsWithEnvironmentIdAndDateAt < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
OLD_INDEX_NAME = 'index_deployments_on_project_and_environment_and_updated_at'
NEW_INDEX_NAME = 'index_deployments_on_project_and_environment_and_updated_at_id'
def up
add_concurrent_index :deployments, [:project_id, :environment_id, :updated_at, :id], name: NEW_INDEX_NAME
remove_concurrent_index_by_name :deployments, OLD_INDEX_NAME
end
def down
add_concurrent_index :deployments, [:project_id, :environment_id, :updated_at], name: OLD_INDEX_NAME
remove_concurrent_index_by_name :deployments, NEW_INDEX_NAME
end
end
b7f75e3b443bfcb1aea812ad1682a31a99021f41ef4d47bdf600437db6f4f2f3
\ No newline at end of file
......@@ -22718,7 +22718,7 @@ CREATE INDEX index_deployments_on_id_and_status_and_created_at ON deployments US
CREATE INDEX index_deployments_on_id_where_cluster_id_present ON deployments USING btree (id) WHERE (cluster_id IS NOT NULL);
CREATE INDEX index_deployments_on_project_and_environment_and_updated_at ON deployments USING btree (project_id, environment_id, updated_at);
CREATE INDEX index_deployments_on_project_and_environment_and_updated_at_id ON deployments USING btree (project_id, environment_id, updated_at, id);
CREATE INDEX index_deployments_on_project_and_finished ON deployments USING btree (project_id, finished_at) WHERE (status = 2);
......@@ -49,7 +49,9 @@ module API
project: user_project,
environment: environment_name,
finished_after: start_date,
finished_before: end_date
finished_before: end_date,
status: :success,
order_by: :finished_at
).execute
end
end
......
......@@ -29,7 +29,7 @@ module Gitlab
# rubocop: disable CodeReuse/ActiveRecord
def deployment_count_via_finder
deployments = DeploymentsFinder
.new(group: group, finished_after: options[:from], finished_before: options[:to], status: :success)
.new(group: group, finished_after: options[:from], finished_before: options[:to], status: :success, order_by: :finished_at)
.execute
deployments = deployments.where(project_id: options[:projects]) if options[:projects].present?
......
......@@ -41,6 +41,8 @@ module API
.execute.with_api_entity_associations
present paginate(deployments), with: Entities::Deployment
rescue DeploymentsFinder::InefficientQueryError => e
bad_request!(e.message)
end
desc 'Gets a specific deployment' do
......
......@@ -16,7 +16,7 @@ module Gitlab
def deployments_count
DeploymentsFinder
.new(project: @project, finished_after: @from, finished_before: @to, status: :success)
.new(project: @project, finished_after: @from, finished_before: @to, status: :success, order_by: :finished_at)
.execute
.count
end
......
This diff is collapsed.
......@@ -72,6 +72,35 @@ RSpec.describe Deployment do
end
end
describe '.for_environment_name' do
subject { described_class.for_environment_name(project, environment_name) }
let_it_be(:project) { create(:project, :repository) }
let_it_be(:production) { create(:environment, :production, project: project) }
let_it_be(:staging) { create(:environment, :staging, project: project) }
let_it_be(:other_project) { create(:project, :repository) }
let_it_be(:other_production) { create(:environment, :production, project: other_project) }
let(:environment_name) { production.name }
context 'when deployment belongs to the environment' do
let!(:deployment) { create(:deployment, project: project, environment: production) }
it { is_expected.to eq([deployment]) }
end
context 'when deployment belongs to the same project but different environment name' do
let!(:deployment) { create(:deployment, project: project, environment: staging) }
it { is_expected.to be_empty }
end
context 'when deployment belongs to the same environment name but different project' do
let!(:deployment) { create(:deployment, project: other_project, environment: other_production) }
it { is_expected.to be_empty }
end
end
describe '.success' do
subject { described_class.success }
......
......@@ -12,9 +12,11 @@ RSpec.describe API::Deployments do
describe 'GET /projects/:id/deployments' do
let_it_be(:project) { create(:project, :repository) }
let_it_be(:deployment_1) { create(:deployment, :success, project: project, iid: 11, ref: 'master', created_at: Time.now, updated_at: Time.now) }
let_it_be(:deployment_2) { create(:deployment, :success, project: project, iid: 12, ref: 'master', created_at: 1.day.ago, updated_at: 2.hours.ago) }
let_it_be(:deployment_3) { create(:deployment, :success, project: project, iid: 8, ref: 'master', created_at: 2.days.ago, updated_at: 1.hour.ago) }
let_it_be(:production) { create(:environment, :production, project: project) }
let_it_be(:staging) { create(:environment, :staging, project: project) }
let_it_be(:deployment_1) { create(:deployment, :success, project: project, environment: production, ref: 'master', created_at: Time.now, updated_at: Time.now) }
let_it_be(:deployment_2) { create(:deployment, :success, project: project, environment: staging, ref: 'master', created_at: 1.day.ago, updated_at: 2.hours.ago) }
let_it_be(:deployment_3) { create(:deployment, :success, project: project, environment: staging, ref: 'master', created_at: 2.days.ago, updated_at: 1.hour.ago) }
def perform_request(params = {})
get api("/projects/#{project.id}/deployments", user), params: params
......@@ -36,17 +38,26 @@ RSpec.describe API::Deployments do
context 'with updated_at filters specified' do
it 'returns projects deployments with last update in specified datetime range' do
perform_request({ updated_before: 30.minutes.ago, updated_after: 90.minutes.ago })
perform_request({ updated_before: 30.minutes.ago, updated_after: 90.minutes.ago, order_by: :updated_at })
expect(response).to have_gitlab_http_status(:ok)
expect(response).to include_pagination_headers
expect(json_response.first['id']).to eq(deployment_3.id)
end
context 'when forbidden order_by is specified' do
it 'returns projects deployments with last update in specified datetime range' do
perform_request({ updated_before: 30.minutes.ago, updated_after: 90.minutes.ago, order_by: :id })
expect(response).to have_gitlab_http_status(:bad_request)
expect(json_response['message']).to include('`updated_at` filter and `updated_at` sorting must be paired')
end
end
end
context 'with the environment filter specifed' do
it 'returns deployments for the environment' do
perform_request({ environment: deployment_1.environment.name })
perform_request({ environment: production.name })
expect(json_response.size).to eq(1)
expect(json_response.first['iid']).to eq(deployment_1.iid)
......@@ -68,7 +79,7 @@ RSpec.describe API::Deployments do
end
it 'returns ordered deployments' do
expect(json_response.map { |i| i['id'] }).to eq([deployment_2.id, deployment_1.id, deployment_3.id])
expect(json_response.map { |i| i['id'] }).to eq([deployment_3.id, deployment_2.id, deployment_1.id])
end
context 'with invalid order_by' do
......@@ -475,7 +486,7 @@ RSpec.describe API::Deployments do
let(:project) { create(:project, :repository) }
let!(:deployment) { create(:deployment, :success, project: project) }
subject { get api("/projects/#{project.id}/deployments?order_by=updated_at&sort=asc", user) }
subject { get api("/projects/#{project.id}/deployments?order_by=id&sort=asc", user) }
it 'succeeds', :aggregate_failures do
subject
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment