Commit 517a05c7 authored by Nick Thomas's avatar Nick Thomas

Recover gracefully when issuable counts are too expensive

On the MR list page, we like to display how many issues were found from
the filtered search query *in total*. However, especially when the
filter includes conditions on the MR title or description, this can be
very expensive to calculate, and involve reading gigabytes of text data
from the database.

As long as the data is already in the page cache, this usually finishes
within the 15-second timeout on GitLab.com, but if the database cache
is cold, a statement timeout is the usual occurrence.

More generally, it's not very clever to spend so much time calculating
a piece of information with marginal value.

This MR applies a shorter limit to the counting statements and provides
for graceful fallback to a '?' value, with a nice tooltip, if the query
times out. This means we're able to view the results in a reasonable
time, rather than the page taking a long time to load, or not loading
at all.
parent a355b01a
...@@ -66,6 +66,7 @@ module IssuableCollections ...@@ -66,6 +66,7 @@ module IssuableCollections
limit = relation.limit_value.to_f limit = relation.limit_value.to_f
return 1 if limit == 0 return 1 if limit == 0
return (params[:page] || 1).to_i + 1 if row_count == -1
(row_count.to_f / limit).ceil (row_count.to_f / limit).ceil
end end
......
...@@ -153,7 +153,9 @@ class IssuableFinder ...@@ -153,7 +153,9 @@ class IssuableFinder
end end
def row_count def row_count
Gitlab::IssuablesCountForState.new(self).for_state_or_opened(params[:state]) Gitlab::IssuablesCountForState
.new(self, nil, fast_fail: Feature.enabled?(:soft_fail_count_by_state, parent))
.for_state_or_opened(params[:state])
end end
# We often get counts for each state by running a query per state, and # We often get counts for each state by running a query per state, and
......
...@@ -250,7 +250,22 @@ module IssuablesHelper ...@@ -250,7 +250,22 @@ module IssuablesHelper
if display_count if display_count
count = issuables_count_for_state(issuable_type, state) count = issuables_count_for_state(issuable_type, state)
html << " " << content_tag(:span, number_with_delimiter(count), class: 'badge badge-pill') tag =
if count == -1
tooltip = _("Couldn't calculate number of %{issuables}.") % { issuables: issuable_type.to_s.humanize(capitalize: false) }
content_tag(
:span,
'?',
class: 'badge badge-pill has-tooltip',
aria: { label: tooltip },
title: tooltip
)
else
content_tag(:span, number_with_delimiter(count), class: 'badge badge-pill')
end
html << " " << tag
end end
html.html_safe html.html_safe
......
...@@ -52,6 +52,16 @@ class ApplicationRecord < ActiveRecord::Base ...@@ -52,6 +52,16 @@ class ApplicationRecord < ActiveRecord::Base
end end
end end
# Start a new transaction with a shorter-than-usual statement timeout. This is
# currently one third of the default 15-second timeout
def self.with_fast_statement_timeout
transaction(requires_new: true) do
connection.exec_query("SET LOCAL statement_timeout = 5000")
yield
end
end
def self.safe_find_or_create_by(*args, &block) def self.safe_find_or_create_by(*args, &block)
safe_ensure_unique(retries: 1) do safe_ensure_unique(retries: 1) do
find_or_create_by(*args, &block) find_or_create_by(*args, &block)
......
...@@ -10,10 +10,11 @@ ...@@ -10,10 +10,11 @@
%a.close{ href: '#', 'data-dismiss' => 'modal' } %a.close{ href: '#', 'data-dismiss' => 'modal' }
= sprite_icon('close', css_class: 'gl-icon') = sprite_icon('close', css_class: 'gl-icon')
.modal-body .modal-body
- issues_count = issuables_count_for_state(:issues, params[:state])
- unless issues_count == -1 # The count timed out
.modal-subheader .modal-subheader
= icon('check', { class: 'checkmark' }) = icon('check', { class: 'checkmark' })
%strong.gl-ml-3 %strong.gl-ml-3
- issues_count = issuables_count_for_state(:issues, params[:state])
= n_('%d issue selected', '%d issues selected', issues_count) % issues_count = n_('%d issue selected', '%d issues selected', issues_count) % issues_count
.modal-text .modal-text
= html_escape(_('The CSV export will be created in the background. Once finished, it will be sent to %{strong_open}%{email}%{strong_close} in an attachment.')) % { email: @current_user.notification_email, strong_open: '<strong>'.html_safe, strong_close: '</strong>'.html_safe } = html_escape(_('The CSV export will be created in the background. Once finished, it will be sent to %{strong_open}%{email}%{strong_close} in an attachment.')) % { email: @current_user.notification_email, strong_open: '<strong>'.html_safe, strong_close: '</strong>'.html_safe }
......
---
title: Recover gracefully when issuable counts are too expensive
merge_request: 44184
author:
type: fixed
---
name: soft_fail_count_by_state
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/44184
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/263222
type: development
group: group::source code
default_enabled: false
...@@ -113,6 +113,8 @@ The following metrics are available: ...@@ -113,6 +113,8 @@ The following metrics are available:
| `action_cable_pool_largest_size` | Gauge | 13.4 | Largest number of worker threads observed so far in ActionCable thread pool | `server_mode` | | `action_cable_pool_largest_size` | Gauge | 13.4 | Largest number of worker threads observed so far in ActionCable thread pool | `server_mode` |
| `action_cable_pool_pending_tasks` | Gauge | 13.4 | Number of tasks waiting to be executed in ActionCable thread pool | `server_mode` | | `action_cable_pool_pending_tasks` | Gauge | 13.4 | Number of tasks waiting to be executed in ActionCable thread pool | `server_mode` |
| `action_cable_pool_tasks_total` | Gauge | 13.4 | Total number of tasks executed in ActionCable thread pool | `server_mode` | | `action_cable_pool_tasks_total` | Gauge | 13.4 | Total number of tasks executed in ActionCable thread pool | `server_mode` |
| `gitlab_issuable_fast_count_by_state_total` | Counter | 13.5 | Total number of row count operations on issue/merge request list pages | |
| `gitlab_issuable_fast_count_by_state_failures_total` | Counter | 13.5 | Number of soft-failed row count operations on issue/merge request list pages | |
## Metrics controlled by a feature flag ## Metrics controlled by a feature flag
...@@ -122,6 +124,8 @@ The following metrics can be controlled by feature flags: ...@@ -122,6 +124,8 @@ The following metrics can be controlled by feature flags:
|:---------------------------------------------------------------|:-------------------------------------------------------------------| |:---------------------------------------------------------------|:-------------------------------------------------------------------|
| `gitlab_method_call_duration_seconds` | `prometheus_metrics_method_instrumentation` | | `gitlab_method_call_duration_seconds` | `prometheus_metrics_method_instrumentation` |
| `gitlab_view_rendering_duration_seconds` | `prometheus_metrics_view_instrumentation` | | `gitlab_view_rendering_duration_seconds` | `prometheus_metrics_view_instrumentation` |
| `gitlab_issuable_fast_count_by_state_total` | `soft_fail_count_by_state` |
| `gitlab_issuable_fast_count_by_state_failures_total` | `soft_fail_count_by_state` |
## Sidekiq metrics ## Sidekiq metrics
......
...@@ -664,6 +664,16 @@ RSpec.describe EpicsFinder do ...@@ -664,6 +664,16 @@ RSpec.describe EpicsFinder do
expect(results).to eq('opened' => 2, 'closed' => 1, 'all' => 3) expect(results).to eq('opened' => 2, 'closed' => 1, 'all' => 3)
end end
it 'returns -1 if the query times out' do
finder = described_class.new(search_user, group_id: group.id)
expect_next_instance_of(described_class) do |subfinder|
expect(subfinder).to receive(:execute).and_raise(ActiveRecord::QueryCanceled)
end
expect(finder.row_count).to eq(-1)
end
context 'when using group cte for search' do context 'when using group cte for search' do
before do before do
stub_feature_flags(use_subquery_for_group_issues_search: false) stub_feature_flags(use_subquery_for_group_issues_search: false)
......
...@@ -16,9 +16,12 @@ module Gitlab ...@@ -16,9 +16,12 @@ module Gitlab
end end
# finder - The finder class to use for retrieving the issuables. # finder - The finder class to use for retrieving the issuables.
def initialize(finder, project = nil) # fast_fail - restrict counting to a shorter period, degrading gracefully on
# failure
def initialize(finder, project = nil, fast_fail: false)
@finder = finder @finder = finder
@project = project @project = project
@fast_fail = fast_fail
@cache = Gitlab::SafeRequestStore[CACHE_KEY] ||= initialize_cache @cache = Gitlab::SafeRequestStore[CACHE_KEY] ||= initialize_cache
end end
...@@ -26,6 +29,10 @@ module Gitlab ...@@ -26,6 +29,10 @@ module Gitlab
self[state || :opened] self[state || :opened]
end end
def fast_fail?
!!@fast_fail
end
# Define method for each state # Define method for each state
STATES.each do |state| STATES.each do |state|
define_method(state) { self[state] } define_method(state) { self[state] }
...@@ -53,7 +60,53 @@ module Gitlab ...@@ -53,7 +60,53 @@ module Gitlab
end end
def initialize_cache def initialize_cache
Hash.new { |hash, finder| hash[finder] = finder.count_by_state } Hash.new { |hash, finder| hash[finder] = perform_count(finder) }
end
def perform_count(finder)
return finder.count_by_state unless fast_fail?
fast_count_by_state_attempt!
# Determining counts when referring to issuable titles or descriptions can
# be very expensive, and involve the database reading gigabytes of data
# for a relatively minor piece of functionality. This may slow index pages
# by seconds in the best case, or lead to a statement timeout in the worst
# case.
#
# In time, we may be able to use elasticsearch or postgresql tsv columns
# to perform the calculation more efficiently. Until then, use a shorter
# timeout and return -1 as a sentinel value if it is triggered
begin
ApplicationRecord.with_fast_statement_timeout do
finder.count_by_state
end
rescue ActiveRecord::QueryCanceled => err
fast_count_by_state_failure!
Gitlab::ErrorTracking.track_exception(
err,
params: finder.params,
current_user_id: finder.current_user&.id,
issue_url: 'https://gitlab.com/gitlab-org/gitlab/-/issues/249180'
)
Hash.new(-1)
end
end
def fast_count_by_state_attempt!
Gitlab::Metrics.counter(
:gitlab_issuable_fast_count_by_state_total,
"Count of total calls to IssuableFinder#count_by_state with fast failure"
).increment
end
def fast_count_by_state_failure!
Gitlab::Metrics.counter(
:gitlab_issuable_fast_count_by_state_failures_total,
"Count of failed calls to IssuableFinder#count_by_state with fast failure"
).increment
end end
end end
end end
...@@ -7448,6 +7448,9 @@ msgstr "" ...@@ -7448,6 +7448,9 @@ msgstr ""
msgid "Could not upload your designs as one or more files uploaded are not supported." msgid "Could not upload your designs as one or more files uploaded are not supported."
msgstr "" msgstr ""
msgid "Couldn't calculate number of %{issuables}."
msgstr ""
msgid "Country" msgid "Country"
msgstr "" msgstr ""
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe IssuableCollections do RSpec.describe IssuableCollections do
using RSpec::Parameterized::TableSyntax
let(:user) { create(:user) } let(:user) { create(:user) }
let(:controller) do let(:controller) do
...@@ -25,16 +27,38 @@ RSpec.describe IssuableCollections do ...@@ -25,16 +27,38 @@ RSpec.describe IssuableCollections do
end end
describe '#page_count_for_relation' do describe '#page_count_for_relation' do
let(:relation) { double(:relation, limit_value: 20) }
context 'row count is known' do
let(:params) { { state: 'opened' } } let(:params) { { state: 'opened' } }
it 'returns the number of pages' do it 'returns the number of pages' do
relation = double(:relation, limit_value: 20)
pages = controller.send(:page_count_for_relation, relation, 28) pages = controller.send(:page_count_for_relation, relation, 28)
expect(pages).to eq(2) expect(pages).to eq(2)
end end
end end
context 'row_count is unknown' do
where(:page_param, :expected) do
nil | 2
1 | 2
'1' | 2
2 | 3
end
with_them do
let(:params) { { state: 'opened', page: page_param } }
it 'returns current page + 1 if the row count is unknown' do
pages = controller.send(:page_count_for_relation, relation, -1)
expect(pages).to eq(expected)
end
end
end
end
describe '#finder_options' do describe '#finder_options' do
before do before do
allow(controller).to receive(:cookies).and_return({}) allow(controller).to receive(:cookies).and_return({})
......
...@@ -48,6 +48,14 @@ RSpec.describe 'issuable list', :js do ...@@ -48,6 +48,14 @@ RSpec.describe 'issuable list', :js do
end end
end end
it 'displays a warning if counting the number of issues times out' do
allow_any_instance_of(IssuesFinder).to receive(:count_by_state).and_raise(ActiveRecord::QueryCanceled)
visit_issuable_list(:issue)
expect(page).to have_text('Open ? Closed ? All ?')
end
it "counts merge requests closing issues icons for each issue" do it "counts merge requests closing issues icons for each issue" do
visit_issuable_list(:issue) visit_issuable_list(:issue)
......
...@@ -843,6 +843,16 @@ RSpec.describe IssuesFinder do ...@@ -843,6 +843,16 @@ RSpec.describe IssuesFinder do
expect(finder.row_count).to be_zero expect(finder.row_count).to be_zero
end end
it 'returns -1 if the query times out' do
finder = described_class.new(admin)
expect_next_instance_of(described_class) do |subfinder|
expect(subfinder).to receive(:execute).and_raise(ActiveRecord::QueryCanceled)
end
expect(finder.row_count).to eq(-1)
end
end end
describe '#with_confidentiality_access_check' do describe '#with_confidentiality_access_check' do
......
...@@ -500,6 +500,16 @@ RSpec.describe MergeRequestsFinder do ...@@ -500,6 +500,16 @@ RSpec.describe MergeRequestsFinder do
expect(finder.row_count).to eq(1) expect(finder.row_count).to eq(1)
end end
it 'returns -1 if the query times out' do
finder = described_class.new(user)
expect_next_instance_of(described_class) do |subfinder|
expect(subfinder).to receive(:execute).and_raise(ActiveRecord::QueryCanceled)
end
expect(finder.row_count).to eq(-1)
end
end end
context 'external authorization' do context 'external authorization' do
......
...@@ -4,14 +4,15 @@ require 'spec_helper' ...@@ -4,14 +4,15 @@ require 'spec_helper'
RSpec.describe Gitlab::IssuablesCountForState do RSpec.describe Gitlab::IssuablesCountForState do
let(:finder) do let(:finder) do
double(:finder, count_by_state: { opened: 2, closed: 1 }) double(:finder, current_user: nil, params: {}, count_by_state: { opened: 2, closed: 1 })
end end
let(:counter) { described_class.new(finder) } let(:project) { nil }
let(:fast_fail) { nil }
let(:counter) { described_class.new(finder, project, fast_fail: fast_fail) }
describe 'project given' do describe 'project given' do
let(:project) { build(:project) } let(:project) { build(:project) }
let(:counter) { described_class.new(finder, project) }
it 'provides the project' do it 'provides the project' do
expect(counter.project).to eq(project) expect(counter.project).to eq(project)
...@@ -50,5 +51,19 @@ RSpec.describe Gitlab::IssuablesCountForState do ...@@ -50,5 +51,19 @@ RSpec.describe Gitlab::IssuablesCountForState do
it 'returns 0 when using an invalid state name as a String' do it 'returns 0 when using an invalid state name as a String' do
expect(counter['kittens']).to be_zero expect(counter['kittens']).to be_zero
end end
context 'fast_fail enabled' do
let(:fast_fail) { true }
it 'returns the expected value' do
expect(counter[:closed]).to eq(1)
end
it 'returns -1 when the database times out' do
expect(finder).to receive(:count_by_state).and_raise(ActiveRecord::QueryCanceled)
expect(counter[:closed]).to eq(-1)
end
end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment