Commit 22ce4dff authored by Erick Bajao's avatar Erick Bajao

Track test failures on pipeline completion

We moved this to pipeline completion so that we have the
total count of failures across all builds. We need this count
to check if the limit is reached.
parent 5559a68e
......@@ -269,6 +269,12 @@ module Ci
end
end
after_transition any => ::Ci::Pipeline.completed_statuses do |pipeline|
pipeline.run_after_commit do
::Ci::TestFailureHistoryService.new(pipeline).async.perform_if_needed # rubocop: disable CodeReuse/ServiceClass
end
end
after_transition any => [:success, :failed] do |pipeline|
ref_status = pipeline.ci_ref&.update_status_by!(pipeline)
......@@ -938,10 +944,18 @@ module Ci
builds.latest.with_reports(reports_scope)
end
def latest_test_report_builds
latest_report_builds(Ci::JobArtifact.test_reports).preload(:project)
end
def builds_with_coverage
builds.latest.with_coverage
end
def builds_with_failed_tests(limit: nil)
latest_test_report_builds.failed.limit(limit)
end
def has_reports?(reports_scope)
complete? && latest_report_builds(reports_scope).exists?
end
......@@ -962,7 +976,7 @@ module Ci
def test_reports
Gitlab::Ci::Reports::TestReports.new.tap do |test_reports|
latest_report_builds(Ci::JobArtifact.test_reports).preload(:project).find_each do |build|
latest_test_report_builds.find_each do |build|
build.collect_test_reports!(test_reports)
end
end
......
# frozen_string_literal: true
module Ci
class TestCasesService
MAX_TRACKABLE_FAILURES = 200
def execute(build)
return unless Feature.enabled?(:test_failure_history, build.project)
return unless build.has_test_reports?
return unless build.project.default_branch_or_master == build.ref
test_suite = generate_test_suite_report(build)
track_failures(build, test_suite)
end
private
def generate_test_suite_report(build)
build.collect_test_reports!(Gitlab::Ci::Reports::TestReports.new)
end
def track_failures(build, test_suite)
return if test_suite.failed_count > MAX_TRACKABLE_FAILURES
test_suite.failed.keys.each_slice(100) do |keys|
Ci::TestCase.transaction do
test_cases = Ci::TestCase.find_or_create_by_batch(build.project, keys)
Ci::TestCaseFailure.insert_all(test_case_failures(test_cases, build))
end
end
end
def test_case_failures(test_cases, build)
test_cases.map do |test_case|
{
test_case_id: test_case.id,
build_id: build.id,
failed_at: build.finished_at
}
end
end
end
end
# frozen_string_literal: true
module Ci
class TestFailureHistoryService
class Async
attr_reader :service
def initialize(service)
@service = service
end
def perform_if_needed
TestFailureHistoryWorker.perform_async(service.pipeline.id) if service.should_track_failures?
end
end
MAX_TRACKABLE_FAILURES = 200
attr_reader :pipeline
delegate :project, to: :pipeline
def initialize(pipeline)
@pipeline = pipeline
end
def execute
return unless should_track_failures?
track_failures
end
def should_track_failures?
return false unless Feature.enabled?(:test_failure_history, project)
return false unless project.default_branch_or_master == pipeline.ref
# We fetch for up to MAX_TRACKABLE_FAILURES + 1 builds. So if ever we get
# 201 total number of builds with the assumption that each job has at least
# 1 failed test case, then we have at least 201 failed test cases which exceeds
# the MAX_TRACKABLE_FAILURES of 200. If this is the case, let's early exit so we
# don't have to parse each JUnit report of each of the 201 builds.
failed_builds.length <= MAX_TRACKABLE_FAILURES
end
def async
Async.new(self)
end
private
def failed_builds
@failed_builds ||= pipeline.builds_with_failed_tests(limit: MAX_TRACKABLE_FAILURES + 1)
end
def track_failures
failed_test_cases = gather_failed_test_cases(failed_builds)
return if failed_test_cases.size > MAX_TRACKABLE_FAILURES
failed_test_cases.keys.each_slice(100) do |key_hashes|
Ci::TestCase.transaction do
ci_test_cases = Ci::TestCase.find_or_create_by_batch(project, key_hashes)
failures = test_case_failures(ci_test_cases, failed_test_cases)
Ci::TestCaseFailure.insert_all(failures)
end
end
end
def gather_failed_test_cases(failed_builds)
failed_builds.each_with_object({}) do |build, failed_test_cases|
test_suite = generate_test_suite!(build)
test_suite.failed.keys.each do |key|
failed_test_cases[key] = build
end
end
end
def generate_test_suite!(build)
# Returns an instance of Gitlab::Ci::Reports::TestSuite
build.collect_test_reports!(Gitlab::Ci::Reports::TestReports.new)
end
def test_case_failures(ci_test_cases, failed_test_cases)
ci_test_cases.map do |test_case|
build = failed_test_cases[test_case.key_hash]
{
test_case_id: test_case.id,
build_id: build.id,
failed_at: build.finished_at
}
end
end
end
end
......@@ -1101,6 +1101,14 @@
:weight: 1
:idempotent: true
:tags: []
- :name: pipeline_background:ci_test_failure_history
:feature_category: :continuous_integration
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent: true
:tags: []
- :name: pipeline_cache:expire_job_cache
:feature_category: :continuous_integration
:has_external_dependencies:
......
......@@ -33,11 +33,6 @@ class BuildFinishedWorker # rubocop:disable Scalability/IdempotentWorker
BuildCoverageWorker.new.perform(build.id)
Ci::BuildReportResultWorker.new.perform(build.id)
# TODO: As per https://gitlab.com/groups/gitlab-com/gl-infra/-/epics/194, it may be
# best to avoid creating more workers that we have no intention of calling async.
# Change the previous worker calls on top to also just call the service directly.
Ci::TestCasesService.new.execute(build)
# We execute these async as these are independent operations.
BuildHooksWorker.perform_async(build.id)
ExpirePipelineCacheWorker.perform_async(build.pipeline_id) if build.pipeline.cacheable?
......
# frozen_string_literal: true
module Ci
class TestFailureHistoryWorker
include ApplicationWorker
include PipelineBackgroundQueue
idempotent!
def perform(pipeline_id)
Ci::Pipeline.find_by_id(pipeline_id).try do |pipeline|
Ci::TestFailureHistoryService.new(pipeline).execute
end
end
end
end
---
title: Track test failures on pipeline completion
merge_request: 48695
author:
type: changed
......@@ -126,10 +126,10 @@ FactoryBot.define do
end
trait :with_test_reports_with_three_failures do
status { :success }
status { :failed }
after(:build) do |pipeline, _evaluator|
pipeline.builds << build(:ci_build, :test_reports_with_three_failures, pipeline: pipeline, project: pipeline.project)
pipeline.builds << build(:ci_build, :failed, :test_reports_with_three_failures, pipeline: pipeline, project: pipeline.project)
end
end
......
......@@ -4030,4 +4030,70 @@ RSpec.describe Ci::Pipeline, :mailer, factory_default: :keep do
bridge
end
end
describe 'test failure history processing' do
it 'performs the service asynchronously when the pipeline is completed' do
service = double
expect(Ci::TestFailureHistoryService).to receive(:new).with(pipeline).and_return(service)
expect(service).to receive_message_chain(:async, :perform_if_needed)
pipeline.succeed!
end
end
describe '#latest_test_report_builds' do
it 'returns pipeline builds with test report artifacts' do
test_build = create(:ci_build, :test_reports, pipeline: pipeline, project: project)
create(:ci_build, :artifacts, pipeline: pipeline, project: project)
expect(pipeline.latest_test_report_builds).to contain_exactly(test_build)
end
it 'preloads project on each build to avoid N+1 queries' do
create(:ci_build, :test_reports, pipeline: pipeline, project: project)
control_count = ActiveRecord::QueryRecorder.new do
pipeline.latest_test_report_builds.map(&:project).map(&:full_path)
end
multi_build_pipeline = create(:ci_empty_pipeline, status: :created, project: project)
create(:ci_build, :test_reports, pipeline: multi_build_pipeline, project: project)
create(:ci_build, :test_reports, pipeline: multi_build_pipeline, project: project)
expect { multi_build_pipeline.latest_test_report_builds.map(&:project).map(&:full_path) }
.not_to exceed_query_limit(control_count)
end
end
describe '#builds_with_failed_tests' do
it 'returns pipeline builds with test report artifacts' do
failed_build = create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
create(:ci_build, :success, :test_reports, pipeline: pipeline, project: project)
expect(pipeline.builds_with_failed_tests).to contain_exactly(failed_build)
end
it 'supports limiting the number of builds to fetch' do
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
expect(pipeline.builds_with_failed_tests(limit: 1).count).to eq(1)
end
it 'preloads project on each build to avoid N+1 queries' do
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
control_count = ActiveRecord::QueryRecorder.new do
pipeline.builds_with_failed_tests.map(&:project).map(&:full_path)
end
multi_build_pipeline = create(:ci_empty_pipeline, status: :created, project: project)
create(:ci_build, :failed, :test_reports, pipeline: multi_build_pipeline, project: project)
create(:ci_build, :failed, :test_reports, pipeline: multi_build_pipeline, project: project)
expect { multi_build_pipeline.builds_with_failed_tests.map(&:project).map(&:full_path) }
.not_to exceed_query_limit(control_count)
end
end
end
......@@ -67,7 +67,7 @@ RSpec.describe Ci::CompareTestReportsService do
# The JUnit fixture for the given build has 3 failures.
# This service will create 1 test case failure record for each.
Ci::TestCasesService.new.execute(build)
Ci::TestFailureHistoryService.new(head_pipeline).execute
end
it 'loads recent failures on limited test cases to avoid building up a huge DB query', :aggregate_failures do
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Ci::TestCasesService, :aggregate_failures do
describe '#execute' do
subject(:execute_service) { described_class.new.execute(build) }
context 'when build has test reports' do
let(:build) { create(:ci_build, :success, :test_reports) } # The test report has 2 test case failures
it 'creates test case failures records' do
execute_service
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
context 'when feature flag for test failure history is disabled' do
before do
stub_feature_flags(test_failure_history: false)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when build is not for the default branch' do
before do
build.update_column(:ref, 'new-feature')
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when test failure data have already been persisted with the same exact attributes' do
before do
execute_service
end
it 'does not fail but does not persist new data' do
expect { described_class.new.execute(build) }.not_to raise_error
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
end
context 'when test failure data have duplicates within the same payload (happens when the JUnit report has duplicate test case names but have different failures)' do
let(:build) { create(:ci_build, :success, :test_reports_with_duplicate_failed_test_names) } # The test report has 2 test case failures but with the same test case keys
it 'does not fail but does not persist duplicate data' do
expect { described_class.new.execute(build) }.not_to raise_error
expect(Ci::TestCase.count).to eq(1)
expect(Ci::TestCaseFailure.count).to eq(1)
end
end
context 'when number of failed test cases exceed the limit' do
before do
stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
end
context 'when build has no test reports' do
let(:build) { create(:ci_build, :running) }
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Ci::TestFailureHistoryService, :aggregate_failures do
describe '#execute' do
let(:project) { create(:project) }
let(:pipeline) { create(:ci_empty_pipeline, status: :created, project: project) }
subject(:execute_service) { described_class.new(pipeline).execute }
context 'when pipeline has failed builds with test reports' do
before do
# The test report has 2 test case failures
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: project)
end
it 'creates test case failures records' do
execute_service
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
context 'when feature flag for test failure history is disabled' do
before do
stub_feature_flags(test_failure_history: false)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when pipeline is not for the default branch' do
before do
pipeline.update_column(:ref, 'new-feature')
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when test failure data have already been persisted with the same exact attributes' do
before do
execute_service
end
it 'does not fail but does not persist new data' do
expect { described_class.new(pipeline).execute }.not_to raise_error
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
end
context 'when number of failed test cases exceed the limit' do
before do
stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
context 'when number of failed test cases across multiple builds exceed the limit' do
before do
stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 2)
# This other test report has 1 unique test case failure which brings us to 3 total failures across all builds
# thus exceeding the limit of 2 for MAX_TRACKABLE_FAILURES
create(:ci_build, :failed, :test_reports_with_duplicate_failed_test_names, pipeline: pipeline, project: project)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
end
context 'when test failure data have duplicates within the same payload (happens when the JUnit report has duplicate test case names but have different failures)' do
before do
# The test report has 2 test case failures but with the same test case keys
create(:ci_build, :failed, :test_reports_with_duplicate_failed_test_names, pipeline: pipeline, project: project)
end
it 'does not fail but does not persist duplicate data' do
expect { execute_service }.not_to raise_error
expect(Ci::TestCase.count).to eq(1)
expect(Ci::TestCaseFailure.count).to eq(1)
end
end
context 'when pipeline has no failed builds with test reports' do
before do
create(:ci_build, :test_reports, pipeline: pipeline, project: project)
create(:ci_build, :failed, pipeline: pipeline, project: project)
end
it 'does not persist data' do
execute_service
expect(Ci::TestCase.count).to eq(0)
expect(Ci::TestCaseFailure.count).to eq(0)
end
end
end
describe '#should_track_failures?' do
let(:project) { create(:project, :repository) }
let(:pipeline) { create(:ci_empty_pipeline, status: :created, project: project, ref: project.default_branch) }
subject { described_class.new(pipeline).should_track_failures? }
before do
create(:ci_build, :test_reports, :failed, pipeline: pipeline, project: project)
create(:ci_build, :test_reports, :failed, pipeline: pipeline, project: project)
end
context 'when feature flag is enabled and pipeline ref is the default branch' do
it { is_expected.to eq(true) }
end
context 'when feature flag is disabled' do
before do
stub_feature_flags(test_failure_history: false)
end
it { is_expected.to eq(false) }
end
context 'when pipeline is not equal to the project default branch' do
before do
pipeline.update_column(:ref, 'some-other-branch')
end
it { is_expected.to eq(false) }
end
context 'when total number of builds with failed tests exceeds the max number of trackable failures' do
before do
stub_const("#{described_class.name}::MAX_TRACKABLE_FAILURES", 1)
end
it { is_expected.to eq(false) }
end
end
describe '#async' do
let(:pipeline) { double(id: 1) }
let(:service) { described_class.new(pipeline) }
context 'when service should track failures' do
before do
allow(service).to receive(:should_track_failures?).and_return(true)
end
it 'enqueues the worker when #perform_if_needed is called' do
expect(Ci::TestFailureHistoryWorker).to receive(:perform_async).with(pipeline.id)
service.async.perform_if_needed
end
end
context 'when service should not track failures' do
before do
allow(service).to receive(:should_track_failures?).and_return(false)
end
it 'does not enqueue the worker when #perform_if_needed is called' do
expect(Ci::TestFailureHistoryWorker).not_to receive(:perform_async)
service.async.perform_if_needed
end
end
end
end
......@@ -26,9 +26,6 @@ RSpec.describe BuildFinishedWorker do
expect_next_instance_of(Ci::BuildReportResultWorker) do |instance|
expect(instance).to receive(:perform)
end
expect_next_instance_of(Ci::TestCasesService) do |instance|
expect(instance).to receive(:execute)
end
expect(BuildHooksWorker).to receive(:perform_async)
expect(ExpirePipelineCacheWorker).to receive(:perform_async)
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe ::Ci::TestFailureHistoryWorker do
describe '#perform' do
subject(:perform) { described_class.new.perform(pipeline_id) }
context 'when pipeline exists' do
let(:pipeline) { create(:ci_pipeline) }
let(:pipeline_id) { pipeline.id }
it 'executes test failure history service' do
expect_next_instance_of(::Ci::TestFailureHistoryService) do |service|
expect(service).to receive(:execute)
end
perform
end
end
context 'when pipeline does not exist' do
let(:pipeline_id) { non_existing_record_id }
it 'does not execute test failure history service' do
expect(Ci::TestFailureHistoryService).not_to receive(:new)
perform
end
end
end
include_examples 'an idempotent worker' do
let(:pipeline) { create(:ci_pipeline) }
let(:job_args) { [pipeline.id] }
it 'tracks test failures' do
# The test report has 2 test case failures
create(:ci_build, :failed, :test_reports, pipeline: pipeline, project: pipeline.project)
subject
expect(Ci::TestCase.count).to eq(2)
expect(Ci::TestCaseFailure.count).to eq(2)
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment