Commit e638ea73 authored by Rémy Coutable's avatar Rémy Coutable Committed by Lin Jen-Shin

Download Knapsack and flaky specs report from latest master artifacts

This dogfood artifacts and download the Knapsack report, flaky specs
report, and Crystalball report from the latest successful master
pipeline triggered by @gitlab-bot (this should be a scheduled pipeline
for which we're sure update-tests-metadata was run).

This also remove the need to upload these reports to S3 since they're
already uploaded as artifacts.
Signed-off-by: default avatarRémy Coutable <remy@rymai.me>
parent f5bd4c28
......@@ -38,7 +38,7 @@ review-build-cng:
- BUILD_TRIGGER_TOKEN=$REVIEW_APPS_BUILD_TRIGGER_TOKEN ./scripts/trigger-build cng
# When the job is manual, review-deploy is also manual and we don't want people
# to have to manually start the jobs in sequence, so we do it for them.
- '[ -z $CI_JOB_MANUAL ] || play_job "review-deploy"'
- '[ -z $CI_JOB_MANUAL ] || scripts/api/play_job --job-name "review-deploy"'
.review-workflow-base:
extends:
......@@ -78,8 +78,8 @@ review-deploy:
- disable_sign_ups || (delete_release && exit 1)
# When the job is manual, review-qa-smoke is also manual and we don't want people
# to have to manually start the jobs in sequence, so we do it for them.
- '[ -z $CI_JOB_MANUAL ] || play_job "review-qa-smoke"'
- '[ -z $CI_JOB_MANUAL ] || play_job "review-performance"'
- '[ -z $CI_JOB_MANUAL ] || scripts/api/play_job --job-name "review-qa-smoke"'
- '[ -z $CI_JOB_MANUAL ] || scripts/api/play_job --job-name "review-performance"'
after_script:
# Run seed-dast-test-data.sh only when DAST_RUN is set to true. This is to pupulate review app with data for DAST scan.
# Set DAST_RUN to true when jobs are manually scheduled.
......
.tests-metadata-state:
variables:
TESTS_METADATA_S3_BUCKET: "gitlab-ce-cache"
image: ruby:2.7
before_script:
- source scripts/utils.sh
artifacts:
......@@ -17,7 +16,8 @@ retrieve-tests-metadata:
- .test-metadata:rules:retrieve-tests-metadata
stage: prepare
script:
- source scripts/rspec_helpers.sh
- install_gitlab_gem
- source ./scripts/rspec_helpers.sh
- retrieve_tests_metadata
update-tests-metadata:
......
......@@ -12,8 +12,8 @@ Our current CI parallelization setup is as follows:
1. The `retrieve-tests-metadata` job in the `prepare` stage ensures we have a
`knapsack/report-master.json` file:
- The `knapsack/report-master.json` file is fetched from S3, if it's not here
we initialize the file with `{}`.
- The `knapsack/report-master.json` file is fetched from the latest `master` pipeline which runs `update-tests-metadata`
(for now it's the 2-hourly scheduled master pipeline), if it's not here we initialize the file with `{}`.
1. Each `[rspec|rspec-ee] [unit|integration|system|geo] n m` job are run with
`knapsack rspec` and should have an evenly distributed share of tests:
- It works because the jobs have access to the `knapsack/report-master.json`
......@@ -25,7 +25,7 @@ Our current CI parallelization setup is as follows:
1. The `update-tests-metadata` job (which only runs on scheduled pipelines for
[the canonical project](https://gitlab.com/gitlab-org/gitlab) takes all the
`knapsack/rspec*_pg_*.json` files and merge them all together into a single
`knapsack/report-master.json` file that is then uploaded to S3.
`knapsack/report-master.json` file that is saved as artifact.
After that, the next pipeline will use the up-to-date `knapsack/report-master.json` file.
......
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'gitlab'
require 'optparse'
require_relative 'get_job_id'
class CancelPipeline
DEFAULT_OPTIONS = {
project: ENV['CI_PROJECT_ID'],
pipeline_id: ENV['CI_PIPELINE_ID'],
api_token: ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
}.freeze
def initialize(options)
@project = options.delete(:project)
@pipeline_id = options.delete(:pipeline_id)
Gitlab.configure do |config|
config.endpoint = 'https://gitlab.com/api/v4'
config.private_token = options.delete(:api_token)
end
end
def execute
Gitlab.cancel_pipeline(project, pipeline_id)
end
private
attr_reader :project, :pipeline_id
end
if $0 == __FILE__
options = CancelPipeline::DEFAULT_OPTIONS.dup
OptionParser.new do |opts|
opts.on("-p", "--project PROJECT", String, "Project where to find the job (defaults to $CI_PROJECT_ID)") do |value|
options[:project] = value
end
opts.on("-i", "--pipeline-id PIPELINE_ID", String, "A pipeline ID (defaults to $CI_PIPELINE_ID)") do |value|
options[:pipeline_id] = value
end
opts.on("-t", "--api-token API_TOKEN", String, "A value API token with the `read_api` scope") do |value|
options[:api_token] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
CancelPipeline.new(options).execute
end
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'optparse'
require 'fileutils'
require 'uri'
require 'cgi'
require 'net/http'
class ArtifactFinder
DEFAULT_OPTIONS = {
project: ENV['CI_PROJECT_ID'],
api_token: ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
}.freeze
def initialize(options)
@project = options.delete(:project)
@job_id = options.delete(:job_id)
@api_token = options.delete(:api_token)
@artifact_path = options.delete(:artifact_path)
end
def execute
url = "https://gitlab.com/api/v4/projects/#{CGI.escape(project)}/jobs/#{job_id}/artifacts"
if artifact_path
FileUtils.mkdir_p(File.dirname(artifact_path))
url += "/#{artifact_path}"
end
fetch(url)
end
private
attr_reader :project, :job_id, :api_token, :artifact_path
def fetch(uri_str, limit = 10)
raise 'Too many HTTP redirects' if limit == 0
uri = URI(uri_str)
request = Net::HTTP::Get.new(uri)
request['Private-Token'] = api_token
Net::HTTP.start(uri.host, uri.port, use_ssl: true) do |http|
http.request(request) do |response|
case response
when Net::HTTPSuccess then
File.open(artifact_path || 'artifacts.zip', 'w') do |file|
response.read_body(&file.method(:write))
end
when Net::HTTPRedirection then
location = response['location']
warn "Redirected (#{limit - 1} redirections remaining)."
fetch(location, limit - 1)
else
raise "Unexpected response: #{response.value}"
end
end
end
end
end
if $0 == __FILE__
options = ArtifactFinder::DEFAULT_OPTIONS.dup
OptionParser.new do |opts|
opts.on("-p", "--project PROJECT", String, "Project where to find the job (defaults to $CI_PROJECT_ID)") do |value|
options[:project] = value
end
opts.on("-j", "--job-id JOB_ID", String, "A job ID") do |value|
options[:job_id] = value
end
opts.on("-a", "--artifact-path ARTIFACT_PATH", String, "A valid artifact path") do |value|
options[:artifact_path] = value
end
opts.on("-t", "--api-token API_TOKEN", String, "A value API token with the `read_api` scope") do |value|
options[:api_token] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
ArtifactFinder.new(options).execute
end
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'gitlab'
require 'optparse'
class JobFinder
DEFAULT_OPTIONS = {
project: ENV['CI_PROJECT_ID'],
pipeline_id: ENV['CI_PIPELINE_ID'],
pipeline_query: {},
job_query: {},
api_token: ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
}.freeze
def initialize(options)
@project = options.delete(:project)
@pipeline_query = options.delete(:pipeline_query)
@job_query = options.delete(:job_query)
@pipeline_id = options.delete(:pipeline_id)
@job_name = options.delete(:job_name)
Gitlab.configure do |config|
config.endpoint = 'https://gitlab.com/api/v4'
config.private_token = options.delete(:api_token)
end
end
def execute
find_job_with_filtered_pipelines || find_job_in_pipeline
end
private
attr_reader :project, :pipeline_query, :job_query, :pipeline_id, :job_name
def find_job_with_filtered_pipelines
return if pipeline_query.empty?
Gitlab.pipelines(project, pipeline_query_params).auto_paginate do |pipeline|
Gitlab.pipeline_jobs(project, pipeline.id, job_query_params).auto_paginate do |job|
return job if job.name == job_name # rubocop:disable Cop/AvoidReturnFromBlocks
end
end
raise 'Job not found!'
end
def find_job_in_pipeline
return unless pipeline_id
Gitlab.pipeline_jobs(project, pipeline_id, job_query_params).auto_paginate do |job|
return job if job.name == job_name # rubocop:disable Cop/AvoidReturnFromBlocks
end
raise 'Job not found!'
end
def pipeline_query_params
@pipeline_query_params ||= { per_page: 100, **pipeline_query }
end
def job_query_params
@job_query_params ||= { per_page: 100, **job_query }
end
end
if $0 == __FILE__
options = JobFinder::DEFAULT_OPTIONS.dup
OptionParser.new do |opts|
opts.on("-p", "--project PROJECT", String, "Project where to find the job (defaults to $CI_PROJECT_ID)") do |value|
options[:project] = value
end
opts.on("-i", "--pipeline-id pipeline_id", String, "A pipeline ID (defaults to $CI_PIPELINE_ID)") do |value|
options[:pipeline_id] = value
end
opts.on("-q", "--pipeline-query pipeline_query", String, "Query to pass to the Pipeline API request") do |value|
options[:pipeline_query].merge!(Hash[*value.split('=')])
end
opts.on("-Q", "--job-query job_query", String, "Query to pass to the Job API request") do |value|
options[:job_query].merge!(Hash[*value.split('=')])
end
opts.on("-j", "--job-name job_name", String, "A job name that needs to exist in the found pipeline") do |value|
options[:job_name] = value
end
opts.on("-t", "--api-token API_TOKEN", String, "A value API token with the `read_api` scope") do |value|
options[:api_token] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
job = JobFinder.new(options).execute
return if job.nil?
puts job.id
end
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'gitlab'
require 'optparse'
require_relative 'get_job_id'
class PlayJob
DEFAULT_OPTIONS = {
project: ENV['CI_PROJECT_ID'],
pipeline_id: ENV['CI_PIPELINE_ID'],
api_token: ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
}.freeze
def initialize(options)
@project = options.delete(:project)
@options = options
Gitlab.configure do |config|
config.endpoint = 'https://gitlab.com/api/v4'
config.private_token = options.fetch(:api_token)
end
end
def execute
job = JobFinder.new(project, options.slice(:api_token, :pipeline_id, :job_name).merge(scope: 'manual')).execute
Gitlab.job_play(project, job.id)
end
private
attr_reader :project, :options
end
if $0 == __FILE__
options = PlayJob::DEFAULT_OPTIONS.dup
OptionParser.new do |opts|
opts.on("-p", "--project PROJECT", String, "Project where to find the job (defaults to $CI_PROJECT_ID)") do |value|
options[:project] = value
end
opts.on("-j", "--job-name JOB_NAME", String, "A job name that needs to exist in the found pipeline") do |value|
options[:job_name] = value
end
opts.on("-t", "--api-token API_TOKEN", String, "A value API token with the `read_api` scope") do |value|
options[:api_token] = value
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!
PlayJob.new(options).execute
end
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'gitlab'
require 'optparse'
#
# Configure credentials to be used with gitlab gem
#
Gitlab.configure do |config|
config.endpoint = 'https://gitlab.com/api/v4'
config.private_token = ENV['GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN']
end
options = {}
OptionParser.new do |opts|
opts.on("-s", "--scope=SCOPE", "Find job with matching scope") do |scope|
options[:scope] = scope
end
end.parse!
class PipelineJobFinder
def initialize(project_id, pipeline_id, job_name, options)
@project_id = project_id
@pipeline_id = pipeline_id
@job_name = job_name
@options = options
end
def execute
Gitlab.pipeline_jobs(@project_id, @pipeline_id, @options).auto_paginate do |job|
break job if job.name == @job_name
end
end
end
project_id, pipeline_id, job_name = ARGV
job = PipelineJobFinder.new(project_id, pipeline_id, job_name, options).execute
return if job.nil?
puts job.id
#!/usr/bin/env bash
function retrieve_tests_metadata() {
mkdir -p knapsack/ rspec_flaky/ rspec_profiling/
mkdir -p crystalball/ knapsack/ rspec_flaky/ rspec_profiling/
local project_path="gitlab-org/gitlab"
local test_metadata_job_id
# Ruby
test_metadata_job_id=$(scripts/api/get_job_id --project "${project_path}" -q "status=success" -q "ref=master" -q "username=gitlab-bot" -Q "scope=success" --job-name "update-tests-metadata")
if [[ ! -f "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" ]]; then
wget -O "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
scripts/api/download_job_artifact --project "${project_path}" --job-id "${test_metadata_job_id}" --artifact-path "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
fi
if [[ ! -f "${FLAKY_RSPEC_SUITE_REPORT_PATH}" ]]; then
wget -O "${FLAKY_RSPEC_SUITE_REPORT_PATH}" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${FLAKY_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
scripts/api/download_job_artifact --project "${project_path}" --job-id "${test_metadata_job_id}" --artifact-path "${FLAKY_RSPEC_SUITE_REPORT_PATH}" || echo "{}" > "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
fi
# FIXME: We will need to find a pipeline where the $RSPEC_PACKED_TESTS_MAPPING_PATH.gz actually exists (Crystalball only runs every two-hours, but the `update-tests-metadata` runs for all `master` pipelines...).
# if [[ ! -f "${RSPEC_PACKED_TESTS_MAPPING_PATH}" ]]; then
# (scripts/api/download_job_artifact --project "${project_path}" --job-id "${test_metadata_job_id}" --artifact-path "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" && gzip -d "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz") || echo "{}" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
# fi
#
# scripts/unpack-test-mapping "${RSPEC_PACKED_TESTS_MAPPING_PATH}" "${RSPEC_TESTS_MAPPING_PATH}"
}
function update_tests_metadata() {
echo "{}" > "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
scripts/merge-reports "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}" knapsack/rspec*.json
if [[ -n "${TESTS_METADATA_S3_BUCKET}" ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${KNAPSACK_RSPEC_SUITE_REPORT_PATH}"
else
echo "Not uplaoding report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f knapsack/rspec*.json
scripts/merge-reports "${FLAKY_RSPEC_SUITE_REPORT_PATH}" rspec_flaky/all_*.json
export FLAKY_RSPEC_GENERATE_REPORT="true"
scripts/merge-reports "${FLAKY_RSPEC_SUITE_REPORT_PATH}" rspec_flaky/all_*.json
scripts/flaky_examples/prune-old-flaky-examples "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
if [[ -n ${TESTS_METADATA_S3_BUCKET} ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${FLAKY_RSPEC_SUITE_REPORT_PATH}"
else
echo "Not uploading report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f rspec_flaky/all_*.json rspec_flaky/new_*.json
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
......@@ -48,16 +43,6 @@ function update_tests_metadata() {
fi
}
function retrieve_tests_mapping() {
mkdir -p crystalball/
if [[ ! -f "${RSPEC_PACKED_TESTS_MAPPING_PATH}" ]]; then
(wget -O "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" "http://${TESTS_METADATA_S3_BUCKET}.s3.amazonaws.com/${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz" && gzip -d "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz") || echo "{}" > "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
fi
scripts/unpack-test-mapping "${RSPEC_PACKED_TESTS_MAPPING_PATH}" "${RSPEC_TESTS_MAPPING_PATH}"
}
function update_tests_mapping() {
if ! crystalball_rspec_data_exists; then
echo "No crystalball rspec data found."
......@@ -65,20 +50,9 @@ function update_tests_mapping() {
fi
scripts/generate-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" crystalball/rspec*.yml
scripts/pack-test-mapping "${RSPEC_TESTS_MAPPING_PATH}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
gzip "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
if [[ -n "${TESTS_METADATA_S3_BUCKET}" ]]; then
if [[ "$CI_PIPELINE_SOURCE" == "schedule" ]]; then
scripts/sync-reports put "${TESTS_METADATA_S3_BUCKET}" "${RSPEC_PACKED_TESTS_MAPPING_PATH}.gz"
else
echo "Not uploading report to S3 as the pipeline is not a scheduled one."
fi
fi
rm -f crystalball/rspec*.yml
rm -f crystalball/rspec*.yml "${RSPEC_PACKED_TESTS_MAPPING_PATH}"
}
function crystalball_rspec_data_exists() {
......
......@@ -87,65 +87,14 @@ function echosuccess() {
fi
}
function get_job_id() {
local job_name="${1}"
local query_string="${2:+&${2}}"
local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}"
if [ -z "${api_token}" ]; then
echoerr "Please provide an API token with \$API_TOKEN or \$GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN."
return
fi
local max_page=3
local page=1
while true; do
local url="https://gitlab.com/api/v4/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/jobs?per_page=100&page=${page}${query_string}"
echoinfo "GET ${url}"
local job_id
job_id=$(curl --silent --show-error --header "PRIVATE-TOKEN: ${api_token}" "${url}" | jq "map(select(.name == \"${job_name}\")) | map(.id) | last")
[[ "${job_id}" == "null" && "${page}" -lt "$max_page" ]] || break
let "page++"
done
if [[ "${job_id}" == "null" ]]; then # jq prints "null" for non-existent attribute
echoerr "The '${job_name}' job ID couldn't be retrieved!"
else
echoinfo "The '${job_name}' job ID is ${job_id}"
echo "${job_id}"
fi
}
function play_job() {
local job_name="${1}"
local job_id
job_id=$(get_job_id "${job_name}" "scope=manual");
if [ -z "${job_id}" ]; then return; fi
local api_token="${API_TOKEN-${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}}"
if [ -z "${api_token}" ]; then
echoerr "Please provide an API token with \$API_TOKEN or \$GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN."
return
fi
local url="https://gitlab.com/api/v4/projects/${CI_PROJECT_ID}/jobs/${job_id}/play"
echoinfo "POST ${url}"
local job_url
job_url=$(curl --silent --show-error --request POST --header "PRIVATE-TOKEN: ${api_token}" "${url}" | jq ".web_url")
echoinfo "Manual job '${job_name}' started at: ${job_url}"
}
function fail_pipeline_early() {
local dont_interrupt_me_job_id
dont_interrupt_me_job_id=$(get_job_id 'dont-interrupt-me' 'scope=success')
dont_interrupt_me_job_id=$(scripts/api/get_job_id --job-query "scope=success" --job-name "dont-interrupt-me")
if [[ -n "${dont_interrupt_me_job_id}" ]]; then
echoinfo "This pipeline cannot be interrupted due to \`dont-interrupt-me\` job ${dont_interrupt_me_job_id}"
else
echoinfo "Failing pipeline early for fast feedback due to test failures in rspec fail-fast."
curl --request POST --header "PRIVATE-TOKEN: ${GITLAB_BOT_MULTI_PROJECT_PIPELINE_POLLING_TOKEN}" "https://${CI_SERVER_HOST}/api/v4/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/cancel"
scripts/api/cancel_pipeline
fi
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment