Commit d4b10599 authored by Rémy Coutable's avatar Rémy Coutable

Improve and simplify the distribution of static analysis tasks

Signed-off-by: default avatarRémy Coutable <remy@rymai.me>
parent 0bdd20eb
......@@ -14,38 +14,49 @@ class StaticAnalysis
"Browserslist: caniuse-lite is outdated. Please run next command `yarn upgrade`"
].freeze
Task = Struct.new(:command, :duration) do
def cmd
command.join(' ')
end
end
NodeAssignment = Struct.new(:index, :tasks, :total_duration) do
def total_duration
return 0 if tasks.empty?
tasks.sum(&:duration)
end
end
# `gettext:updated_check` and `gitlab:sidekiq:sidekiq_queues_yml:check` will fail on FOSS installations
# (e.g. gitlab-org/gitlab-foss) since they test against a single
# file that is generated by an EE installation, which can
# contain values that a FOSS installation won't find. To work
# around this we will only enable this task on EE installations.
TASKS_WITH_DURATIONS_SECONDS = {
%w[bin/rake lint:haml] => 800,
TASKS_WITH_DURATIONS_SECONDS = [
Task.new(%w[bin/rake lint:haml], 562),
# We need to disable the cache for this cop since it creates files under tmp/feature_flags/*.used,
# the cache would prevent these files from being created.
%w[bundle exec rubocop --only Gitlab/MarkUsedFeatureFlags --cache false] => 600,
(Gitlab.ee? ? %w[bin/rake gettext:updated_check] : nil) => 360,
%w[yarn run lint:eslint:all] => 312,
%w[bundle exec rubocop --parallel] => 300,
%w[yarn run lint:prettier] => 162,
%w[bin/rake gettext:lint] => 65,
%w[bundle exec license_finder] => 61,
%w[bin/rake lint:static_verification] => 45,
%w[bin/rake config_lint] => 26,
%w[bin/rake gitlab:sidekiq:all_queues_yml:check] => 15,
(Gitlab.ee? ? %w[bin/rake gitlab:sidekiq:sidekiq_queues_yml:check] : nil) => 11,
%w[yarn run internal:stylelint] => 8,
%w[scripts/lint-conflicts.sh] => 1,
%w[yarn run block-dependencies] => 1,
%w[scripts/lint-rugged] => 1,
%w[scripts/gemfile_lock_changed.sh] => 1,
%w[scripts/frontend/check_no_partial_karma_jest.sh] => 1
}.reject { |k| k.nil? }.freeze
StaticAnalysisTasks = Struct.new(:tasks, :duration)
Task.new(%w[bundle exec rubocop --only Gitlab/MarkUsedFeatureFlags --cache false], 800),
(Gitlab.ee? ? Task.new(%w[bin/rake gettext:updated_check], 360) : nil),
Task.new(%w[yarn run lint:eslint:all], 312),
Task.new(%w[bundle exec rubocop --parallel], 60),
Task.new(%w[yarn run lint:prettier], 160),
Task.new(%w[bin/rake gettext:lint], 85),
Task.new(%w[bundle exec license_finder], 20),
Task.new(%w[bin/rake lint:static_verification], 35),
Task.new(%w[bin/rake config_lint], 10),
Task.new(%w[bin/rake gitlab:sidekiq:all_queues_yml:check], 15),
(Gitlab.ee? ? Task.new(%w[bin/rake gitlab:sidekiq:sidekiq_queues_yml:check], 11) : nil),
Task.new(%w[yarn run internal:stylelint], 8),
Task.new(%w[scripts/lint-conflicts.sh], 1),
Task.new(%w[yarn run block-dependencies], 1),
Task.new(%w[scripts/lint-rugged], 1),
Task.new(%w[scripts/gemfile_lock_changed.sh], 1),
Task.new(%w[scripts/frontend/check_no_partial_karma_jest.sh], 1)
].reject { |t| t.nil? }.freeze
def run_tasks!(options = {})
node_tasks = tasks_to_run((ENV['CI_NODE_TOTAL'] || 1).to_i, debug: options[:debug])[(ENV['CI_NODE_INDEX'] || 1).to_i - 1]
node_assignment = tasks_to_run((ENV['CI_NODE_TOTAL'] || 1).to_i)[(ENV['CI_NODE_INDEX'] || 1).to_i - 1]
if options[:dry_run]
puts "Dry-run mode!"
......@@ -53,19 +64,21 @@ class StaticAnalysis
end
static_analysis = Gitlab::Popen::Runner.new
static_analysis.run(node_tasks.tasks) do |cmd, &run|
start_time = Time.now
static_analysis.run(node_assignment.tasks.map(&:command)) do |command, &run|
task = node_assignment.tasks.find { |task| task.command == command }
puts
puts "$ #{cmd.join(' ')}"
puts "$ #{task.cmd}"
result = run.call
puts "==> Finished in #{result.duration} seconds"
puts "==> Finished in #{result.duration} seconds (expected #{task.duration} seconds)"
puts
end
puts
puts '==================================================='
puts "Node finished running all tasks in #{Time.now - start_time} seconds (expected #{node_assignment.total_duration})"
puts
puts
......@@ -114,49 +127,57 @@ class StaticAnalysis
.count { |result| !ALLOWED_WARNINGS.include?(result.stderr.strip) }
end
def tasks_to_run(node_total, debug: false)
tasks_per_node = Array.new(node_total) { StaticAnalysisTasks.new([], 0) }
total_time = TASKS_WITH_DURATIONS_SECONDS.values.sum.to_f
def tasks_to_run(node_total)
total_time = TASKS_WITH_DURATIONS_SECONDS.sum(&:duration).to_f
ideal_time_per_job = total_time / node_total
tasks_by_duration_desc = TASKS_WITH_DURATIONS_SECONDS.sort_by { |a| -a[1] }.to_h
p "total_time: #{total_time}" if debug
p "ideal_time_per_job: #{ideal_time_per_job}" if debug
tasks_by_duration_desc.each_with_index do |(task, duration), i|
puts "Assigning #{task}..." if debug
(0...node_total).each do |node_index|
puts "Current node: #{node_index}..." if debug
# Task is already longer than the ideal time
if duration >= ideal_time_per_job && tasks_per_node[node_index].tasks.empty?
puts "Assigning #{task} to node #{node_index} (#{duration}s)." if debug
assign_task_to_node(tasks_by_duration_desc, tasks_per_node[node_index], task, duration)
break
elsif tasks_per_node[node_index].duration + duration <= ideal_time_per_job
puts "Assigning #{task} to node #{node_index} (#{duration}s)." if debug
assign_task_to_node(tasks_by_duration_desc, tasks_per_node[node_index], task, duration)
break
else
puts "Node #{node_index} is already full (#{tasks_per_node[node_index]})" if debug
end
tasks_by_duration_desc = TASKS_WITH_DURATIONS_SECONDS.sort_by { |a| -a.duration }
nodes = Array.new(node_total) { |i| NodeAssignment.new(i + 1, [], 0) }
puts "Total expected time: #{total_time}; ideal time per job: #{ideal_time_per_job}.\n\n"
puts "Tasks to distribute:"
tasks_by_duration_desc.each { |task| puts "* #{task.cmd} (#{task.duration}s)" }
# Distribute tasks optimally first
puts "\nAssigning tasks optimally."
distribute_tasks(tasks_by_duration_desc, nodes, ideal_time_per_job: ideal_time_per_job)
# Distribute remaining tasks, ordered by ascending duration
leftover_tasks = tasks_by_duration_desc - nodes.flat_map(&:tasks)
if leftover_tasks.any?
puts "\n\nAssigning remaining tasks: #{leftover_tasks.flat_map(&:cmd)}"
distribute_tasks(leftover_tasks, nodes.sort_by { |node| node.total_duration })
end
nodes.each do |node|
puts "\nExpected duration for node #{node.index}: #{node.total_duration} seconds"
node.tasks.each { |task| puts "* #{task.cmd} (#{task.duration}s)" }
end
raise "There are unassigned tasks: #{tasks_by_duration_desc}" unless tasks_by_duration_desc.empty?
nodes
end
tasks_per_node.each_with_index do |node, i|
puts "\nExpected duration for node #{i + 1}: #{node.duration}"
node.tasks.each { |task| puts "- #{task.join(' ')}" }
def distribute_tasks(tasks, nodes, ideal_time_per_job: nil)
condition =
if ideal_time_per_job
->(task, node, ideal_time_per_job) { (task.duration + node.total_duration) <= ideal_time_per_job }
else
->(*) { true }
end
tasks_per_node
tasks.each do |task|
nodes.each do |node|
if condition.call(task, node, ideal_time_per_job)
assign_task_to_node(tasks, node, task)
break
end
end
end
end
def assign_task_to_node(remaining_tasks, node, task_name, duration)
node.tasks << task_name
node.duration += duration
remaining_tasks.delete(task_name)
def assign_task_to_node(remaining_tasks, node, task)
node.tasks << task
puts "Assigning #{task.command} (#{task.duration}s) to node ##{node.index}. Node total duration: #{node.total_duration}s."
end
end
......@@ -167,9 +188,5 @@ if $0 == __FILE__
options[:dry_run] = true
end
if ARGV.include?('--debug')
options[:debug] = true
end
StaticAnalysis.new.run_tasks!(options)
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment