Commit b434ca4f authored by Vasilii Iakliushin's avatar Vasilii Iakliushin

Add HAML linter for documentation links

Contributes to https://gitlab.com/gitlab-org/gitlab/-/issues/208354

We want to validate if there are any broken links and dead anchors to
documentation pages.

Extra:
* Extract anchor generation logic into Gitlab::Utils::Markdown module
parent f35d6e43
......@@ -8,6 +8,7 @@ exclude:
- 'spec/**/*'
require:
- './haml_lint/linter/no_plain_nodes.rb'
- './haml_lint/linter/documentation_links.rb'
linters:
AltText:
......@@ -26,6 +27,12 @@ linters:
enabled: false
max_consecutive: 2
DocumentationLinks:
enabled: true
include:
- 'app/views/**/*.haml'
- 'ee/app/views/**/*.haml'
EmptyObjectReference:
enabled: true
......
# frozen_string_literal: true
require_relative '../../lib/gitlab/utils/markdown'
module HamlLint
class Linter
# This class is responsible for detection of help_page_path helpers
# with incorrect links or anchors
class DocumentationLinks < Linter
include ::HamlLint::LinterRegistry
include ::Gitlab::Utils::Markdown
DOCS_DIRECTORY = File.join(File.expand_path('../..', __dir__), 'doc')
HELP_PATH_LINK_PATTERN = <<~PATTERN
`(send nil? :help_page_path $...)
PATTERN
MARKDOWN_HEADER = %r{\A\#{1,6}\s+(?<header>.+)\Z}.freeze
def visit_script(node)
check(node)
end
def visit_silent_script(node)
check(node)
end
def visit_tag(node)
check(node)
end
private
def check(node)
match = extract_link_and_anchor(node)
return if match.empty?
path_to_file = detect_path_to_file(match[:link])
unless File.file?(path_to_file)
record_lint(node, "help_page_path points to the unknown location: #{path_to_file}")
return
end
unless correct_anchor?(path_to_file, match[:anchor])
record_lint(node, "anchor (#{match[:anchor]}) is missing in: #{path_to_file}")
end
end
def extract_link_and_anchor(node)
ast_tree = fetch_ast_tree(node)
return {} unless ast_tree
link_match, attributes_match = ::RuboCop::NodePattern.new(HELP_PATH_LINK_PATTERN).match(ast_tree)
{ link: fetch_link(link_match), anchor: fetch_anchor(attributes_match) }.compact
end
def fetch_ast_tree(node)
# Sometimes links are provided via data attributes in html tag
return node.parsed_attributes.syntax_tree if node.type == :tag
node.parsed_script.syntax_tree
end
def detect_path_to_file(link)
path = File.join(DOCS_DIRECTORY, link)
path += '.md' unless path.end_with?('.md')
path
end
def fetch_link(link_match)
return unless link_match && link_match.str_type?
link_match.value
end
def fetch_anchor(attributes_match)
return unless attributes_match
attributes_match.each_pair do |pkey, pvalue|
break pvalue.value if pkey.value == :anchor
end
end
def correct_anchor?(path_to_file, anchor)
return true unless anchor
File.open(path_to_file).any? do |line|
result = line.match(MARKDOWN_HEADER)
string_to_anchor(result[:header]) == anchor if result
end
end
end
end
end
......@@ -17,7 +17,7 @@ module Banzai
# :toc - String containing Table of Contents data as a `ul` element with
# `li` child elements.
class TableOfContentsFilter < HTML::Pipeline::Filter
PUNCTUATION_REGEXP = /[^\p{Word}\- ]/u.freeze
include Gitlab::Utils::Markdown
def call
return doc if context[:no_header_anchors]
......@@ -29,14 +29,7 @@ module Banzai
doc.css('h1, h2, h3, h4, h5, h6').each do |node|
if header_content = node.children.first
id = node
.text
.strip
.downcase
.gsub(PUNCTUATION_REGEXP, '') # remove punctuation
.tr(' ', '-') # replace spaces with dash
.squeeze('-') # replace multiple dashes with one
.gsub(/\A(\d+)\z/, 'anchor-\1') # digits-only hrefs conflict with issue refs
id = string_to_anchor(node.text)
uniq = headers[id] > 0 ? "-#{headers[id]}" : ''
headers[id] += 1
......
# frozen_string_literal: true
module Gitlab
module Utils
module Markdown
PUNCTUATION_REGEXP = /[^\p{Word}\- ]/u.freeze
def string_to_anchor(string)
string
.strip
.downcase
.gsub(PUNCTUATION_REGEXP, '') # remove punctuation
.tr(' ', '-') # replace spaces with dash
.squeeze('-') # replace multiple dashes with one
.gsub(/\A(\d+)\z/, 'anchor-\1') # digits-only hrefs conflict with issue refs
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
require 'haml_lint'
require 'haml_lint/spec'
require Rails.root.join('haml_lint/linter/documentation_links')
RSpec.describe HamlLint::Linter::DocumentationLinks do
include_context 'linter'
context 'when link_to points to the existing file path' do
let(:haml) { "= link_to 'Description', help_page_path('README.md')" }
it { is_expected.not_to report_lint }
end
context 'when link_to points to the existing file with valid anchor' do
let(:haml) { "= link_to 'Description', help_page_path('README.md', anchor: 'overview'), target: '_blank'" }
it { is_expected.not_to report_lint }
end
context 'when link_to points to the existing file path without .md extension' do
let(:haml) { "= link_to 'Description', help_page_path('README')" }
it { is_expected.not_to report_lint }
end
context 'when anchor is not correct' do
let(:haml) { "= link_to 'Description', help_page_path('README.md', anchor: 'wrong')" }
it { is_expected.to report_lint }
context 'when help_page_path has multiple options' do
let(:haml) { "= link_to 'Description', help_page_path('README.md', key: :value, anchor: 'wrong')" }
it { is_expected.to report_lint }
end
end
context 'when file path is wrong' do
let(:haml) { "= link_to 'Description', help_page_path('wrong.md'), target: '_blank'" }
it { is_expected.to report_lint }
end
context 'when link with wrong file path is assigned to a variable' do
let(:haml) { "- my_link = link_to 'Description', help_page_path('wrong.md')" }
it { is_expected.to report_lint }
end
context 'when it is a broken code' do
let(:haml) { "= I am broken! ]]]]" }
it { is_expected.not_to report_lint }
end
context 'when anchor belongs to a different element' do
let(:haml) { "= link_to 'Description', help_page_path('README.md'), target: (anchor: 'blank')" }
it { is_expected.not_to report_lint }
end
context 'when a simple help_page_path' do
let(:haml) { "- url = help_page_path('wrong.md')" }
it { is_expected.to report_lint }
end
context 'when link is not a string' do
let(:haml) { "- url = help_page_path(help_url)" }
it { is_expected.not_to report_lint }
end
context 'when link is a part of the tag' do
let(:haml) { ".data-form{ data: { url: help_page_path('wrong.md') } }" }
it { is_expected.to report_lint }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Utils::Markdown do
let(:klass) do
Class.new do
include Gitlab::Utils::Markdown
end
end
subject(:object) { klass.new }
describe '#string_to_anchor' do
subject { object.string_to_anchor(string) }
let(:string) { 'My Header' }
it 'converts string to anchor' do
is_expected.to eq 'my-header'
end
context 'when string has punctuation' do
let(:string) { 'My, Header!' }
it 'removes punctuation' do
is_expected.to eq 'my-header'
end
end
context 'when string starts and ends with spaces' do
let(:string) { ' My Header ' }
it 'removes extra spaces' do
is_expected.to eq 'my-header'
end
end
context 'when string has multiple spaces and dashes in the middle' do
let(:string) { 'My - - - Header' }
it 'removes consecutive dashes' do
is_expected.to eq 'my-header'
end
end
context 'when string contains only digits' do
let(:string) { '123' }
it 'adds anchor prefix' do
is_expected.to eq 'anchor-123'
end
end
context 'when string is empty' do
let(:string) { '' }
it 'returns an empty string' do
is_expected.to eq ''
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment