Commit 58c84f49 authored by Douglas Barbosa Alexandre's avatar Douglas Barbosa Alexandre

Merge branch '16950_word_diff_parser' into 'master'

Introduce word-diff parser

See merge request gitlab-org/gitlab!55105
parents 2e02846d 4d3964ec
# frozen_string_literal: true
module Gitlab
module WordDiff
class ChunkCollection
def initialize
@chunks = []
end
def add(chunk)
@chunks << chunk
end
def content
@chunks.join('')
end
def reset
@chunks = []
end
end
end
end
# frozen_string_literal: true
# Converts a line from `git diff --word-diff=porcelain` output into a segment
#
# Possible options:
# 1. Diff hunk
# 2. Chunk
# 3. Newline
module Gitlab
module WordDiff
class LineProcessor
def initialize(line)
@line = line
end
def extract
return if empty_line?
return Segments::DiffHunk.new(full_line) if diff_hunk?
return Segments::Newline.new if newline_delimiter?
Segments::Chunk.new(full_line)
end
private
attr_reader :line
def diff_hunk?
line =~ /^@@ -/
end
def empty_line?
full_line == ' '
end
def newline_delimiter?
full_line == '~'
end
def full_line
@full_line ||= line.delete("\n")
end
end
end
end
# frozen_string_literal: true
# Converts git diff --word-diff=porcelain output to Gitlab::Diff::Line objects
# see: https://git-scm.com/docs/git-diff#Documentation/git-diff.txt-porcelain
module Gitlab
module WordDiff
class Parser
include Enumerable
def parse(lines, diff_file: nil)
return [] if lines.blank?
# By returning an Enumerator we make it possible to search for a single line (with #find)
# without having to instantiate all the others that come after it.
Enumerator.new do |yielder|
@chunks = ChunkCollection.new
@counter = PositionsCounter.new
lines.each do |line|
segment = LineProcessor.new(line).extract
case segment
when Segments::DiffHunk
next if segment.first_line?
counter.set_pos_num(old: segment.pos_old, new: segment.pos_new)
yielder << build_line(segment.to_s, 'match', parent_file: diff_file)
when Segments::Chunk
@chunks.add(segment)
when Segments::Newline
yielder << build_line(@chunks.content, nil, parent_file: diff_file)
@chunks.reset
counter.increase_pos_num
end
end
end
end
private
attr_reader :counter
def build_line(content, type, options = {})
Gitlab::Diff::Line.new(
content, type,
counter.line_obj_index, counter.pos_old, counter.pos_new,
**options).tap do
counter.increase_obj_index
end
end
end
end
end
# frozen_string_literal: true
# Responsible for keeping track of line numbers and created Gitlab::Diff::Line objects
module Gitlab
module WordDiff
class PositionsCounter
def initialize
@pos_old = 1
@pos_new = 1
@line_obj_index = 0
end
attr_reader :pos_old, :pos_new, :line_obj_index
def increase_pos_num
@pos_old += 1
@pos_new += 1
end
def increase_obj_index
@line_obj_index += 1
end
def set_pos_num(old:, new:)
@pos_old = old
@pos_new = new
end
end
end
end
# frozen_string_literal: true
# Chunk is a part of the line that starts with ` `, `-`, `+`
# Consecutive chunks build a line. Line that starts with `~` is an identifier of
# end of the line.
module Gitlab
module WordDiff
module Segments
class Chunk
def initialize(line)
@line = line
end
def removed?
line[0] == '-'
end
def added?
line[0] == '+'
end
def to_s
line[1..] || ''
end
def length
to_s.length
end
private
attr_reader :line
end
end
end
end
# frozen_string_literal: true
# Diff hunk is line that starts with @@
# It contains information about start line numbers
#
# Example:
# @@ -1,4 +1,5 @@
#
# See more: https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html
module Gitlab
module WordDiff
module Segments
class DiffHunk
def initialize(line)
@line = line
end
def pos_old
line.match(/\-[0-9]*/)[0].to_i.abs rescue 0
end
def pos_new
line.match(/\+[0-9]*/)[0].to_i.abs rescue 0
end
def first_line?
pos_old <= 1 && pos_new <= 1
end
def to_s
line
end
private
attr_reader :line
end
end
end
end
# frozen_string_literal: true
module Gitlab
module WordDiff
module Segments
class Newline
def to_s
''
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::WordDiff::ChunkCollection do
subject(:collection) { described_class.new }
describe '#add' do
it 'adds elements to the chunk collection' do
collection.add('Hello')
collection.add(' World')
expect(collection.content).to eq('Hello World')
end
end
describe '#content' do
subject { collection.content }
context 'when no elements in the collection' do
it { is_expected.to eq('') }
end
context 'when elements exist' do
before do
collection.add('Hi')
collection.add(' GitLab!')
end
it { is_expected.to eq('Hi GitLab!') }
end
end
describe '#reset' do
it 'clears the collection' do
collection.add('1')
collection.add('2')
collection.reset
expect(collection.content).to eq('')
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::WordDiff::LineProcessor do
subject(:line_processor) { described_class.new(line) }
describe '#extract' do
subject(:segment) { line_processor.extract }
context 'when line is a diff hunk' do
let(:line) { "@@ -1,14 +1,13 @@\n" }
it 'returns DiffHunk segment' do
expect(segment).to be_a(Gitlab::WordDiff::Segments::DiffHunk)
expect(segment.to_s).to eq('@@ -1,14 +1,13 @@')
end
end
context 'when line has a newline delimiter' do
let(:line) { "~\n" }
it 'returns Newline segment' do
expect(segment).to be_a(Gitlab::WordDiff::Segments::Newline)
expect(segment.to_s).to eq('')
end
end
context 'when line has only space' do
let(:line) { " \n" }
it 'returns nil' do
is_expected.to be_nil
end
end
context 'when line has content' do
let(:line) { "+New addition\n" }
it 'returns Chunk segment' do
expect(segment).to be_a(Gitlab::WordDiff::Segments::Chunk)
expect(segment.to_s).to eq('New addition')
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::WordDiff::Parser do
subject(:parser) { described_class.new }
describe '#parse' do
subject { parser.parse(diff.lines).to_a }
let(:diff) do
<<~EOF
@@ -1,14 +1,13 @@
~
Unchanged line
~
~
-Old change
+New addition
unchanged content
~
@@ -50,14 +50,13 @@
+First change
same same same_
-removed_
+added_
end of the line
~
~
EOF
end
it 'returns a collection of lines' do
diff_lines = subject
aggregate_failures do
expect(diff_lines.count).to eq(7)
expect(diff_lines.map(&:to_hash)).to match_array(
[
a_hash_including(index: 0, old_pos: 1, new_pos: 1, text: '', type: nil),
a_hash_including(index: 1, old_pos: 2, new_pos: 2, text: 'Unchanged line', type: nil),
a_hash_including(index: 2, old_pos: 3, new_pos: 3, text: '', type: nil),
a_hash_including(index: 3, old_pos: 4, new_pos: 4, text: 'Old changeNew addition unchanged content', type: nil),
a_hash_including(index: 4, old_pos: 50, new_pos: 50, text: '@@ -50,14 +50,13 @@', type: 'match'),
a_hash_including(index: 5, old_pos: 50, new_pos: 50, text: 'First change same same same_removed_added_end of the line', type: nil),
a_hash_including(index: 6, old_pos: 51, new_pos: 51, text: '', type: nil)
]
)
end
end
it 'restarts object index after several calls to Enumerator' do
enumerator = parser.parse(diff.lines)
2.times do
expect(enumerator.first.index).to eq(0)
end
end
context 'when diff is empty' do
let(:diff) { '' }
it { is_expected.to eq([]) }
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::WordDiff::PositionsCounter do
subject(:counter) { described_class.new }
describe 'Initial state' do
it 'starts with predefined values' do
expect(counter.pos_old).to eq(1)
expect(counter.pos_new).to eq(1)
expect(counter.line_obj_index).to eq(0)
end
end
describe '#increase_pos_num' do
it 'increases old and new positions' do
expect { counter.increase_pos_num }.to change { counter.pos_old }.from(1).to(2)
.and change { counter.pos_new }.from(1).to(2)
end
end
describe '#increase_obj_index' do
it 'increases object index' do
expect { counter.increase_obj_index }.to change { counter.line_obj_index }.from(0).to(1)
end
end
describe '#set_pos_num' do
it 'sets old and new positions' do
expect { counter.set_pos_num(old: 10, new: 12) }.to change { counter.pos_old }.from(1).to(10)
.and change { counter.pos_new }.from(1).to(12)
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::WordDiff::Segments::Chunk do
subject(:chunk) { described_class.new(line) }
let(:line) { ' Hello' }
describe '#removed?' do
subject { chunk.removed? }
it { is_expected.to be_falsey }
context 'when line starts with "-"' do
let(:line) { '-Removed' }
it { is_expected.to be_truthy }
end
end
describe '#added?' do
subject { chunk.added? }
it { is_expected.to be_falsey }
context 'when line starts with "+"' do
let(:line) { '+Added' }
it { is_expected.to be_truthy }
end
end
describe '#to_s' do
subject { chunk.to_s }
it 'removes lead string modifier' do
is_expected.to eq('Hello')
end
context 'when chunk is empty' do
let(:line) { '' }
it { is_expected.to eq('') }
end
end
describe '#length' do
subject { chunk.length }
it { is_expected.to eq('Hello'.length) }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::WordDiff::Segments::DiffHunk do
subject(:diff_hunk) { described_class.new(line) }
let(:line) { '@@ -3,14 +4,13 @@' }
describe '#pos_old' do
subject { diff_hunk.pos_old }
it { is_expected.to eq 3 }
context 'when diff hunk is broken' do
let(:line) { '@@ ??? @@' }
it { is_expected.to eq 0 }
end
end
describe '#pos_new' do
subject { diff_hunk.pos_new }
it { is_expected.to eq 4 }
context 'when diff hunk is broken' do
let(:line) { '@@ ??? @@' }
it { is_expected.to eq 0 }
end
end
describe '#first_line?' do
subject { diff_hunk.first_line? }
it { is_expected.to be_falsey }
context 'when diff hunk located on the first line' do
let(:line) { '@@ -1,14 +1,13 @@' }
it { is_expected.to be_truthy }
end
end
describe '#to_s' do
subject { diff_hunk.to_s }
it { is_expected.to eq(line) }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::WordDiff::Segments::Newline do
subject(:newline) { described_class.new }
describe '#to_s' do
subject { newline.to_s }
it { is_expected.to eq '' }
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment