Commit 43af4e55 authored by Yorick Peterse's avatar Yorick Peterse

Encode when migrating ProcessCommitWorker jobs

If the source encoding is not UTF-8 we need to encode the data as
`JSON.dump` may throw an error if the input can not be converted to
UTF-8. We only encode when necessary to reduce the overhead.

Fixes gitlab-org/gitlab-ce#25489
parent db9e1635
---
title: Encode input when migrating ProcessCommitWorker jobs to prevent migration errors
merge_request:
author:
...@@ -47,14 +47,14 @@ class MigrateProcessCommitWorkerJobs < ActiveRecord::Migration ...@@ -47,14 +47,14 @@ class MigrateProcessCommitWorkerJobs < ActiveRecord::Migration
hash = { hash = {
id: commit.oid, id: commit.oid,
message: commit.message, message: encode(commit.message),
parent_ids: commit.parent_ids, parent_ids: commit.parent_ids,
authored_date: commit.author[:time], authored_date: commit.author[:time],
author_name: commit.author[:name], author_name: encode(commit.author[:name]),
author_email: commit.author[:email], author_email: encode(commit.author[:email]),
committed_date: commit.committer[:time], committed_date: commit.committer[:time],
committer_email: commit.committer[:email], committer_email: encode(commit.committer[:email]),
committer_name: commit.committer[:name] committer_name: encode(commit.committer[:name])
} }
payload['args'][2] = hash payload['args'][2] = hash
...@@ -89,4 +89,14 @@ class MigrateProcessCommitWorkerJobs < ActiveRecord::Migration ...@@ -89,4 +89,14 @@ class MigrateProcessCommitWorkerJobs < ActiveRecord::Migration
end end
end end
end end
def encode(data)
encoding = Encoding::UTF_8
if data.encoding == encoding
data
else
data.encode(encoding, invalid: :replace, undef: :replace)
end
end
end end
# encoding: utf-8
require 'spec_helper' require 'spec_helper'
require Rails.root.join('db', 'migrate', '20161124141322_migrate_process_commit_worker_jobs.rb') require Rails.root.join('db', 'migrate', '20161124141322_migrate_process_commit_worker_jobs.rb')
...@@ -59,6 +61,10 @@ describe MigrateProcessCommitWorkerJobs do ...@@ -59,6 +61,10 @@ describe MigrateProcessCommitWorkerJobs do
Sidekiq.redis { |r| r.llen('queue:process_commit') } Sidekiq.redis { |r| r.llen('queue:process_commit') }
end end
def pop_job
JSON.load(Sidekiq.redis { |r| r.lpop('queue:process_commit') })
end
before do before do
Sidekiq.redis do |redis| Sidekiq.redis do |redis|
job = JSON.dump(args: [project.id, user.id, commit.oid]) job = JSON.dump(args: [project.id, user.id, commit.oid])
...@@ -92,11 +98,28 @@ describe MigrateProcessCommitWorkerJobs do ...@@ -92,11 +98,28 @@ describe MigrateProcessCommitWorkerJobs do
expect(job_count).to eq(1) expect(job_count).to eq(1)
end end
it 'encodes data to UTF-8' do
allow_any_instance_of(Rugged::Repository).to receive(:lookup).
with(commit.oid).
and_return(commit)
allow(commit).to receive(:message).
and_return('김치'.force_encoding('BINARY'))
migration.up
job = pop_job
# We don't care so much about what is being stored, instead we just want
# to make sure the encoding is right so that JSON encoding the data
# doesn't produce any errors.
expect(job['args'][2]['message'].encoding).to eq(Encoding::UTF_8)
end
context 'a migrated job' do context 'a migrated job' do
let(:job) do let(:job) do
migration.up migration.up
pop_job
JSON.load(Sidekiq.redis { |r| r.lpop('queue:process_commit') })
end end
let(:commit_hash) do let(:commit_hash) do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment