Commit 0670055f authored by Nick Thomas's avatar Nick Thomas

Import the gitlab-elasticsearch-git gem unmodified

parent d2cf8335
...@@ -113,7 +113,6 @@ gem 'seed-fu', '~> 2.3.5' ...@@ -113,7 +113,6 @@ gem 'seed-fu', '~> 2.3.5'
gem 'elasticsearch-model', '~> 0.1.9' gem 'elasticsearch-model', '~> 0.1.9'
gem 'elasticsearch-rails', '~> 0.1.9' gem 'elasticsearch-rails', '~> 0.1.9'
gem 'elasticsearch-api', '5.0.3' gem 'elasticsearch-api', '5.0.3'
gem 'gitlab-elasticsearch-git', '1.2.0', require: "elasticsearch/git"
gem 'aws-sdk' gem 'aws-sdk'
gem 'faraday_middleware-aws-signers-v4' gem 'faraday_middleware-aws-signers-v4'
......
...@@ -287,14 +287,6 @@ GEM ...@@ -287,14 +287,6 @@ GEM
mime-types (>= 1.19) mime-types (>= 1.19)
rugged (>= 0.23.0b) rugged (>= 0.23.0b)
github-markup (1.4.0) github-markup (1.4.0)
gitlab-elasticsearch-git (1.2.0)
activemodel (~> 4.2)
activesupport (~> 4.2)
charlock_holmes (~> 0.7)
elasticsearch-api
elasticsearch-model (~> 0.1.9)
github-linguist (~> 4.7)
rugged (~> 0.24)
gitlab-flowdock-git-hook (1.0.1) gitlab-flowdock-git-hook (1.0.1)
flowdock (~> 0.7) flowdock (~> 0.7)
gitlab-grit (>= 2.4.1) gitlab-grit (>= 2.4.1)
...@@ -949,7 +941,6 @@ DEPENDENCIES ...@@ -949,7 +941,6 @@ DEPENDENCIES
gemojione (~> 3.0) gemojione (~> 3.0)
gitaly (~> 0.5.0) gitaly (~> 0.5.0)
github-linguist (~> 4.7.0) github-linguist (~> 4.7.0)
gitlab-elasticsearch-git (= 1.2.0)
gitlab-flowdock-git-hook (~> 1.0.1) gitlab-flowdock-git-hook (~> 1.0.1)
gitlab-license (~> 1.0) gitlab-license (~> 1.0)
gitlab-markup (~> 1.5.1) gitlab-markup (~> 1.5.1)
......
require "elasticsearch/git/version"
require "elasticsearch/git/model"
require "elasticsearch/git/repository"
module Elasticsearch
module Git
end
end
require 'active_support/concern'
require 'charlock_holmes'
module Elasticsearch
module Git
module EncoderHelper
extend ActiveSupport::Concern
included do
def encode!(message)
return nil unless message.respond_to? :force_encoding
# if message is utf-8 encoding, just return it
message.force_encoding("UTF-8")
return message if message.valid_encoding?
# return message if message type is binary
detect = CharlockHolmes::EncodingDetector.detect(message)
return message.force_encoding("BINARY") if detect && detect[:type] == :binary
# encoding message to detect encoding
if detect && detect[:encoding]
message.force_encoding(detect[:encoding])
end
# encode and clean the bad chars
message.replace clean(message)
rescue
encoding = detect ? detect[:encoding] : "unknown"
"--broken encoding: #{encoding}"
end
private
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "")
.encode("UTF-8")
.gsub("\0".encode("UTF-8"), "")
end
end
end
end
end
require 'linguist'
require 'elasticsearch/git/encoder_helper'
module Elasticsearch
module Git
class LiteBlob
include Linguist::BlobHelper
include Elasticsearch::Git::EncoderHelper
attr_accessor :id, :name, :path, :size, :mode, :commit_id
attr_writer :data
def initialize(repo, raw_blob_hash)
@id = raw_blob_hash[:oid]
@blob = repo.lookup(@id)
@mode = raw_blob_hash[:mode].to_s(8)
@size = @blob.size
@path = encode!(raw_blob_hash[:path])
@name = @path.split('/').last
end
def data
@data ||= encode!(@blob.content)
end
end
end
end
require 'active_support/concern'
require 'active_model'
require 'elasticsearch/model'
module Elasticsearch
module Git
module Model
extend ActiveSupport::Concern
included do
extend ActiveModel::Naming
include ActiveModel::Model
include Elasticsearch::Model
env = if defined?(::Rails)
::Rails.env.to_s
else
nil
end
index_name [self.name.downcase, 'index', env].compact.join('-')
settings \
index: {
analysis: {
analyzer: {
path_analyzer: {
type: 'custom',
tokenizer: 'path_tokenizer',
filter: %w(lowercase asciifolding)
},
sha_analyzer: {
type: 'custom',
tokenizer: 'sha_tokenizer',
filter: %w(lowercase asciifolding)
},
code_analyzer: {
type: 'custom',
tokenizer: 'standard',
filter: %w(code lowercase asciifolding),
char_filter: ["code_mapping"]
},
code_search_analyzer: {
type: 'custom',
tokenizer: 'standard',
filter: %w(lowercase asciifolding),
char_filter: ["code_mapping"]
}
},
tokenizer: {
sha_tokenizer: {
type: "edgeNGram",
min_gram: 5,
max_gram: 40,
token_chars: %w(letter digit)
},
path_tokenizer: {
type: 'path_hierarchy',
reverse: true
},
},
filter: {
code: {
type: "pattern_capture",
preserve_original: 1,
patterns: [
"(\\p{Ll}+|\\p{Lu}\\p{Ll}+|\\p{Lu}+)",
"(\\d+)"
]
}
},
char_filter: {
code_mapping: {
type: "mapping",
mappings: [
". => ' '"
]
}
},
}
}
end
end
end
end
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment