about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMatt Jankowski <mjankowski@thoughtbot.com>2017-05-03 10:59:31 -0400
committerEugen Rochko <eugen@zeonfederated.com>2017-05-03 16:59:31 +0200
commit8c5ad23b24b17efc0660928d0b02ddbebc6f2939 (patch)
tree71177649c7f13ce16b80397f987a2ff9cfb97d65
parent53384b0ffe4fd8cfcced4fe2945acd814c676088 (diff)
Language improvements, replace whatlanguage with CLD (#2753)
* add failing en specs

* add cld2 gem

* Replace WhatLanguage with CLD
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock6
-rw-r--r--app/lib/language_detector.rb14
-rw-r--r--spec/lib/language_detector_spec.rb24
4 files changed, 33 insertions, 13 deletions
diff --git a/Gemfile b/Gemfile
index 1287afe44..d84597a78 100644
--- a/Gemfile
+++ b/Gemfile
@@ -20,6 +20,7 @@ gem 'paperclip', '~> 5.1'
 gem 'paperclip-av-transcoder'
 
 gem 'addressable'
+gem 'cld2', require: 'cld'
 gem 'devise'
 gem 'devise-two-factor'
 gem 'doorkeeper'
@@ -56,7 +57,6 @@ gem 'statsd-instrument'
 gem 'twitter-text'
 gem 'tzinfo-data'
 gem 'webpacker', '~>1.2'
-gem 'whatlanguage'
 
 # For some reason the view specs start failing without this
 gem 'react-rails'
diff --git a/Gemfile.lock b/Gemfile.lock
index 218e17237..f4b307cec 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -102,6 +102,8 @@ GEM
       rack-test (>= 0.5.4)
       xpath (~> 2.0)
     chunky_png (1.3.8)
+    cld2 (1.0.3)
+      ffi (~> 1.9.3)
     climate_control (0.1.0)
     cocaine (0.5.8)
       climate_control (>= 0.0.3, < 1.0)
@@ -153,6 +155,7 @@ GEM
     faker (1.7.3)
       i18n (~> 0.5)
     fast_blank (1.0.0)
+    ffi (1.9.18)
     fuubar (2.2.0)
       rspec-core (~> 3.0)
       ruby-progressbar (~> 1.4)
@@ -463,7 +466,6 @@ GEM
     websocket-driver (0.6.5)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.2)
-    whatlanguage (1.0.6)
     xpath (2.0.0)
       nokogiri (~> 1.3)
 
@@ -484,6 +486,7 @@ DEPENDENCIES
   capistrano-rbenv
   capistrano-yarn
   capybara
+  cld2
   devise
   devise-two-factor
   doorkeeper
@@ -549,7 +552,6 @@ DEPENDENCIES
   uglifier (>= 1.3.0)
   webmock
   webpacker (~> 1.2)
-  whatlanguage
 
 RUBY VERSION
    ruby 2.4.1p111
diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb
index 9a32d6a64..8c1751beb 100644
--- a/app/lib/language_detector.rb
+++ b/app/lib/language_detector.rb
@@ -9,11 +9,23 @@ class LanguageDetector
   end
 
   def to_iso_s
-    WhatLanguage.new(:all).language_iso(text_without_urls) || default_locale.to_sym
+    detected_language_code || default_locale.to_sym
   end
 
   private
 
+  def detected_language_code
+    detected_language[:code].to_sym if detected_language_reliable?
+  end
+
+  def detected_language
+    @_detected_language ||= CLD.detect_language(text_without_urls)
+  end
+
+  def detected_language_reliable?
+    detected_language[:reliable]
+  end
+
   def text_without_urls
     text.dup.tap do |new_text|
       URI.extract(new_text).each do |url|
diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb
index 5fb19a1e7..bd4e65ef8 100644
--- a/spec/lib/language_detector_spec.rb
+++ b/spec/lib/language_detector_spec.rb
@@ -3,11 +3,17 @@ require 'rails_helper'
 
 describe LanguageDetector do
   describe 'to_iso_s' do
-    it 'detects english language' do
-      string = 'Hello and welcome to mastodon'
-      result = described_class.new(string).to_iso_s
-
-      expect(result).to eq :en
+    it 'detects english language for basic strings' do
+      strings = [
+        "Hello and welcome to mastodon",
+        "I'd rather not!",
+        "a lot of people just want to feel righteous all the time and that's all that matters",
+      ]
+      strings.each do |string|
+        result = described_class.new(string).to_iso_s
+
+        expect(result).to eq(:en), string
+      end
     end
 
     it 'detects spanish language' do
@@ -19,15 +25,15 @@ describe LanguageDetector do
 
     describe 'when language can\'t be detected' do
       it 'confirm language engine cant detect' do
-        result = WhatLanguage.new(:all).language_iso('')
-        expect(result).to be_nil
+        result = CLD.detect_language('')
+        expect(result[:reliable]).to be false
       end
 
       describe 'because of a URL' do
         it 'uses default locale when sent just a URL' do
           string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
-          wl_result = WhatLanguage.new(:all).language_iso(string)
-          expect(wl_result).not_to eq :en
+          cld_result = CLD.detect_language(string)[:code]
+          expect(cld_result).not_to eq :en
 
           result = described_class.new(string).to_iso_s