about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock4
-rw-r--r--app/helpers/settings_helper.rb2
-rw-r--r--app/lib/language_detector.rb39
-rw-r--r--app/services/post_status_service.rb6
-rw-r--r--spec/lib/language_detector_spec.rb34
-rw-r--r--spec/services/post_status_service_spec.rb9
7 files changed, 43 insertions, 53 deletions
diff --git a/Gemfile b/Gemfile
index 051f09f78..4f4861913 100644
--- a/Gemfile
+++ b/Gemfile
@@ -25,7 +25,7 @@ gem 'bootsnap'
 gem 'browser'
 gem 'charlock_holmes', '~> 0.7.5'
 gem 'iso-639'
-gem 'cld3', '~> 3.1'
+gem 'cld3', '~> 3.2.0'
 gem 'devise', '~> 4.2'
 gem 'devise-two-factor', '~> 3.0'
 gem 'doorkeeper', '~> 4.2'
diff --git a/Gemfile.lock b/Gemfile.lock
index 31893dc3f..f080f15e5 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -110,7 +110,7 @@ GEM
       activesupport
     charlock_holmes (0.7.5)
     chunky_png (1.3.8)
-    cld3 (3.1.3)
+    cld3 (3.2.0)
       ffi (>= 1.1.0, < 1.10.0)
     climate_control (0.2.0)
     cocaine (0.5.8)
@@ -541,7 +541,7 @@ DEPENDENCIES
   capistrano-yarn (~> 2.0)
   capybara (~> 2.14)
   charlock_holmes (~> 0.7.5)
-  cld3 (~> 3.1)
+  cld3 (~> 3.2.0)
   climate_control (~> 0.2)
   devise (~> 4.2)
   devise-two-factor (~> 3.0)
diff --git a/app/helpers/settings_helper.rb b/app/helpers/settings_helper.rb
index 369a45680..14776b354 100644
--- a/app/helpers/settings_helper.rb
+++ b/app/helpers/settings_helper.rb
@@ -41,7 +41,7 @@ module SettingsHelper
   end
 
   def filterable_languages
-    I18n.available_locales.map { |locale| locale.to_s.split('-').first.to_sym }.uniq
+    LanguageDetector.instance.language_names.select(&HUMAN_LOCALES.method(:key?))
   end
 
   def hash_to_object(hash)
diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb
index 1d9932b52..a42460e10 100644
--- a/app/lib/language_detector.rb
+++ b/app/lib/language_detector.rb
@@ -1,26 +1,31 @@
 # frozen_string_literal: true
 
 class LanguageDetector
-  attr_reader :text, :account
+  include Singleton
 
-  def initialize(text, account = nil)
-    @text = text
-    @account = account
+  def initialize
     @identifier = CLD3::NNetLanguageIdentifier.new(1, 2048)
   end
 
-  def to_iso_s
-    detected_language_code || default_locale
+  def detect(text, account)
+    detect_language_code(text) || default_locale(account)
   end
 
-  def prepared_text
-    simplified_text.strip
+  def language_names
+    @language_names =
+      CLD3::TaskContextParams::LANGUAGE_NAMES.map { |name| iso6391(name.to_s).to_sym }
+                                             .uniq
   end
 
   private
 
-  def detected_language_code
-    iso6391(result.language).to_sym if detected_language_reliable?
+  def prepare_text(text)
+    simplify_text(text).strip
+  end
+
+  def detect_language_code(text)
+    result = @identifier.find_language(prepare_text(text))
+    iso6391(result.language.to_s).to_sym if result.reliable?
   end
 
   def iso6391(bcp47)
@@ -32,15 +37,7 @@ class LanguageDetector
     ISO_639.find(iso639).alpha2
   end
 
-  def result
-    @result ||= @identifier.find_language(prepared_text)
-  end
-
-  def detected_language_reliable?
-    result.reliable?
-  end
-
-  def simplified_text
+  def simplify_text(text)
     text.dup.tap do |new_text|
       new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
       new_text.gsub!(Account::MENTION_RE, '')
@@ -49,7 +46,7 @@ class LanguageDetector
     end
   end
 
-  def default_locale
-    account&.user_locale&.to_sym || nil
+  def default_locale(account)
+    account.user_locale&.to_sym
   end
 end
diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb
index 97c55c4b2..e37cd94df 100644
--- a/app/services/post_status_service.rb
+++ b/app/services/post_status_service.rb
@@ -28,7 +28,7 @@ class PostStatusService < BaseService
                                         sensitive: options[:sensitive],
                                         spoiler_text: options[:spoiler_text] || '',
                                         visibility: options[:visibility] || account.user&.setting_default_privacy,
-                                        language: detect_language_for(text, account),
+                                        language: LanguageDetector.instance.detect(text, account),
                                         application: options[:application])
 
       attach_media(status, media)
@@ -69,10 +69,6 @@ class PostStatusService < BaseService
     media.update(status_id: status.id)
   end
 
-  def detect_language_for(text, account)
-    LanguageDetector.new(text, account).to_iso_s
-  end
-
   def process_mentions_service
     @process_mentions_service ||= ProcessMentionsService.new
   end
diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb
index ec39cb6a0..d17026511 100644
--- a/spec/lib/language_detector_spec.rb
+++ b/spec/lib/language_detector_spec.rb
@@ -3,10 +3,10 @@
 require 'rails_helper'
 
 describe LanguageDetector do
-  describe 'prepared_text' do
+  describe 'prepare_text' do
     it 'returns unmodified string without special cases' do
       string = 'just a regular string'
-      result = described_class.new(string).prepared_text
+      result = described_class.instance.send(:prepare_text, string)
 
       expect(result).to eq string
     end
@@ -14,33 +14,35 @@ describe LanguageDetector do
     it 'collapses spacing in strings' do
       string = 'The formatting   in    this is very        odd'
 
-      result = described_class.new(string).prepared_text
+      result = described_class.instance.send(:prepare_text, string)
       expect(result).to eq 'The formatting in this is very odd'
     end
 
     it 'strips usernames from strings before detection' do
       string = '@username Yeah, very surreal...! also @friend'
 
-      result = described_class.new(string).prepared_text
+      result = described_class.instance.send(:prepare_text, string)
       expect(result).to eq 'Yeah, very surreal...! also'
     end
 
     it 'strips URLs from strings before detection' do
       string = 'Our website is https://example.com and also http://localhost.dev'
 
-      result = described_class.new(string).prepared_text
+      result = described_class.instance.send(:prepare_text, string)
       expect(result).to eq 'Our website is and also'
     end
 
     it 'strips #hashtags from strings before detection' do
       string = 'Hey look at all the #animals and #fish'
 
-      result = described_class.new(string).prepared_text
+      result = described_class.instance.send(:prepare_text, string)
       expect(result).to eq 'Hey look at all the and'
     end
   end
 
-  describe 'to_iso_s' do
+  describe 'detect' do
+    let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
+
     it 'detects english language for basic strings' do
       strings = [
         "Hello and welcome to mastodon how are you today?",
@@ -48,7 +50,7 @@ describe LanguageDetector do
         "a lot of people just want to feel righteous all the time and that's all that matters",
       ]
       strings.each do |string|
-        result = described_class.new(string).to_iso_s
+        result = described_class.instance.detect(string, account_without_user_locale)
 
         expect(result).to eq(:en), string
       end
@@ -56,14 +58,14 @@ describe LanguageDetector do
 
     it 'detects spanish language' do
       string = 'Obtener un Hola y bienvenidos a Mastodon'
-      result = described_class.new(string).to_iso_s
+      result = described_class.instance.detect(string, account_without_user_locale)
 
       expect(result).to eq :es
     end
 
     describe 'when language can\'t be detected' do
       it 'uses nil when sent an empty document' do
-        result = described_class.new('').to_iso_s
+        result = described_class.instance.detect('', account_without_user_locale)
         expect(result).to eq nil
       end
 
@@ -73,7 +75,7 @@ describe LanguageDetector do
           cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
           expect(cld_result).not_to eq :en
 
-          result = described_class.new(string).to_iso_s
+          result = described_class.instance.detect(string, account_without_user_locale)
 
           expect(result).to eq nil
         end
@@ -82,14 +84,13 @@ describe LanguageDetector do
       describe 'with an account' do
         it 'uses the account locale when present' do
           account = double(user_locale: 'fr')
-          result  = described_class.new('', account).to_iso_s
+          result  = described_class.instance.detect('', account)
 
           expect(result).to eq :fr
         end
 
         it 'uses nil when account is present but has no locale' do
-          account = double(user_locale: nil)
-          result  = described_class.new('', account).to_iso_s
+          result  = described_class.instance.detect('', account_without_user_locale)
 
           expect(result).to eq nil
         end
@@ -97,8 +98,7 @@ describe LanguageDetector do
 
       describe 'with an `en` default locale' do
         it 'uses nil for undetectable string' do
-          string = ''
-          result = described_class.new(string).to_iso_s
+          result = described_class.instance.detect('', account_without_user_locale)
 
           expect(result).to eq nil
         end
@@ -114,7 +114,7 @@ describe LanguageDetector do
 
         it 'uses nil for undetectable string' do
           string = ''
-          result = described_class.new(string).to_iso_s
+          result = described_class.instance.detect(string, account_without_user_locale)
 
           expect(result).to eq nil
         end
diff --git a/spec/services/post_status_service_spec.rb b/spec/services/post_status_service_spec.rb
index 4182c4e1f..91902ff69 100644
--- a/spec/services/post_status_service_spec.rb
+++ b/spec/services/post_status_service_spec.rb
@@ -65,15 +65,12 @@ RSpec.describe PostStatusService do
   end
 
   it 'creates a status with a language set' do
-    detector = double(to_iso_s: :en)
-    allow(LanguageDetector).to receive(:new).and_return(detector)
-
     account = Fabricate(:account)
-    text = 'test status text'
+    text = 'This is an English text.'
 
-    subject.call(account, text)
+    status = subject.call(account, text)
 
-    expect(LanguageDetector).to have_received(:new).with(text, account)
+    expect(status.language).to eq 'en'
   end
 
   it 'processes mentions' do