From e6cfa7ab897ac4fd6bf9bbcafe09fc42c4cc2c5d Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Tue, 26 Mar 2019 01:23:59 +0100 Subject: Change language detector threshold from 140 characters to 4 words (#10376) Add `lang` attribute to statuses in web UI --- app/lib/language_detector.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'app/lib/language_detector.rb') diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb index 70a9084d1..1e90af42d 100644 --- a/app/lib/language_detector.rb +++ b/app/lib/language_detector.rb @@ -3,7 +3,7 @@ class LanguageDetector include Singleton - CHARACTER_THRESHOLD = 140 + WORDS_THRESHOLD = 4 RELIABLE_CHARACTERS_RE = /[\p{Hebrew}\p{Arabic}\p{Syriac}\p{Thaana}\p{Nko}\p{Han}\p{Katakana}\p{Hiragana}\p{Hangul}]+/m def initialize @@ -37,7 +37,7 @@ class LanguageDetector end def sufficient_text_length?(text) - text.size >= CHARACTER_THRESHOLD + text.split(/\s+/).size >= WORDS_THRESHOLD end def language_specific_character_set?(text) -- cgit