diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2019-07-18 03:02:15 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-07-18 03:02:15 +0200 |
commit | 5bfe1e1f0517a23637a1a132dbf0b62fd29982bc (patch) | |
tree | 1381c422424fe4cbf2b68839d2dcacc654796bb6 | |
parent | 3a6fe657ba30c56677c271432338a7002191d772 (diff) |
Change language detection to include hashtags as words (#11341)
-rw-r--r-- | app/lib/language_detector.rb | 2 | ||||
-rw-r--r-- | spec/lib/language_detector_spec.rb | 6 |
2 files changed, 4 insertions, 4 deletions
diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb index 1e90af42d..6f9511a54 100644 --- a/app/lib/language_detector.rb +++ b/app/lib/language_detector.rb @@ -69,7 +69,7 @@ class LanguageDetector new_text = remove_html(text) new_text.gsub!(FetchLinkCardService::URL_PATTERN, '') new_text.gsub!(Account::MENTION_RE, '') - new_text.gsub!(Tag::HASHTAG_RE, '') + new_text.gsub!(Tag::HASHTAG_RE) { |string| string.gsub(/[#_]/, '#' => '', '_' => ' ').gsub(/[a-z][A-Z]|[a-zA-Z][\d]/) { |s| s.insert(1, ' ') }.downcase } new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '') new_text.gsub!(/\s+/, ' ') new_text diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb index 0cb70605a..b7ba0f6c4 100644 --- a/spec/lib/language_detector_spec.rb +++ b/spec/lib/language_detector_spec.rb @@ -32,11 +32,11 @@ describe LanguageDetector do expect(result).to eq 'Our website is and also' end - it 'strips #hashtags from strings before detection' do - string = 'Hey look at all the #animals and #fish' + it 'converts #hashtags back to normal text before detection' do + string = 'Hey look at all the #animals and #FishAndChips' result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'Hey look at all the and' + expect(result).to eq 'Hey look at all the animals and fish and chips' end end |