diff options
author | Matt Jankowski <mjankowski@thoughtbot.com> | 2017-06-01 09:29:14 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-06-01 09:29:14 -0400 |
commit | d010e270e613f6299397601289158bd2acedbe8e (patch) | |
tree | 6bc639705b9e53be8efd6ff3b7fe791b5a9e13ad /spec | |
parent | d1e08bd38c029f0b47dfd2f3ba61ca5bb3e414b8 (diff) |
Remove usernames and hashtags from language detection (#3503)
* Add failing specs for hashtag and username extraction in language detector * Remove usernames and hashtags from text before language detection * Handle multiple instances of special case, and reduce whitespace
Diffstat (limited to 'spec')
-rw-r--r-- | spec/lib/language_detector_spec.rb | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb index e543edd49..ace7a326a 100644 --- a/spec/lib/language_detector_spec.rb +++ b/spec/lib/language_detector_spec.rb @@ -1,7 +1,45 @@ # frozen_string_literal: true + require 'rails_helper' describe LanguageDetector do + describe 'prepared_text' do + it 'returns unmodified string without special cases' do + string = 'just a regular string' + result = described_class.new(string).prepared_text + + expect(result).to eq string + end + + it 'collapses spacing in strings' do + string = 'The formatting in this is very odd' + + result = described_class.new(string).prepared_text + expect(result).to eq 'The formatting in this is very odd' + end + + it 'strips usernames from strings before detection' do + string = '@username Yeah, very surreal...! also @friend' + + result = described_class.new(string).prepared_text + expect(result).to eq 'Yeah, very surreal...! also' + end + + it 'strips URLs from strings before detection' do + string = 'Our website is https://example.com and also http://localhost.dev' + + result = described_class.new(string).prepared_text + expect(result).to eq 'Our website is and also' + end + + it 'strips #hashtags from strings before detection' do + string = 'Hey look at all the #animals and #fish' + + result = described_class.new(string).prepared_text + expect(result).to eq 'Hey look at all the and' + end + end + describe 'to_iso_s' do it 'detects english language for basic strings' do strings = [ |