about summary refs log tree commit diff
path: root/spec
diff options
context:
space:
mode:
authorMatt Jankowski <mjankowski@thoughtbot.com>2017-06-01 09:29:14 -0400
committerGitHub <noreply@github.com>2017-06-01 09:29:14 -0400
commitd010e270e613f6299397601289158bd2acedbe8e (patch)
tree6bc639705b9e53be8efd6ff3b7fe791b5a9e13ad /spec
parentd1e08bd38c029f0b47dfd2f3ba61ca5bb3e414b8 (diff)
Remove usernames and hashtags from language detection (#3503)
* Add failing specs for hashtag and username extraction in language detector

* Remove usernames and hashtags from text before language detection

* Handle multiple instances of special case, and reduce whitespace
Diffstat (limited to 'spec')
-rw-r--r--spec/lib/language_detector_spec.rb38
1 files changed, 38 insertions, 0 deletions
diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb
index e543edd49..ace7a326a 100644
--- a/spec/lib/language_detector_spec.rb
+++ b/spec/lib/language_detector_spec.rb
@@ -1,7 +1,45 @@
 # frozen_string_literal: true
+
 require 'rails_helper'
 
 describe LanguageDetector do
+  describe 'prepared_text' do
+    it 'returns unmodified string without special cases' do
+      string = 'just a regular string'
+      result = described_class.new(string).prepared_text
+
+      expect(result).to eq string
+    end
+
+    it 'collapses spacing in strings' do
+      string = 'The formatting   in    this is very        odd'
+
+      result = described_class.new(string).prepared_text
+      expect(result).to eq 'The formatting in this is very odd'
+    end
+
+    it 'strips usernames from strings before detection' do
+      string = '@username Yeah, very surreal...! also @friend'
+
+      result = described_class.new(string).prepared_text
+      expect(result).to eq 'Yeah, very surreal...! also'
+    end
+
+    it 'strips URLs from strings before detection' do
+      string = 'Our website is https://example.com and also http://localhost.dev'
+
+      result = described_class.new(string).prepared_text
+      expect(result).to eq 'Our website is and also'
+    end
+
+    it 'strips #hashtags from strings before detection' do
+      string = 'Hey look at all the #animals and #fish'
+
+      result = described_class.new(string).prepared_text
+      expect(result).to eq 'Hey look at all the and'
+    end
+  end
+
   describe 'to_iso_s' do
     it 'detects english language for basic strings' do
       strings = [