about summary refs log tree commit diff
path: root/app/lib
diff options
context:
space:
mode:
Diffstat (limited to 'app/lib')
-rw-r--r--app/lib/activitypub/activity/create.rb2
-rw-r--r--app/lib/extractor.rb3
-rw-r--r--app/lib/language_detector.rb31
3 files changed, 28 insertions, 8 deletions
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb
index 376684c00..66e4f7c5e 100644
--- a/app/lib/activitypub/activity/create.rb
+++ b/app/lib/activitypub/activity/create.rb
@@ -173,7 +173,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
   end
 
   def language_from_content
-    return nil unless language_map?
+    return LanguageDetector.instance.detect(text_from_content, @account) unless language_map?
     @object['contentMap'].keys.first
   end
 
diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb
index 957364293..738ec89a0 100644
--- a/app/lib/extractor.rb
+++ b/app/lib/extractor.rb
@@ -5,7 +5,8 @@ module Extractor
 
   module_function
 
-  def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
+  # :yields: username, list_slug, start, end
+  def extract_mentions_or_lists_with_indices(text)
     return [] unless text =~ Twitter::Regex[:at_signs]
 
     possible_entries = []
diff --git a/app/lib/language_detector.rb b/app/lib/language_detector.rb
index a42460e10..c6f52f0c7 100644
--- a/app/lib/language_detector.rb
+++ b/app/lib/language_detector.rb
@@ -38,12 +38,31 @@ class LanguageDetector
   end
 
   def simplify_text(text)
-    text.dup.tap do |new_text|
-      new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
-      new_text.gsub!(Account::MENTION_RE, '')
-      new_text.gsub!(Tag::HASHTAG_RE, '')
-      new_text.gsub!(/\s+/, ' ')
-    end
+    new_text = remove_html(text)
+    new_text.gsub!(FetchLinkCardService::URL_PATTERN, '')
+    new_text.gsub!(Account::MENTION_RE, '')
+    new_text.gsub!(Tag::HASHTAG_RE, '')
+    new_text.gsub!(/:#{CustomEmoji::SHORTCODE_RE_FRAGMENT}:/, '')
+    new_text.gsub!(/\s+/, ' ')
+    new_text
+  end
+
+  def new_scrubber
+    scrubber = Rails::Html::PermitScrubber.new
+    scrubber.tags = %w(br p)
+    scrubber
+  end
+
+  def scrubber
+    @scrubber ||= new_scrubber
+  end
+
+  def remove_html(text)
+    text = Loofah.fragment(text).scrub!(scrubber).to_s
+    text.gsub!('<br>', "\n")
+    text.gsub!('</p><p>', "\n\n")
+    text.gsub!(/(^<p>|<\/p>$)/, '')
+    text
   end
 
   def default_locale(account)