about summary refs log tree commit diff
diff options
context:
space:
mode:
authormultiple creatures <dev@multiple-creature.party>2019-12-13 00:43:20 -0600
committermultiple creatures <dev@multiple-creature.party>2019-12-13 00:43:20 -0600
commit4b0612c11db1ff2f2206b23d16586f6c3c31e2af (patch)
tree9240fa54cd581e6ae7856040bbf8466447937d25
parenta547f442e02562f33b26a4a473bcc847bde4e48e (diff)
improve normalization
-rw-r--r--app/helpers/search_helper.rb3
-rw-r--r--app/helpers/text_helper.rb21
2 files changed, 12 insertions, 12 deletions
diff --git a/app/helpers/search_helper.rb b/app/helpers/search_helper.rb
index 9510abe99..8bddbe187 100644
--- a/app/helpers/search_helper.rb
+++ b/app/helpers/search_helper.rb
@@ -3,7 +3,8 @@ require 'sixarm_ruby_unaccent'
 module SearchHelper
 	def expand_search_query(query)
     return '' if query.blank?
-    query = query.strip.downcase.unaccent
+    query = query.downcase.unaccent.gsub(/[^\p{Word} [:punct:]]/, '').gsub(/  +/, ' ').strip
+    return '' if query.blank?
 
     if query.include?(':')
       query_parts = query.split(':', 2)
diff --git a/app/helpers/text_helper.rb b/app/helpers/text_helper.rb
index b60eee22d..16bb3f66e 100644
--- a/app/helpers/text_helper.rb
+++ b/app/helpers/text_helper.rb
@@ -13,20 +13,19 @@ module TextHelper
     HTMLEntities.new.decode(html)
   end
 
-  def normalize_text(html)
-    html.downcase
+  def normalize_text(text)
+    text.downcase
       .gsub(Account::MENTION_RE, '')
-      .gsub(/[ \t]*\302\240+[ \t]*/, ' ')
-      .gsub(/  +/, ' ')
+      .gsub(/^(?:#[\w:._·\-]+\s*)+|(?:#[\w:._·\-]+\s*)+$/, '')
+      .gsub(/\s*\302\240+\s*/, ' ')
+      .gsub(/\n\s+|\s+\n/, "\n")
       .gsub(/\r\n?/, "\n")
-      .gsub(/\n[ \t]+/, "\n")
-      .gsub(/[ \t]+\n/, "\n")
       .gsub(/\n\n+/, "\n")
-      .gsub(/^(?:#[\w:._·\-]+\s*)+/, '')
-      .gsub(/(?:#[\w:._·\-]+\s*)+$/, '')
-      .delete('#')
-      .strip
       .unaccent_via_split_map
+      .gsub(/(?:htt|ft)ps?:\/\//, '')
+      .gsub(/[^\n\p{Word} [:punct:]]/, '')
+      .gsub(/  +/, ' ')
+      .strip
   end
 
   def normalize_status(status)
@@ -53,7 +52,7 @@ module TextHelper
 
   def _format_desc(status)
     return unless status.media_attachments.present?
-    text = status.media_attachments.pluck(:description).join("\ndesc ")
+    text = status.media_attachments.pluck(:description).compact.join("\ndesc ")
     "desc #{normalize_text(text)}"
   end
 end