about summary refs log tree commit diff
path: root/app/helpers
diff options
context:
space:
mode:
authormultiple creatures <dev@multiple-creature.party>2019-11-16 21:01:07 -0600
committermultiple creatures <dev@multiple-creature.party>2019-11-16 21:01:07 -0600
commit1132af15151713f52d8d1e320271185865a79633 (patch)
treec7d5dd6afb7a60c840a0d3238124f01c747eada8 /app/helpers
parent487c945d160e9349579bf541147c949f8bca3c46 (diff)
Moved to using a normalized text column for searches. Admins using an FTS-enabled version of Monsterfork will need to apply the migration from `dist/search.sql` then run `bundle exec rails monsterfork:index_statuses`.
Diffstat (limited to 'app/helpers')
-rw-r--r--app/helpers/filter_helper.rb2
-rw-r--r--app/helpers/search_helper.rb5
-rw-r--r--app/helpers/text_helper.rb31
3 files changed, 35 insertions, 3 deletions
diff --git a/app/helpers/filter_helper.rb b/app/helpers/filter_helper.rb
index a4a86496f..77ffa98be 100644
--- a/app/helpers/filter_helper.rb
+++ b/app/helpers/filter_helper.rb
@@ -21,7 +21,7 @@ module FilterHelper
     return false if filters.empty?
 
     status = status.reblog if status.reblog?
-    status_text = Formatter.instance.plaintext(status)
+    status_text = status.normalized_text
     spoiler_text = status.spoiler_text
     tags = status.tags.pluck(:name).join("\n")
     descs = status.media_attachments.map { |a| a.description }.join("\n").strip
diff --git a/app/helpers/search_helper.rb b/app/helpers/search_helper.rb
index 96da161f1..0f3d09c36 100644
--- a/app/helpers/search_helper.rb
+++ b/app/helpers/search_helper.rb
@@ -1,6 +1,7 @@
-module SearchHelper
+require 'sixarm_ruby_unaccent'
 
+module SearchHelper
 	def expand_search_query(query)
-    query.gsub(/"(.*)"/, '\\y\1\\y')
+    query.downcase.unaccent.gsub(/"(.*)"/, '\\y\1\\y')
   end
 end
diff --git a/app/helpers/text_helper.rb b/app/helpers/text_helper.rb
new file mode 100644
index 000000000..c042c0342
--- /dev/null
+++ b/app/helpers/text_helper.rb
@@ -0,0 +1,31 @@
+# coding: utf-8
+require 'htmlentities'
+require 'sixarm_ruby_unaccent'
+
+module TextHelper
+
+  def normalize_text(html)
+    t = html.downcase
+
+    t.gsub!(/<(?:p|pre|blockquote|code|h[1-6]|li)\b[^>]*>/, "\n")
+    t.gsub!(/<[bh]r[\/ ]*>/, "\n")
+    t.gsub!(/<\/?[^>]*>/, '')
+
+    t = HTMLEntities.new.decode(t)
+
+    t.gsub!(/[ \t]*\302\240+[ \t]*/, ' ')
+    t.gsub!(/  +/, ' ')
+
+    t.gsub!(/\r\n?/, "\n")
+    t.gsub!(/\n[ \t]+/, "\n")
+    t.gsub!(/[ \t]+\n/, "\n")
+    t.gsub!(/\n\n+/, "\n")
+
+    t.unaccent_via_split_map.strip
+  end
+
+  def normalize_status(status)
+    return normalize_text("#{status.spoiler_text}\n#{status.text}") unless status.local?
+    normalize_text("#{status.spoiler_text}\n#{Formatter.instance.format(status)}")
+  end
+end