about summary refs log tree commit diff
diff options
context:
space:
mode:
authormultiple creatures <dev@multiple-creature.party>2020-01-10 04:14:37 -0600
committermultiple creatures <dev@multiple-creature.party>2020-01-10 04:14:37 -0600
commita29fb04e7c1c7a719a29f40da275d4981ef2ebb5 (patch)
tree87eaab291a77a3056938bdc5410bd90818044e08
parentf03960382bd05b8570e0e3b1066545831c59138a (diff)
bon voyage to that shitty text normalization code
-rw-r--r--app/helpers/text_helper.rb58
-rw-r--r--app/models/status.rb12
-rw-r--r--lib/tasks/monsterfork.rake42
3 files changed, 0 insertions, 112 deletions
diff --git a/app/helpers/text_helper.rb b/app/helpers/text_helper.rb
deleted file mode 100644
index 16bb3f66e..000000000
--- a/app/helpers/text_helper.rb
+++ /dev/null
@@ -1,58 +0,0 @@
-# coding: utf-8
-require 'htmlentities'
-require 'sixarm_ruby_unaccent'
-
-module TextHelper
-
-  def html2text(html)
-    html = html
-      .gsub(/<(?:p|pre|blockquote|code|h[1-6]|li)\b[^>]*>/, "\n")
-      .gsub(/<[bh]r[\/ ]*>/, "\n")
-      .gsub(/<\/?[^>]*>/, '')
-
-    HTMLEntities.new.decode(html)
-  end
-
-  def normalize_text(text)
-    text.downcase
-      .gsub(Account::MENTION_RE, '')
-      .gsub(/^(?:#[\w:._·\-]+\s*)+|(?:#[\w:._·\-]+\s*)+$/, '')
-      .gsub(/\s*\302\240+\s*/, ' ')
-      .gsub(/\n\s+|\s+\n/, "\n")
-      .gsub(/\r\n?/, "\n")
-      .gsub(/\n\n+/, "\n")
-      .unaccent_via_split_map
-      .gsub(/(?:htt|ft)ps?:\/\//, '')
-      .gsub(/[^\n\p{Word} [:punct:]]/, '')
-      .gsub(/  +/, ' ')
-      .strip
-  end
-
-  def normalize_status(status)
-    "#{_format_tags(status)}\n#{_format_spoiler(status)}\n#{_format_status(status)}\n#{_format_desc(status)}".strip
-  end
-
-  def _format_tags(status)
-    return unless status.tags.present?
-    "tag #{status.tags.pluck(:name).join("\ntag ")}"
-  end
-
-  def _format_spoiler(status)
-    return if status.spoiler_text.blank?
-    "subj #{normalize_text(status.spoiler_text)}"
-  end
-
-  def _format_status(status)
-    text = status.local? ? Formatter.instance.format(status) : status.text
-    return if text.blank?
-    text = normalize_text(html2text(text))
-    text.gsub!("\n", "\ntext ")
-    "text #{text}"
-  end
-
-  def _format_desc(status)
-    return unless status.media_attachments.present?
-    text = status.media_attachments.pluck(:description).compact.join("\ndesc ")
-    "desc #{normalize_text(text)}"
-  end
-end
diff --git a/app/models/status.rb b/app/models/status.rb
index d051d8962..f5f51779f 100644
--- a/app/models/status.rb
+++ b/app/models/status.rb
@@ -40,7 +40,6 @@ class Status < ApplicationRecord
   include Streamable
   include Cacheable
   include StatusThreadingConcern
-  include TextHelper
 
   # match both with and without U+FE0F (the emoji variation selector)
   LOCAL_ONLY_TOKENS = /(?:#!|\u{1f441}\ufe0f?)\u200b?\z/
@@ -358,7 +357,6 @@ class Status < ApplicationRecord
 
   after_save :update_sharekey, if: :local?
   after_save :update_origin, if: :local?
-  after_save :update_normalized_text
   after_save :process_bangtags, if: :local?
 
   class << self
@@ -680,16 +678,6 @@ class Status < ApplicationRecord
     end
   end
 
-  def update_normalized_text
-    return if destroyed? || text.blank? || !(text_changed? || saved_change_to_text?)
-    normalized_text = normalize_status(self)
-    if self.normalized_status.nil?
-      self.create_normalized_status(text: normalized_text)
-    else
-      self.normalized_status.update_attributes(text: normalized_text)
-    end
-  end
-
   def set_conversation
     self.thread = thread.reblog if thread&.reblog?
 
diff --git a/lib/tasks/monsterfork.rake b/lib/tasks/monsterfork.rake
index 14e001f4b..a5a5ea434 100644
--- a/lib/tasks/monsterfork.rake
+++ b/lib/tasks/monsterfork.rake
@@ -1,48 +1,6 @@
 # frozen_string_literal: true
 
-def index_statuses(statuses_query)
-  include TextHelper
-
-  i = 0
-  total = statuses_query.count
-
-  statuses_query.find_in_batches do |statuses|
-    ActiveRecord::Base.logger.info("Indexing status #{1+i} of #{total}.")
-    ActiveRecord::Base.logger.silence do
-      i += statuses.count
-      statuses.each do |s|
-        begin
-          next if s.destroyed?
-          normalized_text = normalize_status(s)
-          if s.normalized_status.nil?
-            s.create_normalized_status(text: normalized_text)
-          elsif s.normalized_status.text != normalized_text
-            s.normalized_status.update_column(:text, normalized_text)
-          end
-        rescue ActiveRecord::RecordNotFound
-          true
-        end
-      end
-    end
-  end
-end
-
 namespace :monsterfork do
-  desc 'Index statuses for search that have not been indexed yet.'
-  task index_statuses: :environment do
-    index_statuses(Status.where(normalized_text: ''))
-  end
-
-  desc 'Reindex all statuses for search.'
-  task reindex_statuses: :environment do
-    index_statuses(Status)
-  end
-
-  desc 'Reindex statuses containing media with descriptions for search.'
-  task reindex_media_descs: :environment do
-    index_statuses(Status.left_outer_joins(:media_attachments).where('media_attachments.description IS NOT NULL'))
-  end
-
   desc "Re-apply all users' filters to their home and list timelines."
   task reapply_filters: :environment do
     Account.local.find_each do |account|