diff options
author | multiple creatures <dev@multiple-creature.party> | 2019-11-16 21:01:07 -0600 |
---|---|---|
committer | multiple creatures <dev@multiple-creature.party> | 2019-11-16 21:01:07 -0600 |
commit | 1132af15151713f52d8d1e320271185865a79633 (patch) | |
tree | c7d5dd6afb7a60c840a0d3238124f01c747eada8 /app | |
parent | 487c945d160e9349579bf541147c949f8bca3c46 (diff) |
Moved to using a normalized text column for searches. Admins using an FTS-enabled version of Monsterfork will need to apply the migration from `dist/search.sql` then run `bundle exec rails monsterfork:index_statuses`.
Diffstat (limited to 'app')
-rw-r--r-- | app/helpers/filter_helper.rb | 2 | ||||
-rw-r--r-- | app/helpers/search_helper.rb | 5 | ||||
-rw-r--r-- | app/helpers/text_helper.rb | 31 | ||||
-rw-r--r-- | app/lib/bangtags.rb | 2 | ||||
-rw-r--r-- | app/models/status.rb | 15 |
5 files changed, 49 insertions, 6 deletions
diff --git a/app/helpers/filter_helper.rb b/app/helpers/filter_helper.rb index a4a86496f..77ffa98be 100644 --- a/app/helpers/filter_helper.rb +++ b/app/helpers/filter_helper.rb @@ -21,7 +21,7 @@ module FilterHelper return false if filters.empty? status = status.reblog if status.reblog? - status_text = Formatter.instance.plaintext(status) + status_text = status.normalized_text spoiler_text = status.spoiler_text tags = status.tags.pluck(:name).join("\n") descs = status.media_attachments.map { |a| a.description }.join("\n").strip diff --git a/app/helpers/search_helper.rb b/app/helpers/search_helper.rb index 96da161f1..0f3d09c36 100644 --- a/app/helpers/search_helper.rb +++ b/app/helpers/search_helper.rb @@ -1,6 +1,7 @@ -module SearchHelper +require 'sixarm_ruby_unaccent' +module SearchHelper def expand_search_query(query) - query.gsub(/"(.*)"/, '\\y\1\\y') + query.downcase.unaccent.gsub(/"(.*)"/, '\\y\1\\y') end end diff --git a/app/helpers/text_helper.rb b/app/helpers/text_helper.rb new file mode 100644 index 000000000..c042c0342 --- /dev/null +++ b/app/helpers/text_helper.rb @@ -0,0 +1,31 @@ +# coding: utf-8 +require 'htmlentities' +require 'sixarm_ruby_unaccent' + +module TextHelper + + def normalize_text(html) + t = html.downcase + + t.gsub!(/<(?:p|pre|blockquote|code|h[1-6]|li)\b[^>]*>/, "\n") + t.gsub!(/<[bh]r[\/ ]*>/, "\n") + t.gsub!(/<\/?[^>]*>/, '') + + t = HTMLEntities.new.decode(t) + + t.gsub!(/[ \t]*\302\240+[ \t]*/, ' ') + t.gsub!(/ +/, ' ') + + t.gsub!(/\r\n?/, "\n") + t.gsub!(/\n[ \t]+/, "\n") + t.gsub!(/[ \t]+\n/, "\n") + t.gsub!(/\n\n+/, "\n") + + t.unaccent_via_split_map.strip + end + + def normalize_status(status) + return normalize_text("#{status.spoiler_text}\n#{status.text}") unless status.local? + normalize_text("#{status.spoiler_text}\n#{Formatter.instance.format(status)}") + end +end diff --git a/app/lib/bangtags.rb b/app/lib/bangtags.rb index 60fb426b3..d708683cb 100644 --- a/app/lib/bangtags.rb +++ b/app/lib/bangtags.rb @@ -720,7 +720,7 @@ class Bangtags q = cmd[1..-1].join.strip next if q.blank? begin - data = @account.statuses.where('text ~* ?', expand_search_query(q)) + data = @account.statuses.where('normalized_text ~ ?', expand_search_query(q)) .reorder(:created_at) .pluck(:created_at) .map { |d| d.strftime('%Y-%m') } diff --git a/app/models/status.rb b/app/models/status.rb index 29c4f6bd1..a2d2a8f28 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -31,9 +31,9 @@ # edited :boolean # imported :boolean # origin :string -# tsv :tsvector # boostable :boolean # reject_replies :boolean +# normalized_text :text default(""), not null # class Status < ApplicationRecord @@ -43,6 +43,7 @@ class Status < ApplicationRecord include Streamable include Cacheable include StatusThreadingConcern + include TextHelper # match both with and without U+FE0F (the emoji variation selector) LOCAL_ONLY_TOKENS = /(?:#!|\u{1f441}\ufe0f?)\u200b?\z/ @@ -324,6 +325,7 @@ class Status < ApplicationRecord around_create Mastodon::Snowflake::Callbacks before_create :set_locality + before_create :update_normalized_text before_validation :prepare_contents, if: :local? before_validation :set_reblog @@ -334,6 +336,9 @@ class Status < ApplicationRecord after_create :set_poll_id after_create :process_bangtags, if: :local? + after_create :update_normalized_text + + after_update :update_normalized_text class << self include SearchHelper @@ -350,7 +355,7 @@ class Status < ApplicationRecord end return none if term.blank? || term.length < 3 query = query.without_reblogs - .where('text ~* ?', expand_search_query(term)) + .where('normalized_text ~ ?', expand_search_query(term)) .offset(offset).limit(limit) apply_timeline_filters(query, account, true) rescue ActiveRecord::StatementInvalid @@ -618,6 +623,12 @@ class Status < ApplicationRecord Bangtags.new(self).process end + def update_normalized_text + return unless (normalized_text.blank? && !text.blank?) || saved_change_to_text? + Rails.cache.delete("formatted_status:#{status.id}") + self.normalized_text = normalize_status(self) + end + def set_conversation self.thread = thread.reblog if thread&.reblog? |