diff options
author | multiple creatures <dev@multiple-creature.party> | 2019-11-16 21:01:07 -0600 |
---|---|---|
committer | multiple creatures <dev@multiple-creature.party> | 2019-11-16 21:01:07 -0600 |
commit | 1132af15151713f52d8d1e320271185865a79633 (patch) | |
tree | c7d5dd6afb7a60c840a0d3238124f01c747eada8 /app/helpers | |
parent | 487c945d160e9349579bf541147c949f8bca3c46 (diff) |
Moved to using a normalized text column for searches. Admins using an FTS-enabled version of Monsterfork will need to apply the migration from `dist/search.sql` then run `bundle exec rails monsterfork:index_statuses`.
Diffstat (limited to 'app/helpers')
-rw-r--r-- | app/helpers/filter_helper.rb | 2 | ||||
-rw-r--r-- | app/helpers/search_helper.rb | 5 | ||||
-rw-r--r-- | app/helpers/text_helper.rb | 31 |
3 files changed, 35 insertions, 3 deletions
diff --git a/app/helpers/filter_helper.rb b/app/helpers/filter_helper.rb index a4a86496f..77ffa98be 100644 --- a/app/helpers/filter_helper.rb +++ b/app/helpers/filter_helper.rb @@ -21,7 +21,7 @@ module FilterHelper return false if filters.empty? status = status.reblog if status.reblog? - status_text = Formatter.instance.plaintext(status) + status_text = status.normalized_text spoiler_text = status.spoiler_text tags = status.tags.pluck(:name).join("\n") descs = status.media_attachments.map { |a| a.description }.join("\n").strip diff --git a/app/helpers/search_helper.rb b/app/helpers/search_helper.rb index 96da161f1..0f3d09c36 100644 --- a/app/helpers/search_helper.rb +++ b/app/helpers/search_helper.rb @@ -1,6 +1,7 @@ -module SearchHelper +require 'sixarm_ruby_unaccent' +module SearchHelper def expand_search_query(query) - query.gsub(/"(.*)"/, '\\y\1\\y') + query.downcase.unaccent.gsub(/"(.*)"/, '\\y\1\\y') end end diff --git a/app/helpers/text_helper.rb b/app/helpers/text_helper.rb new file mode 100644 index 000000000..c042c0342 --- /dev/null +++ b/app/helpers/text_helper.rb @@ -0,0 +1,31 @@ +# coding: utf-8 +require 'htmlentities' +require 'sixarm_ruby_unaccent' + +module TextHelper + + def normalize_text(html) + t = html.downcase + + t.gsub!(/<(?:p|pre|blockquote|code|h[1-6]|li)\b[^>]*>/, "\n") + t.gsub!(/<[bh]r[\/ ]*>/, "\n") + t.gsub!(/<\/?[^>]*>/, '') + + t = HTMLEntities.new.decode(t) + + t.gsub!(/[ \t]*\302\240+[ \t]*/, ' ') + t.gsub!(/ +/, ' ') + + t.gsub!(/\r\n?/, "\n") + t.gsub!(/\n[ \t]+/, "\n") + t.gsub!(/[ \t]+\n/, "\n") + t.gsub!(/\n\n+/, "\n") + + t.unaccent_via_split_map.strip + end + + def normalize_status(status) + return normalize_text("#{status.spoiler_text}\n#{status.text}") unless status.local? + normalize_text("#{status.spoiler_text}\n#{Formatter.instance.format(status)}") + end +end |