diff options
author | multiple creatures <dev@multiple-creature.party> | 2020-01-10 04:14:37 -0600 |
---|---|---|
committer | multiple creatures <dev@multiple-creature.party> | 2020-01-10 04:14:37 -0600 |
commit | a29fb04e7c1c7a719a29f40da275d4981ef2ebb5 (patch) | |
tree | 87eaab291a77a3056938bdc5410bd90818044e08 /app/helpers | |
parent | f03960382bd05b8570e0e3b1066545831c59138a (diff) |
bon voyage to that shitty text normalization code
Diffstat (limited to 'app/helpers')
-rw-r--r-- | app/helpers/text_helper.rb | 58 |
1 files changed, 0 insertions, 58 deletions
diff --git a/app/helpers/text_helper.rb b/app/helpers/text_helper.rb deleted file mode 100644 index 16bb3f66e..000000000 --- a/app/helpers/text_helper.rb +++ /dev/null @@ -1,58 +0,0 @@ -# coding: utf-8 -require 'htmlentities' -require 'sixarm_ruby_unaccent' - -module TextHelper - - def html2text(html) - html = html - .gsub(/<(?:p|pre|blockquote|code|h[1-6]|li)\b[^>]*>/, "\n") - .gsub(/<[bh]r[\/ ]*>/, "\n") - .gsub(/<\/?[^>]*>/, '') - - HTMLEntities.new.decode(html) - end - - def normalize_text(text) - text.downcase - .gsub(Account::MENTION_RE, '') - .gsub(/^(?:#[\w:._·\-]+\s*)+|(?:#[\w:._·\-]+\s*)+$/, '') - .gsub(/\s*\302\240+\s*/, ' ') - .gsub(/\n\s+|\s+\n/, "\n") - .gsub(/\r\n?/, "\n") - .gsub(/\n\n+/, "\n") - .unaccent_via_split_map - .gsub(/(?:htt|ft)ps?:\/\//, '') - .gsub(/[^\n\p{Word} [:punct:]]/, '') - .gsub(/ +/, ' ') - .strip - end - - def normalize_status(status) - "#{_format_tags(status)}\n#{_format_spoiler(status)}\n#{_format_status(status)}\n#{_format_desc(status)}".strip - end - - def _format_tags(status) - return unless status.tags.present? - "tag #{status.tags.pluck(:name).join("\ntag ")}" - end - - def _format_spoiler(status) - return if status.spoiler_text.blank? - "subj #{normalize_text(status.spoiler_text)}" - end - - def _format_status(status) - text = status.local? ? Formatter.instance.format(status) : status.text - return if text.blank? - text = normalize_text(html2text(text)) - text.gsub!("\n", "\ntext ") - "text #{text}" - end - - def _format_desc(status) - return unless status.media_attachments.present? - text = status.media_attachments.pluck(:description).compact.join("\ndesc ") - "desc #{normalize_text(text)}" - end -end |