about summary refs log tree commit diff
path: root/app/helpers/text_helper.rb
blob: 16bb3f66e1ff139a11b7a21b8179359a0c0a5013 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# coding: utf-8
require 'htmlentities'
require 'sixarm_ruby_unaccent'

module TextHelper

  def html2text(html)
    html = html
      .gsub(/<(?:p|pre|blockquote|code|h[1-6]|li)\b[^>]*>/, "\n")
      .gsub(/<[bh]r[\/ ]*>/, "\n")
      .gsub(/<\/?[^>]*>/, '')

    HTMLEntities.new.decode(html)
  end

  def normalize_text(text)
    text.downcase
      .gsub(Account::MENTION_RE, '')
      .gsub(/^(?:#[\w:._·\-]+\s*)+|(?:#[\w:._·\-]+\s*)+$/, '')
      .gsub(/\s*\302\240+\s*/, ' ')
      .gsub(/\n\s+|\s+\n/, "\n")
      .gsub(/\r\n?/, "\n")
      .gsub(/\n\n+/, "\n")
      .unaccent_via_split_map
      .gsub(/(?:htt|ft)ps?:\/\//, '')
      .gsub(/[^\n\p{Word} [:punct:]]/, '')
      .gsub(/  +/, ' ')
      .strip
  end

  def normalize_status(status)
    "#{_format_tags(status)}\n#{_format_spoiler(status)}\n#{_format_status(status)}\n#{_format_desc(status)}".strip
  end

  def _format_tags(status)
    return unless status.tags.present?
    "tag #{status.tags.pluck(:name).join("\ntag ")}"
  end

  def _format_spoiler(status)
    return if status.spoiler_text.blank?
    "subj #{normalize_text(status.spoiler_text)}"
  end

  def _format_status(status)
    text = status.local? ? Formatter.instance.format(status) : status.text
    return if text.blank?
    text = normalize_text(html2text(text))
    text.gsub!("\n", "\ntext ")
    "text #{text}"
  end

  def _format_desc(status)
    return unless status.media_attachments.present?
    text = status.media_attachments.pluck(:description).compact.join("\ndesc ")
    "desc #{normalize_text(text)}"
  end
end