diff options
author | multiple creatures <dev@multiple-creature.party> | 2019-12-13 00:43:20 -0600 |
---|---|---|
committer | multiple creatures <dev@multiple-creature.party> | 2019-12-13 00:43:20 -0600 |
commit | 4b0612c11db1ff2f2206b23d16586f6c3c31e2af (patch) | |
tree | 9240fa54cd581e6ae7856040bbf8466447937d25 /app/helpers/text_helper.rb | |
parent | a547f442e02562f33b26a4a473bcc847bde4e48e (diff) |
improve normalization
Diffstat (limited to 'app/helpers/text_helper.rb')
-rw-r--r-- | app/helpers/text_helper.rb | 21 |
1 files changed, 10 insertions, 11 deletions
diff --git a/app/helpers/text_helper.rb b/app/helpers/text_helper.rb index b60eee22d..16bb3f66e 100644 --- a/app/helpers/text_helper.rb +++ b/app/helpers/text_helper.rb @@ -13,20 +13,19 @@ module TextHelper HTMLEntities.new.decode(html) end - def normalize_text(html) - html.downcase + def normalize_text(text) + text.downcase .gsub(Account::MENTION_RE, '') - .gsub(/[ \t]*\302\240+[ \t]*/, ' ') - .gsub(/ +/, ' ') + .gsub(/^(?:#[\w:._·\-]+\s*)+|(?:#[\w:._·\-]+\s*)+$/, '') + .gsub(/\s*\302\240+\s*/, ' ') + .gsub(/\n\s+|\s+\n/, "\n") .gsub(/\r\n?/, "\n") - .gsub(/\n[ \t]+/, "\n") - .gsub(/[ \t]+\n/, "\n") .gsub(/\n\n+/, "\n") - .gsub(/^(?:#[\w:._·\-]+\s*)+/, '') - .gsub(/(?:#[\w:._·\-]+\s*)+$/, '') - .delete('#') - .strip .unaccent_via_split_map + .gsub(/(?:htt|ft)ps?:\/\//, '') + .gsub(/[^\n\p{Word} [:punct:]]/, '') + .gsub(/ +/, ' ') + .strip end def normalize_status(status) @@ -53,7 +52,7 @@ module TextHelper def _format_desc(status) return unless status.media_attachments.present? - text = status.media_attachments.pluck(:description).join("\ndesc ") + text = status.media_attachments.pluck(:description).compact.join("\ndesc ") "desc #{normalize_text(text)}" end end |