diff options
author | multiple creatures <dev@multiple-creature.party> | 2019-07-22 11:39:27 -0500 |
---|---|---|
committer | multiple creatures <dev@multiple-creature.party> | 2019-07-22 11:41:02 -0500 |
commit | e14d543eddd5ac78f84dbc791ed3768024c33c9c (patch) | |
tree | be79263bbbb337e194060a0879336c558b589802 /app/lib | |
parent | e3ecc0871cc848648131f69758a0cc9220f9d77d (diff) |
handle more edge cases in archor tagger
Diffstat (limited to 'app/lib')
-rw-r--r-- | app/lib/sanitize_config.rb | 25 |
1 files changed, 14 insertions, 11 deletions
diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb index e7b70f078..bc37cd59a 100644 --- a/app/lib/sanitize_config.rb +++ b/app/lib/sanitize_config.rb @@ -36,18 +36,21 @@ class Sanitize href = node['href'] return if href == node.text.strip - # remove query string from link text - node.inner_html = node.inner_html.sub(/\?\S+=\S+/, '') - - # href matches link text without query string? - text = node.text.strip - return if href == text + # href matches link text with sanitized query string? + text = Sanitize::sanitize_query_string(node.text.strip) + if href == text + node.inner_html = "\u2728 #{node.inner_html}" + return + end # strip ellipse & replace keyword search obscuring text = text.sub(/ *(?:\u2026|\.\.\.)\Z/, '').gsub(/ dot /i, '.').gsub(/[\u200b-\u200d\ufeff\u200e\u200f]/, '') # href now matches text without obscuring? - return if href == text + if href == text + node.inner_html = "\u2728 #{node.inner_html}" + return + end # try to detect filenames href_filename = '/'.in?(href) ? href.rpartition('/')[2] : nil @@ -57,10 +60,10 @@ class Sanitize return end - # many fedi servers obfuscate media filenames - ext = text.rpartition('.')[-1] - if ext.downcase.in?(MEDIA_EXTENSIONS) && ext == href_filename.rpartition('.')[2] - node.inner_html = "\xf0\x9f\x93\x8e #{node.inner_html}" + # possibly linked media? + ext = href_filename.rpartition('.')[2] + if ext.downcase.in?(MEDIA_EXTENSIONS) + node.inner_html = "\xf0\x9f\x96\xbc\xef\xb8\x8f #{node.inner_html}" return end end |