Translate incoming remote img tags by a link

author: Thibaut Girka <thib@sitedethib.com> 2019-05-26 15:10:33 +0200
committer: multiple creatures <dev@multiple-creature.party> 2019-11-19 15:42:59 -0600
commit: c0a29cdb71536d64691fbaca0fbd03b5643dfbf7 (patch)
tree: 4e63d40d915939cd507b0e5a66652ada40f11e88 /app/lib/sanitize_config.rb
parent: a78abedd3df32a513c0e0c47c850e769e019adae (diff)
1 files changed, 6 insertions, 77 deletions
diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb
index 76931e86a..1d336fcc3 100644
--- a/app/lib/sanitize_config.rb
+++ b/app/lib/sanitize_config.rb
@@ -24,86 +24,15 @@ class Sanitize
       node['class'] = class_list.join(' ')
     end
 
-    ANCHOR_SANITIZER = lambda do |env|
-      return unless env[:node_name] == 'a'
+    IMG_TAG_TRANSFORMER = lambda do |env|
       node = env[:node]
-      return if node['href'].blank? || node.text.blank?
 
-      class_list = node['class']&.split(/[\t\n\f\r ]/)
-      return if class_list && (class_list.include?('mention') || class_list.include?('hashtag'))
-
-      # href matches link text verbatim?
-      href = node['href']
-      return if href == node.text.strip
-
-      # href matches link text with sanitized query string?
-      text = Sanitize::sanitize_query_string(node.text.strip)
-      return if text.blank?
-      if href == text
-        node.inner_html = "\u2728 #{node.inner_html}"
-        return
-      end
-
-      # strip ellipse & replace keyword search obscuring
-      text = text.sub(/ *(?:\u2026|\.\.\.)\Z/, '').gsub(/ dot /i, '.').gsub(/[\u200b-\u200d\ufeff\u200e\u200f]/, '')
-
-      # href now matches text without obscuring?
-      if href == text
-        node.inner_html = "\u2728 #{node.inner_html}"
-        return
-      end
-
-      # try to detect pseudomentions
-      if text.start_with?('@') && text.match?(Account::MENTION_RE)
-        username, domain = text[1..-1].split('@', 2)
-        return if href == "https://#{domain}/@#{username}"
-        return if href == "https://#{domain}/#{username}"
-        return if href == "https://#{username}.#{domain}"
-        return if href == "https://#{domain}/users/#{username}"
-        return if href == "https://#{domain}/user/#{username}"
-      end
-
-      # try to detect filenames
-      href_filename = '/'.in?(href) ? href.rpartition('/')[2] : nil
-      unless href_filename.blank? || !('.'.in?(href_filename))
-        # possibly linked media?
-        ext = href_filename.rpartition('.')[2]
-        if ext.downcase.in?(MEDIA_EXTENSIONS)
-          node.inner_html = "\xf0\x9f\x96\xbc\xef\xb8\x8f #{node.inner_html}"
-          return
-        end
-      end
-
-      # grab first url from link text
-      first_url = text.scan(/[\w\-]+\.[\w\-]+(?:\.[\w\-]+)*\S*/).first
-
-      return if first_url.nil?
-
-      # strip trailing punctuation
-      text.sub!(/\p{Punct}+\Z/, '')
+      return unless env[:node_name] == 'img'
 
-      # href starts with link text?
-      return if href.start_with?(text)
+      node.name = 'a'
 
-      # split href into parts & grab shortened href
-      uri = Addressable::URI.parse(href)
-      short_href = "#{uri.host}#{uri.path}"
-      normalized_short_href = "#{uri.normalized_host}#{uri.normalized_path}"
-
-      # shortened href starts with link text?
-      return if short_href.start_with?(text) || normalized_short_href.start_with?(text)
-
-      # first domain in link text (if there is one) matches href domain?
-      return if short_href == first_url || normalized_short_href == first_url
-
-      # possibly misleading link text
-      node.inner_html = "\u26a0\ufe0f #{node.inner_html}"
-    rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError
-      # strip malformed links
-      node = env[:node]
-      node['href'] = '#'
-      node.children.remove
-      node.inner_html = "\u274c #{node.inner_html}"
+      node['href'] = node['src']
+      node.content = "[🖼 #{node['alt'] || node['href']}]"
     end
 
     QUERY_STRING_SANITIZER = lambda do |env|
@@ -144,7 +73,7 @@ class Sanitize
       transformers: [
         CLASS_WHITELIST_TRANSFORMER,
         QUERY_STRING_SANITIZER,
-        ANCHOR_SANITIZER
+        IMG_TAG_TRANSFORMER,
       ]
     )
author	Thibaut Girka <thib@sitedethib.com>	2019-05-26 15:10:33 +0200
committer	multiple creatures <dev@multiple-creature.party>	2019-11-19 15:42:59 -0600
commit	c0a29cdb71536d64691fbaca0fbd03b5643dfbf7 (patch)
tree	4e63d40d915939cd507b0e5a66652ada40f11e88 /app/lib/sanitize_config.rb
parent	a78abedd3df32a513c0e0c47c850e769e019adae (diff)