about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--app/lib/sanitize_config.rb45
-rw-r--r--spec/lib/sanitize_config_spec.rb16
2 files changed, 55 insertions, 6 deletions
diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb
index a82411127..4ad1199a6 100644
--- a/app/lib/sanitize_config.rb
+++ b/app/lib/sanitize_config.rb
@@ -2,7 +2,23 @@
 
 class Sanitize
   module Config
-    HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze
+    HTTP_PROTOCOLS = %w(
+      http
+      https
+    ).freeze
+
+    LINK_PROTOCOLS = %w(
+      http
+      https
+      dat
+      dweb
+      ipfs
+      ipns
+      ssb
+      gopher
+      xmpp
+      magnet
+    ).freeze
 
     CLASS_WHITELIST_TRANSFORMER = lambda do |env|
       node = env[:node]
@@ -19,19 +35,37 @@ class Sanitize
       node['class'] = class_list.join(' ')
     end
 
+    UNSUPPORTED_HREF_TRANSFORMER = lambda do |env|
+      return unless env[:node_name] == 'a'
+
+      current_node = env[:node]
+
+      scheme = begin
+        if current_node['href'] =~ Sanitize::REGEX_PROTOCOL
+          Regexp.last_match(1).downcase
+        else
+          :relative
+        end
+      end
+
+      current_node.replace(current_node.text) unless LINK_PROTOCOLS.include?(scheme)
+    end
+
     UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|
       return unless %w(h1 h2 h3 h4 h5 h6 blockquote pre ul ol li).include?(env[:node_name])
 
+      current_node = env[:node]
+
       case env[:node_name]
       when 'li'
-        env[:node].traverse do |node|
+        current_node.traverse do |node|
           next unless %w(p ul ol li).include?(node.name)
 
           node.add_next_sibling('<br>') if node.next_sibling
           node.replace(node.children) unless node.text?
         end
       else
-        env[:node].name = 'p'
+        current_node.name = 'p'
       end
     end
 
@@ -50,13 +84,12 @@ class Sanitize
         },
       },
 
-      protocols: {
-        'a' => { 'href' => HTTP_PROTOCOLS },
-      },
+      protocols: {},
 
       transformers: [
         CLASS_WHITELIST_TRANSFORMER,
         UNSUPPORTED_ELEMENTS_TRANSFORMER,
+        UNSUPPORTED_HREF_TRANSFORMER,
       ]
     )
 
diff --git a/spec/lib/sanitize_config_spec.rb b/spec/lib/sanitize_config_spec.rb
index feb86af35..d66302e64 100644
--- a/spec/lib/sanitize_config_spec.rb
+++ b/spec/lib/sanitize_config_spec.rb
@@ -26,5 +26,21 @@ describe Sanitize::Config do
     it 'keep links in lists' do
       expect(Sanitize.fragment('<p>Check out:</p><ul><li><a href="https://joinmastodon.org" rel="nofollow noopener noreferrer" target="_blank">joinmastodon.org</a></li><li>Bar</li></ul>', subject)).to eq '<p>Check out:</p><p><a href="https://joinmastodon.org" rel="nofollow noopener noreferrer" target="_blank">joinmastodon.org</a><br>Bar</p>'
     end
+
+    it 'removes a without href' do
+      expect(Sanitize.fragment('<a>Test</a>', subject)).to eq 'Test'
+    end
+
+    it 'removes a without href and only keeps text content' do
+      expect(Sanitize.fragment('<a><span class="invisible">foo&amp;</span><span>Test</span></a>', subject)).to eq 'foo&amp;Test'
+    end
+
+    it 'removes a with unsupported scheme in href' do
+      expect(Sanitize.fragment('<a href="foo://bar">Test</a>', subject)).to eq 'Test'
+    end
+
+    it 'keeps a with href' do
+      expect(Sanitize.fragment('<a href="http://example.com">Test</a>', subject)).to eq '<a href="http://example.com" rel="nofollow noopener noreferrer" target="_blank">Test</a>'
+    end
   end
 end