about summary refs log tree commit diff
diff options
context:
space:
mode:
authorThibG <thib@sitedethib.com>2020-01-23 21:27:26 +0100
committerEugen Rochko <eugen@zeonfederated.com>2020-01-23 21:27:26 +0100
commita8e46cf7a16857d6983c5c1878ab9914d3203f2b (patch)
treed7c2af2a7e05ac69aadb3256732f7c7c0f531cc7
parentc0006a004d0e58bb3ad356759c17e60f28975b61 (diff)
Add support for magnet: URIs (#12905)
-rw-r--r--app/lib/formatter.rb4
-rw-r--r--app/lib/sanitize_config.rb2
-rw-r--r--config/initializers/twitter_regex.rb35
-rw-r--r--spec/lib/formatter_spec.rb8
4 files changed, 32 insertions, 17 deletions
diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
index c771dcaaa..2c5674869 100644
--- a/app/lib/formatter.rb
+++ b/app/lib/formatter.rb
@@ -245,9 +245,9 @@ class Formatter
     end
 
     standard = Extractor.extract_entities_with_indices(text, options)
-    xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
+    extra = Extractor.extract_extra_uris_with_indices(text, options)
 
-    Extractor.remove_overlapping_entities(special + standard + xmpp)
+    Extractor.remove_overlapping_entities(special + standard + extra)
   end
 
   def link_to_url(entity, options = {})
diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb
index e2480376e..a82411127 100644
--- a/app/lib/sanitize_config.rb
+++ b/app/lib/sanitize_config.rb
@@ -2,7 +2,7 @@
 
 class Sanitize
   module Config
-    HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
+    HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze
 
     CLASS_WHITELIST_TRANSFORMER = lambda do |env|
       node = env[:node]
diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb
index 87815d458..f84f7c0cb 100644
--- a/config/initializers/twitter_regex.rb
+++ b/config/initializers/twitter_regex.rb
@@ -47,32 +47,39 @@ module Twitter
       #{REGEXEN[:validate_url_pct_encoded]}|
       #{REGEXEN[:validate_url_sub_delims]}
     )/iox
-    REGEXEN[:valid_xmpp_uri] = %r{
-      (                                                                                     #   $1 total match
-        (#{REGEXEN[:valid_url_preceding_chars]})                                            #   $2 Preceding character
-        (                                                                                   #   $3 URL
-          ((?:xmpp):)                                                                       #   $4 Protocol
-          (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)?                     #   $5 Authority (optional)
-          (#{REGEXEN[:validate_nodeid]}+@)?                                                 #   $6 Username in path (optional)
-          (#{REGEXEN[:valid_domain]})                                                       #   $7 Domain in path
-          (/#{REGEXEN[:validate_resid]}+)?                                                  #   $8 Resource in path (optional)
-          (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? #   $9 Query String
+    REGEXEN[:xmpp_uri] = %r{
+      (xmpp:)                                                                           # Protocol
+      (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)?                     # Authority (optional)
+      (#{REGEXEN[:validate_nodeid]}+@)?                                                 # Username in path (optional)
+      (#{REGEXEN[:valid_domain]})                                                       # Domain in path
+      (/#{REGEXEN[:validate_resid]}+)?                                                  # Resource in path (optional)
+      (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String
+    }iox
+    REGEXEN[:magnet_uri] = %r{
+      (magnet:)                                                                         # Protocol
+      (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})  # Query String
+    }iox
+    REGEXEN[:valid_extended_uri] = %r{
+      (                                                                                 #   $1 total match
+        (#{REGEXEN[:valid_url_preceding_chars]})                                        #   $2 Preceding character
+        (                                                                               #   $3 URL
+          (#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]})
         )
       )
     }iox
   end
 
   module Extractor
-    # Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
+    # Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along
     # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
-    # XMPP URIs an empty array will be returned.
+    # XMPP or magnet URIs an empty array will be returned.
     #
     # If a block is given then it will be called for each XMPP URI.
-    def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
+    def extract_extra_uris_with_indices(text, options = {}) # :yields: uri, start, end
       return [] unless text && text.index(":")
       urls = []
 
-      text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
+      text.to_s.scan(Twitter::Regex[:valid_extended_uri]) do
         valid_uri_match_data = $~
 
         start_position = valid_uri_match_data.char_begin(3)
diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb
index 83be0a588..633d59c2a 100644
--- a/spec/lib/formatter_spec.rb
+++ b/spec/lib/formatter_spec.rb
@@ -258,6 +258,14 @@ RSpec.describe Formatter do
         is_expected.to include 'href="xmpp:muc@instance.com?join"'
       end
     end
+
+    context 'given text containing a magnet: URI' do
+      let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
+
+      it 'matches the full URI' do
+        is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
+      end
+    end
   end
 
   describe '#format_spoiler' do