From ea436b355bd844c86a4f4ddfd204b9bf15a1db6c Mon Sep 17 00:00:00 2001
From: ThibG <thib@sitedethib.com>
Date: Sat, 11 Jan 2020 02:15:25 +0100
Subject: Add support for linking XMPP URIs in toots (#12709)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix wrong grouping in Twitter valid_url regex

* Add support for xmpp URIs

Fixes #9776

The difficult part is autolinking, because Twitter-text's extractor does
some pretty ad-hoc stuff to find things that “look like” URLs, and XMPP
URIs do not really match the assumptions of that lib, so it doesn't sound
wise to try to shoehorn it into the existing regex.

This is why I used a specific regex (very close, although slightly more
permissive than the RFC), and a specific scan function (a simplified version
of the generalized one from Twitter).

* Remove leading “xmpp:” from auto-linked text
---
 spec/lib/formatter_spec.rb | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'spec/lib')
diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb
index b8108a247..83be0a588 100644
--- a/spec/lib/formatter_spec.rb
+++ b/spec/lib/formatter_spec.rb
@@ -242,6 +242,22 @@ RSpec.describe Formatter do
         is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>'
       end
     end
+
+    context 'given a stand-alone xmpp: URI' do
+      let(:text) { 'xmpp:user@instance.com' }
+
+      it 'matches the full URI' do
+        is_expected.to include 'href="xmpp:user@instance.com"'
+      end
+    end
+
+    context 'given a an xmpp: URI with a query-string' do
+      let(:text) { 'please join xmpp:muc@instance.com?join right now' }
+
+      it 'matches the full URI' do
+        is_expected.to include 'href="xmpp:muc@instance.com?join"'
+      end
+    end
   end
 
   describe '#format_spoiler' do
-- 
cgit 


From a8e46cf7a16857d6983c5c1878ab9914d3203f2b Mon Sep 17 00:00:00 2001
From: ThibG <thib@sitedethib.com>
Date: Thu, 23 Jan 2020 21:27:26 +0100
Subject: Add support for magnet: URIs (#12905)

---
 app/lib/formatter.rb                 |  4 ++--
 app/lib/sanitize_config.rb           |  2 +-
 config/initializers/twitter_regex.rb | 35 +++++++++++++++++++++--------------
 spec/lib/formatter_spec.rb           |  8 ++++++++
 4 files changed, 32 insertions(+), 17 deletions(-)

(limited to 'spec/lib')

diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
index c771dcaaa..2c5674869 100644
--- a/app/lib/formatter.rb
+++ b/app/lib/formatter.rb
@@ -245,9 +245,9 @@ class Formatter
     end
 
     standard = Extractor.extract_entities_with_indices(text, options)
-    xmpp = Extractor.extract_xmpp_uris_with_indices(text, options)
+    extra = Extractor.extract_extra_uris_with_indices(text, options)
 
-    Extractor.remove_overlapping_entities(special + standard + xmpp)
+    Extractor.remove_overlapping_entities(special + standard + extra)
   end
 
   def link_to_url(entity, options = {})
diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb
index e2480376e..a82411127 100644
--- a/app/lib/sanitize_config.rb
+++ b/app/lib/sanitize_config.rb
@@ -2,7 +2,7 @@
 
 class Sanitize
   module Config
-    HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze
+    HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze
 
     CLASS_WHITELIST_TRANSFORMER = lambda do |env|
       node = env[:node]
diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb
index 87815d458..f84f7c0cb 100644
--- a/config/initializers/twitter_regex.rb
+++ b/config/initializers/twitter_regex.rb
@@ -47,32 +47,39 @@ module Twitter
       #{REGEXEN[:validate_url_pct_encoded]}|
       #{REGEXEN[:validate_url_sub_delims]}
     )/iox
-    REGEXEN[:valid_xmpp_uri] = %r{
-      (                                                                                     #   $1 total match
-        (#{REGEXEN[:valid_url_preceding_chars]})                                            #   $2 Preceding character
-        (                                                                                   #   $3 URL
-          ((?:xmpp):)                                                                       #   $4 Protocol
-          (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)?                     #   $5 Authority (optional)
-          (#{REGEXEN[:validate_nodeid]}+@)?                                                 #   $6 Username in path (optional)
-          (#{REGEXEN[:valid_domain]})                                                       #   $7 Domain in path
-          (/#{REGEXEN[:validate_resid]}+)?                                                  #   $8 Resource in path (optional)
-          (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? #   $9 Query String
+    REGEXEN[:xmpp_uri] = %r{
+      (xmpp:)                                                                           # Protocol
+      (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)?                     # Authority (optional)
+      (#{REGEXEN[:validate_nodeid]}+@)?                                                 # Username in path (optional)
+      (#{REGEXEN[:valid_domain]})                                                       # Domain in path
+      (/#{REGEXEN[:validate_resid]}+)?                                                  # Resource in path (optional)
+      (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String
+    }iox
+    REGEXEN[:magnet_uri] = %r{
+      (magnet:)                                                                         # Protocol
+      (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})  # Query String
+    }iox
+    REGEXEN[:valid_extended_uri] = %r{
+      (                                                                                 #   $1 total match
+        (#{REGEXEN[:valid_url_preceding_chars]})                                        #   $2 Preceding character
+        (                                                                               #   $3 URL
+          (#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]})
         )
       )
     }iox
   end
 
   module Extractor
-    # Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
+    # Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along
     # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
-    # XMPP URIs an empty array will be returned.
+    # XMPP or magnet URIs an empty array will be returned.
     #
     # If a block is given then it will be called for each XMPP URI.
-    def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end
+    def extract_extra_uris_with_indices(text, options = {}) # :yields: uri, start, end
       return [] unless text && text.index(":")
       urls = []
 
-      text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do
+      text.to_s.scan(Twitter::Regex[:valid_extended_uri]) do
         valid_uri_match_data = $~
 
         start_position = valid_uri_match_data.char_begin(3)
diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb
index 83be0a588..633d59c2a 100644
--- a/spec/lib/formatter_spec.rb
+++ b/spec/lib/formatter_spec.rb
@@ -258,6 +258,14 @@ RSpec.describe Formatter do
         is_expected.to include 'href="xmpp:muc@instance.com?join"'
       end
     end
+
+    context 'given text containing a magnet: URI' do
+      let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
+
+      it 'matches the full URI' do
+        is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
+      end
+    end
   end
 
   describe '#format_spoiler' do
-- 
cgit