about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--app/lib/extractor.rb33
-rw-r--r--app/lib/formatter.rb97
-rw-r--r--spec/lib/formatter_spec.rb16
-rw-r--r--spec/models/account_spec.rb4
4 files changed, 107 insertions, 43 deletions
diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb
new file mode 100644
index 000000000..3d88b01cd
--- /dev/null
+++ b/app/lib/extractor.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+module Extractor
+  extend Twitter::Extractor
+
+  module_function
+
+  def extract_mentions_or_lists_with_indices(text) # :yields: username, list_slug, start, end
+    return [] unless text =~ Twitter::Regex[:at_signs]
+
+    possible_entries = []
+
+    text.to_s.scan(Account::MENTION_RE) do |screen_name, _|
+      match_data = $LAST_MATCH_INFO
+      after = $'
+      unless after =~ Twitter::Regex[:end_mention_match]
+        start_position = match_data.char_begin(1) - 1
+        end_position = match_data.char_end(1)
+        possible_entries << {
+          screen_name: screen_name,
+          indices: [start_position, end_position],
+        }
+      end
+    end
+
+    if block_given?
+      possible_entries.each do |mention|
+        yield mention[:screen_name], mention[:indices].first, mention[:indices].last
+      end
+    end
+    possible_entries
+  end
+end
diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
index 6d0828a8d..12b030e11 100644
--- a/app/lib/formatter.rb
+++ b/app/lib/formatter.rb
@@ -13,11 +13,10 @@ class Formatter
     return reformat(status.content) unless status.local?
 
     html = status.text
-    html = encode_and_link_urls(html)
+    html = encode_and_link_urls(html, status.mentions)
+
     html = simple_format(html, {}, sanitize: false)
     html = html.delete("\n")
-    html = link_mentions(html, status.mentions)
-    html = link_hashtags(html)
 
     html.html_safe # rubocop:disable Rails/OutputSafety
   end
@@ -37,8 +36,6 @@ class Formatter
     html = encode_and_link_urls(account.note)
     html = simple_format(html, {}, sanitize: false)
     html = html.delete("\n")
-    html = link_accounts(html)
-    html = link_hashtags(html)
 
     html.html_safe # rubocop:disable Rails/OutputSafety
   end
@@ -53,51 +50,66 @@ class Formatter
     HTMLEntities.new.encode(html)
   end
 
-  def encode_and_link_urls(html)
-    entities = Twitter::Extractor.extract_urls_with_indices(html, extract_url_without_protocol: false)
-    entities = entities.sort_by { |entity| entity[:indices].first }
+  def encode_and_link_urls(html, mentions = nil)
+    entities = Extractor.extract_entities_with_indices(html, extract_url_without_protocol: false)
+
+    rewrite(html.dup, entities) do |entity|
+      if entity[:url]
+        link_to_url(entity)
+      elsif entity[:hashtag]
+        link_to_hashtag(entity)
+      elsif entity[:screen_name]
+        link_to_mention(entity, mentions)
+      end
+    end
+  end
 
-    chars = html.to_s.to_char_a
-    html_attrs = {
-      target: '_blank',
-      rel: 'nofollow noopener',
-    }
-    result = ''
+  def rewrite(text, entities)
+    chars = text.to_s.to_char_a
 
+    # sort by start index
+    entities = entities.sort_by do |entity|
+      indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
+      indices.first
+    end
+
+    result = []
     last_index = entities.reduce(0) do |index, entity|
-      normalized_url = Addressable::URI.parse(entity[:url]).normalize
-      indices = entity[:indices]
-      result += encode(chars[index...indices.first].join)
-      result += Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs)
+      indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices]
+      result << encode(chars[index...indices.first].join)
+      result << yield(entity)
       indices.last
     end
-    result += encode(chars[last_index..-1].join)
-  end
+    result << encode(chars[last_index..-1].join)
 
-  def link_mentions(html, mentions)
-    html.gsub(Account::MENTION_RE) do |match|
-      acct    = Account::MENTION_RE.match(match)[1]
-      mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) }
+    result.flatten.join
+  end
 
-      mention.nil? ? match : mention_html(match, mention.account)
-    end
+  def link_to_url(entity)
+    normalized_url = Addressable::URI.parse(entity[:url]).normalize
+    html_attrs = {
+      target: '_blank',
+      rel: 'nofollow noopener',
+    }
+    Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), normalized_url, html_attrs)
   end
 
-  def link_accounts(html)
-    html.gsub(Account::MENTION_RE) do |match|
-      acct = Account::MENTION_RE.match(match)[1]
-      username, domain = acct.split('@')
-      domain = nil if TagManager.instance.local_domain?(domain)
-      account = Account.find_remote(username, domain)
+  def link_to_mention(entity, mentions)
+    acct = entity[:screen_name]
+    return link_to_account(acct) unless mentions
+    mention = mentions.find { |item| TagManager.instance.same_acct?(item.account.acct, acct) }
+    mention ? mention_html(mention.account) : "@#{acct}"
+  end
 
-      account.nil? ? match : mention_html(match, account)
-    end
+  def link_to_account(acct)
+    username, domain = acct.split('@')
+    domain = nil if TagManager.instance.local_domain?(domain)
+    account = Account.find_remote(username, domain)
+    account ? mention_html(account) : "@#{acct}"
   end
 
-  def link_hashtags(html)
-    html.gsub(Tag::HASHTAG_RE) do |match|
-      hashtag_html(match)
-    end
+  def link_to_hashtag(entity)
+    hashtag_html(entity[:hashtag])
   end
 
   def link_html(url)
@@ -110,12 +122,11 @@ class Formatter
     "<span class=\"invisible\">#{prefix}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{text}</span><span class=\"invisible\">#{suffix}</span>"
   end
 
-  def hashtag_html(match)
-    prefix, _, affix = match.rpartition('#')
-    "#{prefix}<a href=\"#{tag_url(affix.downcase)}\" class=\"mention hashtag\">#<span>#{affix}</span></a>"
+  def hashtag_html(tag)
+    "<a href=\"#{tag_url(tag.downcase)}\" class=\"mention hashtag\">#<span>#{tag}</span></a>"
   end
 
-  def mention_html(match, account)
-    "#{match.split('@').first}<span class=\"h-card\"><a href=\"#{TagManager.instance.url_for(account)}\" class=\"u-url mention\">@<span>#{account.username}</span></a></span>"
+  def mention_html(account)
+    "<span class=\"h-card\"><a href=\"#{TagManager.instance.url_for(account)}\" class=\"u-url mention\">@<span>#{account.username}</span></a></span>"
   end
 end
diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb
index b762907b2..81eaf00e8 100644
--- a/spec/lib/formatter_spec.rb
+++ b/spec/lib/formatter_spec.rb
@@ -6,6 +6,10 @@ RSpec.describe Formatter do
   let(:local_status)  { Fabricate(:status, text: local_text, account: account) }
   let(:remote_status) { Fabricate(:status, text: '<script>alert("Hello")</script> Beep boop', uri: 'beepboop', account: account) }
 
+  let(:local_text_with_mention) { "@#{account.username} @#{account.username}@example.com #{local_text}?x=@#{account.username} #hashtag" }
+  let(:local_status_with_mention) { Fabricate(:status, text: local_text_with_mention,
+                                              account: account, mentions: [Fabricate(:mention, account: account)]) }
+
   describe '#format' do
     subject { Formatter.instance.format(local_status) }
 
@@ -21,6 +25,18 @@ RSpec.describe Formatter do
       expect(subject).to match('<a href="http://google.com/" rel="nofollow noopener" target="_blank"><span class="invisible">http://</span><span class="">google.com/</span><span class="invisible"></span></a>')
     end
 
+    it 'contains a mention' do
+      result = Formatter.instance.format(local_status_with_mention)
+      expect(result).to match "<a href=\"#{TagManager.instance.url_for(account)}\" class=\"u-url mention\">@<span>#{account.username}</span></a></span>"
+      expect(result).to match %r{href=\"http://google.com/\?x=@#{account.username}}
+      expect(result).not_to match "href=\"https://example.com/@#{account.username}"
+    end
+
+    it 'contains a hashtag' do
+      result = Formatter.instance.format(local_status_with_mention)
+      expect(result).to match("/tags/hashtag\" class=\"mention hashtag\">#<span>hashtag</span></a>")
+    end
+
     context 'matches a stand-alone medium URL' do
       let(:local_text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' }
       it 'has valid url' do
diff --git a/spec/models/account_spec.rb b/spec/models/account_spec.rb
index 157db633a..efd87e871 100644
--- a/spec/models/account_spec.rb
+++ b/spec/models/account_spec.rb
@@ -379,6 +379,10 @@ RSpec.describe Account, type: :model do
     it 'does not match URLs' do
       expect(subject.match('Check this out https://medium.com/@alice/some-article#.abcdef123')).to be_nil
     end
+
+    xit 'does not match URL querystring' do
+      expect(subject.match('https://example.com/?x=@alice')).to be_nil
+    end
   end
 
   describe 'validations' do