From 70891a99a97bc1ca14a8ded13a5cd45b648b92b3 Mon Sep 17 00:00:00 2001 From: abcang Date: Wed, 19 Apr 2017 21:52:18 +0900 Subject: Fix html escape characters in the URL (#2138) * fix character escaping in URL * add tests * put a comma after the last item * add HTML escape test --- app/lib/formatter.rb | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'app/lib') diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index a44e5ed3e..43893915d 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -13,10 +13,9 @@ class Formatter return reformat(status.content) unless status.local? html = status.text - html = encode(html) + html = encode_and_link_urls(html) html = simple_format(html, {}, sanitize: false) html = html.delete("\n") - html = link_urls(html) html = link_mentions(html, status.mentions) html = link_hashtags(html) @@ -35,8 +34,7 @@ class Formatter def simplified_format(account) return reformat(account.note) unless account.local? - html = encode(account.note) - html = link_urls(html) + html = encode_and_link_urls(account.note) html = link_accounts(html) html = link_hashtags(html) @@ -49,6 +47,26 @@ class Formatter HTMLEntities.new.encode(html) end + def encode_and_link_urls(html) + entities = Twitter::Extractor.extract_urls_with_indices(html, extract_url_without_protocol: false) + entities = entities.sort_by { |entity| entity[:indices].first } + + chars = html.to_s.to_char_a + html_attrs = { + target: '_blank', + rel: 'nofollow noopener', + } + result = '' + + last_index = entities.reduce(0) do |index, entity| + indices = entity[:indices] + result += encode(chars[index...indices.first].join) + result += Twitter::Autolink.send(:link_to_text, entity, link_html(entity[:url]), entity[:url], html_attrs) + indices.last + end + result += encode(chars[last_index..-1].join) + end + def link_urls(html) Twitter::Autolink.auto_link_urls(html, url_target: '_blank', link_attribute_block: lambda { |_, a| a[:rel] << ' noopener' }, -- cgit