about summary refs log tree commit diff
diff options
context:
space:
mode:
authorThibaut Girka <thib@sitedethib.com>2019-05-12 22:13:36 +0200
committerThibG <thib@sitedethib.com>2019-05-17 23:51:14 +0200
commit0be93820f344fafc8618febde149e9c63d1ba5d4 (patch)
treedca0590f0ca43347fa35cea6ca2e8a3eba542d0b
parent9df1ef87c20380634c9ba2dff6f04725d7355c22 (diff)
Fix autolinking, and newlines in code blocks
Autolinking is now performed *after* the Markdown pass, by replacing HTML
tags with zero-width spaces and running the twitter-text extractor as usual,
except it does not auto-link URLs to avoid links in links…
-rw-r--r--app/lib/formatter.rb65
1 files changed, 50 insertions, 15 deletions
diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb
index eaece8797..2c509ef19 100644
--- a/app/lib/formatter.rb
+++ b/app/lib/formatter.rb
@@ -3,6 +3,17 @@
 require 'singleton'
 require_relative './sanitize_config'
 
+class HTMLRenderer < Redcarpet::Render::HTML
+  def block_code(code, language)
+    "<pre><code>#{code.gsub("\n", "<br/>")}</code></pre>"
+  end
+
+  def autolink(link, link_type)
+    return link if link_type == :email
+    Formatter.instance.link_url(link)
+  end
+end
+
 class Formatter
   include Singleton
   include RoutingHelper
@@ -39,15 +50,18 @@ class Formatter
     html = format_markdown(html) if status.content_type == 'text/markdown'
     html = encode_and_link_urls(html, linkable_accounts, keep_html: %w(text/markdown text/html).include?(status.content_type))
     html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify]
-    html = simple_format(html, {}, sanitize: false) unless %w(text/markdown text/html).include?(status.content_type)
-    html = html.delete("\n")
+
+    unless %w(text/markdown text/html).include?(status.content_type)
+      html = simple_format(html, {}, sanitize: false)
+      html = html.delete("\n")
+    end
 
     html.html_safe # rubocop:disable Rails/OutputSafety
   end
 
   def format_markdown(html)
     extensions = {
-      autolink: false,
+      autolink: true,
       no_intra_emphasis: true,
       fenced_code_blocks: true,
       disable_indented_code_blocks: true,
@@ -57,11 +71,12 @@ class Formatter
       superscript: true,
       underline: true,
       highlight: true,
-      footnotes: true
+      footnotes: false,
     }
 
-    renderer = Redcarpet::Render::HTML.new({
+    renderer = HTMLRenderer.new({
       filter_html: false,
+      escape_html: false,
       no_images: true,
       no_styles: true,
       safe_links_only: true,
@@ -72,14 +87,7 @@ class Formatter
     markdown = Redcarpet::Markdown.new(renderer, extensions)
 
     html = reformat(markdown.render(html))
-    html = html.gsub("\r\n", "\n").gsub("\r", "\n")
-    code_safe_strip(html)
-  end
-
-  def code_safe_strip(html, char="\n")
-    html = html.split(/(<code[ >].*?\/code>)/m)
-    html.each_slice(2) { |part| part[0].delete!(char) }
-    html.join
+    html.delete("\r").delete("\n")
   end
 
   def reformat(html)
@@ -136,6 +144,10 @@ class Formatter
     html.html_safe # rubocop:disable Rails/OutputSafety
   end
 
+  def link_url(url)
+    "<a href=\"#{encode(url)}\" target=\"blank\" rel=\"nofollow noopener\">#{link_html(url)}</a>"
+  end
+
   private
 
   def html_entities
@@ -147,13 +159,13 @@ class Formatter
   end
 
   def encode_and_link_urls(html, accounts = nil, options = {})
-    entities = utf8_friendly_extractor(html, extract_url_without_protocol: false)
-
     if accounts.is_a?(Hash)
       options  = accounts
       accounts = nil
     end
 
+    entities = options[:keep_html] ? html_friendly_extractor(html) : utf8_friendly_extractor(html, extract_url_without_protocol: false)
+
     rewrite(html.dup, entities, options[:keep_html]) do |entity|
       if entity[:url]
         link_to_url(entity, options)
@@ -285,6 +297,29 @@ class Formatter
     Extractor.remove_overlapping_entities(special + standard)
   end
 
+  def html_friendly_extractor(html, options = {})
+    gaps = []
+    total_offset = 0
+
+    escaped = html.gsub(/<[^>]*>/) do |match|
+      total_offset += match.length - 1
+      end_offset = Regexp.last_match.end(0)
+      gaps << [end_offset - total_offset, total_offset]
+      "\u200b"
+    end
+
+    entities = Extractor.extract_hashtags_with_indices(escaped, :check_url_overlap => false) +
+               Extractor.extract_mentions_or_lists_with_indices(escaped)
+    Extractor.remove_overlapping_entities(entities).map do |extract|
+      pos = extract[:indices].first
+      offset_idx = gaps.rindex { |gap| gap.first <= pos }
+      offset = offset_idx.nil? ? 0 : gaps[offset_idx].last
+      next extract.merge(
+        :indices => [extract[:indices].first + offset, extract[:indices].last + offset]
+      )
+    end
+  end
+
   def link_to_url(entity, options = {})
     url        = Addressable::URI.parse(entity[:url])
     html_attrs = { target: '_blank', rel: 'nofollow noopener' }