diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2022-03-26 02:53:34 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-26 02:53:34 +0100 |
commit | cefa526c6d3a45df2d0fcb7643ced828e2e87dea (patch) | |
tree | e3d19aa7d0881ae59d25692fb0c16b781c691b40 | |
parent | 2dd30804b62f750c2780b7043318cbe00d137429 (diff) |
Refactor formatter (#17828)
* Refactor formatter * Move custom emoji pre-rendering logic to view helpers * Move more methods out of Formatter * Fix code style issues * Remove Formatter * Add inline poll options to RSS feeds * Remove unused helper method * Fix code style issues * Various fixes and improvements * Fix test
44 files changed, 932 insertions, 1024 deletions
diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 65cbb6fcd..d119f7cac 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -57,7 +57,7 @@ class StatusesIndex < Chewy::Index field :id, type: 'long' field :account_id, type: 'long' - field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.ordered_media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do + field :text, type: 'text', value: ->(status) { [status.spoiler_text, PlainTextFormatter.new(status.text, status.local?).to_s].concat(status.ordered_media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do field :stemmed, type: 'text', analyzer: 'content' end diff --git a/app/controllers/api/web/embeds_controller.rb b/app/controllers/api/web/embeds_controller.rb index 741ba910f..58f6345e6 100644 --- a/app/controllers/api/web/embeds_controller.rb +++ b/app/controllers/api/web/embeds_controller.rb @@ -15,7 +15,7 @@ class Api::Web::EmbedsController < Api::Web::BaseController return not_found if oembed.nil? begin - oembed[:html] = Formatter.instance.sanitize(oembed[:html], Sanitize::Config::MASTODON_OEMBED) + oembed[:html] = Sanitize.fragment(oembed[:html], Sanitize::Config::MASTODON_OEMBED) rescue ArgumentError return not_found end diff --git a/app/helpers/accounts_helper.rb b/app/helpers/accounts_helper.rb index a33961724..557f60f26 100644 --- a/app/helpers/accounts_helper.rb +++ b/app/helpers/accounts_helper.rb @@ -2,10 +2,12 @@ module AccountsHelper def display_name(account, **options) + str = account.display_name.presence || account.username + if options[:custom_emojify] - Formatter.instance.format_display_name(account, **options) + prerender_custom_emojis(h(str), account.emojis) else - account.display_name.presence || account.username + str end end diff --git a/app/helpers/admin/trends/statuses_helper.rb b/app/helpers/admin/trends/statuses_helper.rb index d16e3dd12..214c1e2a6 100644 --- a/app/helpers/admin/trends/statuses_helper.rb +++ b/app/helpers/admin/trends/statuses_helper.rb @@ -12,9 +12,6 @@ module Admin::Trends::StatusesHelper return '' if text.blank? - html = Formatter.instance.send(:encode, text) - html = Formatter.instance.send(:encode_custom_emojis, html, status.emojis, prefers_autoplay?) - - html.html_safe # rubocop:disable Rails/OutputSafety + prerender_custom_emojis(h(text), status.emojis) end end diff --git a/app/helpers/application_helper.rb b/app/helpers/application_helper.rb index e997570b5..651a98a85 100644 --- a/app/helpers/application_helper.rb +++ b/app/helpers/application_helper.rb @@ -239,4 +239,8 @@ module ApplicationHelper end end.values end + + def prerender_custom_emojis(html, custom_emojis) + EmojiFormatter.new(html, custom_emojis, animate: prefers_autoplay?).to_s + end end diff --git a/app/helpers/formatting_helper.rb b/app/helpers/formatting_helper.rb new file mode 100644 index 000000000..66e9e1e91 --- /dev/null +++ b/app/helpers/formatting_helper.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +module FormattingHelper + def html_aware_format(text, local, options = {}) + HtmlAwareFormatter.new(text, local, options).to_s + end + + def linkify(text, options = {}) + TextFormatter.new(text, options).to_s + end + + def extract_plain_text(text, local) + PlainTextFormatter.new(text, local).to_s + end + + def status_content_format(status) + html_aware_format(status.text, status.local?, preloaded_accounts: [status.account] + (status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : [])) + end +end diff --git a/app/helpers/routing_helper.rb b/app/helpers/routing_helper.rb index fb24a1b28..f95f46a56 100644 --- a/app/helpers/routing_helper.rb +++ b/app/helpers/routing_helper.rb @@ -2,6 +2,7 @@ module RoutingHelper extend ActiveSupport::Concern + include Rails.application.routes.url_helpers include ActionView::Helpers::AssetTagHelper include Webpacker::Helper @@ -22,8 +23,6 @@ module RoutingHelper full_asset_url(asset_pack_path(source, **options)) end - private - def use_storage? Rails.configuration.x.use_s3 || Rails.configuration.x.use_swift end diff --git a/app/helpers/statuses_helper.rb b/app/helpers/statuses_helper.rb index d328f89b7..e92b4c839 100644 --- a/app/helpers/statuses_helper.rb +++ b/app/helpers/statuses_helper.rb @@ -113,20 +113,6 @@ module StatusesHelper end end - private - - def simplified_text(text) - text.dup.tap do |new_text| - URI.extract(new_text).each do |url| - new_text.gsub!(url, '') - end - - new_text.gsub!(Account::MENTION_RE, '') - new_text.gsub!(Tag::HASHTAG_RE, '') - new_text.gsub!(/\s+/, '') - end - end - def embedded_view? params[:controller] == EMBEDDED_CONTROLLER && params[:action] == EMBEDDED_ACTION end diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index ea8d146d4..f4f98e29c 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class ActivityPub::Activity::Create < ActivityPub::Activity + include FormattingHelper + def perform dereference_object! @@ -367,7 +369,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity end def converted_text - Formatter.instance.linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n")) + linkify([@status_parser.title.presence, @status_parser.spoiler_text.presence, @status_parser.url || @status_parser.uri].compact.join("\n\n")) end def unsupported_media_type?(mime_type) diff --git a/app/lib/emoji_formatter.rb b/app/lib/emoji_formatter.rb new file mode 100644 index 000000000..f808f3a22 --- /dev/null +++ b/app/lib/emoji_formatter.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +class EmojiFormatter + include RoutingHelper + + DISALLOWED_BOUNDING_REGEX = /[[:alnum:]:]/.freeze + + attr_reader :html, :custom_emojis, :options + + # @param [ActiveSupport::SafeBuffer] html + # @param [Array<CustomEmoji>] custom_emojis + # @param [Hash] options + # @option options [Boolean] :animate + def initialize(html, custom_emojis, options = {}) + raise ArgumentError unless html.html_safe? + + @html = html + @custom_emojis = custom_emojis + @options = options + end + + def to_s + return html if custom_emojis.empty? || html.blank? + + i = -1 + tag_open_index = nil + inside_shortname = false + shortname_start_index = -1 + invisible_depth = 0 + last_index = 0 + result = ''.dup + + while i + 1 < html.size + i += 1 + + if invisible_depth.zero? && inside_shortname && html[i] == ':' + inside_shortname = false + shortcode = html[shortname_start_index + 1..i - 1] + char_after = html[i + 1] + + next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode]) + + result << html[last_index..shortname_start_index - 1] if shortname_start_index.positive? + result << image_for_emoji(shortcode, emoji) + last_index = i + 1 + elsif tag_open_index && html[i] == '>' + tag = html[tag_open_index..i] + tag_open_index = nil + + if invisible_depth.positive? + invisible_depth += count_tag_nesting(tag) + elsif tag == '<span class="invisible">' + invisible_depth = 1 + end + elsif html[i] == '<' + tag_open_index = i + inside_shortname = false + elsif !tag_open_index && html[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(html[i - 1])) + inside_shortname = true + shortname_start_index = i + end + end + + result << html[last_index..-1] + + result.html_safe # rubocop:disable Rails/OutputSafety + end + + private + + def emoji_map + @emoji_map ||= custom_emojis.each_with_object({}) { |e, h| h[e.shortcode] = [full_asset_url(e.image.url), full_asset_url(e.image.url(:static))] } + end + + def count_tag_nesting(tag) + if tag[1] == '/' + -1 + elsif tag[-2] == '/' + 0 + else + 1 + end + end + + def image_for_emoji(shortcode, emoji) + original_url, static_url = emoji + + if animate? + image_tag(original_url, draggable: false, class: 'emojione', alt: ":#{shortcode}:", title: ":#{shortcode}:") + else + image_tag(original_url, draggable: false, class: 'emojione custom-emoji', alt: ":#{shortcode}:", title: ":#{shortcode}:", data: { original: original_url, static: static_url }) + end + end + + def animate? + @options[:animate] + end +end diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb index 8020aa916..ef9407864 100644 --- a/app/lib/extractor.rb +++ b/app/lib/extractor.rb @@ -5,18 +5,34 @@ module Extractor module_function - # :yields: username, list_slug, start, end + def extract_entities_with_indices(text, options = {}, &block) + entities = begin + extract_urls_with_indices(text, options) + + extract_hashtags_with_indices(text, check_url_overlap: false) + + extract_mentions_or_lists_with_indices(text) + + extract_extra_uris_with_indices(text) + end + + return [] if entities.empty? + + entities = remove_overlapping_entities(entities) + entities.each(&block) if block_given? + entities + end + def extract_mentions_or_lists_with_indices(text) - return [] unless Twitter::TwitterText::Regex[:at_signs].match?(text) + return [] unless text && Twitter::TwitterText::Regex[:at_signs].match?(text) possible_entries = [] - text.to_s.scan(Account::MENTION_RE) do |screen_name, _| + text.scan(Account::MENTION_RE) do |screen_name, _| match_data = $LAST_MATCH_INFO - after = $' + after = $' + unless Twitter::TwitterText::Regex[:end_mention_match].match?(after) start_position = match_data.char_begin(1) - 1 - end_position = match_data.char_end(1) + end_position = match_data.char_end(1) + possible_entries << { screen_name: screen_name, indices: [start_position, end_position], @@ -29,36 +45,70 @@ module Extractor yield mention[:screen_name], mention[:indices].first, mention[:indices].last end end + possible_entries end - def extract_hashtags_with_indices(text, **) - return [] unless /#/.match?(text) + def extract_hashtags_with_indices(text, _options = {}) + return [] unless text&.index('#') + + possible_entries = [] - tags = [] text.scan(Tag::HASHTAG_RE) do |hash_text, _| - match_data = $LAST_MATCH_INFO + match_data = $LAST_MATCH_INFO start_position = match_data.char_begin(1) - 1 - end_position = match_data.char_end(1) - after = $' + end_position = match_data.char_end(1) + after = $' + if %r{\A://}.match?(after) hash_text.match(/(.+)(https?\Z)/) do |matched| - hash_text = matched[1] + hash_text = matched[1] end_position -= matched[2].codepoint_length end end - tags << { + possible_entries << { hashtag: hash_text, indices: [start_position, end_position], } end - tags.each { |tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last } if block_given? - tags + if block_given? + possible_entries.each do |tag| + yield tag[:hashtag], tag[:indices].first, tag[:indices].last + end + end + + possible_entries end def extract_cashtags_with_indices(_text) - [] # always returns empty array + [] + end + + def extract_extra_uris_with_indices(text) + return [] unless text&.index(':') + + possible_entries = [] + + text.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do + valid_uri_match_data = $LAST_MATCH_INFO + + start_position = valid_uri_match_data.char_begin(3) + end_position = valid_uri_match_data.char_end(3) + + possible_entries << { + url: valid_uri_match_data[3], + indices: [start_position, end_position], + } + end + + if block_given? + possible_entries.each do |url| + yield url[:url], url[:indices].first, url[:indices].last + end + end + + possible_entries end end diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 46a55c7a4..53d1390d4 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -5,6 +5,7 @@ require 'singleton' class FeedManager include Singleton include Redisable + include FormattingHelper # Maximum number of items stored in a single feed MAX_ITEMS = 400 @@ -445,7 +446,7 @@ class FeedManager status = status.reblog if status.reblog? combined_text = [ - Formatter.instance.plaintext(status), + extract_plain_text(status.text, status.local?), status.spoiler_text, status.preloadable_poll ? status.preloadable_poll.options.join("\n\n") : nil, status.ordered_media_attachments.map(&:description).join("\n\n"), diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb deleted file mode 100644 index b6a13163d..000000000 --- a/app/lib/formatter.rb +++ /dev/null @@ -1,294 +0,0 @@ -# frozen_string_literal: true - -require 'singleton' - -class Formatter - include Singleton - include RoutingHelper - - include ActionView::Helpers::TextHelper - - def format(status, **options) - if status.respond_to?(:reblog?) && status.reblog? - prepend_reblog = status.reblog.account.acct - status = status.proper - else - prepend_reblog = false - end - - raw_content = status.text - - if options[:inline_poll_options] && status.preloadable_poll - raw_content = raw_content + "\n\n" + status.preloadable_poll.options.map { |title| "[ ] #{title}" }.join("\n") - end - - return '' if raw_content.blank? - - unless status.local? - html = reformat(raw_content) - html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify] - return html.html_safe # rubocop:disable Rails/OutputSafety - end - - linkable_accounts = status.respond_to?(:active_mentions) ? status.active_mentions.map(&:account) : [] - linkable_accounts << status.account - - html = raw_content - html = "RT @#{prepend_reblog} #{html}" if prepend_reblog - html = encode_and_link_urls(html, linkable_accounts) - html = encode_custom_emojis(html, status.emojis, options[:autoplay]) if options[:custom_emojify] - html = simple_format(html, {}, sanitize: false) - html = html.delete("\n") - - html.html_safe # rubocop:disable Rails/OutputSafety - end - - def reformat(html) - sanitize(html, Sanitize::Config::MASTODON_STRICT) - rescue ArgumentError - '' - end - - def plaintext(status) - return status.text if status.local? - - text = status.text.gsub(/(<br \/>|<br>|<\/p>)+/) { |match| "#{match}\n" } - strip_tags(text) - end - - def simplified_format(account, **options) - return '' if account.note.blank? - - html = account.local? ? linkify(account.note) : reformat(account.note) - html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify] - html.html_safe # rubocop:disable Rails/OutputSafety - end - - def sanitize(html, config) - Sanitize.fragment(html, config) - end - - def format_spoiler(status, **options) - html = encode(status.spoiler_text) - html = encode_custom_emojis(html, status.emojis, options[:autoplay]) - html.html_safe # rubocop:disable Rails/OutputSafety - end - - def format_poll_option(status, option, **options) - html = encode(option.title) - html = encode_custom_emojis(html, status.emojis, options[:autoplay]) - html.html_safe # rubocop:disable Rails/OutputSafety - end - - def format_display_name(account, **options) - html = encode(account.display_name.presence || account.username) - html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify] - html.html_safe # rubocop:disable Rails/OutputSafety - end - - def format_field(account, str, **options) - html = account.local? ? encode_and_link_urls(str, me: true, with_domain: true) : reformat(str) - html = encode_custom_emojis(html, account.emojis, options[:autoplay]) if options[:custom_emojify] - html.html_safe # rubocop:disable Rails/OutputSafety - end - - def linkify(text) - html = encode_and_link_urls(text) - html = simple_format(html, {}, sanitize: false) - html = html.delete("\n") - - html.html_safe # rubocop:disable Rails/OutputSafety - end - - private - - def html_entities - @html_entities ||= HTMLEntities.new - end - - def encode(html) - html_entities.encode(html) - end - - def encode_and_link_urls(html, accounts = nil, options = {}) - entities = utf8_friendly_extractor(html, extract_url_without_protocol: false) - - if accounts.is_a?(Hash) - options = accounts - accounts = nil - end - - rewrite(html.dup, entities) do |entity| - if entity[:url] - link_to_url(entity, options) - elsif entity[:hashtag] - link_to_hashtag(entity) - elsif entity[:screen_name] - link_to_mention(entity, accounts, options) - end - end - end - - def count_tag_nesting(tag) - if tag[1] == '/' then -1 - elsif tag[-2] == '/' then 0 - else 1 - end - end - - # rubocop:disable Metrics/BlockNesting - def encode_custom_emojis(html, emojis, animate = false) - return html if emojis.empty? - - emoji_map = emojis.each_with_object({}) { |e, h| h[e.shortcode] = [full_asset_url(e.image.url), full_asset_url(e.image.url(:static))] } - - i = -1 - tag_open_index = nil - inside_shortname = false - shortname_start_index = -1 - invisible_depth = 0 - - while i + 1 < html.size - i += 1 - - if invisible_depth.zero? && inside_shortname && html[i] == ':' - shortcode = html[shortname_start_index + 1..i - 1] - emoji = emoji_map[shortcode] - - if emoji - original_url, static_url = emoji - replacement = begin - if animate - image_tag(original_url, draggable: false, class: 'emojione', alt: ":#{shortcode}:", title: ":#{shortcode}:") - else - image_tag(original_url, draggable: false, class: 'emojione custom-emoji', alt: ":#{shortcode}:", title: ":#{shortcode}:", data: { original: original_url, static: static_url }) - end - end - before_html = shortname_start_index.positive? ? html[0..shortname_start_index - 1] : '' - html = before_html + replacement + html[i + 1..-1] - i += replacement.size - (shortcode.size + 2) - 1 - else - i -= 1 - end - - inside_shortname = false - elsif tag_open_index && html[i] == '>' - tag = html[tag_open_index..i] - tag_open_index = nil - if invisible_depth.positive? - invisible_depth += count_tag_nesting(tag) - elsif tag == '<span class="invisible">' - invisible_depth = 1 - end - elsif html[i] == '<' - tag_open_index = i - inside_shortname = false - elsif !tag_open_index && html[i] == ':' - inside_shortname = true - shortname_start_index = i - end - end - - html - end - # rubocop:enable Metrics/BlockNesting - - def rewrite(text, entities) - text = text.to_s - - # Sort by start index - entities = entities.sort_by do |entity| - indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] - indices.first - end - - result = [] - - last_index = entities.reduce(0) do |index, entity| - indices = entity.respond_to?(:indices) ? entity.indices : entity[:indices] - result << encode(text[index...indices.first]) - result << yield(entity) - indices.last - end - - result << encode(text[last_index..-1]) - - result.flatten.join - end - - def utf8_friendly_extractor(text, options = {}) - # Note: I couldn't obtain list_slug with @user/list-name format - # for mention so this requires additional check - special = Extractor.extract_urls_with_indices(text, options) - standard = Extractor.extract_entities_with_indices(text, options) - extra = Extractor.extract_extra_uris_with_indices(text, options) - - Extractor.remove_overlapping_entities(special + standard + extra) - end - - def link_to_url(entity, options = {}) - url = Addressable::URI.parse(entity[:url]) - html_attrs = { target: '_blank', rel: 'nofollow noopener noreferrer' } - - html_attrs[:rel] = "me #{html_attrs[:rel]}" if options[:me] - - Twitter::TwitterText::Autolink.send(:link_to_text, entity, link_html(entity[:url]), url, html_attrs) - rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError - encode(entity[:url]) - end - - def link_to_mention(entity, linkable_accounts, options = {}) - acct = entity[:screen_name] - - return link_to_account(acct, options) unless linkable_accounts - - same_username_hits = 0 - account = nil - username, domain = acct.split('@') - domain = nil if TagManager.instance.local_domain?(domain) - - linkable_accounts.each do |item| - same_username = item.username.casecmp(username).zero? - same_domain = item.domain.nil? ? domain.nil? : item.domain.casecmp(domain)&.zero? - - if same_username && !same_domain - same_username_hits += 1 - elsif same_username && same_domain - account = item - end - end - - account ? mention_html(account, with_domain: same_username_hits.positive? || options[:with_domain]) : "@#{encode(acct)}" - end - - def link_to_account(acct, options = {}) - username, domain = acct.split('@') - - domain = nil if TagManager.instance.local_domain?(domain) - account = EntityCache.instance.mention(username, domain) - - account ? mention_html(account, with_domain: options[:with_domain]) : "@#{encode(acct)}" - end - - def link_to_hashtag(entity) - hashtag_html(entity[:hashtag]) - end - - def link_html(url) - url = Addressable::URI.parse(url).to_s - prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s - text = url[prefix.length, 30] - suffix = url[prefix.length + 30..-1] - cutoff = url[prefix.length..-1].length > 30 - - "<span class=\"invisible\">#{encode(prefix)}</span><span class=\"#{cutoff ? 'ellipsis' : ''}\">#{encode(text)}</span><span class=\"invisible\">#{encode(suffix)}</span>" - end - - def hashtag_html(tag) - "<a href=\"#{encode(tag_url(tag))}\" class=\"mention hashtag\" rel=\"tag\">#<span>#{encode(tag)}</span></a>" - end - - def mention_html(account, with_domain: false) - "<span class=\"h-card\"><a href=\"#{encode(ActivityPub::TagManager.instance.url_for(account))}\" class=\"u-url mention\">@<span>#{encode(with_domain ? account.pretty_acct : account.username)}</span></a></span>" - end -end diff --git a/app/lib/html_aware_formatter.rb b/app/lib/html_aware_formatter.rb new file mode 100644 index 000000000..64edba09b --- /dev/null +++ b/app/lib/html_aware_formatter.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +class HtmlAwareFormatter + attr_reader :text, :local, :options + + alias local? local + + # @param [String] text + # @param [Boolean] local + # @param [Hash] options + def initialize(text, local, options = {}) + @text = text + @local = local + @options = options + end + + def to_s + return ''.html_safe if text.blank? + + if local? + linkify + else + reformat.html_safe # rubocop:disable Rails/OutputSafety + end + rescue ArgumentError + ''.html_safe + end + + private + + def reformat + Sanitize.fragment(text, Sanitize::Config::MASTODON_STRICT) + end + + def linkify + TextFormatter.new(text, options).to_s + end +end diff --git a/app/lib/plain_text_formatter.rb b/app/lib/plain_text_formatter.rb new file mode 100644 index 000000000..08aa29696 --- /dev/null +++ b/app/lib/plain_text_formatter.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +class PlainTextFormatter + include ActionView::Helpers::TextHelper + + NEWLINE_TAGS_RE = /(<br \/>|<br>|<\/p>)+/.freeze + + attr_reader :text, :local + + alias local? local + + def initialize(text, local) + @text = text + @local = local + end + + def to_s + if local? + text + else + strip_tags(insert_newlines).chomp + end + end + + private + + def insert_newlines + text.gsub(NEWLINE_TAGS_RE) { |match| "#{match}\n" } + end +end diff --git a/app/lib/rss/serializer.rb b/app/lib/rss/serializer.rb index 7e3ed1f17..d44e94221 100644 --- a/app/lib/rss/serializer.rb +++ b/app/lib/rss/serializer.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class RSS::Serializer + include FormattingHelper + private def render_statuses(builder, statuses) @@ -9,7 +11,7 @@ class RSS::Serializer item.title(status_title(status)) .link(ActivityPub::TagManager.instance.url_for(status)) .pub_date(status.created_at) - .description(status.spoiler_text.presence || Formatter.instance.format(status, inline_poll_options: true).to_str) + .description(status_description(status)) status.ordered_media_attachments.each do |media| item.enclosure(full_asset_url(media.file.url(:original, false)), media.file.content_type, media.file.size) @@ -19,9 +21,8 @@ class RSS::Serializer end def status_title(status) - return "#{status.account.acct} deleted status" if status.destroyed? - preview = status.proper.spoiler_text.presence || status.proper.text + if preview.length > 30 || preview[0, 30].include?("\n") preview = preview[0, 30] preview = preview[0, preview.index("\n").presence || 30] + '…' @@ -35,4 +36,20 @@ class RSS::Serializer "#{status.account.acct}: #{preview}" end end + + def status_description(status) + if status.proper.spoiler_text? + status.proper.spoiler_text + else + html = status_content_format(status.proper).to_str + after_html = '' + + if status.proper.preloadable_poll + poll_options_html = status.proper.preloadable_poll.options.map { |o| "[ ] #{o}" }.join('<br />') + after_html = "<p>#{poll_options_html}</p>" + end + + "#{html}#{after_html}" + end + end end diff --git a/app/lib/text_formatter.rb b/app/lib/text_formatter.rb new file mode 100644 index 000000000..48e2fc233 --- /dev/null +++ b/app/lib/text_formatter.rb @@ -0,0 +1,158 @@ +# frozen_string_literal: true + +class TextFormatter + include ActionView::Helpers::TextHelper + include ERB::Util + include RoutingHelper + + URL_PREFIX_REGEX = /\A(https?:\/\/(www\.)?|xmpp:)/.freeze + + DEFAULT_REL = %w(nofollow noopener noreferrer).freeze + + DEFAULT_OPTIONS = { + multiline: true, + }.freeze + + attr_reader :text, :options + + # @param [String] text + # @param [Hash] options + # @option options [Boolean] :multiline + # @option options [Boolean] :with_domains + # @option options [Boolean] :with_rel_me + # @option options [Array<Account>] :preloaded_accounts + def initialize(text, options = {}) + @text = text + @options = DEFAULT_OPTIONS.merge(options) + end + + def entities + @entities ||= Extractor.extract_entities_with_indices(text, extract_url_without_protocol: false) + end + + def to_s + return ''.html_safe if text.blank? + + html = rewrite do |entity| + if entity[:url] + link_to_url(entity) + elsif entity[:hashtag] + link_to_hashtag(entity) + elsif entity[:screen_name] + link_to_mention(entity) + end + end + + html = simple_format(html, {}, sanitize: false).delete("\n") if multiline? + + html.html_safe # rubocop:disable Rails/OutputSafety + end + + private + + def rewrite + entities.sort_by! do |entity| + entity[:indices].first + end + + result = ''.dup + + last_index = entities.reduce(0) do |index, entity| + indices = entity[:indices] + result << h(text[index...indices.first]) + result << yield(entity) + indices.last + end + + result << h(text[last_index..-1]) + + result + end + + def link_to_url(entity) + url = Addressable::URI.parse(entity[:url]).to_s + rel = with_rel_me? ? (DEFAULT_REL + %w(me)) : DEFAULT_REL + + prefix = url.match(URL_PREFIX_REGEX).to_s + display_url = url[prefix.length, 30] + suffix = url[prefix.length + 30..-1] + cutoff = url[prefix.length..-1].length > 30 + + <<~HTML.squish + <a href="#{h(url)}" target="_blank" rel="#{rel.join(' ')}"><span class="invisible">#{h(prefix)}</span><span class="#{cutoff ? 'ellipsis' : ''}">#{h(display_url)}</span><span class="invisible">#{h(suffix)}</span></a> + HTML + rescue Addressable::URI::InvalidURIError, IDN::Idna::IdnaError + h(entity[:url]) + end + + def link_to_hashtag(entity) + hashtag = entity[:hashtag] + url = tag_url(hashtag) + + <<~HTML.squish + <a href="#{h(url)}" class="mention hashtag" rel="tag">#<span>#{h(hashtag)}</span></a> + HTML + end + + def link_to_mention(entity) + username, domain = entity[:screen_name].split('@') + domain = nil if local_domain?(domain) + account = nil + + if preloaded_accounts? + same_username_hits = 0 + + preloaded_accounts.each do |other_account| + same_username = other_account.username.casecmp(username).zero? + same_domain = other_account.domain.nil? ? domain.nil? : other_account.domain.casecmp(domain)&.zero? + + if same_username && !same_domain + same_username_hits += 1 + elsif same_username && same_domain + account = other_account + end + end + else + account = entity_cache.mention(username, domain) + end + + return "@#{h(entity[:screen_name])}" if account.nil? + + url = ActivityPub::TagManager.instance.url_for(account) + display_username = same_username_hits&.positive? || with_domains? ? account.pretty_acct : account.username + + <<~HTML.squish + <span class="h-card"><a href="#{h(url)}" class="u-url mention">@<span>#{h(display_username)}</span></a></span> + HTML + end + + def entity_cache + @entity_cache ||= EntityCache.instance + end + + def tag_manager + @tag_manager ||= TagManager.instance + end + + delegate :local_domain?, to: :tag_manager + + def multiline? + options[:multiline] + end + + def with_domains? + options[:with_domains] + end + + def with_rel_me? + options[:with_rel_me] + end + + def preloaded_accounts + options[:preloaded_accounts] + end + + def preloaded_accounts? + preloaded_accounts.present? + end +end diff --git a/app/mailers/application_mailer.rb b/app/mailers/application_mailer.rb index cc585c3b7..a37682eca 100644 --- a/app/mailers/application_mailer.rb +++ b/app/mailers/application_mailer.rb @@ -5,6 +5,7 @@ class ApplicationMailer < ActionMailer::Base helper :application helper :instance + helper :formatting protected diff --git a/app/serializers/activitypub/actor_serializer.rb b/app/serializers/activitypub/actor_serializer.rb index 48707aa16..bd1648348 100644 --- a/app/serializers/activitypub/actor_serializer.rb +++ b/app/serializers/activitypub/actor_serializer.rb @@ -2,6 +2,7 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer include RoutingHelper + include FormattingHelper context :security @@ -102,7 +103,7 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer end def summary - object.suspended? ? '' : Formatter.instance.simplified_format(object) + object.suspended? ? '' : html_aware_format(object.note, object.local?) end def icon @@ -185,6 +186,8 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer end class Account::FieldSerializer < ActivityPub::Serializer + include FormattingHelper + attributes :type, :name, :value def type @@ -192,7 +195,7 @@ class ActivityPub::ActorSerializer < ActivityPub::Serializer end def value - Formatter.instance.format_field(object.account, object.value) + html_aware_format(object.value, object.account.value?, with_rel_me: true, with_domains: true, multiline: false) end end diff --git a/app/serializers/activitypub/note_serializer.rb b/app/serializers/activitypub/note_serializer.rb index 7be2e2647..27e058199 100644 --- a/app/serializers/activitypub/note_serializer.rb +++ b/app/serializers/activitypub/note_serializer.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class ActivityPub::NoteSerializer < ActivityPub::Serializer + include FormattingHelper + context_extensions :atom_uri, :conversation, :sensitive, :voters_count attributes :id, :type, :summary, @@ -39,11 +41,11 @@ class ActivityPub::NoteSerializer < ActivityPub::Serializer end def content - Formatter.instance.format(object) + status_content_format(object) end def content_map - { object.language => Formatter.instance.format(object) } + { object.language => content } end def replies diff --git a/app/serializers/rest/account_serializer.rb b/app/serializers/rest/account_serializer.rb index a78ec4507..2f67e06b2 100644 --- a/app/serializers/rest/account_serializer.rb +++ b/app/serializers/rest/account_serializer.rb @@ -2,6 +2,7 @@ class REST::AccountSerializer < ActiveModel::Serializer include RoutingHelper + include FormattingHelper attributes :id, :username, :acct, :display_name, :locked, :bot, :discoverable, :group, :created_at, :note, :url, :avatar, :avatar_static, :header, :header_static, @@ -14,10 +15,12 @@ class REST::AccountSerializer < ActiveModel::Serializer attribute :suspended, if: :suspended? class FieldSerializer < ActiveModel::Serializer + include FormattingHelper + attributes :name, :value, :verified_at def value - Formatter.instance.format_field(object.account, object.value) + html_aware_format(object.value, object.account.local?, with_rel_me: true, with_domains: true, multiline: false) end end @@ -32,7 +35,7 @@ class REST::AccountSerializer < ActiveModel::Serializer end def note - object.suspended? ? '' : Formatter.instance.simplified_format(object) + object.suspended? ? '' : html_aware_format(object.note, object.local?) end def url diff --git a/app/serializers/rest/announcement_serializer.rb b/app/serializers/rest/announcement_serializer.rb index 9343b97d2..23b2fa514 100644 --- a/app/serializers/rest/announcement_serializer.rb +++ b/app/serializers/rest/announcement_serializer.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class REST::AnnouncementSerializer < ActiveModel::Serializer + include FormattingHelper + attributes :id, :content, :starts_at, :ends_at, :all_day, :published_at, :updated_at @@ -25,7 +27,7 @@ class REST::AnnouncementSerializer < ActiveModel::Serializer end def content - Formatter.instance.linkify(object.text) + linkify(object.text) end def reactions diff --git a/app/serializers/rest/status_edit_serializer.rb b/app/serializers/rest/status_edit_serializer.rb index 05ccd5e94..f7a48797d 100644 --- a/app/serializers/rest/status_edit_serializer.rb +++ b/app/serializers/rest/status_edit_serializer.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class REST::StatusEditSerializer < ActiveModel::Serializer + include FormattingHelper + has_one :account, serializer: REST::AccountSerializer attributes :content, :spoiler_text, :sensitive, :created_at @@ -11,7 +13,7 @@ class REST::StatusEditSerializer < ActiveModel::Serializer attribute :poll, if: -> { object.poll_options.present? } def content - Formatter.instance.format(object) + status_content_format(object) end def poll diff --git a/app/serializers/rest/status_serializer.rb b/app/serializers/rest/status_serializer.rb index 7c3dd673e..32c4e405e 100644 --- a/app/serializers/rest/status_serializer.rb +++ b/app/serializers/rest/status_serializer.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class REST::StatusSerializer < ActiveModel::Serializer + include FormattingHelper + attributes :id, :created_at, :in_reply_to_id, :in_reply_to_account_id, :sensitive, :spoiler_text, :visibility, :language, :uri, :url, :replies_count, :reblogs_count, @@ -71,7 +73,7 @@ class REST::StatusSerializer < ActiveModel::Serializer end def content - Formatter.instance.format(object) + status_content_format(object) end def url diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 239ab9b93..9c8b5ea20 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -134,7 +134,7 @@ class FetchLinkCardService < BaseService when 'video' @card.width = embed[:width].presence || 0 @card.height = embed[:height].presence || 0 - @card.html = Formatter.instance.sanitize(embed[:html], Sanitize::Config::MASTODON_OEMBED) + @card.html = Sanitize.fragment(embed[:html], Sanitize::Config::MASTODON_OEMBED) @card.image_remote_url = (url + embed[:thumbnail_url]).to_s if embed[:thumbnail_url].present? when 'rich' # Most providers rely on <script> tags, which is a no-no diff --git a/app/views/accounts/_bio.html.haml b/app/views/accounts/_bio.html.haml index e8a49a1aa..df4f9bdb8 100644 --- a/app/views/accounts/_bio.html.haml +++ b/app/views/accounts/_bio.html.haml @@ -5,17 +5,17 @@ .account__header__fields - fields.each do |field| %dl - %dt.emojify{ title: field.name }= Formatter.instance.format_field(account, field.name, custom_emojify: true) + %dt.emojify{ title: field.name }= prerender_custom_emojis(h(field.name), account.emojis) %dd{ title: field.value, class: custom_field_classes(field) } - if field.verified? %span.verified__mark{ title: t('accounts.link_verified_on', date: l(field.verified_at)) } = fa_icon 'check' - = Formatter.instance.format_field(account, field.value, custom_emojify: true) + = prerender_custom_emojis(html_aware_format(field.value, account.local?, with_rel_me: true, with_domains: true, multiline: false), account.emojis) = account_badge(account) - if account.note.present? - .account__header__content.emojify= Formatter.instance.simplified_format(account, custom_emojify: true) + .account__header__content.emojify= prerender_custom_emojis(html_aware_format(account.note, account.local?), account.emojis) .public-account-bio__extra = t 'accounts.joined', date: l(account.created_at, format: :month) diff --git a/app/views/admin/accounts/show.html.haml b/app/views/admin/accounts/show.html.haml index 805cf8a4f..b252f3eac 100644 --- a/app/views/admin/accounts/show.html.haml +++ b/app/views/admin/accounts/show.html.haml @@ -16,16 +16,16 @@ .account__header__fields - fields.each do |field| %dl - %dt.emojify{ title: field.name }= Formatter.instance.format_field(account, field.name, custom_emojify: true) + %dt.emojify{ title: field.name }= prerender_custom_emojis(h(field.name), account.emojis) %dd{ title: field.value, class: custom_field_classes(field) } - if field.verified? %span.verified__mark{ title: t('accounts.link_verified_on', date: l(field.verified_at)) } = fa_icon 'check' - = Formatter.instance.format_field(account, field.value, custom_emojify: true) + = prerender_custom_emojis(html_aware_format(field.value, account.local?, with_rel_me: true, with_domains: true, multiline: false), account.emojis) - if account.note.present? %div - .account__header__content.emojify= Formatter.instance.simplified_format(account, custom_emojify: true) + .account__header__content.emojify= prerender_custom_emojis(html_aware_format(account.note, account.local?), account.emojis) .dashboard__counters.admin-account-counters %div diff --git a/app/views/admin/reports/_status.html.haml b/app/views/admin/reports/_status.html.haml index 7538cfd54..392fc8f81 100644 --- a/app/views/admin/reports/_status.html.haml +++ b/app/views/admin/reports/_status.html.haml @@ -4,12 +4,12 @@ .batch-table__row__content .status__content>< - if status.proper.spoiler_text.blank? - = Formatter.instance.format(status.proper, custom_emojify: true) + = prerender_custom_emojis(status_content_format(status.proper), status.proper.emojis) - else %details< %summary>< - %strong> Content warning: #{Formatter.instance.format_spoiler(status.proper)} - = Formatter.instance.format(status.proper, custom_emojify: true) + %strong> Content warning: #{prerender_custom_emojis(h(status.proper.spoiler_text), status.proper.emojis)} + = prerender_custom_emojis(status_content_format(status.proper), status.proper.emojis) - unless status.proper.ordered_media_attachments.empty? - if status.proper.ordered_media_attachments.first.video? diff --git a/app/views/admin/reports/show.html.haml b/app/views/admin/reports/show.html.haml index 25b751335..41fed2efb 100644 --- a/app/views/admin/reports/show.html.haml +++ b/app/views/admin/reports/show.html.haml @@ -27,7 +27,7 @@ = fa_icon('lock') if @report.target_account.locked? - if @report.target_account.note.present? .account-card__bio.emojify - = Formatter.instance.simplified_format(@report.target_account, custom_emojify: true) + = prerender_custom_emojis(html_aware_format(@report.target_account.note, @report.target_account.local?), @report.target_account.emojis) .account-card__actions .account-card__counters .account-card__counters__item diff --git a/app/views/directories/index.html.haml b/app/views/directories/index.html.haml index 2b338115b..a032ddb8d 100644 --- a/app/views/directories/index.html.haml +++ b/app/views/directories/index.html.haml @@ -34,7 +34,7 @@ = fa_icon('lock') if account.locked? - if account.note.present? .account-card__bio.emojify - = Formatter.instance.simplified_format(account, custom_emojify: true) + = prerender_custom_emojis(html_aware_format(account.note, account.local?), account.emojis) - else .flex-spacer .account-card__actions diff --git a/app/views/disputes/strikes/show.html.haml b/app/views/disputes/strikes/show.html.haml index 0fc32b918..0b71e14a3 100644 --- a/app/views/disputes/strikes/show.html.haml +++ b/app/views/disputes/strikes/show.html.haml @@ -26,7 +26,7 @@ %p= t "user_mailer.warning.explanation.#{@strike.action}", instance: Rails.configuration.x.local_domain - unless @strike.text.blank? - = Formatter.instance.linkify(@strike.text) + = linkify(@strike.text) - if @strike.report && !@strike.report.other? %p diff --git a/app/views/notification_mailer/_status.html.haml b/app/views/notification_mailer/_status.html.haml index 219e7e667..444b06fe6 100644 --- a/app/views/notification_mailer/_status.html.haml +++ b/app/views/notification_mailer/_status.html.haml @@ -28,10 +28,10 @@ - if status.spoiler_text? %div.auto-dir %p - = Formatter.instance.format_spoiler(status) + = status.spoiler_text %div.auto-dir - = Formatter.instance.format(status) + = status_content_format(status) - if status.ordered_media_attachments.size > 0 %p diff --git a/app/views/notification_mailer/_status.text.erb b/app/views/notification_mailer/_status.text.erb index c43f32d9f..bf6d2b620 100644 --- a/app/views/notification_mailer/_status.text.erb +++ b/app/views/notification_mailer/_status.text.erb @@ -3,6 +3,6 @@ > ---- > <% end %> -> <%= raw word_wrap(Formatter.instance.plaintext(status), break_sequence: "\n> ") %> +> <%= raw word_wrap(extract_plain_text(status.text, status.local?), break_sequence: "\n> ") %> <%= raw t('application_mailer.view')%> <%= web_url("statuses/#{status.id}") %> diff --git a/app/views/notification_mailer/digest.text.erb b/app/views/notification_mailer/digest.text.erb index 4cd4190c1..b767eb9c4 100644 --- a/app/views/notification_mailer/digest.text.erb +++ b/app/views/notification_mailer/digest.text.erb @@ -5,7 +5,7 @@ * <%= raw t('notification_mailer.digest.mention', name: notification.from_account.pretty_acct) %> - <%= raw Formatter.instance.plaintext(notification.target_status) %> + <%= raw extract_plain_text(notification.target_status.text, notification.target_status.local?) %> <%= raw t('application_mailer.view')%> <%= web_url("statuses/#{notification.target_status.id}") %> <% end %> diff --git a/app/views/statuses/_detailed_status.html.haml b/app/views/statuses/_detailed_status.html.haml index fd7e034b1..1d0e5a38c 100644 --- a/app/views/statuses/_detailed_status.html.haml +++ b/app/views/statuses/_detailed_status.html.haml @@ -18,10 +18,11 @@ .status__content.emojify{ :data => ({ spoiler: current_account&.user&.setting_expand_spoilers ? 'expanded' : 'folded' } if status.spoiler_text?) }< - if status.spoiler_text? %p< - %span.p-summary> #{Formatter.instance.format_spoiler(status, autoplay: prefers_autoplay?)} + %span.p-summary> #{prerender_custom_emojis(h(status.spoiler_text), status.emojis)} %button.status__content__spoiler-link= t('statuses.show_more') .e-content - = Formatter.instance.format(status, custom_emojify: true, autoplay: prefers_autoplay?) + = prerender_custom_emojis(status_content_format(status), status.emojis) + - if status.preloadable_poll = render_poll_component(status) diff --git a/app/views/statuses/_poll.html.haml b/app/views/statuses/_poll.html.haml index 3546a923e..d0f264095 100644 --- a/app/views/statuses/_poll.html.haml +++ b/app/views/statuses/_poll.html.haml @@ -12,7 +12,7 @@ %span.poll__number>< = "#{percent.round}%" %span.poll__option__text - = Formatter.instance.format_poll_option(status, option, autoplay: prefers_autoplay?) + = prerender_custom_emojis(h(option.title), status.emojis) - if own_votes.include?(index) %span.poll__voted %i.poll__voted__mark.fa.fa-check @@ -23,7 +23,7 @@ %label.poll__option>< %span.poll__input{ class: poll.multiple? ? 'checkbox' : nil}>< %span.poll__option__text - = Formatter.instance.format_poll_option(status, option, autoplay: prefers_autoplay?) + = prerender_custom_emojis(h(option.title), status.emojis) .poll__footer - unless show_results %button.button.button-secondary{ disabled: true } diff --git a/app/views/statuses/_simple_status.html.haml b/app/views/statuses/_simple_status.html.haml index 8ffd3cb74..13b6613ce 100644 --- a/app/views/statuses/_simple_status.html.haml +++ b/app/views/statuses/_simple_status.html.haml @@ -30,10 +30,11 @@ .status__content.emojify{ :data => ({ spoiler: current_account&.user&.setting_expand_spoilers ? 'expanded' : 'folded' } if status.spoiler_text?) }< - if status.spoiler_text? %p< - %span.p-summary> #{Formatter.instance.format_spoiler(status, autoplay: prefers_autoplay?)} + %span.p-summary> #{prerender_custom_emojis(h(status.spoiler_text), status.emojis)} %button.status__content__spoiler-link= t('statuses.show_more') .e-content - = Formatter.instance.format(status, custom_emojify: true, autoplay: prefers_autoplay?) + = prerender_custom_emojis(status_content_format(status), status.emojis) + - if status.preloadable_poll = render_poll_component(status) diff --git a/app/views/user_mailer/warning.html.haml b/app/views/user_mailer/warning.html.haml index b308e18f7..fff61fa90 100644 --- a/app/views/user_mailer/warning.html.haml +++ b/app/views/user_mailer/warning.html.haml @@ -40,7 +40,7 @@ %p= t "user_mailer.warning.explanation.#{@warning.action}", instance: @instance - unless @warning.text.blank? - = Formatter.instance.linkify(@warning.text) + = linkify(@warning.text) - if @warning.report && !@warning.report.other? %p diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb index d2ea5f974..6a7723fd2 100644 --- a/config/initializers/twitter_regex.rb +++ b/config/initializers/twitter_regex.rb @@ -75,30 +75,4 @@ module Twitter::TwitterText ) }iox end - - module Extractor - # Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along - # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no - # XMPP or magnet URIs an empty array will be returned. - # - # If a block is given then it will be called for each XMPP URI. - def extract_extra_uris_with_indices(text, _options = {}) # :yields: uri, start, end - return [] unless text && text.index(":") - urls = [] - - text.to_s.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do - valid_uri_match_data = $~ - - start_position = valid_uri_match_data.char_begin(3) - end_position = valid_uri_match_data.char_end(3) - - urls << { - :url => valid_uri_match_data[3], - :indices => [start_position, end_position] - } - end - urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given? - urls - end - end end diff --git a/spec/lib/emoji_formatter_spec.rb b/spec/lib/emoji_formatter_spec.rb new file mode 100644 index 000000000..129445aa5 --- /dev/null +++ b/spec/lib/emoji_formatter_spec.rb @@ -0,0 +1,55 @@ +require 'rails_helper' + +RSpec.describe EmojiFormatter do + let!(:emoji) { Fabricate(:custom_emoji, shortcode: 'coolcat') } + + def preformat_text(str) + TextFormatter.new(str).to_s + end + + describe '#to_s' do + subject { described_class.new(text, emojis).to_s } + + let(:emojis) { [emoji] } + + context 'given text that is not marked as html-safe' do + let(:text) { 'Foo' } + + it 'raises an argument error' do + expect { subject }.to raise_error ArgumentError + end + end + + context 'given text with an emoji shortcode at the start' do + let(:text) { preformat_text(':coolcat: Beep boop') } + + it 'converts the shortcode to an image tag' do + is_expected.to match(/<img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) + end + end + + context 'given text with an emoji shortcode in the middle' do + let(:text) { preformat_text('Beep :coolcat: boop') } + + it 'converts the shortcode to an image tag' do + is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) + end + end + + context 'given text with concatenated emoji shortcodes' do + let(:text) { preformat_text(':coolcat::coolcat:') } + + it 'does not touch the shortcodes' do + is_expected.to match(/:coolcat::coolcat:/) + end + end + + context 'given text with an emoji shortcode at the end' do + let(:text) { preformat_text('Beep boop :coolcat:') } + + it 'converts the shortcode to an image tag' do + is_expected.to match(/boop <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) + end + end + end +end diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb deleted file mode 100644 index 5c88a2569..000000000 --- a/spec/lib/formatter_spec.rb +++ /dev/null @@ -1,626 +0,0 @@ -require 'rails_helper' - -RSpec.describe Formatter do - let(:local_account) { Fabricate(:account, domain: nil, username: 'alice') } - let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') } - - shared_examples 'encode and link URLs' do - context 'given a stand-alone medium URL' do - let(:text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } - - it 'matches the full URL' do - is_expected.to include 'href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"' - end - end - - context 'given a stand-alone google URL' do - let(:text) { 'http://google.com' } - - it 'matches the full URL' do - is_expected.to include 'href="http://google.com"' - end - end - - context 'given a stand-alone URL with a newer TLD' do - let(:text) { 'http://example.gay' } - - it 'matches the full URL' do - is_expected.to include 'href="http://example.gay"' - end - end - - context 'given a stand-alone IDN URL' do - let(:text) { 'https://nic.みんな/' } - - it 'matches the full URL' do - is_expected.to include 'href="https://nic.みんな/"' - end - - it 'has display URL' do - is_expected.to include '<span class="">nic.みんな/</span>' - end - end - - context 'given a URL with a trailing period' do - let(:text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' } - - it 'matches the full URL but not the period' do - is_expected.to include 'href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"' - end - end - - context 'given a URL enclosed with parentheses' do - let(:text) { '(http://google.com/)' } - - it 'matches the full URL but not the parentheses' do - is_expected.to include 'href="http://google.com/"' - end - end - - context 'given a URL with a trailing exclamation point' do - let(:text) { 'http://www.google.com!' } - - it 'matches the full URL but not the exclamation point' do - is_expected.to include 'href="http://www.google.com"' - end - end - - context 'given a URL with a trailing single quote' do - let(:text) { "http://www.google.com'" } - - it 'matches the full URL but not the single quote' do - is_expected.to include 'href="http://www.google.com"' - end - end - - context 'given a URL with a trailing angle bracket' do - let(:text) { 'http://www.google.com>' } - - it 'matches the full URL but not the angle bracket' do - is_expected.to include 'href="http://www.google.com"' - end - end - - context 'given a URL with a query string' do - context 'with escaped unicode character' do - let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' } - - it 'matches the full URL' do - is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink"' - end - end - - context 'with unicode character' do - let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' } - - it 'matches the full URL' do - is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&q=autolink"' - end - end - - context 'with unicode character at the end' do - let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' } - - it 'matches the full URL' do - is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"' - end - end - - context 'with escaped and not escaped unicode characters' do - let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' } - - it 'preserves escaped unicode characters' do - is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink"' - end - end - end - - context 'given a URL with parentheses in it' do - let(:text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' } - - it 'matches the full URL' do - is_expected.to include 'href="https://en.wikipedia.org/wiki/Diaspora_(software)"' - end - end - - context 'given a URL in quotation marks' do - let(:text) { '"https://example.com/"' } - - it 'does not match the quotation marks' do - is_expected.to include 'href="https://example.com/"' - end - end - - context 'given a URL in angle brackets' do - let(:text) { '<https://example.com/>' } - - it 'does not match the angle brackets' do - is_expected.to include 'href="https://example.com/"' - end - end - - context 'given a URL with Japanese path string' do - let(:text) { 'https://ja.wikipedia.org/wiki/日本' } - - it 'matches the full URL' do - is_expected.to include 'href="https://ja.wikipedia.org/wiki/日本"' - end - end - - context 'given a URL with Korean path string' do - let(:text) { 'https://ko.wikipedia.org/wiki/대한민국' } - - it 'matches the full URL' do - is_expected.to include 'href="https://ko.wikipedia.org/wiki/대한민국"' - end - end - - context 'given a URL with a full-width space' do - let(:text) { 'https://example.com/ abc123' } - - it 'does not match the full-width space' do - is_expected.to include 'href="https://example.com/"' - end - end - - context 'given a URL in Japanese quotation marks' do - let(:text) { '「[https://example.org/」' } - - it 'does not match the quotation marks' do - is_expected.to include 'href="https://example.org/"' - end - end - - context 'given a URL with Simplified Chinese path string' do - let(:text) { 'https://baike.baidu.com/item/中华人民共和国' } - - it 'matches the full URL' do - is_expected.to include 'href="https://baike.baidu.com/item/中华人民共和国"' - end - end - - context 'given a URL with Traditional Chinese path string' do - let(:text) { 'https://zh.wikipedia.org/wiki/臺灣' } - - it 'matches the full URL' do - is_expected.to include 'href="https://zh.wikipedia.org/wiki/臺灣"' - end - end - - context 'given a URL containing unsafe code (XSS attack, visible part)' do - let(:text) { %q{http://example.com/b<del>b</del>} } - - it 'does not include the HTML in the URL' do - is_expected.to include '"http://example.com/b"' - end - - it 'escapes the HTML' do - is_expected.to include '<del>b</del>' - end - end - - context 'given a URL containing unsafe code (XSS attack, invisible part)' do - let(:text) { %q{http://example.com/blahblahblahblah/a<script>alert("Hello")</script>} } - - it 'does not include the HTML in the URL' do - is_expected.to include '"http://example.com/blahblahblahblah/a"' - end - - it 'escapes the HTML' do - is_expected.to include '<script>alert("Hello")</script>' - end - end - - context 'given text containing HTML code (script tag)' do - let(:text) { '<script>alert("Hello")</script>' } - - it 'escapes the HTML' do - is_expected.to include '<p><script>alert("Hello")</script></p>' - end - end - - context 'given text containing HTML (XSS attack)' do - let(:text) { %q{<img src="javascript:alert('XSS');">} } - - it 'escapes the HTML' do - is_expected.to include '<p><img src="javascript:alert('XSS');"></p>' - end - end - - context 'given an invalid URL' do - let(:text) { 'http://www\.google\.com' } - - it 'outputs the raw URL' do - is_expected.to eq '<p>http://www\.google\.com</p>' - end - end - - context 'given text containing a hashtag' do - let(:text) { '#hashtag' } - - it 'creates a hashtag link' do - is_expected.to include '/tags/hashtag" class="mention hashtag" rel="tag">#<span>hashtag</span></a>' - end - end - - context 'given text containing a hashtag with Unicode chars' do - let(:text) { '#hashtagタグ' } - - it 'creates a hashtag link' do - is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>' - end - end - - context 'given a stand-alone xmpp: URI' do - let(:text) { 'xmpp:user@instance.com' } - - it 'matches the full URI' do - is_expected.to include 'href="xmpp:user@instance.com"' - end - end - - context 'given a an xmpp: URI with a query-string' do - let(:text) { 'please join xmpp:muc@instance.com?join right now' } - - it 'matches the full URI' do - is_expected.to include 'href="xmpp:muc@instance.com?join"' - end - end - - context 'given text containing a magnet: URI' do - let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' } - - it 'matches the full URI' do - is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"' - end - end - end - - describe '#format_spoiler' do - subject { Formatter.instance.format_spoiler(status) } - - context 'given a post containing plain text' do - let(:status) { Fabricate(:status, text: 'text', spoiler_text: 'Secret!', uri: nil) } - - it 'Returns the spoiler text' do - is_expected.to eq 'Secret!' - end - end - - context 'given a post with an emoji shortcode at the start' do - let!(:emoji) { Fabricate(:custom_emoji) } - let(:status) { Fabricate(:status, text: 'text', spoiler_text: ':coolcat: Secret!', uri: nil) } - let(:text) { ':coolcat: Beep boop' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/<img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - end - - describe '#format' do - subject { Formatter.instance.format(status) } - - context 'given a post with local status' do - context 'given a reblogged post' do - let(:reblog) { Fabricate(:status, account: local_account, text: 'Hello world', uri: nil) } - let(:status) { Fabricate(:status, reblog: reblog) } - - it 'returns original status with credit to its author' do - is_expected.to include 'RT <span class="h-card"><a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span> Hello world' - end - end - - context 'given a post containing plain text' do - let(:status) { Fabricate(:status, text: 'text', uri: nil) } - - it 'paragraphizes the text' do - is_expected.to eq '<p>text</p>' - end - end - - context 'given a post containing line feeds' do - let(:status) { Fabricate(:status, text: "line\nfeed", uri: nil) } - - it 'removes line feeds' do - is_expected.not_to include "\n" - end - end - - context 'given a post containing linkable mentions' do - let(:status) { Fabricate(:status, mentions: [ Fabricate(:mention, account: local_account) ], text: '@alice') } - - it 'creates a mention link' do - is_expected.to include '<a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span>' - end - end - - context 'given a post containing unlinkable mentions' do - let(:status) { Fabricate(:status, text: '@alice', uri: nil) } - - it 'does not create a mention link' do - is_expected.to include '@alice' - end - end - - context do - subject do - status = Fabricate(:status, text: text, uri: nil) - Formatter.instance.format(status) - end - - include_examples 'encode and link URLs' - end - - context 'given a post with custom_emojify option' do - let!(:emoji) { Fabricate(:custom_emoji) } - let(:status) { Fabricate(:status, account: local_account, text: text) } - - subject { Formatter.instance.format(status, custom_emojify: true) } - - context 'given a post with an emoji shortcode at the start' do - let(:text) { ':coolcat: Beep boop' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/<p><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - - context 'given a post with an emoji shortcode in the middle' do - let(:text) { 'Beep :coolcat: boop' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - - context 'given a post with concatenated emoji shortcodes' do - let(:text) { ':coolcat::coolcat:' } - - it 'does not touch the shortcodes' do - is_expected.to match(/:coolcat::coolcat:/) - end - end - - context 'given a post with an emoji shortcode at the end' do - let(:text) { 'Beep boop :coolcat:' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/boop <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - end - end - - context 'given a post with remote status' do - let(:status) { Fabricate(:status, account: remote_account, text: 'Beep boop') } - - it 'reformats the post' do - is_expected.to eq 'Beep boop' - end - - context 'given a post with custom_emojify option' do - let!(:emoji) { Fabricate(:custom_emoji, domain: remote_account.domain) } - let(:status) { Fabricate(:status, account: remote_account, text: text) } - - subject { Formatter.instance.format(status, custom_emojify: true) } - - context 'given a post with an emoji shortcode at the start' do - let(:text) { '<p>:coolcat: Beep boop<br />' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/<p><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - - context 'given a post with an emoji shortcode in the middle' do - let(:text) { '<p>Beep :coolcat: boop</p>' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - - context 'given a post with concatenated emoji' do - let(:text) { '<p>:coolcat::coolcat:</p>' } - - it 'does not touch the shortcodes' do - is_expected.to match(/<p>:coolcat::coolcat:<\/p>/) - end - end - - context 'given a post with an emoji shortcode at the end' do - let(:text) { '<p>Beep boop<br />:coolcat:</p>' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/<br><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - end - end - end - - describe '#reformat' do - subject { Formatter.instance.reformat(text) } - - context 'given a post containing plain text' do - let(:text) { 'Beep boop' } - - it 'keeps the plain text' do - is_expected.to include 'Beep boop' - end - end - - context 'given a post containing script tags' do - let(:text) { '<script>alert("Hello")</script>' } - - it 'strips the scripts' do - is_expected.to_not include '<script>alert("Hello")</script>' - end - end - - context 'given a post containing malicious classes' do - let(:text) { '<span class="mention status__content__spoiler-link">Show more</span>' } - - it 'strips the malicious classes' do - is_expected.to_not include 'status__content__spoiler-link' - end - end - end - - describe '#plaintext' do - subject { Formatter.instance.plaintext(status) } - - context 'given a post with local status' do - let(:status) { Fabricate(:status, text: '<p>a text by a nerd who uses an HTML tag in text</p>', uri: nil) } - - it 'returns the raw text' do - is_expected.to eq '<p>a text by a nerd who uses an HTML tag in text</p>' - end - end - - context 'given a post with remote status' do - let(:status) { Fabricate(:status, account: remote_account, text: '<script>alert("Hello")</script>') } - - it 'returns tag-stripped text' do - is_expected.to eq '' - end - end - end - - describe '#simplified_format' do - subject { Formatter.instance.simplified_format(account) } - - context 'given a post with local status' do - let(:account) { Fabricate(:account, domain: nil, note: text) } - - context 'given a post containing linkable mentions for local accounts' do - let(:text) { '@alice' } - - before { local_account } - - it 'creates a mention link' do - is_expected.to eq '<p><span class="h-card"><a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span></p>' - end - end - - context 'given a post containing linkable mentions for remote accounts' do - let(:text) { '@bob@remote.test' } - - before { remote_account } - - it 'creates a mention link' do - is_expected.to eq '<p><span class="h-card"><a href="https://remote.test/" class="u-url mention">@<span>bob</span></a></span></p>' - end - end - - context 'given a post containing unlinkable mentions' do - let(:text) { '@alice' } - - it 'does not create a mention link' do - is_expected.to eq '<p>@alice</p>' - end - end - - context 'given a post with custom_emojify option' do - let!(:emoji) { Fabricate(:custom_emoji) } - - before { account.note = text } - subject { Formatter.instance.simplified_format(account, custom_emojify: true) } - - context 'given a post with an emoji shortcode at the start' do - let(:text) { ':coolcat: Beep boop' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/<p><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - - context 'given a post with an emoji shortcode in the middle' do - let(:text) { 'Beep :coolcat: boop' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - - context 'given a post with concatenated emoji shortcodes' do - let(:text) { ':coolcat::coolcat:' } - - it 'does not touch the shortcodes' do - is_expected.to match(/:coolcat::coolcat:/) - end - end - - context 'given a post with an emoji shortcode at the end' do - let(:text) { 'Beep boop :coolcat:' } - - it 'converts the shortcode to an image tag' do - is_expected.to match(/boop <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - end - - include_examples 'encode and link URLs' - end - - context 'given a post with remote status' do - let(:text) { '<script>alert("Hello")</script>' } - let(:account) { Fabricate(:account, domain: 'remote', note: text) } - - it 'reformats' do - is_expected.to_not include '<script>alert("Hello")</script>' - end - - context 'with custom_emojify option' do - let!(:emoji) { Fabricate(:custom_emoji, domain: remote_account.domain) } - - before { remote_account.note = text } - - subject { Formatter.instance.simplified_format(remote_account, custom_emojify: true) } - - context 'given a post with an emoji shortcode at the start' do - let(:text) { '<p>:coolcat: Beep boop<br />' } - - it 'converts shortcode to image tag' do - is_expected.to match(/<p><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - - context 'given a post with an emoji shortcode in the middle' do - let(:text) { '<p>Beep :coolcat: boop</p>' } - - it 'converts shortcode to image tag' do - is_expected.to match(/Beep <img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - - context 'given a post with concatenated emoji shortcodes' do - let(:text) { '<p>:coolcat::coolcat:</p>' } - - it 'does not touch the shortcodes' do - is_expected.to match(/<p>:coolcat::coolcat:<\/p>/) - end - end - - context 'given a post with an emoji shortcode at the end' do - let(:text) { '<p>Beep boop<br />:coolcat:</p>' } - - it 'converts shortcode to image tag' do - is_expected.to match(/<br><img draggable="false" class="emojione custom-emoji" alt=":coolcat:"/) - end - end - end - end - end - - describe '#sanitize' do - let(:html) { '<script>alert("Hello")</script>' } - - subject { Formatter.instance.sanitize(html, Sanitize::Config::MASTODON_STRICT) } - - it 'sanitizes' do - is_expected.to eq '' - end - end -end diff --git a/spec/lib/html_aware_formatter.rb b/spec/lib/html_aware_formatter.rb new file mode 100644 index 000000000..18d23abf5 --- /dev/null +++ b/spec/lib/html_aware_formatter.rb @@ -0,0 +1,44 @@ +require 'rails_helper' + +RSpec.describe HtmlAwareFormatter do + describe '#to_s' do + subject { described_class.new(text, local).to_s } + + context 'when local' do + let(:local) { true } + let(:text) { 'Foo bar' } + + it 'returns formatted text' do + is_expected.to eq '<p>Foo bar</p>' + end + end + + context 'when remote' do + let(:local) { false } + + context 'given plain text' do + let(:text) { 'Beep boop' } + + it 'keeps the plain text' do + is_expected.to include 'Beep boop' + end + end + + context 'given text containing script tags' do + let(:text) { '<script>alert("Hello")</script>' } + + it 'strips the scripts' do + is_expected.to_not include '<script>alert("Hello")</script>' + end + end + + context 'given text containing malicious classes' do + let(:text) { '<span class="mention status__content__spoiler-link">Show more</span>' } + + it 'strips the malicious classes' do + is_expected.to_not include 'status__content__spoiler-link' + end + end + end + end +end diff --git a/spec/lib/plain_text_formatter_spec.rb b/spec/lib/plain_text_formatter_spec.rb new file mode 100644 index 000000000..c3d0ee630 --- /dev/null +++ b/spec/lib/plain_text_formatter_spec.rb @@ -0,0 +1,24 @@ +require 'rails_helper' + +RSpec.describe PlainTextFormatter do + describe '#to_s' do + subject { described_class.new(status.text, status.local?).to_s } + + context 'given a post with local status' do + let(:status) { Fabricate(:status, text: '<p>a text by a nerd who uses an HTML tag in text</p>', uri: nil) } + + it 'returns the raw text' do + is_expected.to eq '<p>a text by a nerd who uses an HTML tag in text</p>' + end + end + + context 'given a post with remote status' do + let(:remote_account) { Fabricate(:account, domain: 'remote.test', username: 'bob', url: 'https://remote.test/') } + let(:status) { Fabricate(:status, account: remote_account, text: '<p>Hello</p><script>alert("Hello")</script>') } + + it 'returns tag-stripped text' do + is_expected.to eq 'Hello' + end + end + end +end diff --git a/spec/lib/text_formatter_spec.rb b/spec/lib/text_formatter_spec.rb new file mode 100644 index 000000000..52a9d2498 --- /dev/null +++ b/spec/lib/text_formatter_spec.rb @@ -0,0 +1,313 @@ +require 'rails_helper' + +RSpec.describe TextFormatter do + describe '#to_s' do + let(:preloaded_accounts) { nil } + + subject { described_class.new(text, preloaded_accounts: preloaded_accounts).to_s } + + context 'given text containing plain text' do + let(:text) { 'text' } + + it 'paragraphizes the text' do + is_expected.to eq '<p>text</p>' + end + end + + context 'given text containing line feeds' do + let(:text) { "line\nfeed" } + + it 'removes line feeds' do + is_expected.not_to include "\n" + end + end + + context 'given text containing linkable mentions' do + let(:preloaded_accounts) { [Fabricate(:account, username: 'alice')] } + let(:text) { '@alice' } + + it 'creates a mention link' do + is_expected.to include '<a href="https://cb6e6126.ngrok.io/@alice" class="u-url mention">@<span>alice</span></a></span>' + end + end + + context 'given text containing unlinkable mentions' do + let(:preloaded_accounts) { [] } + let(:text) { '@alice' } + + it 'does not create a mention link' do + is_expected.to include '@alice' + end + end + + context 'given a stand-alone medium URL' do + let(:text) { 'https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4' } + + it 'matches the full URL' do + is_expected.to include 'href="https://hackernoon.com/the-power-to-build-communities-a-response-to-mark-zuckerberg-3f2cac9148a4"' + end + end + + context 'given a stand-alone google URL' do + let(:text) { 'http://google.com' } + + it 'matches the full URL' do + is_expected.to include 'href="http://google.com"' + end + end + + context 'given a stand-alone URL with a newer TLD' do + let(:text) { 'http://example.gay' } + + it 'matches the full URL' do + is_expected.to include 'href="http://example.gay"' + end + end + + context 'given a stand-alone IDN URL' do + let(:text) { 'https://nic.みんな/' } + + it 'matches the full URL' do + is_expected.to include 'href="https://nic.みんな/"' + end + + it 'has display URL' do + is_expected.to include '<span class="">nic.みんな/</span>' + end + end + + context 'given a URL with a trailing period' do + let(:text) { 'http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona. ' } + + it 'matches the full URL but not the period' do + is_expected.to include 'href="http://www.mcmansionhell.com/post/156408871451/50-states-of-mcmansion-hell-scottsdale-arizona"' + end + end + + context 'given a URL enclosed with parentheses' do + let(:text) { '(http://google.com/)' } + + it 'matches the full URL but not the parentheses' do + is_expected.to include 'href="http://google.com/"' + end + end + + context 'given a URL with a trailing exclamation point' do + let(:text) { 'http://www.google.com!' } + + it 'matches the full URL but not the exclamation point' do + is_expected.to include 'href="http://www.google.com"' + end + end + + context 'given a URL with a trailing single quote' do + let(:text) { "http://www.google.com'" } + + it 'matches the full URL but not the single quote' do + is_expected.to include 'href="http://www.google.com"' + end + end + + context 'given a URL with a trailing angle bracket' do + let(:text) { 'http://www.google.com>' } + + it 'matches the full URL but not the angle bracket' do + is_expected.to include 'href="http://www.google.com"' + end + end + + context 'given a URL with a query string' do + context 'with escaped unicode character' do + let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink' } + + it 'matches the full URL' do + is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&q=autolink"' + end + end + + context 'with unicode character' do + let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓&q=autolink' } + + it 'matches the full URL' do + is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓&q=autolink"' + end + end + + context 'with unicode character at the end' do + let(:text) { 'https://www.ruby-toolbox.com/search?utf8=✓' } + + it 'matches the full URL' do + is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=✓"' + end + end + + context 'with escaped and not escaped unicode characters' do + let(:text) { 'https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink' } + + it 'preserves escaped unicode characters' do + is_expected.to include 'href="https://www.ruby-toolbox.com/search?utf8=%E2%9C%93&utf81=✓&q=autolink"' + end + end + end + + context 'given a URL with parentheses in it' do + let(:text) { 'https://en.wikipedia.org/wiki/Diaspora_(software)' } + + it 'matches the full URL' do + is_expected.to include 'href="https://en.wikipedia.org/wiki/Diaspora_(software)"' + end + end + + context 'given a URL in quotation marks' do + let(:text) { '"https://example.com/"' } + + it 'does not match the quotation marks' do + is_expected.to include 'href="https://example.com/"' + end + end + + context 'given a URL in angle brackets' do + let(:text) { '<https://example.com/>' } + + it 'does not match the angle brackets' do + is_expected.to include 'href="https://example.com/"' + end + end + + context 'given a URL with Japanese path string' do + let(:text) { 'https://ja.wikipedia.org/wiki/日本' } + + it 'matches the full URL' do + is_expected.to include 'href="https://ja.wikipedia.org/wiki/日本"' + end + end + + context 'given a URL with Korean path string' do + let(:text) { 'https://ko.wikipedia.org/wiki/대한민국' } + + it 'matches the full URL' do + is_expected.to include 'href="https://ko.wikipedia.org/wiki/대한민국"' + end + end + + context 'given a URL with a full-width space' do + let(:text) { 'https://example.com/ abc123' } + + it 'does not match the full-width space' do + is_expected.to include 'href="https://example.com/"' + end + end + + context 'given a URL in Japanese quotation marks' do + let(:text) { '「[https://example.org/」' } + + it 'does not match the quotation marks' do + is_expected.to include 'href="https://example.org/"' + end + end + + context 'given a URL with Simplified Chinese path string' do + let(:text) { 'https://baike.baidu.com/item/中华人民共和国' } + + it 'matches the full URL' do + is_expected.to include 'href="https://baike.baidu.com/item/中华人民共和国"' + end + end + + context 'given a URL with Traditional Chinese path string' do + let(:text) { 'https://zh.wikipedia.org/wiki/臺灣' } + + it 'matches the full URL' do + is_expected.to include 'href="https://zh.wikipedia.org/wiki/臺灣"' + end + end + + context 'given a URL containing unsafe code (XSS attack, visible part)' do + let(:text) { %q{http://example.com/b<del>b</del>} } + + it 'does not include the HTML in the URL' do + is_expected.to include '"http://example.com/b"' + end + + it 'escapes the HTML' do + is_expected.to include '<del>b</del>' + end + end + + context 'given a URL containing unsafe code (XSS attack, invisible part)' do + let(:text) { %q{http://example.com/blahblahblahblah/a<script>alert("Hello")</script>} } + + it 'does not include the HTML in the URL' do + is_expected.to include '"http://example.com/blahblahblahblah/a"' + end + + it 'escapes the HTML' do + is_expected.to include '<script>alert("Hello")</script>' + end + end + + context 'given text containing HTML code (script tag)' do + let(:text) { '<script>alert("Hello")</script>' } + + it 'escapes the HTML' do + is_expected.to include '<p><script>alert("Hello")</script></p>' + end + end + + context 'given text containing HTML (XSS attack)' do + let(:text) { %q{<img src="javascript:alert('XSS');">} } + + it 'escapes the HTML' do + is_expected.to include '<p><img src="javascript:alert('XSS');"></p>' + end + end + + context 'given an invalid URL' do + let(:text) { 'http://www\.google\.com' } + + it 'outputs the raw URL' do + is_expected.to eq '<p>http://www\.google\.com</p>' + end + end + + context 'given text containing a hashtag' do + let(:text) { '#hashtag' } + + it 'creates a hashtag link' do + is_expected.to include '/tags/hashtag" class="mention hashtag" rel="tag">#<span>hashtag</span></a>' + end + end + + context 'given text containing a hashtag with Unicode chars' do + let(:text) { '#hashtagタグ' } + + it 'creates a hashtag link' do + is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#<span>hashtagタグ</span></a>' + end + end + + context 'given text with a stand-alone xmpp: URI' do + let(:text) { 'xmpp:user@instance.com' } + + it 'matches the full URI' do + is_expected.to include 'href="xmpp:user@instance.com"' + end + end + + context 'given text with an xmpp: URI with a query-string' do + let(:text) { 'please join xmpp:muc@instance.com?join right now' } + + it 'matches the full URI' do + is_expected.to include 'href="xmpp:muc@instance.com?join"' + end + end + + context 'given text containing a magnet: URI' do + let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' } + + it 'matches the full URI' do + is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"' + end + end + end +end |