From 6378feffa8935238bdb5f1f1c01fcb102440fe30 Mon Sep 17 00:00:00 2001 From: Darius Kazemi Date: Tue, 30 Apr 2019 15:29:28 -0700 Subject: [Feature, Federation, Port: hometown@b3e6597] Support locally cached inline images [+ Monsterfork additions] Changes added by Monsterfork: - Do not limit to only Articles - Reuse existing media; retroactively using more-detailed descriptions - Also scrub carrige returns between tags - Handle download failures - Attach to statuses and keep track of inlined media - Handle local edits Co-authored-by: Fire Demon --- app/helpers/img_proxy_helper.rb | 128 +++++++++++++++++++++ .../styles/monsterfork/components/status.scss | 10 ++ app/lib/activitypub/activity.rb | 4 +- app/lib/activitypub/activity/create.rb | 6 +- app/lib/command_tag/processor.rb | 2 +- app/lib/formatter.rb | 9 +- app/lib/img_tag_handler.rb | 30 +++++ app/lib/sanitize_config.rb | 23 +--- app/models/inline_media_attachment.rb | 14 +++ app/models/media_attachment.rb | 12 +- app/models/status.rb | 1 + app/services/post_status_service.rb | 1 + app/services/update_status_service.rb | 14 ++- app/workers/redownload_media_worker.rb | 19 ++- 14 files changed, 241 insertions(+), 32 deletions(-) create mode 100644 app/helpers/img_proxy_helper.rb create mode 100644 app/lib/img_tag_handler.rb create mode 100644 app/models/inline_media_attachment.rb (limited to 'app') diff --git a/app/helpers/img_proxy_helper.rb b/app/helpers/img_proxy_helper.rb new file mode 100644 index 000000000..6ef3fe84d --- /dev/null +++ b/app/helpers/img_proxy_helper.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +# .~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~. # +################### Cthulhu Code! ################### +# `~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~` # +# - Has a high complexity level and needs tests. # +# - Makes many assumptions the environment it's included into. # +# - Incurs a high performance penalty. # +# # +############################################################################### + +module ImgProxyHelper + def process_inline_images! + raise NameError('@status must be defined by the instance this method is being called from.') unless defined?(@status) + return if @status.text&.strip.blank? || @status.content_type == 'text/plain' + + replace_markdown_images_with_html! + + handler = ImgTagHandler.new + Ox.sax_parse(handler, StringIO.new(@status.text, 'r')) + return if handler.srcs.blank? + + @skip_download_from = { @status.account.domain => DomainBlock.reject_media?(@status.account.domain) } + @redownload_attachment_ids = Set[] + + handler.srcs.each do |src| + alt = handler.alts[src] + normalized_src_parts = begin + Addressable::URI.parse(src&.strip).normalize + rescue Addressable::URI::InvalidURIError + nil + end + normalized_src = normalized_src_parts.to_s + + next replace_text!(src) if normalized_src.blank? || skip_download_from?(normalized_src_parts.host) + + file_name = normalized_src_parts.path.split('/').last + media_attachment = find_media_attachment(normalized_src, file_name) + + if media_attachment.present? + media_attachment.update(description: alt) if alt_more_descriptive?(alt, media_attachment.description) + elsif normalized_src_parts.scheme.blank? || !file_name.match?(/\S\.\w{3,}/) + next replace_text!(src) + else + media_attachment = create_media_attachment!(normalized_src) + end + + next replace_text!(src) if media_attachment.blank? || media_attachment.destroyed? + + if media_attachment.needs_redownload? + replace_text!(src, "#{media_attachment.file.url(:small)}##{media_attachment.id}") + else + replace_text!(src, media_attachment.file.url(:small)) + end + end + end + + private + + def skip_download_from?(domain) + return true if @skip_download_from[@status.account.domain] + return @skip_download_from[domain] if @skip_download_from[domain] + + @skip_download_from[domain] = DomainBlock.reject_media?(domain) + end + + def unsupported_media_type?(mime_type) + mime_type.present? && !MediaAttachment.supported_mime_types.include?(mime_type) + end + + def html_entities + @html_entities ||= HTMLEntities.new + end + + def replace_markdown_images_with_html! + return unless @status.content_type == 'text/markdown' + + @status.text.gsub!(/!\[(\S+)\]\(\s*(\S+)\s*\)/) do + begin + alt = html_entities.encode(Regexp.last_match(1).strip) + url = Addressable::URI.parse(Regexp.last_match(2)).normalize.to_s + "\"#{alt}\"" + rescue Addressable::URI::InvalidURIError + '' + end + end + end + + def replace_text!(text, replacement = '') + @status.text.gsub!(text, replacement) + end + + def alt_more_descriptive?(alt, description) + return false unless alt.present? && description != alt + return true if description.blank? || alt.split(/[\s\n\r]+/).count > description.split(/[\s\n\r]+/).count + end + + def find_media_attachment(src, file_name) + media_attachment = src.start_with?('http') ? MediaAttachment.find_by(account: @account, remote_url: src, inline: true) : nil + return media_attachment if media_attachment.present? + + MediaAttachment.where(account: @status.account, file_file_name: file_name, inline: true) + .find { |m| [m.file.url(:small), m.file.url(:original)].include?(src) || m.status_id == @status.id } + end + + def create_media_attachment!(src, alt) + media_attachment = MediaAttachment.create!(account: @status.account, remote_url: src, description: alt, focus: nil, inline: true) + media_attachment = process_media_attachment!(media_attachment) + return if media_attachment.destroyed? + + @status.inlined_attachments.first_or_create!(media_attachment: media_attachment) + media_attachment + end + + def process_media_attachment!(media_attachment) + media_attachment.download_file! + media_attachment.download_thumbnail! + media_attachment.save! + media_attachment.destroy! if unsupported_media_type?(media_attachment.file.content_type) + media_attachment + rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError + return if @redownload_attachment_ids.include?(media_attachment.id) + + RedownloadMediaWorker.perform_in(rand(30..60).seconds, media_attachment.id) + @redownload_attachment_ids << media_attachment.id + media_attachment + end +end diff --git a/app/javascript/flavours/glitch/styles/monsterfork/components/status.scss b/app/javascript/flavours/glitch/styles/monsterfork/components/status.scss index 33601b8bf..2df7f1aac 100644 --- a/app/javascript/flavours/glitch/styles/monsterfork/components/status.scss +++ b/app/javascript/flavours/glitch/styles/monsterfork/components/status.scss @@ -35,3 +35,13 @@ div[data-nest-deep="true"] { border-left: 75px dashed darken($ui-base-color, 8%); } + +.status__content { + .status__content__text, + .e-content { + img { + max-width: 100%; + margin: 1em auto; + } + } +} diff --git a/app/lib/activitypub/activity.rb b/app/lib/activitypub/activity.rb index cff6b569a..9b58fabed 100644 --- a/app/lib/activitypub/activity.rb +++ b/app/lib/activitypub/activity.rb @@ -4,8 +4,8 @@ class ActivityPub::Activity include JsonLdHelper include Redisable - SUPPORTED_TYPES = %w(Note Question).freeze - CONVERTED_TYPES = %w(Image Audio Video Article Page Event).freeze + SUPPORTED_TYPES = %w(Note Question Article).freeze + CONVERTED_TYPES = %w(Image Audio Video Page Event).freeze def initialize(json, account, **options) @json = json diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index fbb4624e3..10a0a9498 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class ActivityPub::Activity::Create < ActivityPub::Activity + include ImgProxyHelper + def perform dereference_object! @@ -94,6 +96,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity ApplicationRecord.transaction do @status = Status.create!(@params) + process_inline_images! attach_tags(@status) end @@ -138,6 +141,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity text: text_from_content || '', language: detected_language, spoiler_text: converted_object_type? ? '' : (text_from_summary || ''), + title: text_from_title, sensitive: @object['sensitive'] || false, visibility: visibility_from_audience, media_attachment_ids: process_attachments.take(4).map(&:id), @@ -433,7 +437,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity return Formatter.instance.linkify([[text_from_name, text_from_summary.presence].compact.join("\n\n"), object_url || @object['id']].join(' ')) if converted_object_type? if @object['content'].present? - @object['content'] + @object['type'] == 'Article' ? Formatter.instance.format_article(@object['content']) : @object['content'] elsif content_language_map? @object['contentMap'].values.first end diff --git a/app/lib/command_tag/processor.rb b/app/lib/command_tag/processor.rb index d9e49c84a..118affee4 100644 --- a/app/lib/command_tag/processor.rb +++ b/app/lib/command_tag/processor.rb @@ -39,7 +39,7 @@ class CommandTag::Processor end elsif @status.destroyed? %w(after_destroy once_after_destroy).each { |suffix| execute_statements(suffix) } - elsif @status.update(text: @text) + elsif @status.update(text: process_inline_images(@text)) %w(after_save once_after_save).each { |suffix| execute_statements(suffix) } else %w(after_save_fail once_after_save_fail).each { |suffix| execute_statements(suffix) } diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index d5408a30b..39c42c8db 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -24,6 +24,7 @@ class HTMLRenderer < Redcarpet::Render::HTML end end +# rubocop:disable Metrics/ClassLength class Formatter include Singleton include RoutingHelper @@ -75,6 +76,11 @@ class Formatter html.delete("\r").delete("\n") end + def format_article(text) + text = text.gsub(/>[\r\n]+<") + text.html_safe # rubocop:disable Rails/OutputSafety + end + def reformat(html, outgoing = false) sanitize(html, Sanitize::Config::MASTODON_STRICT.merge(outgoing: outgoing)) rescue ArgumentError @@ -154,7 +160,7 @@ class Formatter renderer = HTMLRenderer.new({ filter_html: false, escape_html: false, - no_images: true, + no_images: false, no_styles: true, safe_links_only: true, hard_wrap: true, @@ -389,3 +395,4 @@ class Formatter "@#{encode(account.username)}" end end +# rubocop:enable Metrics/ClassLength diff --git a/app/lib/img_tag_handler.rb b/app/lib/img_tag_handler.rb new file mode 100644 index 000000000..0263e1cbd --- /dev/null +++ b/app/lib/img_tag_handler.rb @@ -0,0 +1,30 @@ +# frozen_string_literal: true + +class ImgTagHandler < ::Ox::Sax + attr_reader :srcs + attr_reader :alts + + def initialize + @stack = [] + @srcs = [] + @alts = {} + end + + def start_element(element_name) + @stack << [element_name, {}] + end + + def end_element(_) + self_name, self_attributes = @stack[-1] + if self_name == :img && !self_attributes[:src].nil? + @srcs << self_attributes[:src] + @alts[self_attributes[:src]] = self_attributes[:alt]&.strip + end + @stack.pop + end + + def attr(attribute_name, attribute_value) + _name, attributes = @stack.last + attributes[attribute_name] = attribute_value&.strip + end +end diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb index ccc3f4642..05131b674 100644 --- a/app/lib/sanitize_config.rb +++ b/app/lib/sanitize_config.rb @@ -35,25 +35,6 @@ class Sanitize node['class'] = class_list.join(' ') end - IMG_TAG_TRANSFORMER = lambda do |env| - node = env[:node] - - return unless env[:node_name] == 'img' - - node.name = 'a' - - node['href'] = node['src'] - if node['alt'].present? - node.content = "[🖼 #{node['alt']}]" - else - url = node['href'] - prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s - text = url[prefix.length, 30] - text = text + "…" if url[prefix.length..-1].length > 30 - node.content = "[🖼 #{text}]" - end - end - LINK_REL_TRANSFORMER = lambda do |env| return unless env[:node_name] == 'a' and env[:node]['href'] @@ -83,7 +64,7 @@ class Sanitize end MASTODON_STRICT ||= freeze_config( - elements: %w(p br span a abbr del pre blockquote code b strong u sub sup i em h1 h2 h3 h4 h5 ul ol li), + elements: %w(p br span a abbr del pre blockquote code b strong u sub sup i em h1 h2 h3 h4 h5 ul ol li img), attributes: { 'a' => %w(href rel class title), @@ -92,6 +73,7 @@ class Sanitize 'blockquote' => %w(cite), 'ol' => %w(start reversed), 'li' => %w(value), + 'img' => %w(src alt title), }, add_attributes: { @@ -107,7 +89,6 @@ class Sanitize transformers: [ CLASS_WHITELIST_TRANSFORMER, - IMG_TAG_TRANSFORMER, UNSUPPORTED_HREF_TRANSFORMER, LINK_REL_TRANSFORMER, ] diff --git a/app/models/inline_media_attachment.rb b/app/models/inline_media_attachment.rb new file mode 100644 index 000000000..ac4388a6d --- /dev/null +++ b/app/models/inline_media_attachment.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true +# == Schema Information +# +# Table name: inline_media_attachments +# +# id :bigint(8) not null, primary key +# status_id :bigint(8) +# media_attachment_id :bigint(8) +# + +class InlineMediaAttachment < ApplicationRecord + belongs_to :status, inverse_of: :inlined_attachments + belongs_to :media_attachment, inverse_of: :inlines +end diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb index cfdd95b22..324bd0305 100644 --- a/app/models/media_attachment.rb +++ b/app/models/media_attachment.rb @@ -26,6 +26,7 @@ # thumbnail_file_size :integer # thumbnail_updated_at :datetime # thumbnail_remote_url :string +# inline :boolean default(FALSE), not null # class MediaAttachment < ApplicationRecord @@ -160,6 +161,8 @@ class MediaAttachment < ApplicationRecord belongs_to :status, inverse_of: :media_attachments, optional: true belongs_to :scheduled_status, inverse_of: :media_attachments, optional: true + has_many :inlines, class_name: 'InlineMediaAttachment', inverse_of: :media_attachment, dependent: :destroy + has_attached_file :file, styles: ->(f) { file_styles f }, processors: ->(f) { file_processors f }, @@ -189,13 +192,16 @@ class MediaAttachment < ApplicationRecord validates :file, presence: true, if: :local? validates :thumbnail, absence: true, if: -> { local? && !audio_or_video? } - scope :attached, -> { where.not(status_id: nil).or(where.not(scheduled_status_id: nil)) } - scope :unattached, -> { where(status_id: nil, scheduled_status_id: nil) } + scope :attached, -> { all_media.where.not(status_id: nil).or(all_media.where.not(scheduled_status_id: nil)) } + scope :unattached, -> { all_media.where(status_id: nil, scheduled_status_id: nil) } + scope :uninlined, -> { where(inline: false) } + scope :inlined, -> { rewhere(inline: true) } + scope :all_media, -> { unscope(where: :inline) } scope :local, -> { where(remote_url: '') } scope :remote, -> { where.not(remote_url: '') } scope :cached, -> { remote.where.not(file_file_name: nil) } - default_scope { order(id: :asc) } + default_scope { uninlined.order(id: :asc) } def local? remote_url.blank? diff --git a/app/models/status.rb b/app/models/status.rb index 164c6cb8e..46361280d 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -70,6 +70,7 @@ class Status < ApplicationRecord has_many :active_mentions, -> { active }, class_name: 'Mention', inverse_of: :status has_many :media_attachments, dependent: :nullify + has_many :inlined_attachments, class_name: 'InlineMediaAttachment', inverse_of: :status, dependent: :destroy has_many :mutes, class_name: 'StatusMute', inverse_of: :status, dependent: :destroy belongs_to :conversation_mute, primary_key: 'conversation_id', foreign_key: 'conversation_id', inverse_of: :conversation, dependent: :destroy, optional: true diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb index eb60c1022..aef630f71 100644 --- a/app/services/post_status_service.rb +++ b/app/services/post_status_service.rb @@ -2,6 +2,7 @@ class PostStatusService < BaseService include Redisable + include ImgProxyHelper MIN_SCHEDULE_OFFSET = 5.minutes.freeze diff --git a/app/services/update_status_service.rb b/app/services/update_status_service.rb index 795e43d01..f59f26a25 100644 --- a/app/services/update_status_service.rb +++ b/app/services/update_status_service.rb @@ -2,6 +2,7 @@ class UpdateStatusService < BaseService include Redisable + include ImgProxyHelper ALLOWED_ATTRIBUTES = %i( spoiler_text @@ -42,11 +43,19 @@ class UpdateStatusService < BaseService @deleted_tag_ids = @status.tag_ids - @tags.pluck(:id) @deleted_tag_names = @status.tags.pluck(:name) - @tags.pluck(:name) @deleted_attachment_ids = @status.media_attachment_ids - (@params[:media_attachment_ids] || @params[:media_attachments]&.pluck(:id) || []) - @new_mention_ids = @mentions.pluck(:id) - @status.mention_ids + + @new_mention_ids = @mentions.pluck(:id) - @status.mention_ids ApplicationRecord.transaction do @status.update!(@params) - ProcessCommandTagsService.new.call(@account, @status) if @account.local? + + if @account.local? + ProcessCommandTagsService.new.call(@account, @status) + else + process_inline_images! + @status.save! + end + detach_deleted_tags attach_updated_tags end @@ -64,6 +73,7 @@ class UpdateStatusService < BaseService private def prune_attachments + @new_inline_ids = @status.inlined_attachments.pluck(:media_attachment_id) RemoveMediaAttachmentsWorker.perform_async(@deleted_attachment_ids) if @deleted_attachment_ids.present? end diff --git a/app/workers/redownload_media_worker.rb b/app/workers/redownload_media_worker.rb index 0638cd0f0..0ead9a7a8 100644 --- a/app/workers/redownload_media_worker.rb +++ b/app/workers/redownload_media_worker.rb @@ -11,10 +11,27 @@ class RedownloadMediaWorker return if media_attachment.remote_url.blank? + orig_small_url = media_attachment.file.url(:small) + media_attachment.download_file! media_attachment.download_thumbnail! - media_attachment.save + + if media_attachment.save && media_attachment.inline? && media_attachment.status.present? + if unsupported_media_type?(media_attachment.file.content_type) + media_attachment.destroy + true + else + media_attachment.status.text.gsub!("#{orig_small_url}##{media_attachment.id}", media_attachment.file.url(:small)) + media_attachment.status.save + end + end rescue ActiveRecord::RecordNotFound true end + + private + + def unsupported_media_type?(mime_type) + mime_type.present? && !MediaAttachment.supported_mime_types.include?(mime_type) + end end -- cgit