diff options
author | Darius Kazemi <darius.kazemi@gmail.com> | 2019-04-30 15:29:28 -0700 |
---|---|---|
committer | Fire Demon <firedemon@creature.cafe> | 2020-08-30 05:45:15 -0500 |
commit | 6378feffa8935238bdb5f1f1c01fcb102440fe30 (patch) | |
tree | b1eea210ace7eb72a00ab4604c3ec10c5ae4db4b /app/helpers | |
parent | 4eb49257fc618219709b357fb68f4d6156cab249 (diff) |
[Feature, Federation, Port: hometown@b3e6597] Support locally cached inline images [+ Monsterfork additions]
Changes added by Monsterfork: - Do not limit to only Articles - Reuse existing media; retroactively using more-detailed descriptions - Also scrub carrige returns between tags - Handle download failures - Attach to statuses and keep track of inlined media - Handle local edits Co-authored-by: Fire Demon <firedemon@creature.cafe>
Diffstat (limited to 'app/helpers')
-rw-r--r-- | app/helpers/img_proxy_helper.rb | 128 |
1 files changed, 128 insertions, 0 deletions
diff --git a/app/helpers/img_proxy_helper.rb b/app/helpers/img_proxy_helper.rb new file mode 100644 index 000000000..6ef3fe84d --- /dev/null +++ b/app/helpers/img_proxy_helper.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +# .~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~. # +################### Cthulhu Code! ################### +# `~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~` # +# - Has a high complexity level and needs tests. # +# - Makes many assumptions the environment it's included into. # +# - Incurs a high performance penalty. # +# # +############################################################################### + +module ImgProxyHelper + def process_inline_images! + raise NameError('@status must be defined by the instance this method is being called from.') unless defined?(@status) + return if @status.text&.strip.blank? || @status.content_type == 'text/plain' + + replace_markdown_images_with_html! + + handler = ImgTagHandler.new + Ox.sax_parse(handler, StringIO.new(@status.text, 'r')) + return if handler.srcs.blank? + + @skip_download_from = { @status.account.domain => DomainBlock.reject_media?(@status.account.domain) } + @redownload_attachment_ids = Set[] + + handler.srcs.each do |src| + alt = handler.alts[src] + normalized_src_parts = begin + Addressable::URI.parse(src&.strip).normalize + rescue Addressable::URI::InvalidURIError + nil + end + normalized_src = normalized_src_parts.to_s + + next replace_text!(src) if normalized_src.blank? || skip_download_from?(normalized_src_parts.host) + + file_name = normalized_src_parts.path.split('/').last + media_attachment = find_media_attachment(normalized_src, file_name) + + if media_attachment.present? + media_attachment.update(description: alt) if alt_more_descriptive?(alt, media_attachment.description) + elsif normalized_src_parts.scheme.blank? || !file_name.match?(/\S\.\w{3,}/) + next replace_text!(src) + else + media_attachment = create_media_attachment!(normalized_src) + end + + next replace_text!(src) if media_attachment.blank? || media_attachment.destroyed? + + if media_attachment.needs_redownload? + replace_text!(src, "#{media_attachment.file.url(:small)}##{media_attachment.id}") + else + replace_text!(src, media_attachment.file.url(:small)) + end + end + end + + private + + def skip_download_from?(domain) + return true if @skip_download_from[@status.account.domain] + return @skip_download_from[domain] if @skip_download_from[domain] + + @skip_download_from[domain] = DomainBlock.reject_media?(domain) + end + + def unsupported_media_type?(mime_type) + mime_type.present? && !MediaAttachment.supported_mime_types.include?(mime_type) + end + + def html_entities + @html_entities ||= HTMLEntities.new + end + + def replace_markdown_images_with_html! + return unless @status.content_type == 'text/markdown' + + @status.text.gsub!(/!\[(\S+)\]\(\s*(\S+)\s*\)/) do + begin + alt = html_entities.encode(Regexp.last_match(1).strip) + url = Addressable::URI.parse(Regexp.last_match(2)).normalize.to_s + "<img title=\"#{alt}\" alt=\"#{alt}\" src=\"#{url}\" />" + rescue Addressable::URI::InvalidURIError + '' + end + end + end + + def replace_text!(text, replacement = '') + @status.text.gsub!(text, replacement) + end + + def alt_more_descriptive?(alt, description) + return false unless alt.present? && description != alt + return true if description.blank? || alt.split(/[\s\n\r]+/).count > description.split(/[\s\n\r]+/).count + end + + def find_media_attachment(src, file_name) + media_attachment = src.start_with?('http') ? MediaAttachment.find_by(account: @account, remote_url: src, inline: true) : nil + return media_attachment if media_attachment.present? + + MediaAttachment.where(account: @status.account, file_file_name: file_name, inline: true) + .find { |m| [m.file.url(:small), m.file.url(:original)].include?(src) || m.status_id == @status.id } + end + + def create_media_attachment!(src, alt) + media_attachment = MediaAttachment.create!(account: @status.account, remote_url: src, description: alt, focus: nil, inline: true) + media_attachment = process_media_attachment!(media_attachment) + return if media_attachment.destroyed? + + @status.inlined_attachments.first_or_create!(media_attachment: media_attachment) + media_attachment + end + + def process_media_attachment!(media_attachment) + media_attachment.download_file! + media_attachment.download_thumbnail! + media_attachment.save! + media_attachment.destroy! if unsupported_media_type?(media_attachment.file.content_type) + media_attachment + rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError + return if @redownload_attachment_ids.include?(media_attachment.id) + + RedownloadMediaWorker.perform_in(rand(30..60).seconds, media_attachment.id) + @redownload_attachment_ids << media_attachment.id + media_attachment + end +end |