about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-09-10 15:29:12 +0200
committerGitHub <noreply@github.com>2019-09-10 15:29:12 +0200
commit031ca25014e0ba88d3dcc3086947b41449a672e2 (patch)
tree405ee6477c1406034faf4dbdfb6139d4090e9005
parent86748148256b504c0411119628435b1445959309 (diff)
Add retry for failed media downloads and `tootctl media refresh` (#11775)
-rw-r--r--app/lib/activitypub/activity/create.rb21
-rw-r--r--app/models/concerns/remotable.rb12
-rw-r--r--app/models/media_attachment.rb2
-rw-r--r--app/workers/redownload_media_worker.rb19
-rw-r--r--lib/mastodon/media_cli.rb54
5 files changed, 92 insertions, 16 deletions
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb
index 000b77df5..dea7fd43c 100644
--- a/app/lib/activitypub/activity/create.rb
+++ b/app/lib/activitypub/activity/create.rb
@@ -189,22 +189,25 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
     media_attachments = []
 
     as_array(@object['attachment']).each do |attachment|
-      next if attachment['url'].blank?
+      next if attachment['url'].blank? || media_attachments.size >= 4
 
-      href             = Addressable::URI.parse(attachment['url']).normalize.to_s
-      media_attachment = MediaAttachment.create(account: @account, remote_url: href, description: attachment['name'].presence, focus: attachment['focalPoint'], blurhash: supported_blurhash?(attachment['blurhash']) ? attachment['blurhash'] : nil)
-      media_attachments << media_attachment
+      begin
+        href             = Addressable::URI.parse(attachment['url']).normalize.to_s
+        media_attachment = MediaAttachment.create(account: @account, remote_url: href, description: attachment['name'].presence, focus: attachment['focalPoint'], blurhash: supported_blurhash?(attachment['blurhash']) ? attachment['blurhash'] : nil)
+        media_attachments << media_attachment
 
-      next if unsupported_media_type?(attachment['mediaType']) || skip_download?
+        next if unsupported_media_type?(attachment['mediaType']) || skip_download?
 
-      media_attachment.file_remote_url = href
-      media_attachment.save
+        media_attachment.file_remote_url = href
+        media_attachment.save
+      rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError
+        RedownloadMediaWorker.perform_in(rand(30..600).seconds, media_attachment.id)
+      end
     end
 
     media_attachments
   rescue Addressable::URI::InvalidURIError => e
-    Rails.logger.debug e
-
+    Rails.logger.debug "Invalid URL in attachment: #{e}"
     media_attachments
   end
 
diff --git a/app/models/concerns/remotable.rb b/app/models/concerns/remotable.rb
index 9372a963b..082302619 100644
--- a/app/models/concerns/remotable.rb
+++ b/app/models/concerns/remotable.rb
@@ -4,7 +4,7 @@ module Remotable
   extend ActiveSupport::Concern
 
   class_methods do
-    def remotable_attachment(attachment_name, limit)
+    def remotable_attachment(attachment_name, limit, suppress_errors: true)
       attribute_name  = "#{attachment_name}_remote_url".to_sym
       method_name     = "#{attribute_name}=".to_sym
       alt_method_name = "reset_#{attachment_name}!".to_sym
@@ -22,7 +22,7 @@ module Remotable
 
         begin
           Request.new(:get, url).perform do |response|
-            next if response.code != 200
+            raise Mastodon::UnexpectedResponseError, response unless (200...300).cover?(response.code)
 
             content_type = parse_content_type(response.headers.get('content-type').last)
             extname      = detect_extname_from_content_type(content_type)
@@ -41,11 +41,11 @@ module Remotable
 
             self[attribute_name] = url if has_attribute?(attribute_name)
           end
-        rescue HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError, Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e
+        rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError => e
+          Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}"
+          raise e unless suppress_errors
+        rescue Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, Paperclip::Error, Mastodon::DimensionsValidationError => e
           Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}"
-          nil
-        rescue Paperclip::Error, Mastodon::DimensionsValidationError => e
-          Rails.logger.debug "Error processing remote #{attachment_name}: #{e}"
           nil
         end
       end
diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb
index b58025015..a2b73f150 100644
--- a/app/models/media_attachment.rb
+++ b/app/models/media_attachment.rb
@@ -118,7 +118,7 @@ class MediaAttachment < ApplicationRecord
   validates_attachment_content_type :file, content_type: IMAGE_MIME_TYPES + VIDEO_MIME_TYPES + AUDIO_MIME_TYPES
   validates_attachment_size :file, less_than: IMAGE_LIMIT, unless: :larger_media_format?
   validates_attachment_size :file, less_than: VIDEO_LIMIT, if: :larger_media_format?
-  remotable_attachment :file, VIDEO_LIMIT
+  remotable_attachment :file, VIDEO_LIMIT, suppress_errors: false
 
   include Attachmentable
 
diff --git a/app/workers/redownload_media_worker.rb b/app/workers/redownload_media_worker.rb
new file mode 100644
index 000000000..98e995918
--- /dev/null
+++ b/app/workers/redownload_media_worker.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+class RedownloadMediaWorker
+  include Sidekiq::Worker
+  include ExponentialBackoff
+
+  sidekiq_options queue: 'pull', retry: 3
+
+  def perform(id)
+    media_attachment = MediaAttachment.find(id)
+
+    return if media_attachment.remote_url.blank?
+
+    media_attachment.reset_file!
+    media_attachment.save
+  rescue ActiveRecord::RecordNotFound
+    true
+  end
+end
diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb
index 0659b6b65..ec2f36c30 100644
--- a/lib/mastodon/media_cli.rb
+++ b/lib/mastodon/media_cli.rb
@@ -43,5 +43,59 @@ module Mastodon
 
       say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
     end
+
+    option :account, type: :string
+    option :domain, type: :string
+    option :status, type: :numeric
+    option :concurrency, type: :numeric, default: 5, aliases: [:c]
+    option :verbose, type: :boolean, default: false, aliases: [:v]
+    option :dry_run, type: :boolean, default: false
+    desc 'refresh', 'Fetch remote media files'
+    long_desc <<-DESC
+      Re-downloads media attachments from other servers. You must specify the
+      source of media attachments with one of the following options:
+
+      Use the --status option to download attachments from a specific status,
+      using the status local numeric ID.
+
+      Use the --account option to download attachments from a specific account,
+      using username@domain handle of the account.
+
+      Use the --domain option to download attachments from a specific domain.
+    DESC
+    def refresh
+      dry_run = options[:dry_run] ? ' (DRY RUN)' : ''
+
+      if options[:status]
+        scope = MediaAttachment.where(status_id: options[:status])
+      elsif options[:account]
+        username, domain = username.split('@')
+        account = Account.find_remote(username, domain)
+
+        if account.nil?
+          say('No such account', :red)
+          exit(1)
+        end
+
+        scope = MediaAttachment.where(account_id: account.id)
+      elsif options[:domain]
+        scope = MediaAttachment.joins(:account).merge(Account.by_domain_and_subdomains(options[:domain]))
+      else
+        exit(1)
+      end
+
+      processed, aggregate = parallelize_with_progress(scope) do |media_attachment|
+        next if media_attachment.remote_url.blank?
+
+        unless options[:dry_run]
+          media_attachment.reset_file!
+          media_attachment.save
+        end
+
+        media_attachment.file_file_size
+      end
+
+      say("Downloaded #{processed} media attachments (approx. #{number_to_human_size(aggregate)})#{dry_run}", :green, true)
+    end
   end
 end