about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--app/models/media_attachment.rb12
-rw-r--r--config/initializers/paperclip.rb2
-rw-r--r--lib/mastodon/media_cli.rb112
3 files changed, 125 insertions, 1 deletions
diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb
index 2c195c523..4f0a09784 100644
--- a/app/models/media_attachment.rb
+++ b/app/models/media_attachment.rb
@@ -149,6 +149,18 @@ class MediaAttachment < ApplicationRecord
     domains.any? { |domain| blocks.where(domain: domain).or(blocks.where('domain LIKE ?', "%.#{domain}")).exists? }
   end
 
+  def variant?(other_file_name)
+    return true if file_file_name == other_file_name
+
+    formats = file.styles.values.map(&:format).compact
+
+    return false if formats.empty?
+
+    extension = File.extname(other_file_name)
+
+    formats.include?(extension.delete('.')) && File.basename(other_file_name, extension) == File.basename(file_file_name, File.extname(file_file_name))
+  end
+
   def to_param
     shortcode
   end
diff --git a/config/initializers/paperclip.rb b/config/initializers/paperclip.rb
index 5eec70d62..096f7a11e 100644
--- a/config/initializers/paperclip.rb
+++ b/config/initializers/paperclip.rb
@@ -89,7 +89,7 @@ else
   Paperclip::Attachment.default_options.merge!(
     storage: :filesystem,
     use_timestamp: true,
-    path: ENV.fetch('PAPERCLIP_ROOT_PATH', ':rails_root/public/system') + '/:class/:attachment/:id_partition/:style/:filename',
+    path: File.join(ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')), ':class', ':attachment', ':id_partition', ':style', ':filename'),
     url: ENV.fetch('PAPERCLIP_ROOT_URL', '/system') + '/:class/:attachment/:id_partition/:style/:filename',
   )
 end
diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb
index 6152d5a09..08e646d4a 100644
--- a/lib/mastodon/media_cli.rb
+++ b/lib/mastodon/media_cli.rb
@@ -22,6 +22,118 @@ module Mastodon
 
       The --days option specifies how old media attachments have to be before
       they are removed. It defaults to 7 days.
+    DESC
+    def remove
+      time_ago = options[:days].days.ago
+      dry_run  = options[:dry_run] ? '(DRY RUN)' : ''
+
+      processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment|
+        next if media_attachment.file.blank?
+
+        size = media_attachment.file_file_size
+
+        unless options[:dry_run]
+          media_attachment.file.destroy
+          media_attachment.save
+        end
+
+        size
+      end
+
+      say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
+    end
+
+    option :start_after
+    option :dry_run, type: :boolean, default: false
+    desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
+    long_desc <<~LONG_DESC
+      Scans file storage for files that do not belong to existing media attachments. Because this operation
+      requires iterating over every single file individually, it will be slow.
+
+      Please mind that some storage providers charge for the necessary API requests to list objects.
+    LONG_DESC
+    def remove_orphans
+      progress        = create_progress_bar(nil)
+      reclaimed_bytes = 0
+      removed         = 0
+      dry_run         = options[:dry_run] ? ' (DRY RUN)' : ''
+
+      case Paperclip::Attachment.default_options[:storage]
+      when :s3
+        paperclip_instance = MediaAttachment.new.file
+        s3_interface       = paperclip_instance.s3_interface
+        bucket             = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
+        last_key           = options[:start_after]
+
+        loop do
+          objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x }
+
+          break if objects.empty?
+
+          last_key        = objects.last.key
+          attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment }
+
+          objects.each do |object|
+            attachment_id = object.key.split('/')[2..-2].join.to_i
+            filename      = object.key.split('/').last
+
+            progress.increment
+
+            next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename)
+
+            reclaimed_bytes += object.size
+            removed += 1
+            object.delete unless options[:dry_run]
+            progress.log("Found and removed orphan: #{object.key}")
+          end
+        end
+      when :fog
+        say('The fog storage driver is not supported for this operation at this time', :red)
+        exit(1)
+      when :filesystem
+        require 'find'
+
+        root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
+
+        Find.find(File.join(root_path, 'media_attachments', 'files')) do |path|
+          next if File.directory?(path)
+
+          key           = path.gsub("#{root_path}#{File::SEPARATOR}", '')
+          attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i
+          filename      = key.split(File::SEPARATOR).last
+          attachment    = MediaAttachment.find_by(id: attachment_id)
+
+          progress.increment
+
+          next unless attachment.nil? || !attachment.variant?(filename)
+
+          reclaimed_bytes += File.size(path)
+          removed += 1
+          File.delete(path) unless options[:dry_run]
+          progress.log("Found and removed orphan: #{key}")
+        end
+      end
+
+      progress.total = progress.progress
+      progress.finish
+
+      say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
+    end
+
+    option :account, type: :string
+    option :domain, type: :string
+    option :status, type: :numeric
+    option :concurrency, type: :numeric, default: 5, aliases: [:c]
+    option :verbose, type: :boolean, default: false, aliases: [:v]
+    option :dry_run, type: :boolean, default: false
+    option :force, type: :boolean, default: false
+    desc 'refresh', 'Fetch remote media files'
+    long_desc <<-DESC
+      Re-downloads media attachments from other servers. You must specify the
+      source of media attachments with one of the following options:
+
+      Use the --status option to download attachments from a specific status,
+      using the status local numeric ID.
 
       With the --background option, instead of deleting the files sequentially,
       they will be queued into Sidekiq and the command will exit as soon as