about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-12-08 15:37:12 +0100
committerGitHub <noreply@github.com>2019-12-08 15:37:12 +0100
commitf3d232381d60cbc93cb7a35285eb24c30cd0aba0 (patch)
tree8265da7bac3c3dee64d577d4d1c148f75d8f594e
parent6d7daf6154b1edbfe9a0c0b297baab8cd45658f3 (diff)
Add `tootctl media remove-orphans` (#12568)
-rw-r--r--app/models/media_attachment.rb12
-rw-r--r--config/initializers/paperclip.rb2
-rw-r--r--lib/mastodon/media_cli.rb77
3 files changed, 90 insertions, 1 deletions
diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb
index 5d5034a52..573ef5dfc 100644
--- a/app/models/media_attachment.rb
+++ b/app/models/media_attachment.rb
@@ -167,6 +167,18 @@ class MediaAttachment < ApplicationRecord
     audio? || video?
   end
 
+  def variant?(other_file_name)
+    return true if file_file_name == other_file_name
+
+    formats = file.styles.values.map(&:format).compact
+
+    return false if formats.empty?
+
+    extension = File.extname(other_file_name)
+
+    formats.include?(extension.delete('.')) && File.basename(other_file_name, extension) == File.basename(file_file_name, File.extname(file_file_name))
+  end
+
   def to_param
     shortcode
   end
diff --git a/config/initializers/paperclip.rb b/config/initializers/paperclip.rb
index 96607b7ce..dadc492a0 100644
--- a/config/initializers/paperclip.rb
+++ b/config/initializers/paperclip.rb
@@ -89,7 +89,7 @@ else
   Paperclip::Attachment.default_options.merge!(
     storage: :filesystem,
     use_timestamp: true,
-    path: ENV.fetch('PAPERCLIP_ROOT_PATH', ':rails_root/public/system') + '/:class/:attachment/:id_partition/:style/:filename',
+    path: File.join(ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')), ':class', ':attachment', ':id_partition', ':style', ':filename'),
     url: ENV.fetch('PAPERCLIP_ROOT_URL', '/system') + '/:class/:attachment/:id_partition/:style/:filename',
   )
 end
diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb
index 3b702f155..96ad8556a 100644
--- a/lib/mastodon/media_cli.rb
+++ b/lib/mastodon/media_cli.rb
@@ -44,6 +44,83 @@ module Mastodon
       say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
     end
 
+    option :start_after
+    option :dry_run, type: :boolean, default: false
+    desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
+    long_desc <<~LONG_DESC
+      Scans file storage for files that do not belong to existing media attachments. Because this operation
+      requires iterating over every single file individually, it will be slow.
+
+      Please mind that some storage providers charge for the necessary API requests to list objects.
+    LONG_DESC
+    def remove_orphans
+      progress        = create_progress_bar(nil)
+      reclaimed_bytes = 0
+      removed         = 0
+      dry_run         = options[:dry_run] ? ' (DRY RUN)' : ''
+
+      case Paperclip::Attachment.default_options[:storage]
+      when :s3
+        paperclip_instance = MediaAttachment.new.file
+        s3_interface       = paperclip_instance.s3_interface
+        bucket             = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
+        last_key           = options[:start_after]
+
+        loop do
+          objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x }
+
+          break if objects.empty?
+
+          last_key        = objects.last.key
+          attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment }
+
+          objects.each do |object|
+            attachment_id = object.key.split('/')[2..-2].join.to_i
+            filename      = object.key.split('/').last
+
+            progress.increment
+
+            next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename)
+
+            reclaimed_bytes += object.size
+            removed += 1
+            object.delete unless options[:dry_run]
+            progress.log("Found and removed orphan: #{object.key}")
+          end
+        end
+      when :fog
+        say('The fog storage driver is not supported for this operation at this time', :red)
+        exit(1)
+      when :filesystem
+        require 'find'
+
+        root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
+
+        Find.find(File.join(root_path, 'media_attachments', 'files')) do |path|
+          next if File.directory?(path)
+
+          key           = path.gsub("#{root_path}#{File::SEPARATOR}", '')
+          attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i
+          filename      = key.split(File::SEPARATOR).last
+          attachment    = MediaAttachment.find_by(id: attachment_id)
+
+          progress.increment
+
+          next unless attachment.nil? || !attachment.variant?(filename)
+
+          reclaimed_bytes += File.size(path)
+          removed += 1
+          File.delete(path) unless options[:dry_run]
+          progress.log("Found and removed orphan: #{key}")
+        end
+      end
+
+      progress.total = progress.progress
+      progress.finish
+
+      say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
+    end
+
     option :account, type: :string
     option :domain, type: :string
     option :status, type: :numeric