about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-12-08 15:37:12 +0100
committermultiple creatures <dev@multiple-creature.party>2020-02-21 04:07:31 -0600
commitcaeca95f389dc65b0c1719fece7f119f164aa401 (patch)
tree6c3c0d3a2799aa96a5e99a20f214cb0bc8a5e23f
parent28073bbcd6010de077e528f0447b35e51905ed23 (diff)
port tootsuite#12568 to monsterfork: Add `tootctl media remove-orphans`
-rw-r--r--app/models/media_attachment.rb7
-rw-r--r--lib/mastodon/media_cli.rb77
2 files changed, 77 insertions, 7 deletions
diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb
index 36282d010..d4fd7eeb8 100644
--- a/app/models/media_attachment.rb
+++ b/app/models/media_attachment.rb
@@ -183,13 +183,6 @@ class MediaAttachment < ApplicationRecord
     audio? || video?
   end
 
-  def blocked?
-    domains = Set[self.account.domain]
-    domains.add(remote_url.scan(/[\w\-]+\.[\w\-]+(?:\.[\w\-]+)*/).first) if remote_url.present?
-    blocks = DomainBlock.suspend.or(DomainBlock.where(reject_media: true))
-    domains.any? { |domain| blocks.where(domain: domain).or(blocks.where('domain LIKE ?', "%.#{domain}")).exists? }
-  end
-
   def variant?(other_file_name)
     return true if file_file_name == other_file_name
 
diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb
index ec2f36c30..493ba680e 100644
--- a/lib/mastodon/media_cli.rb
+++ b/lib/mastodon/media_cli.rb
@@ -44,6 +44,83 @@ module Mastodon
       say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
     end
 
+    option :start_after
+    option :dry_run, type: :boolean, default: false
+    desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
+    long_desc <<~LONG_DESC
+      Scans file storage for files that do not belong to existing media attachments. Because this operation
+      requires iterating over every single file individually, it will be slow.
+
+      Please mind that some storage providers charge for the necessary API requests to list objects.
+    LONG_DESC
+    def remove_orphans
+      progress        = create_progress_bar(nil)
+      reclaimed_bytes = 0
+      removed         = 0
+      dry_run         = options[:dry_run] ? ' (DRY RUN)' : ''
+
+      case Paperclip::Attachment.default_options[:storage]
+      when :s3
+        paperclip_instance = MediaAttachment.new.file
+        s3_interface       = paperclip_instance.s3_interface
+        bucket             = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
+        last_key           = options[:start_after]
+
+        loop do
+          objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x }
+
+          break if objects.empty?
+
+          last_key        = objects.last.key
+          attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment }
+
+          objects.each do |object|
+            attachment_id = object.key.split('/')[2..-2].join.to_i
+            filename      = object.key.split('/').last
+
+            progress.increment
+
+            next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename)
+
+            reclaimed_bytes += object.size
+            removed += 1
+            object.delete unless options[:dry_run]
+            progress.log("Found and removed orphan: #{object.key}")
+          end
+        end
+      when :fog
+        say('The fog storage driver is not supported for this operation at this time', :red)
+        exit(1)
+      when :filesystem
+        require 'find'
+
+        root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
+
+        Find.find(File.join(root_path, 'media_attachments', 'files')) do |path|
+          next if File.directory?(path)
+
+          key           = path.gsub("#{root_path}#{File::SEPARATOR}", '')
+          attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i
+          filename      = key.split(File::SEPARATOR).last
+          attachment    = MediaAttachment.find_by(id: attachment_id)
+
+          progress.increment
+
+          next unless attachment.nil? || !attachment.variant?(filename)
+
+          reclaimed_bytes += File.size(path)
+          removed += 1
+          File.delete(path) unless options[:dry_run]
+          progress.log("Found and removed orphan: #{key}")
+        end
+      end
+
+      progress.total = progress.progress
+      progress.finish
+
+      say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
+    end
+
     option :account, type: :string
     option :domain, type: :string
     option :status, type: :numeric