about summary refs log tree commit diff
path: root/lib
diff options
context:
space:
mode:
authorStarfall <root@starfall.blue>2019-12-29 17:53:54 -0600
committerStarfall <root@starfall.blue>2019-12-29 17:53:54 -0600
commitc0c9529df269816f52915a9802e5e30fbce9576b (patch)
tree077a7a12c69b18dafd3db3226f4beac477d0f49f /lib
parent9ee65e89d547ae4a5bc0a5bdb59316ba5c061cfd (diff)
parent22daf24600d8e99e4569740ee5836d25c70c1e8b (diff)
Merge branch 'glitch'
Diffstat (limited to 'lib')
-rw-r--r--lib/mastodon/media_cli.rb99
-rw-r--r--lib/mastodon/statuses_cli.rb37
2 files changed, 123 insertions, 13 deletions
diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb
index 3b702f155..d842b986f 100644
--- a/lib/mastodon/media_cli.rb
+++ b/lib/mastodon/media_cli.rb
@@ -44,6 +44,105 @@ module Mastodon
       say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true)
     end
 
+    option :start_after
+    option :dry_run, type: :boolean, default: false
+    desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments'
+    long_desc <<~LONG_DESC
+      Scans file storage for files that do not belong to existing media attachments. Because this operation
+      requires iterating over every single file individually, it will be slow.
+
+      Please mind that some storage providers charge for the necessary API requests to list objects.
+    LONG_DESC
+    def remove_orphans
+      progress        = create_progress_bar(nil)
+      reclaimed_bytes = 0
+      removed         = 0
+      dry_run         = options[:dry_run] ? ' (DRY RUN)' : ''
+
+      case Paperclip::Attachment.default_options[:storage]
+      when :s3
+        paperclip_instance = MediaAttachment.new.file
+        s3_interface       = paperclip_instance.s3_interface
+        bucket             = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket])
+        last_key           = options[:start_after]
+
+        loop do
+          objects = begin
+            begin
+              bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x }
+            rescue => e
+              progress.log(pastel.red("Error fetching list of files: #{e}"))
+              progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key
+              break
+            end
+          end
+
+          break if objects.empty?
+
+          last_key        = objects.last.key
+          attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment }
+
+          objects.each do |object|
+            attachment_id = object.key.split('/')[2..-2].join.to_i
+            filename      = object.key.split('/').last
+
+            progress.increment
+
+            next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename)
+
+            begin
+              object.delete unless options[:dry_run]
+
+              reclaimed_bytes += object.size
+              removed += 1
+
+              progress.log("Found and removed orphan: #{object.key}")
+            rescue => e
+              progress.log(pastel.red("Error processing #{object.key}: #{e}"))
+            end
+          end
+        end
+      when :fog
+        say('The fog storage driver is not supported for this operation at this time', :red)
+        exit(1)
+      when :filesystem
+        require 'find'
+
+        root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s)
+
+        Find.find(File.join(root_path, 'media_attachments', 'files')) do |path|
+          next if File.directory?(path)
+
+          key           = path.gsub("#{root_path}#{File::SEPARATOR}", '')
+          attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i
+          filename      = key.split(File::SEPARATOR).last
+          attachment    = MediaAttachment.find_by(id: attachment_id)
+
+          progress.increment
+
+          next unless attachment.nil? || !attachment.variant?(filename)
+
+          begin
+            size = File.size(path)
+
+            File.delete(path) unless options[:dry_run]
+
+            reclaimed_bytes += size
+            removed += 1
+
+            progress.log("Found and removed orphan: #{key}")
+          rescue => e
+            progress.log(pastel.red("Error processing #{key}: #{e}"))
+          end
+        end
+      end
+
+      progress.total = progress.progress
+      progress.finish
+
+      say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true)
+    end
+
     option :account, type: :string
     option :domain, type: :string
     option :status, type: :numeric
diff --git a/lib/mastodon/statuses_cli.rb b/lib/mastodon/statuses_cli.rb
index eeedc026c..875183372 100644
--- a/lib/mastodon/statuses_cli.rb
+++ b/lib/mastodon/statuses_cli.rb
@@ -13,6 +13,7 @@ module Mastodon
     end
 
     option :days, type: :numeric, default: 90
+    option :clean_followed, type: :boolean
     desc 'remove', 'Remove unreferenced statuses'
     long_desc <<~LONG_DESC
       Remove statuses that are not referenced by local user activity, such as
@@ -20,7 +21,7 @@ module Mastodon
       by someone locally but no longer are.
 
       This is a computationally heavy procedure that creates extra database
-      indicides before commencing, and removes them afterward.
+      indices before commencing, and removes them afterward.
     LONG_DESC
     def remove
       say('Creating temporary database indices...')
@@ -34,18 +35,28 @@ module Mastodon
 
       say('Beginning removal... This might take a while...')
 
-      Status.remote
-            .where('id < ?', max_id)
-            .where(reblog_of_id: nil)                                                                                                                                                                                              # Skip reblogs
-            .where(in_reply_to_id: nil)                                                                                                                                                                                            # Skip replies
-            .where('id NOT IN (SELECT status_pins.status_id FROM status_pins WHERE statuses.id = status_id)')                                                                                                                      # Skip statuses that are pinned on profiles
-            .where('id NOT IN (SELECT mentions.status_id FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')                                # Skip statuses that mention local accounts
-            .where('id NOT IN (SELECT statuses1.in_reply_to_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)')                                                                                          # Skip statuses favourited by local accounts
-            .where('id NOT IN (SELECT bookmarks.status_id FROM bookmarks WHERE statuses.id = bookmarks.status_id)') # Skip statuses bookmarked by local users
-            .where('id NOT IN (SELECT statuses1.reblog_of_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND statuses1.account_id IN (SELECT accounts.id FROM accounts WHERE accounts.domain IS NULL))') # Skip statuses reblogged by local accounts
-            .where('account_id NOT IN (SELECT follows.target_account_id FROM follows WHERE statuses.account_id = follows.target_account_id)')                                                                                      # Skip accounts followed by local accounts
-            .in_batches
-            .delete_all
+      scope = Status.remote.where('id < ?', max_id)
+      # Skip reblogs of local statuses
+      scope = scope.where('reblog_of_id NOT IN (SELECT statuses1.id FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))')
+      # Skip statuses that are pinned on profiles
+      scope = scope.where('id NOT IN (SELECT status_pins.status_id FROM status_pins WHERE statuses.id = status_id)')
+      # Skip statuses that mention local accounts
+      scope = scope.where('id NOT IN (SELECT mentions.status_id FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
+      # Skip statuses which have replies
+      scope = scope.where('id NOT IN (SELECT statuses1.in_reply_to_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)')
+      # Skip statuses reblogged by local accounts or with recent boosts
+      scope = scope.where('id NOT IN (SELECT statuses1.reblog_of_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= ?))', max_id)
+      # Skip statuses favourited by local users
+      scope = scope.where('id NOT IN (SELECT favourites.status_id FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))')
+      # Skip statuses bookmarked by local users
+      scope = scope.where('id NOT IN (SELECT bookmarks.status_id FROM bookmarks WHERE statuses.id = bookmarks.status_id)')
+
+      unless options[:clean_followed]
+        # Skip accounts followed by local accounts
+        scope = scope.where('account_id NOT IN (SELECT follows.target_account_id FROM follows WHERE statuses.account_id = follows.target_account_id)')
+      end
+
+      scope.in_batches.delete_all
 
       say('Beginning removal of now-orphaned media attachments to free up disk space...')