From f3d232381d60cbc93cb7a35285eb24c30cd0aba0 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sun, 8 Dec 2019 15:37:12 +0100 Subject: Add `tootctl media remove-orphans` (#12568) --- lib/mastodon/media_cli.rb | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'lib') diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb index 3b702f155..96ad8556a 100644 --- a/lib/mastodon/media_cli.rb +++ b/lib/mastodon/media_cli.rb @@ -44,6 +44,83 @@ module Mastodon say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true) end + option :start_after + option :dry_run, type: :boolean, default: false + desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments' + long_desc <<~LONG_DESC + Scans file storage for files that do not belong to existing media attachments. Because this operation + requires iterating over every single file individually, it will be slow. + + Please mind that some storage providers charge for the necessary API requests to list objects. + LONG_DESC + def remove_orphans + progress = create_progress_bar(nil) + reclaimed_bytes = 0 + removed = 0 + dry_run = options[:dry_run] ? ' (DRY RUN)' : '' + + case Paperclip::Attachment.default_options[:storage] + when :s3 + paperclip_instance = MediaAttachment.new.file + s3_interface = paperclip_instance.s3_interface + bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket]) + last_key = options[:start_after] + + loop do + objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x } + + break if objects.empty? + + last_key = objects.last.key + attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment } + + objects.each do |object| + attachment_id = object.key.split('/')[2..-2].join.to_i + filename = object.key.split('/').last + + progress.increment + + next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename) + + reclaimed_bytes += object.size + removed += 1 + object.delete unless options[:dry_run] + progress.log("Found and removed orphan: #{object.key}") + end + end + when :fog + say('The fog storage driver is not supported for this operation at this time', :red) + exit(1) + when :filesystem + require 'find' + + root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s) + + Find.find(File.join(root_path, 'media_attachments', 'files')) do |path| + next if File.directory?(path) + + key = path.gsub("#{root_path}#{File::SEPARATOR}", '') + attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i + filename = key.split(File::SEPARATOR).last + attachment = MediaAttachment.find_by(id: attachment_id) + + progress.increment + + next unless attachment.nil? || !attachment.variant?(filename) + + reclaimed_bytes += File.size(path) + removed += 1 + File.delete(path) unless options[:dry_run] + progress.log("Found and removed orphan: #{key}") + end + end + + progress.total = progress.progress + progress.finish + + say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true) + end + option :account, type: :string option :domain, type: :string option :status, type: :numeric -- cgit From d7bcc0930cea66c646b56db3587426880b4368df Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 9 Dec 2019 04:26:00 +0100 Subject: Fix error handling in `tootctl media remove-orphans` (#12571) --- lib/mastodon/media_cli.rb | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb index 96ad8556a..d842b986f 100644 --- a/lib/mastodon/media_cli.rb +++ b/lib/mastodon/media_cli.rb @@ -67,7 +67,15 @@ module Mastodon last_key = options[:start_after] loop do - objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x } + objects = begin + begin + bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x } + rescue => e + progress.log(pastel.red("Error fetching list of files: #{e}")) + progress.log("If you want to continue from this point, add --start-after=#{last_key} to your command") if last_key + break + end + end break if objects.empty? @@ -82,10 +90,16 @@ module Mastodon next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename) - reclaimed_bytes += object.size - removed += 1 - object.delete unless options[:dry_run] - progress.log("Found and removed orphan: #{object.key}") + begin + object.delete unless options[:dry_run] + + reclaimed_bytes += object.size + removed += 1 + + progress.log("Found and removed orphan: #{object.key}") + rescue => e + progress.log(pastel.red("Error processing #{object.key}: #{e}")) + end end end when :fog @@ -108,10 +122,18 @@ module Mastodon next unless attachment.nil? || !attachment.variant?(filename) - reclaimed_bytes += File.size(path) - removed += 1 - File.delete(path) unless options[:dry_run] - progress.log("Found and removed orphan: #{key}") + begin + size = File.size(path) + + File.delete(path) unless options[:dry_run] + + reclaimed_bytes += size + removed += 1 + + progress.log("Found and removed orphan: #{key}") + rescue => e + progress.log(pastel.red("Error processing #{key}: #{e}")) + end end end -- cgit From 1c1028321795461cb72a60bb7bc1e3479bb21bb9 Mon Sep 17 00:00:00 2001 From: trwnh Date: Thu, 12 Dec 2019 12:50:08 -0600 Subject: Fix typo in tootctl statuses remove (#12603) --- lib/mastodon/statuses_cli.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mastodon/statuses_cli.rb b/lib/mastodon/statuses_cli.rb index 7f2fbfa85..4d9af0a54 100644 --- a/lib/mastodon/statuses_cli.rb +++ b/lib/mastodon/statuses_cli.rb @@ -20,7 +20,7 @@ module Mastodon by someone locally but no longer are. This is a computationally heavy procedure that creates extra database - indicides before commencing, and removes them afterward. + indices before commencing, and removes them afterward. LONG_DESC def remove say('Creating temporary database indices...') -- cgit From 7fe45e7c0317a896f3edfa1473489696dc7f0cf1 Mon Sep 17 00:00:00 2001 From: ThibG Date: Tue, 17 Dec 2019 13:33:36 +0100 Subject: Changes to tootctl statuses remove (#11267) * Fix comments in statuses_cli The comment for the part of query keeping statuses which have replies was incorrect, explaining the part of the query kept favourited statuses instead * Keep statuses favourited by local users in tootctl statuses remove * Do delete old replies that are not direct interactions with local users * Skip reblogs of local statuses instead of all reblogs * Optimize some queries * Keep old statuses with recent reblogs * Add option to clear toots from followed accounts too --- lib/mastodon/statuses_cli.rb | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/mastodon/statuses_cli.rb b/lib/mastodon/statuses_cli.rb index 4d9af0a54..74f15de5f 100644 --- a/lib/mastodon/statuses_cli.rb +++ b/lib/mastodon/statuses_cli.rb @@ -13,6 +13,7 @@ module Mastodon end option :days, type: :numeric, default: 90 + option :clean_followed, type: :boolean desc 'remove', 'Remove unreferenced statuses' long_desc <<~LONG_DESC Remove statuses that are not referenced by local user activity, such as @@ -34,17 +35,26 @@ module Mastodon say('Beginning removal... This might take a while...') - Status.remote - .where('id < ?', max_id) - .where(reblog_of_id: nil) # Skip reblogs - .where(in_reply_to_id: nil) # Skip replies - .where('id NOT IN (SELECT status_pins.status_id FROM status_pins WHERE statuses.id = status_id)') # Skip statuses that are pinned on profiles - .where('id NOT IN (SELECT mentions.status_id FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))') # Skip statuses that mention local accounts - .where('id NOT IN (SELECT statuses1.in_reply_to_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)') # Skip statuses favourited by local accounts - .where('id NOT IN (SELECT statuses1.reblog_of_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND statuses1.account_id IN (SELECT accounts.id FROM accounts WHERE accounts.domain IS NULL))') # Skip statuses reblogged by local accounts - .where('account_id NOT IN (SELECT follows.target_account_id FROM follows WHERE statuses.account_id = follows.target_account_id)') # Skip accounts followed by local accounts - .in_batches - .delete_all + scope = Status.remote.where('id < ?', max_id) + # Skip reblogs of local statuses + scope = scope.where('reblog_of_id NOT IN (SELECT statuses1.id FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))') + # Skip statuses that are pinned on profiles + scope = scope.where('id NOT IN (SELECT status_pins.status_id FROM status_pins WHERE statuses.id = status_id)') + # Skip statuses that mention local accounts + scope = scope.where('id NOT IN (SELECT mentions.status_id FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))') + # Skip statuses which have replies + scope = scope.where('id NOT IN (SELECT statuses1.in_reply_to_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)') + # Skip statuses reblogged by local accounts or with recent boosts + scope = scope.where('id NOT IN (SELECT statuses1.reblog_of_id FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= ?))', max_id) + # Skip statuses favourited by local users + scope = scope.where('id NOT IN (SELECT favourites.status_id FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))') + + unless options[:clean_followed] + # Skip accounts followed by local accounts + scope = scope.where('account_id NOT IN (SELECT follows.target_account_id FROM follows WHERE statuses.account_id = follows.target_account_id)') + end + + scope.in_batches.delete_all say('Beginning removal of now-orphaned media attachments to free up disk space...') -- cgit