diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2019-12-08 15:37:12 +0100 |
---|---|---|
committer | multiple creatures <dev@multiple-creature.party> | 2020-02-21 02:24:06 -0600 |
commit | 90f2752375145432747c559e0d44e0c86b2eef53 (patch) | |
tree | f1071b24156b9d3613047ca99b2f0e463a9e84a2 | |
parent | 8caf9e4ede78b434443cc2cd65aeaae4412b288f (diff) |
port tootsuite#12568 to monsterfork: Add `tootctl media remove-orphans`
-rw-r--r-- | app/models/media_attachment.rb | 12 | ||||
-rw-r--r-- | config/initializers/paperclip.rb | 2 | ||||
-rw-r--r-- | lib/mastodon/media_cli.rb | 112 |
3 files changed, 125 insertions, 1 deletions
diff --git a/app/models/media_attachment.rb b/app/models/media_attachment.rb index 2c195c523..4f0a09784 100644 --- a/app/models/media_attachment.rb +++ b/app/models/media_attachment.rb @@ -149,6 +149,18 @@ class MediaAttachment < ApplicationRecord domains.any? { |domain| blocks.where(domain: domain).or(blocks.where('domain LIKE ?', "%.#{domain}")).exists? } end + def variant?(other_file_name) + return true if file_file_name == other_file_name + + formats = file.styles.values.map(&:format).compact + + return false if formats.empty? + + extension = File.extname(other_file_name) + + formats.include?(extension.delete('.')) && File.basename(other_file_name, extension) == File.basename(file_file_name, File.extname(file_file_name)) + end + def to_param shortcode end diff --git a/config/initializers/paperclip.rb b/config/initializers/paperclip.rb index 5eec70d62..096f7a11e 100644 --- a/config/initializers/paperclip.rb +++ b/config/initializers/paperclip.rb @@ -89,7 +89,7 @@ else Paperclip::Attachment.default_options.merge!( storage: :filesystem, use_timestamp: true, - path: ENV.fetch('PAPERCLIP_ROOT_PATH', ':rails_root/public/system') + '/:class/:attachment/:id_partition/:style/:filename', + path: File.join(ENV.fetch('PAPERCLIP_ROOT_PATH', File.join(':rails_root', 'public', 'system')), ':class', ':attachment', ':id_partition', ':style', ':filename'), url: ENV.fetch('PAPERCLIP_ROOT_URL', '/system') + '/:class/:attachment/:id_partition/:style/:filename', ) end diff --git a/lib/mastodon/media_cli.rb b/lib/mastodon/media_cli.rb index 6152d5a09..08e646d4a 100644 --- a/lib/mastodon/media_cli.rb +++ b/lib/mastodon/media_cli.rb @@ -22,6 +22,118 @@ module Mastodon The --days option specifies how old media attachments have to be before they are removed. It defaults to 7 days. + DESC + def remove + time_ago = options[:days].days.ago + dry_run = options[:dry_run] ? '(DRY RUN)' : '' + + processed, aggregate = parallelize_with_progress(MediaAttachment.cached.where.not(remote_url: '').where('created_at < ?', time_ago)) do |media_attachment| + next if media_attachment.file.blank? + + size = media_attachment.file_file_size + + unless options[:dry_run] + media_attachment.file.destroy + media_attachment.save + end + + size + end + + say("Removed #{processed} media attachments (approx. #{number_to_human_size(aggregate)}) #{dry_run}", :green, true) + end + + option :start_after + option :dry_run, type: :boolean, default: false + desc 'remove-orphans', 'Scan storage and check for files that do not belong to existing media attachments' + long_desc <<~LONG_DESC + Scans file storage for files that do not belong to existing media attachments. Because this operation + requires iterating over every single file individually, it will be slow. + + Please mind that some storage providers charge for the necessary API requests to list objects. + LONG_DESC + def remove_orphans + progress = create_progress_bar(nil) + reclaimed_bytes = 0 + removed = 0 + dry_run = options[:dry_run] ? ' (DRY RUN)' : '' + + case Paperclip::Attachment.default_options[:storage] + when :s3 + paperclip_instance = MediaAttachment.new.file + s3_interface = paperclip_instance.s3_interface + bucket = s3_interface.bucket(Paperclip::Attachment.default_options[:s3_credentials][:bucket]) + last_key = options[:start_after] + + loop do + objects = bucket.objects(start_after: last_key, prefix: 'media_attachments/files/').limit(1000).map { |x| x } + + break if objects.empty? + + last_key = objects.last.key + attachments_map = MediaAttachment.where(id: objects.map { |object| object.key.split('/')[2..-2].join.to_i }).each_with_object({}) { |attachment, map| map[attachment.id] = attachment } + + objects.each do |object| + attachment_id = object.key.split('/')[2..-2].join.to_i + filename = object.key.split('/').last + + progress.increment + + next unless attachments_map[attachment_id].nil? || !attachments_map[attachment_id].variant?(filename) + + reclaimed_bytes += object.size + removed += 1 + object.delete unless options[:dry_run] + progress.log("Found and removed orphan: #{object.key}") + end + end + when :fog + say('The fog storage driver is not supported for this operation at this time', :red) + exit(1) + when :filesystem + require 'find' + + root_path = ENV.fetch('RAILS_ROOT_PATH', File.join(':rails_root', 'public', 'system')).gsub(':rails_root', Rails.root.to_s) + + Find.find(File.join(root_path, 'media_attachments', 'files')) do |path| + next if File.directory?(path) + + key = path.gsub("#{root_path}#{File::SEPARATOR}", '') + attachment_id = key.split(File::SEPARATOR)[2..-2].join.to_i + filename = key.split(File::SEPARATOR).last + attachment = MediaAttachment.find_by(id: attachment_id) + + progress.increment + + next unless attachment.nil? || !attachment.variant?(filename) + + reclaimed_bytes += File.size(path) + removed += 1 + File.delete(path) unless options[:dry_run] + progress.log("Found and removed orphan: #{key}") + end + end + + progress.total = progress.progress + progress.finish + + say("Removed #{removed} orphans (approx. #{number_to_human_size(reclaimed_bytes)})#{dry_run}", :green, true) + end + + option :account, type: :string + option :domain, type: :string + option :status, type: :numeric + option :concurrency, type: :numeric, default: 5, aliases: [:c] + option :verbose, type: :boolean, default: false, aliases: [:v] + option :dry_run, type: :boolean, default: false + option :force, type: :boolean, default: false + desc 'refresh', 'Fetch remote media files' + long_desc <<-DESC + Re-downloads media attachments from other servers. You must specify the + source of media attachments with one of the following options: + + Use the --status option to download attachments from a specific status, + using the status local numeric ID. With the --background option, instead of deleting the files sequentially, they will be queued into Sidekiq and the command will exit as soon as |