about summary refs log tree commit diff
path: root/lib/mastodon/statuses_cli.rb
blob: d4c2e6cf2a7e477162c57f5bbcea467f53a238ea (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
# frozen_string_literal: true

require_relative '../../config/boot'
require_relative '../../config/environment'
require_relative 'cli_helper'

module Mastodon
  class StatusesCLI < Thor
    include CLIHelper
    include ActionView::Helpers::NumberHelper

    def self.exit_on_failure?
      true
    end

    option :days, type: :numeric, default: 90
    option :batch_size, type: :numeric, default: 1_000, aliases: [:b], desc: 'Number of records in each batch'
    option :continue, type: :boolean, default: false, desc: 'If remove is not completed, execute from the previous continuation'
    option :clean_followed, type: :boolean, default: false, desc: 'Include the status of remote accounts that are followed by local accounts as candidates for remove'
    option :skip_status_remove, type: :boolean, default: false, desc: 'Skip status remove (run only cleanup tasks)'
    option :skip_media_remove, type: :boolean, default: false, desc: 'Skip remove orphaned media attachments'
    option :compress_database, type: :boolean, default: false, desc: 'Compress database and update the statistics. This option locks the table for a long time, so run it offline'
    desc 'remove', 'Remove unreferenced statuses'
    long_desc <<~LONG_DESC
      Remove statuses that are not referenced by local user activity, such as
      ones that came from relays, or belonging to users that were once followed
      by someone locally but no longer are.

      It also removes orphaned records and performs additional cleanup tasks
      such as updating statistics and recovering disk space.

      This is a computationally heavy procedure that creates extra database
      indices before commencing, and removes them afterward.
    LONG_DESC
    def remove
      if options[:batch_size] < 1
        say('Cannot run with this batch_size setting, must be at least 1', :red)
        exit(1)
      end

      remove_statuses
      vacuum_and_analyze_statuses
      remove_orphans_media_attachments
      remove_orphans_conversations
      vacuum_and_analyze_conversations
    end

    private

    def remove_statuses
      return if options[:skip_status_remove]

      say('Creating temporary database indices...')

      ActiveRecord::Base.connection.add_index(:media_attachments, :remote_url, name: :index_media_attachments_remote_url, where: 'remote_url is not null', algorithm: :concurrently, if_not_exists: true)

      max_id   = Mastodon::Snowflake.id_at(options[:days].days.ago, with_random: false)
      start_at = Time.now.to_f

      unless options[:continue] && ActiveRecord::Base.connection.table_exists?('statuses_to_be_deleted')
        ActiveRecord::Base.connection.add_index(:accounts, :id, name: :index_accounts_local, where: 'domain is null', algorithm: :concurrently, if_not_exists: true)
        ActiveRecord::Base.connection.add_index(:status_pins, :status_id, name: :index_status_pins_status_id, algorithm: :concurrently, if_not_exists: true)

        say('Extract the deletion target from statuses... This might take a while...')

        ActiveRecord::Base.connection.create_table('statuses_to_be_deleted', force: true)

        # Skip accounts followed by local accounts
        clean_followed_sql = 'AND NOT EXISTS (SELECT 1 FROM follows WHERE statuses.account_id = follows.target_account_id)' unless options[:clean_followed]

        ActiveRecord::Base.connection.exec_insert(<<-SQL.squish, 'SQL', [[nil, max_id]])
          INSERT INTO statuses_to_be_deleted (id)
          SELECT statuses.id FROM statuses WHERE deleted_at IS NULL AND NOT local AND uri IS NOT NULL AND (id < $1)
          AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.in_reply_to_id)
          AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses1.id = statuses.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local))
          AND NOT EXISTS (SELECT 1 FROM statuses AS statuses1 WHERE statuses.id = statuses1.reblog_of_id AND (statuses1.uri IS NULL OR statuses1.local OR statuses1.id >= $1))
          AND NOT EXISTS (SELECT 1 FROM status_pins WHERE statuses.id = status_id)
          AND NOT EXISTS (SELECT 1 FROM mentions WHERE statuses.id = mentions.status_id AND mentions.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
          AND NOT EXISTS (SELECT 1 FROM favourites WHERE statuses.id = favourites.status_id AND favourites.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
          AND NOT EXISTS (SELECT 1 FROM bookmarks WHERE statuses.id = bookmarks.status_id AND bookmarks.account_id IN (SELECT accounts.id FROM accounts WHERE domain IS NULL))
          #{clean_followed_sql}
        SQL

        say('Removing temporary database indices to restore write performance...')

        ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true)
        ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true)
      end

      say('Beginning statuses removal... This might take a while...')

      klass = Class.new(ApplicationRecord) do |c|
        c.table_name = 'statuses_to_be_deleted'
      end

      Object.const_set('StatusToBeDeleted', klass)

      scope     = StatusToBeDeleted
      processed = 0
      removed   = 0
      progress  = create_progress_bar(scope.count.fdiv(options[:batch_size]).ceil)

      scope.reorder(nil).in_batches(of: options[:batch_size]) do |relation|
        ids        = relation.pluck(:id)
        processed += ids.count
        removed   += Status.unscoped.where(id: ids).delete_all
        progress.increment
      end

      progress.stop

      ActiveRecord::Base.connection.drop_table('statuses_to_be_deleted')

      say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} statuses.", :green)
    ensure
      say('Removing temporary database indices to restore write performance...')

      ActiveRecord::Base.connection.remove_index(:accounts, name: :index_accounts_local, if_exists: true)
      ActiveRecord::Base.connection.remove_index(:status_pins, name: :index_status_pins_status_id, if_exists: true)
      ActiveRecord::Base.connection.remove_index(:media_attachments, name: :index_media_attachments_remote_url, if_exists: true)
    end

    def remove_orphans_media_attachments
      return if options[:skip_media_remove]

      start_at = Time.now.to_f

      say('Beginning removal of now-orphaned media attachments to free up disk space...')

      scope     = MediaAttachment.reorder(nil).unattached.where('created_at < ?', options[:days].pred.days.ago)
      processed = 0
      removed   = 0
      progress  = create_progress_bar(scope.count)

      scope.find_each do |media_attachment|
        media_attachment.destroy!

        removed += 1
      rescue => e
        progress.log pastel.red("Error processing #{media_attachment.id}: #{e}")
      ensure
        progress.increment
        processed += 1
      end

      progress.stop

      say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} media_attachments.", :green)
    end

    def remove_orphans_conversations
      start_at = Time.now.to_f

      unless options[:continue] && ActiveRecord::Base.connection.table_exists?('conversations_to_be_deleted')
        say('Creating temporary database indices...')

        ActiveRecord::Base.connection.add_index(:statuses, :conversation_id, name: :index_statuses_conversation_id, algorithm: :concurrently, if_not_exists: true)

        say('Extract the deletion target from conversations... This might take a while...')

        ActiveRecord::Base.connection.create_table('conversations_to_be_deleted', force: true)

        ActiveRecord::Base.connection.exec_insert(<<-SQL.squish, 'SQL')
          INSERT INTO conversations_to_be_deleted (id)
          SELECT id FROM conversations WHERE NOT EXISTS (SELECT 1 FROM statuses WHERE statuses.conversation_id = conversations.id)
        SQL

        say('Removing temporary database indices to restore write performance...')
        ActiveRecord::Base.connection.remove_index(:statuses, name: :index_statuses_conversation_id, if_exists: true)
      end

      say('Beginning orphans removal... This might take a while...')

      klass = Class.new(ApplicationRecord) do |c|
        c.table_name = 'conversations_to_be_deleted'
      end

      Object.const_set('ConversationsToBeDeleted', klass)

      scope     = ConversationsToBeDeleted
      processed = 0
      removed   = 0
      progress  = create_progress_bar(scope.count.fdiv(options[:batch_size]).ceil)

      scope.in_batches(of: options[:batch_size]) do |relation|
        ids        = relation.pluck(:id)
        processed += ids.count
        removed   += Conversation.unscoped.where(id: ids).delete_all
        progress.increment
      end

      progress.stop

      ActiveRecord::Base.connection.drop_table('conversations_to_be_deleted')

      say("Done after #{Time.now.to_f - start_at}s, removed #{removed} out of #{processed} conversations.", :green)
    ensure
      say('Removing temporary database indices to restore write performance...')
      ActiveRecord::Base.connection.remove_index(:statuses, name: :index_statuses_conversation_id, if_exists: true)
    end

    def vacuum_and_analyze_statuses
      if options[:compress_database]
        say('Run VACUUM FULL ANALYZE to statuses...')
        ActiveRecord::Base.connection.execute('VACUUM FULL ANALYZE statuses')
        say('Run REINDEX to statuses...')
        ActiveRecord::Base.connection.execute('REINDEX TABLE statuses')
      else
        say('Run ANALYZE to statuses...')
        ActiveRecord::Base.connection.execute('ANALYZE statuses')
      end
    end

    def vacuum_and_analyze_conversations
      if options[:compress_database]
        say('Run VACUUM FULL ANALYZE to conversations...')
        ActiveRecord::Base.connection.execute('VACUUM FULL ANALYZE conversations')
        say('Run REINDEX to conversations...')
        ActiveRecord::Base.connection.execute('REINDEX TABLE conversations')
      else
        say('Run ANALYZE to conversations...')
        ActiveRecord::Base.connection.execute('ANALYZE conversations')
      end
    end
  end
end