about summary refs log tree commit diff
path: root/app/models/account_statuses_cleanup_policy.rb
blob: 0a9551ec2850a17a9ee1b3a886ebda400c8af275 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# frozen_string_literal: true

# == Schema Information
#
# Table name: account_statuses_cleanup_policies
#
#  id                 :bigint           not null, primary key
#  account_id         :bigint           not null
#  enabled            :boolean          default(TRUE), not null
#  min_status_age     :integer          default(1209600), not null
#  keep_direct        :boolean          default(TRUE), not null
#  keep_pinned        :boolean          default(TRUE), not null
#  keep_polls         :boolean          default(FALSE), not null
#  keep_media         :boolean          default(FALSE), not null
#  keep_self_fav      :boolean          default(TRUE), not null
#  keep_self_bookmark :boolean          default(TRUE), not null
#  min_favs           :integer
#  min_reblogs        :integer
#  created_at         :datetime         not null
#  updated_at         :datetime         not null
#
class AccountStatusesCleanupPolicy < ApplicationRecord
  include Redisable

  ALLOWED_MIN_STATUS_AGE = [
    2.weeks.seconds,
    1.month.seconds,
    2.months.seconds,
    3.months.seconds,
    6.months.seconds,
    1.year.seconds,
    2.years.seconds,
  ].freeze

  EXCEPTION_BOOLS      = %w(keep_direct keep_pinned keep_polls keep_media keep_self_fav keep_self_bookmark).freeze
  EXCEPTION_THRESHOLDS = %w(min_favs min_reblogs).freeze

  # Depending on the cleanup policy, the query to discover the next
  # statuses to delete my get expensive if the account has a lot of old
  # statuses otherwise excluded from deletion by the other exceptions.
  #
  # Therefore, `EARLY_SEARCH_CUTOFF` is meant to be the maximum number of
  # old statuses to be considered for deletion prior to checking exceptions.
  #
  # This is used in `compute_cutoff_id` to provide a `max_id` to
  # `statuses_to_delete`.
  EARLY_SEARCH_CUTOFF = 5_000

  belongs_to :account

  validates :min_status_age, inclusion: { in: ALLOWED_MIN_STATUS_AGE }
  validates :min_favs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
  validates :min_reblogs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
  validate :validate_local_account

  before_save :update_last_inspected

  def statuses_to_delete(limit = 50, max_id = nil, min_id = nil)
    scope = account.statuses
    scope.merge!(old_enough_scope(max_id))
    scope = scope.where(Status.arel_table[:id].gteq(min_id)) if min_id.present?
    scope.merge!(without_popular_scope) unless min_favs.nil? && min_reblogs.nil?
    scope.merge!(without_direct_scope) if keep_direct?
    scope.merge!(without_pinned_scope) if keep_pinned?
    scope.merge!(without_poll_scope) if keep_polls?
    scope.merge!(without_media_scope) if keep_media?
    scope.merge!(without_self_fav_scope) if keep_self_fav?
    scope.merge!(without_self_bookmark_scope) if keep_self_bookmark?

    scope.reorder(id: :asc).limit(limit)
  end

  # This computes a toot id such that:
  # - the toot would be old enough to be candidate for deletion
  # - there are at most EARLY_SEARCH_CUTOFF toots between the last inspected toot and this one
  #
  # The idea is to limit expensive SQL queries when an account has lots of toots excluded from
  # deletion, while not starting anew on each run.
  def compute_cutoff_id
    min_id = last_inspected || 0
    max_id = Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)
    subquery = account.statuses.where(Status.arel_table[:id].gteq(min_id)).where(Status.arel_table[:id].lteq(max_id))
    subquery = subquery.select(:id).reorder(id: :asc).limit(EARLY_SEARCH_CUTOFF)

    # We're textually interpolating a subquery here as ActiveRecord seem to not provide
    # a way to apply the limit to the subquery
    Status.connection.execute("SELECT MAX(id) FROM (#{subquery.to_sql}) t").values.first.first
  end

  # The most important thing about `last_inspected` is that any toot older than it is guaranteed
  # not to be kept by the policy regardless of its age.
  def record_last_inspected(last_id)
    redis.set("account_cleanup:#{account.id}", last_id, ex: 1.week.seconds)
  end

  def last_inspected
    redis.get("account_cleanup:#{account.id}")&.to_i
  end

  def invalidate_last_inspected(status, action)
    last_value = last_inspected
    return if last_value.nil? || status.id > last_value || status.account_id != account_id

    case action
    when :unbookmark
      return unless keep_self_bookmark?
    when :unfav
      return unless keep_self_fav?
    when :unpin
      return unless keep_pinned?
    end

    record_last_inspected(status.id)
  end

  private

  def update_last_inspected
    if EXCEPTION_BOOLS.map { |name| attribute_change_to_be_saved(name) }.compact.include?([true, false])
      # Policy has been widened in such a way that any previously-inspected status
      # may need to be deleted, so we'll have to start again.
      redis.del("account_cleanup:#{account.id}")
    end
    if EXCEPTION_THRESHOLDS.map { |name| attribute_change_to_be_saved(name) }.compact.any? { |old, new| old.present? && (new.nil? || new > old) }
      redis.del("account_cleanup:#{account.id}")
    end
  end

  def validate_local_account
    errors.add(:account, :invalid) unless account&.local?
  end

  def without_direct_scope
    Status.where.not(visibility: :direct)
  end

  def old_enough_scope(max_id = nil)
    # Filtering on `id` rather than `min_status_age` ago will treat
    # non-snowflake statuses as older than they really are, but Mastodon
    # has switched to snowflake IDs significantly over 2 years ago anyway.
    max_id = [max_id, Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)].compact.min
    Status.where(Status.arel_table[:id].lteq(max_id))
  end

  def without_self_fav_scope
    Status.where('NOT EXISTS (SELECT * FROM favourites fav WHERE fav.account_id = statuses.account_id AND fav.status_id = statuses.id)')
  end

  def without_self_bookmark_scope
    Status.where('NOT EXISTS (SELECT * FROM bookmarks bookmark WHERE bookmark.account_id = statuses.account_id AND bookmark.status_id = statuses.id)')
  end

  def without_pinned_scope
    Status.where('NOT EXISTS (SELECT * FROM status_pins pin WHERE pin.account_id = statuses.account_id AND pin.status_id = statuses.id)')
  end

  def without_media_scope
    Status.where('NOT EXISTS (SELECT * FROM media_attachments media WHERE media.status_id = statuses.id)')
  end

  def without_poll_scope
    Status.where(poll_id: nil)
  end

  def without_popular_scope
    scope = Status.left_joins(:status_stat)
    scope = scope.where('COALESCE(status_stats.reblogs_count, 0) < ?', min_reblogs) unless min_reblogs.nil?
    scope = scope.where('COALESCE(status_stats.favourites_count, 0) < ?', min_favs) unless min_favs.nil?
    scope
  end
end