about summary refs log tree commit diff
path: root/app
diff options
context:
space:
mode:
Diffstat (limited to 'app')
-rw-r--r--app/controllers/statuses_cleanup_controller.rb35
-rw-r--r--app/models/account_statuses_cleanup_policy.rb171
-rw-r--r--app/models/bookmark.rb8
-rw-r--r--app/models/concerns/account_associations.rb3
-rw-r--r--app/models/favourite.rb7
-rw-r--r--app/models/status_pin.rb8
-rw-r--r--app/services/account_statuses_cleanup_service.rb27
-rw-r--r--app/views/statuses_cleanup/show.html.haml45
-rw-r--r--app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb96
9 files changed, 400 insertions, 0 deletions
diff --git a/app/controllers/statuses_cleanup_controller.rb b/app/controllers/statuses_cleanup_controller.rb
new file mode 100644
index 000000000..be234cdcb
--- /dev/null
+++ b/app/controllers/statuses_cleanup_controller.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+class StatusesCleanupController < ApplicationController
+  layout 'admin'
+
+  before_action :authenticate_user!
+  before_action :set_policy
+  before_action :set_body_classes
+
+  def show; end
+
+  def update
+    if @policy.update(resource_params)
+      redirect_to statuses_cleanup_path, notice: I18n.t('generic.changes_saved_msg')
+    else
+      render action: :show
+    end
+  rescue ActionController::ParameterMissing
+    # Do nothing
+  end
+
+  private
+
+  def set_policy
+    @policy = current_account.statuses_cleanup_policy || current_account.build_statuses_cleanup_policy(enabled: false)
+  end
+
+  def resource_params
+    params.require(:account_statuses_cleanup_policy).permit(:enabled, :min_status_age, :keep_direct, :keep_pinned, :keep_polls, :keep_media, :keep_self_fav, :keep_self_bookmark, :min_favs, :min_reblogs)
+  end
+
+  def set_body_classes
+    @body_classes = 'admin'
+  end
+end
diff --git a/app/models/account_statuses_cleanup_policy.rb b/app/models/account_statuses_cleanup_policy.rb
new file mode 100644
index 000000000..705ccff54
--- /dev/null
+++ b/app/models/account_statuses_cleanup_policy.rb
@@ -0,0 +1,171 @@
+# frozen_string_literal: true
+
+# == Schema Information
+#
+# Table name: account_statuses_cleanup_policies
+#
+#  id                 :bigint           not null, primary key
+#  account_id         :bigint           not null
+#  enabled            :boolean          default(TRUE), not null
+#  min_status_age     :integer          default(1209600), not null
+#  keep_direct        :boolean          default(TRUE), not null
+#  keep_pinned        :boolean          default(TRUE), not null
+#  keep_polls         :boolean          default(FALSE), not null
+#  keep_media         :boolean          default(FALSE), not null
+#  keep_self_fav      :boolean          default(TRUE), not null
+#  keep_self_bookmark :boolean          default(TRUE), not null
+#  min_favs           :integer
+#  min_reblogs        :integer
+#  created_at         :datetime         not null
+#  updated_at         :datetime         not null
+#
+class AccountStatusesCleanupPolicy < ApplicationRecord
+  include Redisable
+
+  ALLOWED_MIN_STATUS_AGE = [
+    2.weeks.seconds,
+    1.month.seconds,
+    2.months.seconds,
+    3.months.seconds,
+    6.months.seconds,
+    1.year.seconds,
+    2.years.seconds,
+  ].freeze
+
+  EXCEPTION_BOOLS      = %w(keep_direct keep_pinned keep_polls keep_media keep_self_fav keep_self_bookmark).freeze
+  EXCEPTION_THRESHOLDS = %w(min_favs min_reblogs).freeze
+
+  # Depending on the cleanup policy, the query to discover the next
+  # statuses to delete my get expensive if the account has a lot of old
+  # statuses otherwise excluded from deletion by the other exceptions.
+  #
+  # Therefore, `EARLY_SEARCH_CUTOFF` is meant to be the maximum number of
+  # old statuses to be considered for deletion prior to checking exceptions.
+  #
+  # This is used in `compute_cutoff_id` to provide a `max_id` to
+  # `statuses_to_delete`.
+  EARLY_SEARCH_CUTOFF = 5_000
+
+  belongs_to :account
+
+  validates :min_status_age, inclusion: { in: ALLOWED_MIN_STATUS_AGE }
+  validates :min_favs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
+  validates :min_reblogs, numericality: { greater_than_or_equal_to: 1, allow_nil: true }
+  validate :validate_local_account
+
+  before_save :update_last_inspected
+
+  def statuses_to_delete(limit = 50, max_id = nil, min_id = nil)
+    scope = account.statuses
+    scope.merge!(old_enough_scope(max_id))
+    scope = scope.where(Status.arel_table[:id].gteq(min_id)) if min_id.present?
+    scope.merge!(without_popular_scope) unless min_favs.nil? && min_reblogs.nil?
+    scope.merge!(without_direct_scope) if keep_direct?
+    scope.merge!(without_pinned_scope) if keep_pinned?
+    scope.merge!(without_poll_scope) if keep_polls?
+    scope.merge!(without_media_scope) if keep_media?
+    scope.merge!(without_self_fav_scope) if keep_self_fav?
+    scope.merge!(without_self_bookmark_scope) if keep_self_bookmark?
+
+    scope.reorder(id: :asc).limit(limit)
+  end
+
+  # This computes a toot id such that:
+  # - the toot would be old enough to be candidate for deletion
+  # - there are at most EARLY_SEARCH_CUTOFF toots between the last inspected toot and this one
+  #
+  # The idea is to limit expensive SQL queries when an account has lots of toots excluded from
+  # deletion, while not starting anew on each run.
+  def compute_cutoff_id
+    min_id = last_inspected || 0
+    max_id = Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)
+    subquery = account.statuses.where(Status.arel_table[:id].gteq(min_id)).where(Status.arel_table[:id].lteq(max_id))
+    subquery = subquery.select(:id).reorder(id: :asc).limit(EARLY_SEARCH_CUTOFF)
+
+    # We're textually interpolating a subquery here as ActiveRecord seem to not provide
+    # a way to apply the limit to the subquery
+    Status.connection.execute("SELECT MAX(id) FROM (#{subquery.to_sql}) t").values.first.first
+  end
+
+  # The most important thing about `last_inspected` is that any toot older than it is guaranteed
+  # not to be kept by the policy regardless of its age.
+  def record_last_inspected(last_id)
+    redis.set("account_cleanup:#{account.id}", last_id, ex: 1.week.seconds)
+  end
+
+  def last_inspected
+    redis.get("account_cleanup:#{account.id}")&.to_i
+  end
+
+  def invalidate_last_inspected(status, action)
+    last_value = last_inspected
+    return if last_value.nil? || status.id > last_value || status.account_id != account_id
+
+    case action
+    when :unbookmark
+      return unless keep_self_bookmark?
+    when :unfav
+      return unless keep_self_fav?
+    when :unpin
+      return unless keep_pinned?
+    end
+
+    record_last_inspected(status.id)
+  end
+
+  private
+
+  def update_last_inspected
+    if EXCEPTION_BOOLS.map { |name| attribute_change_to_be_saved(name) }.compact.include?([true, false])
+      # Policy has been widened in such a way that any previously-inspected status
+      # may need to be deleted, so we'll have to start again.
+      redis.del("account_cleanup:#{account.id}")
+    end
+    if EXCEPTION_THRESHOLDS.map { |name| attribute_change_to_be_saved(name) }.compact.any? { |old, new| old.present? && (new.nil? || new > old) }
+      redis.del("account_cleanup:#{account.id}")
+    end
+  end
+
+  def validate_local_account
+    errors.add(:account, :invalid) unless account&.local?
+  end
+
+  def without_direct_scope
+    Status.where.not(visibility: :direct)
+  end
+
+  def old_enough_scope(max_id = nil)
+    # Filtering on `id` rather than `min_status_age` ago will treat
+    # non-snowflake statuses as older than they really are, but Mastodon
+    # has switched to snowflake IDs significantly over 2 years ago anyway.
+    max_id = [max_id, Mastodon::Snowflake.id_at(min_status_age.seconds.ago, with_random: false)].compact.min
+    Status.where(Status.arel_table[:id].lteq(max_id))
+  end
+
+  def without_self_fav_scope
+    Status.where('NOT EXISTS (SELECT * FROM favourites fav WHERE fav.account_id = statuses.account_id AND fav.status_id = statuses.id)')
+  end
+
+  def without_self_bookmark_scope
+    Status.where('NOT EXISTS (SELECT * FROM bookmarks bookmark WHERE bookmark.account_id = statuses.account_id AND bookmark.status_id = statuses.id)')
+  end
+
+  def without_pinned_scope
+    Status.where('NOT EXISTS (SELECT * FROM status_pins pin WHERE pin.account_id = statuses.account_id AND pin.status_id = statuses.id)')
+  end
+
+  def without_media_scope
+    Status.where('NOT EXISTS (SELECT * FROM media_attachments media WHERE media.status_id = statuses.id)')
+  end
+
+  def without_poll_scope
+    Status.where(poll_id: nil)
+  end
+
+  def without_popular_scope
+    scope = Status.left_joins(:status_stat)
+    scope = scope.where('COALESCE(status_stats.reblogs_count, 0) <= ?', min_reblogs) unless min_reblogs.nil?
+    scope = scope.where('COALESCE(status_stats.favourites_count, 0) <= ?', min_favs) unless min_favs.nil?
+    scope
+  end
+end
diff --git a/app/models/bookmark.rb b/app/models/bookmark.rb
index 916261a17..f21ea714c 100644
--- a/app/models/bookmark.rb
+++ b/app/models/bookmark.rb
@@ -23,4 +23,12 @@ class Bookmark < ApplicationRecord
   before_validation do
     self.status = status.reblog if status&.reblog?
   end
+
+  after_destroy :invalidate_cleanup_info
+
+  def invalidate_cleanup_info
+    return unless status&.account_id == account_id && account.local?
+
+    account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unbookmark)
+  end
 end
diff --git a/app/models/concerns/account_associations.rb b/app/models/concerns/account_associations.rb
index aaf371ebd..f2a4eae77 100644
--- a/app/models/concerns/account_associations.rb
+++ b/app/models/concerns/account_associations.rb
@@ -66,5 +66,8 @@ module AccountAssociations
 
     # Follow recommendations
     has_one :follow_recommendation_suppression, inverse_of: :account, dependent: :destroy
+
+    # Account statuses cleanup policy
+    has_one :statuses_cleanup_policy, class_name: 'AccountStatusesCleanupPolicy', inverse_of: :account, dependent: :destroy
   end
 end
diff --git a/app/models/favourite.rb b/app/models/favourite.rb
index 35028b7dd..ca8bce146 100644
--- a/app/models/favourite.rb
+++ b/app/models/favourite.rb
@@ -28,6 +28,7 @@ class Favourite < ApplicationRecord
 
   after_create :increment_cache_counters
   after_destroy :decrement_cache_counters
+  after_destroy :invalidate_cleanup_info
 
   private
 
@@ -39,4 +40,10 @@ class Favourite < ApplicationRecord
     return if association(:status).loaded? && status.marked_for_destruction?
     status&.decrement_count!(:favourites_count)
   end
+
+  def invalidate_cleanup_info
+    return unless status&.account_id == account_id && account.local?
+
+    account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unfav)
+  end
 end
diff --git a/app/models/status_pin.rb b/app/models/status_pin.rb
index afc76bded..93a0ea1c0 100644
--- a/app/models/status_pin.rb
+++ b/app/models/status_pin.rb
@@ -15,4 +15,12 @@ class StatusPin < ApplicationRecord
   belongs_to :status
 
   validates_with StatusPinValidator
+
+  after_destroy :invalidate_cleanup_info
+
+  def invalidate_cleanup_info
+    return unless status&.account_id == account_id && account.local?
+
+    account.statuses_cleanup_policy&.invalidate_last_inspected(status, :unpin)
+  end
 end
diff --git a/app/services/account_statuses_cleanup_service.rb b/app/services/account_statuses_cleanup_service.rb
new file mode 100644
index 000000000..cbadecc63
--- /dev/null
+++ b/app/services/account_statuses_cleanup_service.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+class AccountStatusesCleanupService < BaseService
+  # @param [AccountStatusesCleanupPolicy] account_policy
+  # @param [Integer] budget
+  # @return [Integer]
+  def call(account_policy, budget = 50)
+    return 0 unless account_policy.enabled?
+
+    cutoff_id = account_policy.compute_cutoff_id
+    return 0 if cutoff_id.blank?
+
+    num_deleted = 0
+    last_deleted = nil
+
+    account_policy.statuses_to_delete(budget, cutoff_id, account_policy.last_inspected).reorder(nil).find_each(order: :asc) do |status|
+      status.discard
+      RemovalWorker.perform_async(status.id, redraft: false)
+      num_deleted += 1
+      last_deleted = status.id
+    end
+
+    account_policy.record_last_inspected(last_deleted.presence || cutoff_id)
+
+    num_deleted
+  end
+end
diff --git a/app/views/statuses_cleanup/show.html.haml b/app/views/statuses_cleanup/show.html.haml
new file mode 100644
index 000000000..59de4b5aa
--- /dev/null
+++ b/app/views/statuses_cleanup/show.html.haml
@@ -0,0 +1,45 @@
+- content_for :page_title do
+  = t('settings.statuses_cleanup')
+
+- content_for :heading_actions do
+  = button_tag t('generic.save_changes'), class: 'button', form: 'edit_policy'
+
+= simple_form_for @policy, url: statuses_cleanup_path, method: :put, html: { id: 'edit_policy' } do |f|
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :enabled, as: :boolean, wrapper: :with_label, label: t('statuses_cleanup.enabled'), hint: t('statuses_cleanup.enabled_hint')
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :min_status_age, wrapper: :with_label, label: t('statuses_cleanup.min_age_label'), collection: AccountStatusesCleanupPolicy::ALLOWED_MIN_STATUS_AGE.map(&:to_i), label_method: lambda { |i| t("statuses_cleanup.min_age.#{i}") }, include_blank: false, hint: false
+
+  .flash-message= t('statuses_cleanup.explanation')
+
+  %h4= t('statuses_cleanup.exceptions')
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_pinned, wrapper: :with_label, label: t('statuses_cleanup.keep_pinned'), hint: t('statuses_cleanup.keep_pinned_hint')
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_direct, wrapper: :with_label, label: t('statuses_cleanup.keep_direct'), hint: t('statuses_cleanup.keep_direct_hint')
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_self_fav, wrapper: :with_label, label: t('statuses_cleanup.keep_self_fav'), hint: t('statuses_cleanup.keep_self_fav_hint')
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_self_bookmark, wrapper: :with_label, label: t('statuses_cleanup.keep_self_bookmark'), hint: t('statuses_cleanup.keep_self_bookmark_hint')
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_polls, wrapper: :with_label, label: t('statuses_cleanup.keep_polls'), hint: t('statuses_cleanup.keep_polls_hint')
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :keep_media, wrapper: :with_label, label: t('statuses_cleanup.keep_media'), hint: t('statuses_cleanup.keep_media_hint')
+
+  %h4= t('statuses_cleanup.interaction_exceptions')
+
+  .fields-row
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :min_favs, wrapper: :with_label, label: t('statuses_cleanup.min_favs'), hint: t('statuses_cleanup.min_favs_hint'), input_html: { min: 1, placeholder: t('statuses_cleanup.ignore_favs') }
+    .fields-row__column.fields-row__column-6.fields-group
+      = f.input :min_reblogs, wrapper: :with_label, label: t('statuses_cleanup.min_reblogs'), hint: t('statuses_cleanup.min_reblogs_hint'), input_html: { min: 1, placeholder: t('statuses_cleanup.ignore_reblogs') }
+
+  .flash-message= t('statuses_cleanup.interaction_exceptions_explanation')
diff --git a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
new file mode 100644
index 000000000..f42d4bca6
--- /dev/null
+++ b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+class Scheduler::AccountsStatusesCleanupScheduler
+  include Sidekiq::Worker
+
+  # This limit is mostly to be nice to the fediverse at large and not
+  # generate too much traffic.
+  # This also helps limiting the running time of the scheduler itself.
+  MAX_BUDGET         = 50
+
+  # This is an attempt to spread the load across instances, as various
+  # accounts are likely to have various followers.
+  PER_ACCOUNT_BUDGET = 5
+
+  # This is an attempt to limit the workload generated by status removal
+  # jobs to something the particular instance can handle.
+  PER_THREAD_BUDGET  = 5
+
+  # Those avoid loading an instance that is already under load
+  MAX_DEFAULT_SIZE    = 2
+  MAX_DEFAULT_LATENCY = 5
+  MAX_PUSH_SIZE       = 5
+  MAX_PUSH_LATENCY    = 10
+  # 'pull' queue has lower priority jobs, and it's unlikely that pushing
+  # deletes would cause much issues with this queue if it didn't cause issues
+  # with default and push. Yet, do not enqueue deletes if the instance is
+  # lagging behind too much.
+  MAX_PULL_SIZE       = 500
+  MAX_PULL_LATENCY    = 300
+
+  # This is less of an issue in general, but deleting old statuses is likely
+  # to cause delivery errors, and thus increase the number of jobs to be retried.
+  # This doesn't directly translate to load, but connection errors and a high
+  # number of dead instances may lead to this spiraling out of control if
+  # unchecked.
+  MAX_RETRY_SIZE = 50_000
+
+  sidekiq_options retry: 0, lock: :until_executed
+
+  def perform
+    return if under_load?
+
+    budget = compute_budget
+    first_policy_id = last_processed_id
+
+    loop do
+      num_processed_accounts = 0
+
+      scope = AccountStatusesCleanupPolicy.where(enabled: true)
+      scope.where(Account.arel_table[:id].gt(first_policy_id)) if first_policy_id.present?
+      scope.find_each(order: :asc) do |policy|
+        num_deleted = AccountStatusesCleanupService.new.call(policy, [budget, PER_ACCOUNT_BUDGET].min)
+        num_processed_accounts += 1 unless num_deleted.zero?
+        budget -= num_deleted
+        if budget.zero?
+          save_last_processed_id(policy.id)
+          break
+        end
+      end
+
+      # The idea here is to loop through all policies at least once until the budget is exhausted
+      # and start back after the last processed account otherwise
+      break if budget.zero? || (num_processed_accounts.zero? && first_policy_id.nil?)
+      first_policy_id = nil
+    end
+  end
+
+  def compute_budget
+    threads = Sidekiq::ProcessSet.new.filter { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum
+    [PER_THREAD_BUDGET * threads, MAX_BUDGET].min
+  end
+
+  def under_load?
+    return true if Sidekiq::Stats.new.retry_size > MAX_RETRY_SIZE
+    queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY)
+  end
+
+  private
+
+  def queue_under_load?(name, max_size, max_latency)
+    queue = Sidekiq::Queue.new(name)
+    queue.size > max_size || queue.latency > max_latency
+  end
+
+  def last_processed_id
+    Redis.current.get('account_statuses_cleanup_scheduler:last_account_id')
+  end
+
+  def save_last_processed_id(id)
+    if id.nil?
+      Redis.current.del('account_statuses_cleanup_scheduler:last_account_id')
+    else
+      Redis.current.set('account_statuses_cleanup_scheduler:last_account_id', id, ex: 1.hour.seconds)
+    end
+  end
+end