about summary refs log tree commit diff
path: root/app
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2022-09-27 03:08:19 +0200
committerGitHub <noreply@github.com>2022-09-27 03:08:19 +0200
commit5c9abdeff1d0cf3e14d84c5ae298e6a5beccaf18 (patch)
treeb4bad153eec9f2a39d96a9da342e1618ac43740b /app
parent3e0999cd1139d638332d62129dbf0b37263802fd (diff)
Add retention policy for cached content and media (#19232)
Diffstat (limited to 'app')
-rw-r--r--app/lib/redis_configuration.rb6
-rw-r--r--app/lib/vacuum.rb3
-rw-r--r--app/lib/vacuum/access_tokens_vacuum.rb (renamed from app/workers/scheduler/doorkeeper_cleanup_scheduler.rb)15
-rw-r--r--app/lib/vacuum/backups_vacuum.rb25
-rw-r--r--app/lib/vacuum/feeds_vacuum.rb34
-rw-r--r--app/lib/vacuum/media_attachments_vacuum.rb40
-rw-r--r--app/lib/vacuum/preview_cards_vacuum.rb39
-rw-r--r--app/lib/vacuum/statuses_vacuum.rb54
-rw-r--r--app/lib/vacuum/system_keys_vacuum.rb13
-rw-r--r--app/models/content_retention_policy.rb25
-rw-r--r--app/models/form/admin_settings.rb4
-rw-r--r--app/views/admin/settings/edit.html.haml8
-rw-r--r--app/workers/scheduler/backup_cleanup_scheduler.rb17
-rw-r--r--app/workers/scheduler/feed_cleanup_scheduler.rb35
-rw-r--r--app/workers/scheduler/media_cleanup_scheduler.rb17
-rw-r--r--app/workers/scheduler/vacuum_scheduler.rb56
16 files changed, 312 insertions, 79 deletions
diff --git a/app/lib/redis_configuration.rb b/app/lib/redis_configuration.rb
index e14d6c8b6..f0e86d985 100644
--- a/app/lib/redis_configuration.rb
+++ b/app/lib/redis_configuration.rb
@@ -7,9 +7,7 @@ class RedisConfiguration
       @pool = ConnectionPool.new(size: new_pool_size) { new.connection }
     end
 
-    def with
-      pool.with { |redis| yield redis }
-    end
+    delegate :with, to: :pool
 
     def pool
       @pool ||= establish_pool(pool_size)
@@ -17,7 +15,7 @@ class RedisConfiguration
 
     def pool_size
       if Sidekiq.server?
-        Sidekiq.options[:concurrency]
+        Sidekiq[:concurrency]
       else
         ENV['MAX_THREADS'] || 5
       end
diff --git a/app/lib/vacuum.rb b/app/lib/vacuum.rb
new file mode 100644
index 000000000..9db1ec90b
--- /dev/null
+++ b/app/lib/vacuum.rb
@@ -0,0 +1,3 @@
+# frozen_string_literal: true
+
+module Vacuum; end
diff --git a/app/workers/scheduler/doorkeeper_cleanup_scheduler.rb b/app/lib/vacuum/access_tokens_vacuum.rb
index 9303a352f..4f3878027 100644
--- a/app/workers/scheduler/doorkeeper_cleanup_scheduler.rb
+++ b/app/lib/vacuum/access_tokens_vacuum.rb
@@ -1,13 +1,18 @@
 # frozen_string_literal: true
 
-class Scheduler::DoorkeeperCleanupScheduler
-  include Sidekiq::Worker
+class Vacuum::AccessTokensVacuum
+  def perform
+    vacuum_revoked_access_tokens!
+    vacuum_revoked_access_grants!
+  end
 
-  sidekiq_options retry: 0
+  private
 
-  def perform
+  def vacuum_revoked_access_tokens!
     Doorkeeper::AccessToken.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all
+  end
+
+  def vacuum_revoked_access_grants!
     Doorkeeper::AccessGrant.where('revoked_at IS NOT NULL').where('revoked_at < NOW()').delete_all
-    SystemKey.expired.delete_all
   end
 end
diff --git a/app/lib/vacuum/backups_vacuum.rb b/app/lib/vacuum/backups_vacuum.rb
new file mode 100644
index 000000000..3b83072f3
--- /dev/null
+++ b/app/lib/vacuum/backups_vacuum.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+class Vacuum::BackupsVacuum
+  def initialize(retention_period)
+    @retention_period = retention_period
+  end
+
+  def perform
+    vacuum_expired_backups! if retention_period?
+  end
+
+  private
+
+  def vacuum_expired_backups!
+    backups_past_retention_period.in_batches.destroy_all
+  end
+
+  def backups_past_retention_period
+    Backup.unscoped.where(Backup.arel_table[:created_at].lt(@retention_period.ago))
+  end
+
+  def retention_period?
+    @retention_period.present?
+  end
+end
diff --git a/app/lib/vacuum/feeds_vacuum.rb b/app/lib/vacuum/feeds_vacuum.rb
new file mode 100644
index 000000000..f46bcf75f
--- /dev/null
+++ b/app/lib/vacuum/feeds_vacuum.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+class Vacuum::FeedsVacuum
+  def perform
+    vacuum_inactive_home_feeds!
+    vacuum_inactive_list_feeds!
+  end
+
+  private
+
+  def vacuum_inactive_home_feeds!
+    inactive_users.select(:id, :account_id).find_in_batches do |users|
+      feed_manager.clean_feeds!(:home, users.map(&:account_id))
+    end
+  end
+
+  def vacuum_inactive_list_feeds!
+    inactive_users_lists.select(:id).find_in_batches do |lists|
+      feed_manager.clean_feeds!(:list, lists.map(&:id))
+    end
+  end
+
+  def inactive_users
+    User.confirmed.inactive
+  end
+
+  def inactive_users_lists
+    List.where(account_id: inactive_users.select(:account_id))
+  end
+
+  def feed_manager
+    FeedManager.instance
+  end
+end
diff --git a/app/lib/vacuum/media_attachments_vacuum.rb b/app/lib/vacuum/media_attachments_vacuum.rb
new file mode 100644
index 000000000..7fb347ce4
--- /dev/null
+++ b/app/lib/vacuum/media_attachments_vacuum.rb
@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+
+class Vacuum::MediaAttachmentsVacuum
+  TTL = 1.day.freeze
+
+  def initialize(retention_period)
+    @retention_period = retention_period
+  end
+
+  def perform
+    vacuum_cached_files! if retention_period?
+    vacuum_orphaned_records!
+  end
+
+  private
+
+  def vacuum_cached_files!
+    media_attachments_past_retention_period.find_each do |media_attachment|
+      media_attachment.file.destroy
+      media_attachment.thumbnail.destroy
+      media_attachment.save
+    end
+  end
+
+  def vacuum_orphaned_records!
+    orphaned_media_attachments.in_batches.destroy_all
+  end
+
+  def media_attachments_past_retention_period
+    MediaAttachment.unscoped.remote.cached.where(MediaAttachment.arel_table[:created_at].lt(@retention_period.ago)).where(MediaAttachment.arel_table[:updated_at].lt(@retention_period.ago))
+  end
+
+  def orphaned_media_attachments
+    MediaAttachment.unscoped.unattached.where(MediaAttachment.arel_table[:created_at].lt(TTL.ago))
+  end
+
+  def retention_period?
+    @retention_period.present?
+  end
+end
diff --git a/app/lib/vacuum/preview_cards_vacuum.rb b/app/lib/vacuum/preview_cards_vacuum.rb
new file mode 100644
index 000000000..84ef100ed
--- /dev/null
+++ b/app/lib/vacuum/preview_cards_vacuum.rb
@@ -0,0 +1,39 @@
+# frozen_string_literal: true
+
+class Vacuum::PreviewCardsVacuum
+  TTL = 1.day.freeze
+
+  def initialize(retention_period)
+    @retention_period = retention_period
+  end
+
+  def perform
+    vacuum_cached_images! if retention_period?
+    vacuum_orphaned_records!
+  end
+
+  private
+
+  def vacuum_cached_images!
+    preview_cards_past_retention_period.find_each do |preview_card|
+      preview_card.image.destroy
+      preview_card.save
+    end
+  end
+
+  def vacuum_orphaned_records!
+    orphaned_preview_cards.in_batches.destroy_all
+  end
+
+  def preview_cards_past_retention_period
+    PreviewCard.cached.where(PreviewCard.arel_table[:updated_at].lt(@retention_period.ago))
+  end
+
+  def orphaned_preview_cards
+    PreviewCard.where('NOT EXISTS (SELECT 1 FROM preview_cards_statuses WHERE preview_cards_statuses.preview_card_id = preview_cards.id)').where(PreviewCard.arel_table[:created_at].lt(TTL.ago))
+  end
+
+  def retention_period?
+    @retention_period.present?
+  end
+end
diff --git a/app/lib/vacuum/statuses_vacuum.rb b/app/lib/vacuum/statuses_vacuum.rb
new file mode 100644
index 000000000..41d6ba270
--- /dev/null
+++ b/app/lib/vacuum/statuses_vacuum.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+class Vacuum::StatusesVacuum
+  include Redisable
+
+  def initialize(retention_period)
+    @retention_period = retention_period
+  end
+
+  def perform
+    vacuum_statuses! if retention_period?
+  end
+
+  private
+
+  def vacuum_statuses!
+    statuses_scope.find_in_batches do |statuses|
+      # Side-effects not covered by foreign keys, such
+      # as the search index, must be handled first.
+
+      remove_from_account_conversations(statuses)
+      remove_from_search_index(statuses)
+
+      # Foreign keys take care of most associated records
+      # for us. Media attachments will be orphaned.
+
+      Status.where(id: statuses.map(&:id)).delete_all
+    end
+  end
+
+  def statuses_scope
+    Status.unscoped.kept.where(account: Account.remote).where(Status.arel_table[:id].lt(retention_period_as_id)).select(:id, :visibility)
+  end
+
+  def retention_period_as_id
+    Mastodon::Snowflake.id_at(@retention_period.ago, with_random: false)
+  end
+
+  def analyze_statuses!
+    ActiveRecord::Base.connection.execute('ANALYZE statuses')
+  end
+
+  def remove_from_account_conversations(statuses)
+    Status.where(id: statuses.select(&:direct_visibility?).map(&:id)).includes(:account, mentions: :account).each(&:unlink_from_conversations)
+  end
+
+  def remove_from_search_index(statuses)
+    with_redis { |redis| redis.sadd('chewy:queue:StatusesIndex', statuses.map(&:id)) } if Chewy.enabled?
+  end
+
+  def retention_period?
+    @retention_period.present?
+  end
+end
diff --git a/app/lib/vacuum/system_keys_vacuum.rb b/app/lib/vacuum/system_keys_vacuum.rb
new file mode 100644
index 000000000..ceee2fd16
--- /dev/null
+++ b/app/lib/vacuum/system_keys_vacuum.rb
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+class Vacuum::SystemKeysVacuum
+  def perform
+    vacuum_expired_system_keys!
+  end
+
+  private
+
+  def vacuum_expired_system_keys!
+    SystemKey.expired.delete_all
+  end
+end
diff --git a/app/models/content_retention_policy.rb b/app/models/content_retention_policy.rb
new file mode 100644
index 000000000..b5e922c8c
--- /dev/null
+++ b/app/models/content_retention_policy.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+class ContentRetentionPolicy
+  def self.current
+    new
+  end
+
+  def media_cache_retention_period
+    retention_period Setting.media_cache_retention_period
+  end
+
+  def content_cache_retention_period
+    retention_period Setting.content_cache_retention_period
+  end
+
+  def backups_retention_period
+    retention_period Setting.backups_retention_period
+  end
+
+  private
+
+  def retention_period(value)
+    value.days if value.is_a?(Integer) && value.positive?
+  end
+end
diff --git a/app/models/form/admin_settings.rb b/app/models/form/admin_settings.rb
index 97fabc6ac..3a7150916 100644
--- a/app/models/form/admin_settings.rb
+++ b/app/models/form/admin_settings.rb
@@ -32,6 +32,9 @@ class Form::AdminSettings
     show_domain_blocks_rationale
     noindex
     require_invite_text
+    media_cache_retention_period
+    content_cache_retention_period
+    backups_retention_period
   ).freeze
 
   BOOLEAN_KEYS = %i(
@@ -64,6 +67,7 @@ class Form::AdminSettings
   validates :bootstrap_timeline_accounts, existing_username: { multiple: true }
   validates :show_domain_blocks, inclusion: { in: %w(disabled users all) }
   validates :show_domain_blocks_rationale, inclusion: { in: %w(disabled users all) }
+  validates :media_cache_retention_period, :content_cache_retention_period, :backups_retention_period, numericality: { only_integer: true }
 
   def initialize(_attributes = {})
     super
diff --git a/app/views/admin/settings/edit.html.haml b/app/views/admin/settings/edit.html.haml
index 64687b7a6..1dfd21643 100644
--- a/app/views/admin/settings/edit.html.haml
+++ b/app/views/admin/settings/edit.html.haml
@@ -45,7 +45,6 @@
 
   .fields-group
     = f.input :require_invite_text, as: :boolean, wrapper: :with_label, label: t('admin.settings.registrations.require_invite_text.title'), hint: t('admin.settings.registrations.require_invite_text.desc_html'), disabled: !approved_registrations?
-  .fields-group
 
   %hr.spacer/
 
@@ -100,5 +99,12 @@
     = f.input :site_terms, wrapper: :with_block_label, as: :text, label: t('admin.settings.site_terms.title'), hint: t('admin.settings.site_terms.desc_html'), input_html: { rows: 8 }
     = f.input :custom_css, wrapper: :with_block_label, as: :text, input_html: { rows: 8 }, label: t('admin.settings.custom_css.title'), hint: t('admin.settings.custom_css.desc_html')
 
+  %hr.spacer/
+
+  .fields-group
+    = f.input :media_cache_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
+    = f.input :content_cache_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
+    = f.input :backups_retention_period, wrapper: :with_block_label, input_html: { pattern: '[0-9]+' }
+
   .actions
     = f.button :button, t('generic.save_changes'), type: :submit
diff --git a/app/workers/scheduler/backup_cleanup_scheduler.rb b/app/workers/scheduler/backup_cleanup_scheduler.rb
deleted file mode 100644
index 85d5312c0..000000000
--- a/app/workers/scheduler/backup_cleanup_scheduler.rb
+++ /dev/null
@@ -1,17 +0,0 @@
-# frozen_string_literal: true
-
-class Scheduler::BackupCleanupScheduler
-  include Sidekiq::Worker
-
-  sidekiq_options retry: 0
-
-  def perform
-    old_backups.reorder(nil).find_each(&:destroy!)
-  end
-
-  private
-
-  def old_backups
-    Backup.where('created_at < ?', 7.days.ago)
-  end
-end
diff --git a/app/workers/scheduler/feed_cleanup_scheduler.rb b/app/workers/scheduler/feed_cleanup_scheduler.rb
deleted file mode 100644
index aa0cc8b8d..000000000
--- a/app/workers/scheduler/feed_cleanup_scheduler.rb
+++ /dev/null
@@ -1,35 +0,0 @@
-# frozen_string_literal: true
-
-class Scheduler::FeedCleanupScheduler
-  include Sidekiq::Worker
-  include Redisable
-
-  sidekiq_options retry: 0
-
-  def perform
-    clean_home_feeds!
-    clean_list_feeds!
-  end
-
-  private
-
-  def clean_home_feeds!
-    feed_manager.clean_feeds!(:home, inactive_account_ids)
-  end
-
-  def clean_list_feeds!
-    feed_manager.clean_feeds!(:list, inactive_list_ids)
-  end
-
-  def inactive_account_ids
-    @inactive_account_ids ||= User.confirmed.inactive.pluck(:account_id)
-  end
-
-  def inactive_list_ids
-    List.where(account_id: inactive_account_ids).pluck(:id)
-  end
-
-  def feed_manager
-    FeedManager.instance
-  end
-end
diff --git a/app/workers/scheduler/media_cleanup_scheduler.rb b/app/workers/scheduler/media_cleanup_scheduler.rb
deleted file mode 100644
index 24d30a6be..000000000
--- a/app/workers/scheduler/media_cleanup_scheduler.rb
+++ /dev/null
@@ -1,17 +0,0 @@
-# frozen_string_literal: true
-
-class Scheduler::MediaCleanupScheduler
-  include Sidekiq::Worker
-
-  sidekiq_options retry: 0
-
-  def perform
-    unattached_media.find_each(&:destroy)
-  end
-
-  private
-
-  def unattached_media
-    MediaAttachment.reorder(nil).unattached.where('created_at < ?', 1.day.ago)
-  end
-end
diff --git a/app/workers/scheduler/vacuum_scheduler.rb b/app/workers/scheduler/vacuum_scheduler.rb
new file mode 100644
index 000000000..ce88ff204
--- /dev/null
+++ b/app/workers/scheduler/vacuum_scheduler.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+class Scheduler::VacuumScheduler
+  include Sidekiq::Worker
+
+  sidekiq_options retry: 0
+
+  def perform
+    vacuum_operations.each do |operation|
+      operation.perform
+    rescue => e
+      Rails.logger.error("Error while running #{operation.class.name}: #{e}")
+    end
+  end
+
+  private
+
+  def vacuum_operations
+    [
+      statuses_vacuum,
+      media_attachments_vacuum,
+      preview_cards_vacuum,
+      backups_vacuum,
+      access_tokens_vacuum,
+      feeds_vacuum,
+    ]
+  end
+
+  def statuses_vacuum
+    Vacuum::StatusesVacuum.new(content_retention_policy.content_cache_retention_period)
+  end
+
+  def media_attachments_vacuum
+    Vacuum::MediaAttachmentsVacuum.new(content_retention_policy.media_cache_retention_period)
+  end
+
+  def preview_cards_vacuum
+    Vacuum::PreviewCardsVacuum.new(content_retention_policy.media_cache_retention_period)
+  end
+
+  def backups_vacuum
+    Vacuum::BackupsVacuum.new(content_retention_policy.backups_retention_period)
+  end
+
+  def access_tokens_vacuum
+    Vacuum::AccessTokensVacuum.new
+  end
+
+  def feeds_vacuum
+    Vacuum::FeedsVacuum.new
+  end
+
+  def content_retention_policy
+    ContentRetentionPolicy.current
+  end
+end