about summary refs log tree commit diff
diff options
context:
space:
mode:
authorThibG <thib@sitedethib.com>2020-12-22 23:57:46 +0100
committerGitHub <noreply@github.com>2020-12-22 23:57:46 +0100
commit3249d35bdcd9a495af3277dfb4b2129d7ef80f15 (patch)
treeaad517a164b6df760cb20e5dcf4827c180937a62
parentf18349640b686a3c3866711bfcd1d14edd5bc41a (diff)
Improve account deletion performances further (#15407)
* Delete status records by batches of 50

* Do not precompute values that are only used once

* Do not generate redis events for removal of public toots older than two weeks

* Filter reported toots a priori for polls and status deletion

* Do not process reblogs when cleaning up public timelines

As in Mastodon proper, reblogs don't appear in public TLs

* Clean the deleted account's own feed in one go

* Refactor Account#clean_feed_manager and List#clean_feed_manager

* Delete instead of destroy a few more associations

* Fix preloading

Co-authored-by: Claire <claire.github-309c@sitedethib.com>
-rw-r--r--app/lib/feed_manager.rb30
-rw-r--r--app/models/account.rb13
-rw-r--r--app/models/list.rb13
-rw-r--r--app/services/batched_remove_status_service.rb24
-rw-r--r--app/services/delete_account_service.rb20
-rw-r--r--app/workers/scheduler/feed_cleanup_scheduler.rb30
-rw-r--r--spec/services/batched_remove_status_service_spec.rb4
7 files changed, 53 insertions, 81 deletions
diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb
index 5e01ef67a..f0ad3e21f 100644
--- a/app/lib/feed_manager.rb
+++ b/app/lib/feed_manager.rb
@@ -230,6 +230,36 @@ class FeedManager
     end
   end
 
+  # Completely clear multiple feeds at once
+  # @param [Symbol] type
+  # @param [Array<Integer>] ids
+  # @return [void]
+  def clean_feeds!(type, ids)
+    reblogged_id_sets = {}
+
+    redis.pipelined do
+      ids.each do |feed_id|
+        redis.del(key(type, feed_id))
+        reblog_key = key(type, feed_id, 'reblogs')
+        # We collect a future for this: we don't block while getting
+        # it, but we can iterate over it later.
+        reblogged_id_sets[feed_id] = redis.zrange(reblog_key, 0, -1)
+        redis.del(reblog_key)
+      end
+    end
+
+    # Remove all of the reblog tracking keys we just removed the
+    # references to.
+    redis.pipelined do
+      reblogged_id_sets.each do |feed_id, future|
+        future.value.each do |reblogged_id|
+          reblog_set_key = key(type, feed_id, "reblogs:#{reblogged_id}")
+          redis.del(reblog_set_key)
+        end
+      end
+    end
+  end
+
   private
 
   # Trim a feed to maximum size by removing older items
diff --git a/app/models/account.rb b/app/models/account.rb
index 80eb92a71..e6cf03fa8 100644
--- a/app/models/account.rb
+++ b/app/models/account.rb
@@ -578,17 +578,6 @@ class Account < ApplicationRecord
   end
 
   def clean_feed_manager
-    reblog_key       = FeedManager.instance.key(:home, id, 'reblogs')
-    reblogged_id_set = Redis.current.zrange(reblog_key, 0, -1)
-
-    Redis.current.pipelined do
-      Redis.current.del(FeedManager.instance.key(:home, id))
-      Redis.current.del(reblog_key)
-
-      reblogged_id_set.each do |reblogged_id|
-        reblog_set_key = FeedManager.instance.key(:home, id, "reblogs:#{reblogged_id}")
-        Redis.current.del(reblog_set_key)
-      end
-    end
+    FeedManager.instance.clean_feeds!(:home, [id])
   end
 end
diff --git a/app/models/list.rb b/app/models/list.rb
index 655d55ff6..cdc6ebdb3 100644
--- a/app/models/list.rb
+++ b/app/models/list.rb
@@ -34,17 +34,6 @@ class List < ApplicationRecord
   private
 
   def clean_feed_manager
-    reblog_key       = FeedManager.instance.key(:list, id, 'reblogs')
-    reblogged_id_set = Redis.current.zrange(reblog_key, 0, -1)
-
-    Redis.current.pipelined do
-      Redis.current.del(FeedManager.instance.key(:list, id))
-      Redis.current.del(reblog_key)
-
-      reblogged_id_set.each do |reblogged_id|
-        reblog_set_key = FeedManager.instance.key(:list, id, "reblogs:#{reblogged_id}")
-        Redis.current.del(reblog_set_key)
-      end
-    end
+    FeedManager.instance.clean_feeds!(:list, [id])
   end
 end
diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb
index 3ec000110..61617d958 100644
--- a/app/services/batched_remove_status_service.rb
+++ b/app/services/batched_remove_status_service.rb
@@ -8,7 +8,7 @@ class BatchedRemoveStatusService < BaseService
   # @param [Hash] options
   # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API
   def call(statuses, **options)
-    ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account])
+    ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, :tags, reblogs: :account])
 
     statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs }
 
@@ -27,7 +27,7 @@ class BatchedRemoveStatusService < BaseService
     # transaction lock the database, but we use the delete method instead
     # of destroy to avoid all callbacks. We rely on foreign keys to
     # cascade the delete faster without loading the associations.
-    statuses_and_reblogs.each(&:delete)
+    statuses_and_reblogs.each_slice(50) { |slice| Status.where(id: slice.map(&:id)).delete_all }
 
     # Since we skipped all callbacks, we also need to manually
     # deindex the statuses
@@ -35,11 +35,6 @@ class BatchedRemoveStatusService < BaseService
 
     return if options[:skip_side_effects]
 
-    ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags)
-
-    @tags          = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } }
-    @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
-
     # Batch by source account
     statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses|
       account = account_statuses.first.account
@@ -51,8 +46,9 @@ class BatchedRemoveStatusService < BaseService
     end
 
     # Cannot be batched
+    @status_id_cutoff = Mastodon::Snowflake.id_at(2.weeks.ago)
     redis.pipelined do
-      statuses_and_reblogs.each do |status|
+      statuses.each do |status|
         unpush_from_public_timelines(status)
       end
     end
@@ -66,12 +62,6 @@ class BatchedRemoveStatusService < BaseService
         FeedManager.instance.unpush_from_home(follower, status)
       end
     end
-
-    return unless account.local?
-
-    statuses.each do |status|
-      FeedManager.instance.unpush_from_home(account, status)
-    end
   end
 
   def unpush_from_list_timelines(account, statuses)
@@ -83,9 +73,9 @@ class BatchedRemoveStatusService < BaseService
   end
 
   def unpush_from_public_timelines(status)
-    return unless status.public_visibility?
+    return unless status.public_visibility? && status.id > @status_id_cutoff
 
-    payload = @json_payloads[status.id]
+    payload = Oj.dump(event: :delete, payload: status.id.to_s)
 
     redis.publish('timeline:public', payload)
     redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload)
@@ -95,7 +85,7 @@ class BatchedRemoveStatusService < BaseService
       redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload)
     end
 
-    @tags[status.id].each do |hashtag|
+    status.tags.map { |tag| tag.name.mb_chars.downcase }.each do |hashtag|
       redis.publish("timeline:hashtag:#{hashtag}", payload)
       redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
     end
diff --git a/app/services/delete_account_service.rb b/app/services/delete_account_service.rb
index 5123a4697..58f6ef2ab 100644
--- a/app/services/delete_account_service.rb
+++ b/app/services/delete_account_service.rb
@@ -46,10 +46,12 @@ class DeleteAccountService < BaseService
     featured_tags
     follow_requests
     identity_proofs
+    list_accounts
     migrations
     mute_relationships
     muted_by_relationships
     notifications
+    owned_lists
     scheduled_statuses
     status_pins
   )
@@ -145,15 +147,14 @@ class DeleteAccountService < BaseService
     purge_media_attachments!
     purge_polls!
     purge_generated_notifications!
+    purge_feeds!
     purge_other_associations!
 
     @account.destroy unless keep_account_record?
   end
 
   def purge_statuses!
-    @account.statuses.reorder(nil).find_in_batches do |statuses|
-      statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record?
-
+    @account.statuses.reorder(nil).where.not(id: reported_status_ids).in_batches do |statuses|
       BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?)
     end
   end
@@ -167,11 +168,7 @@ class DeleteAccountService < BaseService
   end
 
   def purge_polls!
-    @account.polls.reorder(nil).find_each do |poll|
-      next if keep_account_record? && reported_status_ids.include?(poll.status_id)
-
-      poll.delete
-    end
+    @account.polls.reorder(nil).where.not(status_id: reported_status_ids).in_batches.delete_all
   end
 
   def purge_generated_notifications!
@@ -187,6 +184,13 @@ class DeleteAccountService < BaseService
     end
   end
 
+  def purge_feeds!
+    return unless @account.local?
+
+    FeedManager.instance.clean_feeds!(:home, [@account.id])
+    FeedManager.instance.clean_feeds!(:list, @account.owned_lists.pluck(:id))
+  end
+
   def purge_profile!
     # If the account is going to be destroyed
     # there is no point wasting time updating
diff --git a/app/workers/scheduler/feed_cleanup_scheduler.rb b/app/workers/scheduler/feed_cleanup_scheduler.rb
index 458fe6193..42b29f4ec 100644
--- a/app/workers/scheduler/feed_cleanup_scheduler.rb
+++ b/app/workers/scheduler/feed_cleanup_scheduler.rb
@@ -14,37 +14,11 @@ class Scheduler::FeedCleanupScheduler
   private
 
   def clean_home_feeds!
-    clean_feeds!(inactive_account_ids, :home)
+    feed_manager.clean_feeds!(:home, inactive_account_ids)
   end
 
   def clean_list_feeds!
-    clean_feeds!(inactive_list_ids, :list)
-  end
-
-  def clean_feeds!(ids, type)
-    reblogged_id_sets = {}
-
-    redis.pipelined do
-      ids.each do |feed_id|
-        redis.del(feed_manager.key(type, feed_id))
-        reblog_key = feed_manager.key(type, feed_id, 'reblogs')
-        # We collect a future for this: we don't block while getting
-        # it, but we can iterate over it later.
-        reblogged_id_sets[feed_id] = redis.zrange(reblog_key, 0, -1)
-        redis.del(reblog_key)
-      end
-    end
-
-    # Remove all of the reblog tracking keys we just removed the
-    # references to.
-    redis.pipelined do
-      reblogged_id_sets.each do |feed_id, future|
-        future.value.each do |reblogged_id|
-          reblog_set_key = feed_manager.key(type, feed_id, "reblogs:#{reblogged_id}")
-          redis.del(reblog_set_key)
-        end
-      end
-    end
+    feed_manager.clean_feeds!(:list, inactive_list_ids)
   end
 
   def inactive_account_ids
diff --git a/spec/services/batched_remove_status_service_spec.rb b/spec/services/batched_remove_status_service_spec.rb
index 239859f06..c1f54a6fd 100644
--- a/spec/services/batched_remove_status_service_spec.rb
+++ b/spec/services/batched_remove_status_service_spec.rb
@@ -43,10 +43,6 @@ RSpec.describe BatchedRemoveStatusService, type: :service do
     expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once)
   end
 
-  it 'notifies streaming API of author' do
-    expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once)
-  end
-
   it 'notifies streaming API of public timeline' do
     expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once)
   end