about summary refs log tree commit diff
path: root/app/services/batched_remove_status_service.rb
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2020-12-22 17:13:55 +0100
committerGitHub <noreply@github.com>2020-12-22 17:13:55 +0100
commit9915d11c0d7a15b6775af8e78fcc4d836368f88d (patch)
treeb6b5efe86ab2686eda673e81b10e05d17af30cf9 /app/services/batched_remove_status_service.rb
parent67ebd61f1180e63fcc671c583e7251e1e09755d9 (diff)
Fix unnecessary queries when batch-removing statuses, 100x faster (#15387)
Diffstat (limited to 'app/services/batched_remove_status_service.rb')
-rw-r--r--app/services/batched_remove_status_service.rb91
1 files changed, 51 insertions, 40 deletions
diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb
index 28e5468b3..63ab89f2d 100644
--- a/app/services/batched_remove_status_service.rb
+++ b/app/services/batched_remove_status_service.rb
@@ -3,29 +3,45 @@
 class BatchedRemoveStatusService < BaseService
   include Redisable
 
-  # Delete given statuses and reblogs of them
-  # Remove statuses from home feeds
-  # Push delete events to streaming API for home feeds and public feeds
-  # @param [Enumerable<Status>] statuses A preferably batched array of statuses
+  # Delete multiple statuses and reblogs of them as efficiently as possible
+  # @param [Enumerable<Status>] statuses An array of statuses
   # @param [Hash] options
-  # @option [Boolean] :skip_side_effects
+  # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API
   def call(statuses, **options)
-    statuses = Status.where(id: statuses.map(&:id)).includes(:account).flat_map { |status| [status] + status.reblogs.includes(:account).to_a }
+    ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account])
 
-    @mentions = statuses.each_with_object({}) { |s, h| h[s.id] = s.active_mentions.includes(:account).to_a }
-    @tags     = statuses.each_with_object({}) { |s, h| h[s.id] = s.tags.pluck(:name) }
+    statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs }
 
-    @json_payloads = statuses.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
+    # The conversations for direct visibility statuses also need
+    # to be manually updated. This part is not efficient but we
+    # rely on direct visibility statuses being relatively rare.
+    statuses_with_account_conversations = statuses.select(&:direct_visibility?)
 
-    statuses.each do |status|
-      status.mark_for_mass_destruction!
-      status.destroy
+    ActiveRecord::Associations::Preloader.new.preload(statuses_with_account_conversations, [mentions: :account])
+
+    statuses_with_account_conversations.each do |status|
+      status.send(:unlink_from_conversations)
     end
 
+    # We do not batch all deletes into one to avoid having a long-running
+    # transaction lock the database, but we use the delete method instead
+    # of destroy to avoid all callbacks. We rely on foreign keys to
+    # cascade the delete faster without loading the associations.
+    statuses_and_reblogs.each(&:delete)
+
+    # Since we skipped all callbacks, we also need to manually
+    # deindex the statuses
+    Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs)
+
     return if options[:skip_side_effects]
 
+    ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags)
+
+    @tags          = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } }
+    @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) }
+
     # Batch by source account
-    statuses.group_by(&:account_id).each_value do |account_statuses|
+    statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses|
       account = account_statuses.first.account
 
       next unless account
@@ -35,27 +51,31 @@ class BatchedRemoveStatusService < BaseService
     end
 
     # Cannot be batched
-    statuses.each do |status|
-      unpush_from_public_timelines(status)
+    redis.pipelined do
+      statuses_and_reblogs.each do |status|
+        unpush_from_public_timelines(status)
+      end
     end
   end
 
   private
 
   def unpush_from_home_timelines(account, statuses)
-    recipients = account.followers_for_local_distribution.to_a
-
-    recipients << account if account.local?
-
-    recipients.each do |follower|
+    account.followers_for_local_distribution.includes(:user).find_each do |follower|
       statuses.each do |status|
         FeedManager.instance.unpush_from_home(follower, status)
       end
     end
+
+    return unless account.local?
+
+    statuses.each do |status|
+      FeedManager.instance.unpush_from_home(account, status)
+    end
   end
 
   def unpush_from_list_timelines(account, statuses)
-    account.lists_for_local_distribution.select(:id, :account_id).each do |list|
+    account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).find_each do |list|
       statuses.each do |status|
         FeedManager.instance.unpush_from_list(list, status)
       end
@@ -67,26 +87,17 @@ class BatchedRemoveStatusService < BaseService
 
     payload = @json_payloads[status.id]
 
-    redis.pipelined do
-      redis.publish('timeline:public', payload)
-      if status.local?
-        redis.publish('timeline:public:local', payload)
-      else
-        redis.publish('timeline:public:remote', payload)
-      end
-      if status.media_attachments.any?
-        redis.publish('timeline:public:media', payload)
-        if status.local?
-          redis.publish('timeline:public:local:media', payload)
-        else
-          redis.publish('timeline:public:remote:media', payload)
-        end
-      end
+    redis.publish('timeline:public', payload)
+    redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload)
 
-      @tags[status.id].each do |hashtag|
-        redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}", payload)
-        redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}:local", payload) if status.local?
-      end
+    if status.media_attachments.any?
+      redis.publish('timeline:public:media', payload)
+      redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload)
+    end
+
+    @tags[status.id].each do |hashtag|
+      redis.publish("timeline:hashtag:#{hashtag}", payload)
+      redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
     end
   end
 end