From 9915d11c0d7a15b6775af8e78fcc4d836368f88d Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Tue, 22 Dec 2020 17:13:55 +0100 Subject: Fix unnecessary queries when batch-removing statuses, 100x faster (#15387) --- app/services/batched_remove_status_service.rb | 91 +++++++++++++++------------ 1 file changed, 51 insertions(+), 40 deletions(-) (limited to 'app/services/batched_remove_status_service.rb') diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb index 28e5468b3..63ab89f2d 100644 --- a/app/services/batched_remove_status_service.rb +++ b/app/services/batched_remove_status_service.rb @@ -3,29 +3,45 @@ class BatchedRemoveStatusService < BaseService include Redisable - # Delete given statuses and reblogs of them - # Remove statuses from home feeds - # Push delete events to streaming API for home feeds and public feeds - # @param [Enumerable] statuses A preferably batched array of statuses + # Delete multiple statuses and reblogs of them as efficiently as possible + # @param [Enumerable] statuses An array of statuses # @param [Hash] options - # @option [Boolean] :skip_side_effects + # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API def call(statuses, **options) - statuses = Status.where(id: statuses.map(&:id)).includes(:account).flat_map { |status| [status] + status.reblogs.includes(:account).to_a } + ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account]) - @mentions = statuses.each_with_object({}) { |s, h| h[s.id] = s.active_mentions.includes(:account).to_a } - @tags = statuses.each_with_object({}) { |s, h| h[s.id] = s.tags.pluck(:name) } + statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs } - @json_payloads = statuses.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) } + # The conversations for direct visibility statuses also need + # to be manually updated. This part is not efficient but we + # rely on direct visibility statuses being relatively rare. + statuses_with_account_conversations = statuses.select(&:direct_visibility?) - statuses.each do |status| - status.mark_for_mass_destruction! - status.destroy + ActiveRecord::Associations::Preloader.new.preload(statuses_with_account_conversations, [mentions: :account]) + + statuses_with_account_conversations.each do |status| + status.send(:unlink_from_conversations) end + # We do not batch all deletes into one to avoid having a long-running + # transaction lock the database, but we use the delete method instead + # of destroy to avoid all callbacks. We rely on foreign keys to + # cascade the delete faster without loading the associations. + statuses_and_reblogs.each(&:delete) + + # Since we skipped all callbacks, we also need to manually + # deindex the statuses + Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) + return if options[:skip_side_effects] + ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags) + + @tags = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } } + @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) } + # Batch by source account - statuses.group_by(&:account_id).each_value do |account_statuses| + statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses| account = account_statuses.first.account next unless account @@ -35,27 +51,31 @@ class BatchedRemoveStatusService < BaseService end # Cannot be batched - statuses.each do |status| - unpush_from_public_timelines(status) + redis.pipelined do + statuses_and_reblogs.each do |status| + unpush_from_public_timelines(status) + end end end private def unpush_from_home_timelines(account, statuses) - recipients = account.followers_for_local_distribution.to_a - - recipients << account if account.local? - - recipients.each do |follower| + account.followers_for_local_distribution.includes(:user).find_each do |follower| statuses.each do |status| FeedManager.instance.unpush_from_home(follower, status) end end + + return unless account.local? + + statuses.each do |status| + FeedManager.instance.unpush_from_home(account, status) + end end def unpush_from_list_timelines(account, statuses) - account.lists_for_local_distribution.select(:id, :account_id).each do |list| + account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).find_each do |list| statuses.each do |status| FeedManager.instance.unpush_from_list(list, status) end @@ -67,26 +87,17 @@ class BatchedRemoveStatusService < BaseService payload = @json_payloads[status.id] - redis.pipelined do - redis.publish('timeline:public', payload) - if status.local? - redis.publish('timeline:public:local', payload) - else - redis.publish('timeline:public:remote', payload) - end - if status.media_attachments.any? - redis.publish('timeline:public:media', payload) - if status.local? - redis.publish('timeline:public:local:media', payload) - else - redis.publish('timeline:public:remote:media', payload) - end - end + redis.publish('timeline:public', payload) + redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload) - @tags[status.id].each do |hashtag| - redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}", payload) - redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}:local", payload) if status.local? - end + if status.media_attachments.any? + redis.publish('timeline:public:media', payload) + redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload) + end + + @tags[status.id].each do |hashtag| + redis.publish("timeline:hashtag:#{hashtag}", payload) + redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local? end end end -- cgit From f18349640b686a3c3866711bfcd1d14edd5bc41a Mon Sep 17 00:00:00 2001 From: ThibG Date: Tue, 22 Dec 2020 20:24:55 +0100 Subject: Fix batch order warnings in BatchedRemoveStatusService (#15409) Co-authored-by: Claire --- app/services/batched_remove_status_service.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'app/services/batched_remove_status_service.rb') diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb index 63ab89f2d..3ec000110 100644 --- a/app/services/batched_remove_status_service.rb +++ b/app/services/batched_remove_status_service.rb @@ -61,7 +61,7 @@ class BatchedRemoveStatusService < BaseService private def unpush_from_home_timelines(account, statuses) - account.followers_for_local_distribution.includes(:user).find_each do |follower| + account.followers_for_local_distribution.includes(:user).reorder(nil).find_each do |follower| statuses.each do |status| FeedManager.instance.unpush_from_home(follower, status) end @@ -75,7 +75,7 @@ class BatchedRemoveStatusService < BaseService end def unpush_from_list_timelines(account, statuses) - account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).find_each do |list| + account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).reorder(nil).find_each do |list| statuses.each do |status| FeedManager.instance.unpush_from_list(list, status) end -- cgit From 3249d35bdcd9a495af3277dfb4b2129d7ef80f15 Mon Sep 17 00:00:00 2001 From: ThibG Date: Tue, 22 Dec 2020 23:57:46 +0100 Subject: Improve account deletion performances further (#15407) * Delete status records by batches of 50 * Do not precompute values that are only used once * Do not generate redis events for removal of public toots older than two weeks * Filter reported toots a priori for polls and status deletion * Do not process reblogs when cleaning up public timelines As in Mastodon proper, reblogs don't appear in public TLs * Clean the deleted account's own feed in one go * Refactor Account#clean_feed_manager and List#clean_feed_manager * Delete instead of destroy a few more associations * Fix preloading Co-authored-by: Claire --- app/lib/feed_manager.rb | 30 ++++++++++++++++++++++ app/models/account.rb | 13 +--------- app/models/list.rb | 13 +--------- app/services/batched_remove_status_service.rb | 24 +++++------------ app/services/delete_account_service.rb | 20 +++++++++------ app/workers/scheduler/feed_cleanup_scheduler.rb | 30 ++-------------------- .../services/batched_remove_status_service_spec.rb | 4 --- 7 files changed, 53 insertions(+), 81 deletions(-) (limited to 'app/services/batched_remove_status_service.rb') diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 5e01ef67a..f0ad3e21f 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -230,6 +230,36 @@ class FeedManager end end + # Completely clear multiple feeds at once + # @param [Symbol] type + # @param [Array] ids + # @return [void] + def clean_feeds!(type, ids) + reblogged_id_sets = {} + + redis.pipelined do + ids.each do |feed_id| + redis.del(key(type, feed_id)) + reblog_key = key(type, feed_id, 'reblogs') + # We collect a future for this: we don't block while getting + # it, but we can iterate over it later. + reblogged_id_sets[feed_id] = redis.zrange(reblog_key, 0, -1) + redis.del(reblog_key) + end + end + + # Remove all of the reblog tracking keys we just removed the + # references to. + redis.pipelined do + reblogged_id_sets.each do |feed_id, future| + future.value.each do |reblogged_id| + reblog_set_key = key(type, feed_id, "reblogs:#{reblogged_id}") + redis.del(reblog_set_key) + end + end + end + end + private # Trim a feed to maximum size by removing older items diff --git a/app/models/account.rb b/app/models/account.rb index 80eb92a71..e6cf03fa8 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -578,17 +578,6 @@ class Account < ApplicationRecord end def clean_feed_manager - reblog_key = FeedManager.instance.key(:home, id, 'reblogs') - reblogged_id_set = Redis.current.zrange(reblog_key, 0, -1) - - Redis.current.pipelined do - Redis.current.del(FeedManager.instance.key(:home, id)) - Redis.current.del(reblog_key) - - reblogged_id_set.each do |reblogged_id| - reblog_set_key = FeedManager.instance.key(:home, id, "reblogs:#{reblogged_id}") - Redis.current.del(reblog_set_key) - end - end + FeedManager.instance.clean_feeds!(:home, [id]) end end diff --git a/app/models/list.rb b/app/models/list.rb index 655d55ff6..cdc6ebdb3 100644 --- a/app/models/list.rb +++ b/app/models/list.rb @@ -34,17 +34,6 @@ class List < ApplicationRecord private def clean_feed_manager - reblog_key = FeedManager.instance.key(:list, id, 'reblogs') - reblogged_id_set = Redis.current.zrange(reblog_key, 0, -1) - - Redis.current.pipelined do - Redis.current.del(FeedManager.instance.key(:list, id)) - Redis.current.del(reblog_key) - - reblogged_id_set.each do |reblogged_id| - reblog_set_key = FeedManager.instance.key(:list, id, "reblogs:#{reblogged_id}") - Redis.current.del(reblog_set_key) - end - end + FeedManager.instance.clean_feeds!(:list, [id]) end end diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb index 3ec000110..61617d958 100644 --- a/app/services/batched_remove_status_service.rb +++ b/app/services/batched_remove_status_service.rb @@ -8,7 +8,7 @@ class BatchedRemoveStatusService < BaseService # @param [Hash] options # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API def call(statuses, **options) - ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account]) + ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, :tags, reblogs: :account]) statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs } @@ -27,7 +27,7 @@ class BatchedRemoveStatusService < BaseService # transaction lock the database, but we use the delete method instead # of destroy to avoid all callbacks. We rely on foreign keys to # cascade the delete faster without loading the associations. - statuses_and_reblogs.each(&:delete) + statuses_and_reblogs.each_slice(50) { |slice| Status.where(id: slice.map(&:id)).delete_all } # Since we skipped all callbacks, we also need to manually # deindex the statuses @@ -35,11 +35,6 @@ class BatchedRemoveStatusService < BaseService return if options[:skip_side_effects] - ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags) - - @tags = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } } - @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) } - # Batch by source account statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses| account = account_statuses.first.account @@ -51,8 +46,9 @@ class BatchedRemoveStatusService < BaseService end # Cannot be batched + @status_id_cutoff = Mastodon::Snowflake.id_at(2.weeks.ago) redis.pipelined do - statuses_and_reblogs.each do |status| + statuses.each do |status| unpush_from_public_timelines(status) end end @@ -66,12 +62,6 @@ class BatchedRemoveStatusService < BaseService FeedManager.instance.unpush_from_home(follower, status) end end - - return unless account.local? - - statuses.each do |status| - FeedManager.instance.unpush_from_home(account, status) - end end def unpush_from_list_timelines(account, statuses) @@ -83,9 +73,9 @@ class BatchedRemoveStatusService < BaseService end def unpush_from_public_timelines(status) - return unless status.public_visibility? + return unless status.public_visibility? && status.id > @status_id_cutoff - payload = @json_payloads[status.id] + payload = Oj.dump(event: :delete, payload: status.id.to_s) redis.publish('timeline:public', payload) redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload) @@ -95,7 +85,7 @@ class BatchedRemoveStatusService < BaseService redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload) end - @tags[status.id].each do |hashtag| + status.tags.map { |tag| tag.name.mb_chars.downcase }.each do |hashtag| redis.publish("timeline:hashtag:#{hashtag}", payload) redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local? end diff --git a/app/services/delete_account_service.rb b/app/services/delete_account_service.rb index 5123a4697..58f6ef2ab 100644 --- a/app/services/delete_account_service.rb +++ b/app/services/delete_account_service.rb @@ -46,10 +46,12 @@ class DeleteAccountService < BaseService featured_tags follow_requests identity_proofs + list_accounts migrations mute_relationships muted_by_relationships notifications + owned_lists scheduled_statuses status_pins ) @@ -145,15 +147,14 @@ class DeleteAccountService < BaseService purge_media_attachments! purge_polls! purge_generated_notifications! + purge_feeds! purge_other_associations! @account.destroy unless keep_account_record? end def purge_statuses! - @account.statuses.reorder(nil).find_in_batches do |statuses| - statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record? - + @account.statuses.reorder(nil).where.not(id: reported_status_ids).in_batches do |statuses| BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?) end end @@ -167,11 +168,7 @@ class DeleteAccountService < BaseService end def purge_polls! - @account.polls.reorder(nil).find_each do |poll| - next if keep_account_record? && reported_status_ids.include?(poll.status_id) - - poll.delete - end + @account.polls.reorder(nil).where.not(status_id: reported_status_ids).in_batches.delete_all end def purge_generated_notifications! @@ -187,6 +184,13 @@ class DeleteAccountService < BaseService end end + def purge_feeds! + return unless @account.local? + + FeedManager.instance.clean_feeds!(:home, [@account.id]) + FeedManager.instance.clean_feeds!(:list, @account.owned_lists.pluck(:id)) + end + def purge_profile! # If the account is going to be destroyed # there is no point wasting time updating diff --git a/app/workers/scheduler/feed_cleanup_scheduler.rb b/app/workers/scheduler/feed_cleanup_scheduler.rb index 458fe6193..42b29f4ec 100644 --- a/app/workers/scheduler/feed_cleanup_scheduler.rb +++ b/app/workers/scheduler/feed_cleanup_scheduler.rb @@ -14,37 +14,11 @@ class Scheduler::FeedCleanupScheduler private def clean_home_feeds! - clean_feeds!(inactive_account_ids, :home) + feed_manager.clean_feeds!(:home, inactive_account_ids) end def clean_list_feeds! - clean_feeds!(inactive_list_ids, :list) - end - - def clean_feeds!(ids, type) - reblogged_id_sets = {} - - redis.pipelined do - ids.each do |feed_id| - redis.del(feed_manager.key(type, feed_id)) - reblog_key = feed_manager.key(type, feed_id, 'reblogs') - # We collect a future for this: we don't block while getting - # it, but we can iterate over it later. - reblogged_id_sets[feed_id] = redis.zrange(reblog_key, 0, -1) - redis.del(reblog_key) - end - end - - # Remove all of the reblog tracking keys we just removed the - # references to. - redis.pipelined do - reblogged_id_sets.each do |feed_id, future| - future.value.each do |reblogged_id| - reblog_set_key = feed_manager.key(type, feed_id, "reblogs:#{reblogged_id}") - redis.del(reblog_set_key) - end - end - end + feed_manager.clean_feeds!(:list, inactive_list_ids) end def inactive_account_ids diff --git a/spec/services/batched_remove_status_service_spec.rb b/spec/services/batched_remove_status_service_spec.rb index 239859f06..c1f54a6fd 100644 --- a/spec/services/batched_remove_status_service_spec.rb +++ b/spec/services/batched_remove_status_service_spec.rb @@ -43,10 +43,6 @@ RSpec.describe BatchedRemoveStatusService, type: :service do expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once) end - it 'notifies streaming API of author' do - expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once) - end - it 'notifies streaming API of public timeline' do expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once) end -- cgit From 2ab3e91eaf6c386f3933d79007ca87215397f06c Mon Sep 17 00:00:00 2001 From: ThibG Date: Wed, 23 Dec 2020 00:04:52 +0100 Subject: Fix BatchedRemoveStatusService not working without ES in rails console (#15408) Not a huge deal but may cause surprising failures in custom scripts and development. Co-authored-by: Claire --- app/services/batched_remove_status_service.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/services/batched_remove_status_service.rb') diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb index 61617d958..5000062e4 100644 --- a/app/services/batched_remove_status_service.rb +++ b/app/services/batched_remove_status_service.rb @@ -31,7 +31,7 @@ class BatchedRemoveStatusService < BaseService # Since we skipped all callbacks, we also need to manually # deindex the statuses - Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) + Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled? return if options[:skip_side_effects] -- cgit