From 9915d11c0d7a15b6775af8e78fcc4d836368f88d Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Tue, 22 Dec 2020 17:13:55 +0100 Subject: Fix unnecessary queries when batch-removing statuses, 100x faster (#15387) --- app/models/favourite.rb | 2 +- app/models/status.rb | 12 +- app/services/batched_remove_status_service.rb | 91 ++++++++------- app/services/delete_account_service.rb | 126 ++++++++++++++++----- config/initializers/chewy.rb | 5 + lib/chewy/strategy/custom_sidekiq.rb | 25 +--- .../services/batched_remove_status_service_spec.rb | 5 + 7 files changed, 167 insertions(+), 99 deletions(-) diff --git a/app/models/favourite.rb b/app/models/favourite.rb index bf0ec4449..35028b7dd 100644 --- a/app/models/favourite.rb +++ b/app/models/favourite.rb @@ -36,7 +36,7 @@ class Favourite < ApplicationRecord end def decrement_cache_counters - return if association(:status).loaded? && (status.marked_for_destruction? || status.marked_for_mass_destruction?) + return if association(:status).loaded? && status.marked_for_destruction? status&.decrement_count!(:favourites_count) end end diff --git a/app/models/status.rb b/app/models/status.rb index 96d90e1c2..b426f9d5b 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -228,14 +228,6 @@ class Status < ApplicationRecord @emojis = CustomEmoji.from_text(fields.join(' '), account.domain) end - def mark_for_mass_destruction! - @marked_for_mass_destruction = true - end - - def marked_for_mass_destruction? - @marked_for_mass_destruction - end - def replies_count status_stat&.replies_count || 0 end @@ -430,7 +422,7 @@ class Status < ApplicationRecord end def decrement_counter_caches - return if direct_visibility? || marked_for_mass_destruction? + return if direct_visibility? account&.decrement_count!(:statuses_count) reblog&.decrement_count!(:reblogs_count) if reblog? @@ -440,7 +432,7 @@ class Status < ApplicationRecord def unlink_from_conversations return unless direct_visibility? - mentioned_accounts = mentions.includes(:account).map(&:account) + mentioned_accounts = (association(:mentions).loaded? ? mentions : mentions.includes(:account)).map(&:account) inbox_owners = mentioned_accounts.select(&:local?) + (account.local? ? [account] : []) inbox_owners.each do |inbox_owner| diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb index 28e5468b3..63ab89f2d 100644 --- a/app/services/batched_remove_status_service.rb +++ b/app/services/batched_remove_status_service.rb @@ -3,29 +3,45 @@ class BatchedRemoveStatusService < BaseService include Redisable - # Delete given statuses and reblogs of them - # Remove statuses from home feeds - # Push delete events to streaming API for home feeds and public feeds - # @param [Enumerable] statuses A preferably batched array of statuses + # Delete multiple statuses and reblogs of them as efficiently as possible + # @param [Enumerable] statuses An array of statuses # @param [Hash] options - # @option [Boolean] :skip_side_effects + # @option [Boolean] :skip_side_effects Do not modify feeds and send updates to streaming API def call(statuses, **options) - statuses = Status.where(id: statuses.map(&:id)).includes(:account).flat_map { |status| [status] + status.reblogs.includes(:account).to_a } + ActiveRecord::Associations::Preloader.new.preload(statuses, options[:skip_side_effects] ? :reblogs : [:account, reblogs: :account]) - @mentions = statuses.each_with_object({}) { |s, h| h[s.id] = s.active_mentions.includes(:account).to_a } - @tags = statuses.each_with_object({}) { |s, h| h[s.id] = s.tags.pluck(:name) } + statuses_and_reblogs = statuses.flat_map { |status| [status] + status.reblogs } - @json_payloads = statuses.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) } + # The conversations for direct visibility statuses also need + # to be manually updated. This part is not efficient but we + # rely on direct visibility statuses being relatively rare. + statuses_with_account_conversations = statuses.select(&:direct_visibility?) - statuses.each do |status| - status.mark_for_mass_destruction! - status.destroy + ActiveRecord::Associations::Preloader.new.preload(statuses_with_account_conversations, [mentions: :account]) + + statuses_with_account_conversations.each do |status| + status.send(:unlink_from_conversations) end + # We do not batch all deletes into one to avoid having a long-running + # transaction lock the database, but we use the delete method instead + # of destroy to avoid all callbacks. We rely on foreign keys to + # cascade the delete faster without loading the associations. + statuses_and_reblogs.each(&:delete) + + # Since we skipped all callbacks, we also need to manually + # deindex the statuses + Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) + return if options[:skip_side_effects] + ActiveRecord::Associations::Preloader.new.preload(statuses_and_reblogs, :tags) + + @tags = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = s.tags.map { |tag| tag.name.mb_chars.downcase } } + @json_payloads = statuses_and_reblogs.each_with_object({}) { |s, h| h[s.id] = Oj.dump(event: :delete, payload: s.id.to_s) } + # Batch by source account - statuses.group_by(&:account_id).each_value do |account_statuses| + statuses_and_reblogs.group_by(&:account_id).each_value do |account_statuses| account = account_statuses.first.account next unless account @@ -35,27 +51,31 @@ class BatchedRemoveStatusService < BaseService end # Cannot be batched - statuses.each do |status| - unpush_from_public_timelines(status) + redis.pipelined do + statuses_and_reblogs.each do |status| + unpush_from_public_timelines(status) + end end end private def unpush_from_home_timelines(account, statuses) - recipients = account.followers_for_local_distribution.to_a - - recipients << account if account.local? - - recipients.each do |follower| + account.followers_for_local_distribution.includes(:user).find_each do |follower| statuses.each do |status| FeedManager.instance.unpush_from_home(follower, status) end end + + return unless account.local? + + statuses.each do |status| + FeedManager.instance.unpush_from_home(account, status) + end end def unpush_from_list_timelines(account, statuses) - account.lists_for_local_distribution.select(:id, :account_id).each do |list| + account.lists_for_local_distribution.select(:id, :account_id).includes(account: :user).find_each do |list| statuses.each do |status| FeedManager.instance.unpush_from_list(list, status) end @@ -67,26 +87,17 @@ class BatchedRemoveStatusService < BaseService payload = @json_payloads[status.id] - redis.pipelined do - redis.publish('timeline:public', payload) - if status.local? - redis.publish('timeline:public:local', payload) - else - redis.publish('timeline:public:remote', payload) - end - if status.media_attachments.any? - redis.publish('timeline:public:media', payload) - if status.local? - redis.publish('timeline:public:local:media', payload) - else - redis.publish('timeline:public:remote:media', payload) - end - end + redis.publish('timeline:public', payload) + redis.publish(status.local? ? 'timeline:public:local' : 'timeline:public:remote', payload) - @tags[status.id].each do |hashtag| - redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}", payload) - redis.publish("timeline:hashtag:#{hashtag.mb_chars.downcase}:local", payload) if status.local? - end + if status.media_attachments.any? + redis.publish('timeline:public:media', payload) + redis.publish(status.local? ? 'timeline:public:local:media' : 'timeline:public:remote:media', payload) + end + + @tags[status.id].each do |hashtag| + redis.publish("timeline:hashtag:#{hashtag}", payload) + redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local? end end end diff --git a/app/services/delete_account_service.rb b/app/services/delete_account_service.rb index fa834e775..5123a4697 100644 --- a/app/services/delete_account_service.rb +++ b/app/services/delete_account_service.rb @@ -6,15 +6,21 @@ class DeleteAccountService < BaseService ASSOCIATIONS_ON_SUSPEND = %w( account_pins active_relationships + aliases block_relationships blocked_by_relationships + bookmarks conversation_mutes conversations custom_filters + devices domain_blocks favourites + featured_tags follow_requests + identity_proofs list_accounts + migrations mute_relationships muted_by_relationships notifications @@ -25,6 +31,29 @@ class DeleteAccountService < BaseService status_pins ).freeze + # The following associations have no important side-effects + # in callbacks and all of their own associations are secured + # by foreign keys, making them safe to delete without loading + # into memory + ASSOCIATIONS_WITHOUT_SIDE_EFFECTS = %w( + account_pins + aliases + conversation_mutes + conversations + custom_filters + devices + domain_blocks + featured_tags + follow_requests + identity_proofs + migrations + mute_relationships + muted_by_relationships + notifications + scheduled_statuses + status_pins + ) + ASSOCIATIONS_ON_DESTROY = %w( reports targeted_moderation_notes @@ -55,19 +84,25 @@ class DeleteAccountService < BaseService @options[:skip_activitypub] = true if @options[:skip_side_effects] - reject_follows! - undo_follows! - purge_user! - purge_profile! + distribute_activities! purge_content! fulfill_deletion_request! end private - def reject_follows! - return if @account.local? || !@account.activitypub? || @options[:skip_activitypub] + def distribute_activities! + return if skip_activitypub? + if @account.local? + delete_actor! + elsif @account.activitypub? + reject_follows! + undo_follows! + end + end + + def reject_follows! # When deleting a remote account, the account obviously doesn't # actually become deleted on its origin server, i.e. unlike a # locally deleted account it continues to have access to its home @@ -81,8 +116,6 @@ class DeleteAccountService < BaseService end def undo_follows! - return if @account.local? || !@account.activitypub? || @options[:skip_activitypub] - # When deleting a remote account, the account obviously doesn't # actually become deleted on its origin server, but following relationships # are severed on our end. Therefore, make the remote server aware that the @@ -97,7 +130,7 @@ class DeleteAccountService < BaseService def purge_user! return if !@account.local? || @account.user.nil? - if @options[:reserve_email] + if keep_user_record? @account.user.disable! @account.user.invites.where(uses: 0).destroy_all else @@ -106,34 +139,52 @@ class DeleteAccountService < BaseService end def purge_content! - distribute_delete_actor! if @account.local? && !@options[:skip_side_effects] + purge_user! + purge_profile! + purge_statuses! + purge_media_attachments! + purge_polls! + purge_generated_notifications! + purge_other_associations! + @account.destroy unless keep_account_record? + end + + def purge_statuses! @account.statuses.reorder(nil).find_in_batches do |statuses| - statuses.reject! { |status| reported_status_ids.include?(status.id) } if @options[:reserve_username] - BatchedRemoveStatusService.new.call(statuses, skip_side_effects: @options[:skip_side_effects]) + statuses.reject! { |status| reported_status_ids.include?(status.id) } if keep_account_record? + + BatchedRemoveStatusService.new.call(statuses, skip_side_effects: skip_side_effects?) end + end + def purge_media_attachments! @account.media_attachments.reorder(nil).find_each do |media_attachment| - next if @options[:reserve_username] && reported_status_ids.include?(media_attachment.status_id) + next if keep_account_record? && reported_status_ids.include?(media_attachment.status_id) media_attachment.destroy end + end + def purge_polls! @account.polls.reorder(nil).find_each do |poll| - next if @options[:reserve_username] && reported_status_ids.include?(poll.status_id) + next if keep_account_record? && reported_status_ids.include?(poll.status_id) - # We can safely delete the poll rather than destroy it, as any non-reported - # status should have been deleted already, as long as we take care of - # notifications. - Notification.where(poll: poll).delete_all poll.delete end + end + def purge_generated_notifications! + # By deleting polls and statuses without callbacks, we've left behind + # polymorphically associated notifications generated by this account + + Notification.where(from_account: @account).in_batches.delete_all + end + + def purge_other_associations! associations_for_destruction.each do |association_name| - destroy_all(@account.public_send(association_name)) + purge_association(association_name) end - - @account.destroy unless @options[:reserve_username] end def purge_profile! @@ -141,7 +192,7 @@ class DeleteAccountService < BaseService # there is no point wasting time updating # its values first - return unless @options[:reserve_username] + return unless keep_account_record? @account.silenced_at = nil @account.suspended_at = @options[:suspended_at] || Time.now.utc @@ -156,6 +207,7 @@ class DeleteAccountService < BaseService @account.followers_count = 0 @account.following_count = 0 @account.moved_to_account = nil + @account.also_known_as = [] @account.trust_level = :untrusted @account.avatar.destroy @account.header.destroy @@ -166,11 +218,17 @@ class DeleteAccountService < BaseService @account.deletion_request&.destroy end - def destroy_all(association) - association.in_batches.destroy_all + def purge_association(association_name) + association = @account.public_send(association_name) + + if ASSOCIATIONS_WITHOUT_SIDE_EFFECTS.include?(association_name) + association.in_batches.delete_all + else + association.in_batches.destroy_all + end end - def distribute_delete_actor! + def delete_actor! ActivityPub::DeliveryWorker.push_bulk(delivery_inboxes) do |inbox_url| [delete_actor_json, @account.id, inbox_url] end @@ -197,10 +255,26 @@ class DeleteAccountService < BaseService end def associations_for_destruction - if @options[:reserve_username] + if keep_account_record? ASSOCIATIONS_ON_SUSPEND else ASSOCIATIONS_ON_SUSPEND + ASSOCIATIONS_ON_DESTROY end end + + def keep_user_record? + @options[:reserve_email] + end + + def keep_account_record? + @options[:reserve_username] + end + + def skip_side_effects? + @options[:skip_side_effects] + end + + def skip_activitypub? + @options[:skip_activitypub] + end end diff --git a/config/initializers/chewy.rb b/config/initializers/chewy.rb index 8f54abf77..9fc9b2f1a 100644 --- a/config/initializers/chewy.rb +++ b/config/initializers/chewy.rb @@ -12,6 +12,10 @@ Chewy.settings = { sidekiq: { queue: 'pull' }, } +# We use our own async strategy even outside the request-response +# cycle, which takes care of checking if ElasticSearch is enabled +# or not. However, mind that for the Rails console, the :urgent +# strategy is set automatically with no way to override it. Chewy.root_strategy = :custom_sidekiq Chewy.request_strategy = :custom_sidekiq Chewy.use_after_commit_callbacks = false @@ -37,6 +41,7 @@ Elasticsearch::Transport::Client.prepend Module.new { super arguments end } + Elasticsearch::API::Indices::IndicesClient.prepend Module.new { def create(arguments = {}) arguments[:include_type_name] = true diff --git a/lib/chewy/strategy/custom_sidekiq.rb b/lib/chewy/strategy/custom_sidekiq.rb index 3e54326ba..794ae4ed4 100644 --- a/lib/chewy/strategy/custom_sidekiq.rb +++ b/lib/chewy/strategy/custom_sidekiq.rb @@ -2,29 +2,10 @@ module Chewy class Strategy - class CustomSidekiq < Base - class Worker - include ::Sidekiq::Worker - - sidekiq_options queue: 'pull' - - def perform(type, ids, options = {}) - options[:refresh] = !Chewy.disable_refresh_async if Chewy.disable_refresh_async - type.constantize.import!(ids, options) - end - end - - def update(type, objects, _options = {}) - return unless Chewy.enabled? - - ids = type.root.id ? Array.wrap(objects) : type.adapter.identify(objects) - - return if ids.empty? - - Worker.perform_async(type.name, ids) + class CustomSidekiq < Sidekiq + def update(_type, _objects, _options = {}) + super if Chewy.enabled? end - - def leave; end end end end diff --git a/spec/services/batched_remove_status_service_spec.rb b/spec/services/batched_remove_status_service_spec.rb index f84256f18..239859f06 100644 --- a/spec/services/batched_remove_status_service_spec.rb +++ b/spec/services/batched_remove_status_service_spec.rb @@ -26,6 +26,11 @@ RSpec.describe BatchedRemoveStatusService, type: :service do subject.call([status1, status2]) end + it 'removes statuses' do + expect { Status.find(status1.id) }.to raise_error ActiveRecord::RecordNotFound + expect { Status.find(status2.id) }.to raise_error ActiveRecord::RecordNotFound + end + it 'removes statuses from author\'s home feed' do expect(HomeFeed.new(alice).get(10)).to_not include([status1.id, status2.id]) end -- cgit