about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2017-06-14 18:01:35 +0200
committerGitHub <noreply@github.com>2017-06-14 18:01:35 +0200
commite17c2e5da5010beaefb46c8fffe35e87cb28f407 (patch)
tree5044d517e80ea2e152621da3520992648bcca42b
parent4a618908e836ecb94f70e99f2198ee7b3ba3b2ec (diff)
Batched remove status service (#3735)
* Make Pubsubhubbub::DistributionWorker handle both single stream entry
arguments, as well as arrays of stream entries

* Add BatchedRemoveStatusService, make SuspendAccountService use it

* Improve method names

* Add test

* Add more tests

* Use PuSH payloads of 100 to have a clear mapping of
1000 input statuses -> 10 PuSH payloads

It was nice while it lasted
-rw-r--r--app/services/batched_remove_status_service.rb114
-rw-r--r--app/services/suspend_account_service.rb5
-rw-r--r--app/workers/pubsubhubbub/distribution_worker.rb42
-rw-r--r--spec/services/batched_remove_status_service_spec.rb61
-rw-r--r--spec/workers/pubsubhubbub/distribution_worker_spec.rb15
5 files changed, 215 insertions, 22 deletions
diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb
new file mode 100644
index 000000000..b462154ae
--- /dev/null
+++ b/app/services/batched_remove_status_service.rb
@@ -0,0 +1,114 @@
+# frozen_string_literal: true
+
+class BatchedRemoveStatusService < BaseService
+  include StreamEntryRenderer
+
+  # Delete given statuses and reblogs of them
+  # Dispatch PuSH updates of the deleted statuses, but only local ones
+  # Dispatch Salmon deletes, unique per domain, of the deleted statuses, but only local ones
+  # Remove statuses from home feeds
+  # Push delete events to streaming API for home feeds and public feeds
+  # @param [Status] statuses A preferably batched array of statuses
+  def call(statuses)
+    statuses = Status.where(id: statuses.map(&:id)).includes(:account, :stream_entry).flat_map { |status| [status] + status.reblogs.includes(:account, :stream_entry).to_a }
+
+    @mentions = statuses.map { |s| [s.id, s.mentions.includes(:account).to_a] }.to_h
+    @tags     = statuses.map { |s| [s.id, s.tags.pluck(:name)] }.to_h
+
+    @stream_entry_batches = []
+    @salmon_batches       = []
+    @json_payloads        = statuses.map { |s| [s.id, Oj.dump(event: :delete, payload: s.id)] }.to_h
+
+    # Ensure that rendered XML reflects destroyed state
+    Status.where(id: statuses.map(&:id)).in_batches.destroy_all
+
+    # Batch by source account
+    statuses.group_by(&:account_id).each do |_, account_statuses|
+      account = account_statuses.first.account
+
+      unpush_from_home_timelines(account_statuses)
+      batch_stream_entries(account_statuses) if account.local?
+    end
+
+    # Cannot be batched
+    statuses.each do |status|
+      unpush_from_public_timelines(status)
+      batch_salmon_slaps(status) if status.local?
+    end
+
+    Pubsubhubbub::DistributionWorker.push_bulk(@stream_entry_batches) { |batch| batch }
+    NotificationWorker.push_bulk(@salmon_batches) { |batch| batch }
+  end
+
+  private
+
+  def batch_stream_entries(statuses)
+    stream_entry_ids = statuses.map { |s| s.stream_entry.id }
+
+    stream_entry_ids.each_slice(100) do |batch_of_stream_entry_ids|
+      @stream_entry_batches << [batch_of_stream_entry_ids]
+    end
+  end
+
+  def unpush_from_home_timelines(statuses)
+    account    = statuses.first.account
+    recipients = account.followers.local.pluck(:id)
+
+    recipients << account.id if account.local?
+
+    recipients.each do |follower_id|
+      unpush(follower_id, statuses)
+    end
+  end
+
+  def unpush_from_public_timelines(status)
+    payload = @json_payloads[status.id]
+
+    redis.pipelined do
+      redis.publish('timeline:public', payload)
+      redis.publish('timeline:public:local', payload) if status.local?
+
+      @tags[status.id].each do |hashtag|
+        redis.publish("timeline:hashtag:#{hashtag}", payload)
+        redis.publish("timeline:hashtag:#{hashtag}:local", payload) if status.local?
+      end
+    end
+  end
+
+  def batch_salmon_slaps(status)
+    return if @mentions[status.id].empty?
+
+    payload    = stream_entry_to_xml(status.stream_entry.reload)
+    recipients = @mentions[status.id].map(&:account).reject(&:local?).uniq(&:domain).map(&:id)
+
+    recipients.each do |recipient_id|
+      @salmon_batches << [payload, status.account_id, recipient_id]
+    end
+  end
+
+  def unpush(follower_id, statuses)
+    key = FeedManager.instance.key(:home, follower_id)
+
+    originals = statuses.reject(&:reblog?)
+    reblogs   = statuses.reject { |s| !s.reblog? }
+
+    # Quickly remove all originals
+    redis.pipelined do
+      originals.each do |status|
+        redis.zremrangebyscore(key, status.id, status.id)
+        redis.publish("timeline:#{follower_id}", @json_payloads[status.id])
+      end
+    end
+
+    # For reblogs, re-add original status to feed, unless the reblog
+    # was not in the feed in the first place
+    reblogs.each do |status|
+      redis.zadd(key, status.reblog_of_id, status.reblog_of_id) unless redis.zscore(key, status.reblog_of_id).nil?
+      redis.publish("timeline:#{follower_id}", @json_payloads[status.id])
+    end
+  end
+
+  def redis
+    Redis.current
+  end
+end
diff --git a/app/services/suspend_account_service.rb b/app/services/suspend_account_service.rb
index 842500259..1e3a51e4e 100644
--- a/app/services/suspend_account_service.rb
+++ b/app/services/suspend_account_service.rb
@@ -17,9 +17,8 @@ class SuspendAccountService < BaseService
   end
 
   def purge_content
-    @account.statuses.reorder(nil).find_each do |status|
-      # This federates out deletes to previous followers
-      RemoveStatusService.new.call(status)
+    @account.statuses.reorder(nil).find_in_batches do |statuses|
+      BatchedRemoveStatusService.new.call(statuses)
     end
 
     [
diff --git a/app/workers/pubsubhubbub/distribution_worker.rb b/app/workers/pubsubhubbub/distribution_worker.rb
index b8f5c35e1..da01dcd91 100644
--- a/app/workers/pubsubhubbub/distribution_worker.rb
+++ b/app/workers/pubsubhubbub/distribution_worker.rb
@@ -5,24 +5,44 @@ class Pubsubhubbub::DistributionWorker
 
   sidekiq_options queue: 'push'
 
-  def perform(stream_entry_id)
-    stream_entry = StreamEntry.find(stream_entry_id)
+  def perform(stream_entry_ids)
+    stream_entries = StreamEntry.where(id: stream_entry_ids).includes(:status).reject { |e| e.status&.direct_visibility? }
 
-    return if stream_entry.status&.direct_visibility?
+    return if stream_entries.empty?
 
-    @account = stream_entry.account
-    @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, [stream_entry]))
+    @account       = stream_entries.first.account
+    @subscriptions = active_subscriptions.to_a
+
+    distribute_public!(stream_entries.reject(&:hidden?))
+    distribute_hidden!(stream_entries.reject { |s| !s.hidden? })
+  end
+
+  private
+
+  def distribute_public!(stream_entries)
+    return if stream_entries.empty?
+
+    @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries))
+
+    Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions) do |subscription|
+      [subscription.id, @payload]
+    end
+  end
+
+  def distribute_hidden!(stream_entries)
+    return if stream_entries.empty?
+
+    @payload = AtomSerializer.render(AtomSerializer.new.feed(@account, stream_entries))
     @domains = @account.followers_domains
 
-    Subscription.where(account: @account).active.select('id, callback_url').find_each do |subscription|
-      next if stream_entry.hidden? && !allowed_to_receive?(subscription.callback_url)
-      Pubsubhubbub::DeliveryWorker.perform_async(subscription.id, @payload)
+    Pubsubhubbub::DeliveryWorker.push_bulk(@subscriptions.reject { |s| !allowed_to_receive?(s.callback_url) }) do |subscription|
+      [subscription.id, @payload]
     end
-  rescue ActiveRecord::RecordNotFound
-    true
   end
 
-  private
+  def active_subscriptions
+    Subscription.where(account: @account).active.select('id, callback_url')
+  end
 
   def allowed_to_receive?(callback_url)
     @domains.include?(Addressable::URI.parse(callback_url).host)
diff --git a/spec/services/batched_remove_status_service_spec.rb b/spec/services/batched_remove_status_service_spec.rb
new file mode 100644
index 000000000..c20085e25
--- /dev/null
+++ b/spec/services/batched_remove_status_service_spec.rb
@@ -0,0 +1,61 @@
+require 'rails_helper'
+
+RSpec.describe BatchedRemoveStatusService do
+  subject { BatchedRemoveStatusService.new }
+
+  let!(:alice)  { Fabricate(:account) }
+  let!(:bob)    { Fabricate(:account, username: 'bob', domain: 'example.com', salmon_url: 'http://example.com/salmon') }
+  let!(:jeff)   { Fabricate(:account) }
+
+  let(:status1) { PostStatusService.new.call(alice, 'Hello @bob@example.com') }
+  let(:status2) { PostStatusService.new.call(alice, 'Another status') }
+
+  before do
+    allow(Redis.current).to receive_messages(publish: nil)
+
+    stub_request(:post, 'http://example.com/push').to_return(status: 200, body: '', headers: {})
+    stub_request(:post, 'http://example.com/salmon').to_return(status: 200, body: '', headers: {})
+
+    Fabricate(:subscription, account: alice, callback_url: 'http://example.com/push', confirmed: true, expires_at: 30.days.from_now)
+    jeff.follow!(alice)
+
+    status1
+    status2
+
+    subject.call([status1, status2])
+  end
+
+  it 'removes statuses from author\'s home feed' do
+    expect(Feed.new(:home, alice).get(10)).to_not include([status1.id, status2.id])
+  end
+
+  it 'removes statuses from local follower\'s home feed' do
+    expect(Feed.new(:home, jeff).get(10)).to_not include([status1.id, status2.id])
+  end
+
+  it 'notifies streaming API of followers' do
+    expect(Redis.current).to have_received(:publish).with("timeline:#{jeff.id}", any_args).at_least(:once)
+  end
+
+  it 'notifies streaming API of author' do
+    expect(Redis.current).to have_received(:publish).with("timeline:#{alice.id}", any_args).at_least(:once)
+  end
+
+  it 'notifies streaming API of public timeline' do
+    expect(Redis.current).to have_received(:publish).with('timeline:public', any_args).at_least(:once)
+  end
+
+  it 'sends PuSH update to PuSH subscribers with two payloads united' do
+    expect(a_request(:post, 'http://example.com/push').with { |req|
+      matches = req.body.scan(TagManager::VERBS[:delete])
+      matches.size == 2
+    }).to have_been_made
+  end
+
+  it 'sends Salmon slap to previously mentioned users' do
+    expect(a_request(:post, "http://example.com/salmon").with { |req|
+      xml = OStatus2::Salmon.new.unpack(req.body)
+      xml.match(TagManager::VERBS[:delete])
+    }).to have_been_made.once
+  end
+end
diff --git a/spec/workers/pubsubhubbub/distribution_worker_spec.rb b/spec/workers/pubsubhubbub/distribution_worker_spec.rb
index 512adb04c..89191c084 100644
--- a/spec/workers/pubsubhubbub/distribution_worker_spec.rb
+++ b/spec/workers/pubsubhubbub/distribution_worker_spec.rb
@@ -16,10 +16,9 @@ describe Pubsubhubbub::DistributionWorker do
     let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :public) }
 
     it 'delivers payload to all subscriptions' do
-      allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
+      allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
       subject.perform(status.stream_entry.id)
-      expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/)
-      expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(anonymous_subscription.id, /.*/)
+      expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([anonymous_subscription, subscription_with_follower])
     end
   end
 
@@ -27,10 +26,10 @@ describe Pubsubhubbub::DistributionWorker do
     let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :private) }
 
     it 'delivers payload only to subscriptions with followers' do
-      allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
+      allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
       subject.perform(status.stream_entry.id)
-      expect(Pubsubhubbub::DeliveryWorker).to have_received(:perform_async).with(subscription_with_follower.id, /.*/)
-      expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async).with(anonymous_subscription.id, /.*/)
+      expect(Pubsubhubbub::DeliveryWorker).to have_received(:push_bulk).with([subscription_with_follower])
+      expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk).with([anonymous_subscription])
     end
   end
 
@@ -38,9 +37,9 @@ describe Pubsubhubbub::DistributionWorker do
     let(:status) { Fabricate(:status, account: alice, text: 'Hello', visibility: :direct) }
 
     it 'does not deliver payload' do
-      allow(Pubsubhubbub::DeliveryWorker).to receive(:perform_async)
+      allow(Pubsubhubbub::DeliveryWorker).to receive(:push_bulk)
       subject.perform(status.stream_entry.id)
-      expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:perform_async)
+      expect(Pubsubhubbub::DeliveryWorker).to_not have_received(:push_bulk)
     end
   end
 end