From b1feb47055c121c1b6949bd7ef6734bf56c847bd Mon Sep 17 00:00:00 2001 From: ThibG Date: Thu, 17 Dec 2020 06:51:49 +0100 Subject: Improve searching for private toots from URL (#14856) * Improve searching for private toots from URL Most of the time, when sharing toots, people use the toot URL rather than the toot URI, which makes sense since it is the user-facing URL. In Mastodon's case, the URL and URI are different, and Mastodon does not have an index on URL, which means searching a private toot by URL is done with a slow query that will only succeed for very recent toots. This change gets rid of the slow query, and attempts to guess the URI from URL instead, as Mastodon's are predictable. * Add tests * Only return status with guessed uri if url matches Co-authored-by: Claire --- spec/services/resolve_url_service_spec.rb | 97 +++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'spec/services') diff --git a/spec/services/resolve_url_service_spec.rb b/spec/services/resolve_url_service_spec.rb index aa4204637..a38b23590 100644 --- a/spec/services/resolve_url_service_spec.rb +++ b/spec/services/resolve_url_service_spec.rb @@ -15,5 +15,102 @@ describe ResolveURLService, type: :service do expect(subject.call(url)).to be_nil end + + context 'searching for a remote private status' do + let(:account) { Fabricate(:account) } + let(:poster) { Fabricate(:account, domain: 'example.com') } + let(:url) { 'https://example.com/@foo/42' } + let(:uri) { 'https://example.com/users/foo/statuses/42' } + let!(:status) { Fabricate(:status, url: url, uri: uri, account: poster, visibility: :private) } + + before do + stub_request(:get, url).to_return(status: 404) if url.present? + stub_request(:get, uri).to_return(status: 404) + end + + context 'when the account follows the poster' do + before do + account.follow!(poster) + end + + context 'when the status uses Mastodon-style URLs' do + let(:url) { 'https://example.com/@foo/42' } + let(:uri) { 'https://example.com/users/foo/statuses/42' } + + it 'returns status by url' do + expect(subject.call(url, on_behalf_of: account)).to eq(status) + end + + it 'returns status by uri' do + expect(subject.call(uri, on_behalf_of: account)).to eq(status) + end + end + + context 'when the status uses pleroma-style URLs' do + let(:url) { nil } + let(:uri) { 'https://example.com/objects/0123-456-789-abc-def' } + + it 'returns status by uri' do + expect(subject.call(uri, on_behalf_of: account)).to eq(status) + end + end + end + + context 'when the account does not follow the poster' do + context 'when the status uses Mastodon-style URLs' do + let(:url) { 'https://example.com/@foo/42' } + let(:uri) { 'https://example.com/users/foo/statuses/42' } + + it 'does not return the status by url' do + expect(subject.call(url, on_behalf_of: account)).to be_nil + end + + it 'does not return the status by uri' do + expect(subject.call(uri, on_behalf_of: account)).to be_nil + end + end + + context 'when the status uses pleroma-style URLs' do + let(:url) { nil } + let(:uri) { 'https://example.com/objects/0123-456-789-abc-def' } + + it 'returns status by uri' do + expect(subject.call(uri, on_behalf_of: account)).to be_nil + end + end + end + end + + context 'searching for a local private status' do + let(:account) { Fabricate(:account) } + let(:poster) { Fabricate(:account) } + let!(:status) { Fabricate(:status, account: poster, visibility: :private) } + let(:url) { ActivityPub::TagManager.instance.url_for(status) } + let(:uri) { ActivityPub::TagManager.instance.uri_for(status) } + + context 'when the account follows the poster' do + before do + account.follow!(poster) + end + + it 'returns status by url' do + expect(subject.call(url, on_behalf_of: account)).to eq(status) + end + + it 'returns status by uri' do + expect(subject.call(uri, on_behalf_of: account)).to eq(status) + end + end + + context 'when the account does not follow the poster' do + it 'does not return the status by url' do + expect(subject.call(url, on_behalf_of: account)).to be_nil + end + + it 'does not return the status by uri' do + expect(subject.call(uri, on_behalf_of: account)).to be_nil + end + end + end end end -- cgit From a60d9335d8e7c4aa070f081719ee2a438b0e0202 Mon Sep 17 00:00:00 2001 From: ThibG Date: Fri, 18 Dec 2020 23:26:26 +0100 Subject: Fix resolving accounts sometimes creating duplicate records for a given AP id (#15364) * Fix ResolveAccountService accepting mismatching acct: URI * Set attributes that should be updated regardless of suspension * Fix key fetching * Automatically merge remote accounts with duplicate `uri` * Add tests * Add "tootctl accounts fix-duplicates" Finds duplicate accounts sharing a same ActivityPub `id`, re-fetch them and merge them under the canonical `acct:` URI. Co-authored-by: Claire --- .../activitypub/fetch_remote_account_service.rb | 2 +- .../activitypub/process_account_service.rb | 28 ++++++++--- app/services/resolve_account_service.rb | 17 ++----- app/workers/account_merging_worker.rb | 18 +++++++ lib/mastodon/accounts_cli.rb | 19 ++++++++ spec/services/resolve_account_service_spec.rb | 56 ++++++++++++++++++++-- 6 files changed, 116 insertions(+), 24 deletions(-) create mode 100644 app/workers/account_merging_worker.rb (limited to 'spec/services') diff --git a/app/services/activitypub/fetch_remote_account_service.rb b/app/services/activitypub/fetch_remote_account_service.rb index e5bd0c47c..9d01f5386 100644 --- a/app/services/activitypub/fetch_remote_account_service.rb +++ b/app/services/activitypub/fetch_remote_account_service.rb @@ -28,7 +28,7 @@ class ActivityPub::FetchRemoteAccountService < BaseService return unless only_key || verified_webfinger? - ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key) + ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key, verified_webfinger: !only_key) rescue Oj::ParseError nil end diff --git a/app/services/activitypub/process_account_service.rb b/app/services/activitypub/process_account_service.rb index 4cb8e09db..6afeb92d6 100644 --- a/app/services/activitypub/process_account_service.rb +++ b/app/services/activitypub/process_account_service.rb @@ -28,6 +28,8 @@ class ActivityPub::ProcessAccountService < BaseService update_account process_tags process_attachments + + process_duplicate_accounts! if @options[:verified_webfinger] else raise Mastodon::RaceConditionError end @@ -69,34 +71,42 @@ class ActivityPub::ProcessAccountService < BaseService @account.protocol = :activitypub set_suspension! + set_immediate_protocol_attributes! + set_fetchable_key! unless @account.suspended? && @account.suspension_origin_local? set_immediate_attributes! unless @account.suspended? - set_fetchable_attributes! unless @options[:only_keys] || @account.suspended? + set_fetchable_attributes! unless @options[:only_key] || @account.suspended? @account.save_with_optional_media! end - def set_immediate_attributes! + def set_immediate_protocol_attributes! @account.inbox_url = @json['inbox'] || '' @account.outbox_url = @json['outbox'] || '' @account.shared_inbox_url = (@json['endpoints'].is_a?(Hash) ? @json['endpoints']['sharedInbox'] : @json['sharedInbox']) || '' @account.followers_url = @json['followers'] || '' - @account.featured_collection_url = @json['featured'] || '' - @account.devices_url = @json['devices'] || '' @account.url = url || @uri @account.uri = @uri + @account.actor_type = actor_type + end + + def set_immediate_attributes! + @account.featured_collection_url = @json['featured'] || '' + @account.devices_url = @json['devices'] || '' @account.display_name = @json['name'] || '' @account.note = @json['summary'] || '' @account.locked = @json['manuallyApprovesFollowers'] || false @account.fields = property_values || {} @account.also_known_as = as_array(@json['alsoKnownAs'] || []).map { |item| value_or_id(item) } - @account.actor_type = actor_type @account.discoverable = @json['discoverable'] || false end + def set_fetchable_key! + @account.public_key = public_key || '' + end + def set_fetchable_attributes! @account.avatar_remote_url = image_url('icon') || '' unless skip_download? @account.header_remote_url = image_url('image') || '' unless skip_download? - @account.public_key = public_key || '' @account.statuses_count = outbox_total_items if outbox_total_items.present? @account.following_count = following_total_items if following_total_items.present? @account.followers_count = followers_total_items if followers_total_items.present? @@ -140,6 +150,12 @@ class ActivityPub::ProcessAccountService < BaseService VerifyAccountLinksWorker.perform_async(@account.id) end + def process_duplicate_accounts! + return unless Account.where(uri: @account.uri).where.not(id: @account.id).exists? + + AccountMergingWorker.perform_async(@account.id) + end + def actor_type if @json['type'].is_a?(Array) @json['type'].find { |type| ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES.include?(type) } diff --git a/app/services/resolve_account_service.rb b/app/services/resolve_account_service.rb index 74b0b82d0..3301aaf51 100644 --- a/app/services/resolve_account_service.rb +++ b/app/services/resolve_account_service.rb @@ -49,7 +49,7 @@ class ResolveAccountService < BaseService # Now it is certain, it is definitely a remote account, and it # either needs to be created, or updated from fresh data - process_account! + fetch_account! rescue Webfinger::Error, Oj::ParseError => e Rails.logger.debug "Webfinger query for #{@uri} failed: #{e}" nil @@ -104,16 +104,12 @@ class ResolveAccountService < BaseService acct.gsub(/\Aacct:/, '').split('@') end - def process_account! + def fetch_account! return unless activitypub_ready? RedisLock.acquire(lock_options) do |lock| if lock.acquired? - @account = Account.find_remote(@username, @domain) - - next if actor_json.nil? - - @account = ActivityPub::ProcessAccountService.new.call(@username, @domain, actor_json) + @account = ActivityPub::FetchRemoteAccountService.new.call(actor_url) else raise Mastodon::RaceConditionError end @@ -136,13 +132,6 @@ class ResolveAccountService < BaseService @actor_url ||= @webfinger.link('self', 'href') end - def actor_json - return @actor_json if defined?(@actor_json) - - json = fetch_resource(actor_url, false) - @actor_json = supported_context?(json) && equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) ? json : nil - end - def gone_from_origin? @gone end diff --git a/app/workers/account_merging_worker.rb b/app/workers/account_merging_worker.rb new file mode 100644 index 000000000..8c234e7ac --- /dev/null +++ b/app/workers/account_merging_worker.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class AccountMergingWorker + include Sidekiq::Worker + + sidekiq_options queue: 'pull' + + def perform(account_id) + account = Account.find(account_id) + + return true if account.nil? || account.local? + + Account.where(uri: account.uri).where.not(id: account.id).find_each do |duplicate| + account.merge_with!(duplicate) + duplicate.destroy + end + end +end diff --git a/lib/mastodon/accounts_cli.rb b/lib/mastodon/accounts_cli.rb index bef4093a8..474643869 100644 --- a/lib/mastodon/accounts_cli.rb +++ b/lib/mastodon/accounts_cli.rb @@ -236,6 +236,25 @@ module Mastodon say('OK', :green) end + desc 'fix-duplicates', 'Find duplicate remote accounts and merge them' + option :dry_run, type: :boolean + long_desc <<-LONG_DESC + Merge known remote accounts sharing an ActivityPub actor identifier. + + Such duplicates can occur when a remote server admin misconfigures their + domain configuration. + LONG_DESC + def fix_duplicates + Account.remote.select(:uri, 'count(*)').group(:uri).having('count(*) > 1').pluck_each(:uri) do |uri| + say("Duplicates found for #{uri}") + begin + ActivityPub::FetchRemotAccountService.new.call(uri) unless options[:dry_run] + rescue => e + say("Error processing #{uri}: #{e}", :red) + end + end + end + desc 'backup USERNAME', 'Request a backup for a user' long_desc <<-LONG_DESC Request a new backup for an account with a given USERNAME. diff --git a/spec/services/resolve_account_service_spec.rb b/spec/services/resolve_account_service_spec.rb index 5bd0ec264..a604e90b5 100644 --- a/spec/services/resolve_account_service_spec.rb +++ b/spec/services/resolve_account_service_spec.rb @@ -60,7 +60,22 @@ RSpec.describe ResolveAccountService, type: :service do context 'with a legitimate webfinger redirection' do before do - webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } + webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } + stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) + end + + it 'returns new remote account' do + account = subject.call('Foo@redirected.example.com') + + expect(account.activitypub?).to eq true + expect(account.acct).to eq 'foo@ap.example.com' + expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' + end + end + + context 'with a misconfigured redirection' do + before do + webfinger = { subject: 'acct:Foo@redirected.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) end @@ -75,9 +90,9 @@ RSpec.describe ResolveAccountService, type: :service do context 'with too many webfinger redirections' do before do - webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } + webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) - webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } + webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } stub_request(:get, 'https://evil.example.com/.well-known/webfinger?resource=acct:foo@evil.example.com').to_return(body: Oj.dump(webfinger2), headers: { 'Content-Type': 'application/jrd+json' }) end @@ -111,6 +126,41 @@ RSpec.describe ResolveAccountService, type: :service do end end + context 'with an already-known actor changing acct: URI' do + let!(:duplicate) { Fabricate(:account, username: 'foo', domain: 'old.example.com', uri: 'https://ap.example.com/users/foo') } + let!(:status) { Fabricate(:status, account: duplicate, text: 'foo') } + + it 'returns new remote account' do + account = subject.call('foo@ap.example.com') + + expect(account.activitypub?).to eq true + expect(account.domain).to eq 'ap.example.com' + expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' + expect(account.uri).to eq 'https://ap.example.com/users/foo' + end + + it 'merges accounts' do + account = subject.call('foo@ap.example.com') + + expect(status.reload.account_id).to eq account.id + expect(Account.where(uri: account.uri).count).to eq 1 + end + end + + context 'with an already-known acct: URI changing ActivityPub id' do + let!(:old_account) { Fabricate(:account, username: 'foo', domain: 'ap.example.com', uri: 'https://old.example.com/users/foo', last_webfingered_at: nil) } + let!(:status) { Fabricate(:status, account: old_account, text: 'foo') } + + it 'returns new remote account' do + account = subject.call('foo@ap.example.com') + + expect(account.activitypub?).to eq true + expect(account.domain).to eq 'ap.example.com' + expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' + expect(account.uri).to eq 'https://ap.example.com/users/foo' + end + end + it 'processes one remote account at a time using locks' do wait_for_start = true fail_occurred = false -- cgit