From b1feb47055c121c1b6949bd7ef6734bf56c847bd Mon Sep 17 00:00:00 2001 From: ThibG Date: Thu, 17 Dec 2020 06:51:49 +0100 Subject: Improve searching for private toots from URL (#14856) * Improve searching for private toots from URL Most of the time, when sharing toots, people use the toot URL rather than the toot URI, which makes sense since it is the user-facing URL. In Mastodon's case, the URL and URI are different, and Mastodon does not have an index on URL, which means searching a private toot by URL is done with a slow query that will only succeed for very recent toots. This change gets rid of the slow query, and attempts to guess the URI from URL instead, as Mastodon's are predictable. * Add tests * Only return status with guessed uri if url matches Co-authored-by: Claire --- app/services/resolve_url_service.rb | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'app/services') diff --git a/app/services/resolve_url_service.rb b/app/services/resolve_url_service.rb index 78080d878..5981e4d98 100644 --- a/app/services/resolve_url_service.rb +++ b/app/services/resolve_url_service.rb @@ -34,7 +34,17 @@ class ResolveURLService < BaseService # It may happen that the resource is a private toot, and thus not fetchable, # but we can return the toot if we already know about it. - status = Status.find_by(uri: @url) || Status.find_by(url: @url) + scope = Status.where(uri: @url) + + # We don't have an index on `url`, so try guessing the `uri` from `url` + parsed_url = Addressable::URI.parse(@url) + parsed_url.path.match(%r{/@(?#{Account::USERNAME_RE})/(?[0-9]+)\Z}) do |matched| + parsed_url.path = "/users/#{matched[:username]}/statuses/#{matched[:status_id]}" + scope = scope.or(Status.where(uri: parsed_url.to_s, url: @url)) + end + + status = scope.first + authorize_with @on_behalf_of, status, :show? unless status.nil? status rescue Mastodon::NotPermittedError -- cgit From eb35be0431b2cdd2bbf3339beb9c5a0839e1088b Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Fri, 18 Dec 2020 09:18:31 +0100 Subject: Fix follow limit preventing re-following of a moved account (#14207) --- app/models/follow.rb | 2 +- app/models/follow_request.rb | 2 +- app/models/import.rb | 1 + app/services/import_service.rb | 7 ++---- app/validators/import_validator.rb | 44 ++++++++++++++++++++++++++++++++++++++ config/locales/en.yml | 2 ++ spec/models/follow_spec.rb | 2 +- spec/models/import_spec.rb | 10 +++++++++ 8 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 app/validators/import_validator.rb (limited to 'app/services') diff --git a/app/models/follow.rb b/app/models/follow.rb index 55a9da792..69a1722b3 100644 --- a/app/models/follow.rb +++ b/app/models/follow.rb @@ -26,7 +26,7 @@ class Follow < ApplicationRecord has_one :notification, as: :activity, dependent: :destroy validates :account_id, uniqueness: { scope: :target_account_id } - validates_with FollowLimitValidator, on: :create + validates_with FollowLimitValidator, on: :create, if: :rate_limit? scope :recent, -> { reorder(id: :desc) } diff --git a/app/models/follow_request.rb b/app/models/follow_request.rb index c1f19149b..2d2a77b59 100644 --- a/app/models/follow_request.rb +++ b/app/models/follow_request.rb @@ -26,7 +26,7 @@ class FollowRequest < ApplicationRecord has_one :notification, as: :activity, dependent: :destroy validates :account_id, uniqueness: { scope: :target_account_id } - validates_with FollowLimitValidator, on: :create + validates_with FollowLimitValidator, on: :create, if: :rate_limit? def authorize! account.follow!(target_account, reblogs: show_reblogs, notify: notify, uri: uri) diff --git a/app/models/import.rb b/app/models/import.rb index 702453289..00a54892e 100644 --- a/app/models/import.rb +++ b/app/models/import.rb @@ -27,6 +27,7 @@ class Import < ApplicationRecord enum type: [:following, :blocking, :muting, :domain_blocking, :bookmarks] validates :type, presence: true + validates_with ImportValidator, on: :create has_attached_file :data validates_attachment_content_type :data, content_type: FILE_TYPES diff --git a/app/services/import_service.rb b/app/services/import_service.rb index 288e47f1e..0c6ef2238 100644 --- a/app/services/import_service.rb +++ b/app/services/import_service.rb @@ -27,7 +27,7 @@ class ImportService < BaseService def import_follows! parse_import_data!(['Account address']) - import_relationships!('follow', 'unfollow', @account.following, follow_limit, reblogs: { header: 'Show boosts', default: true }) + import_relationships!('follow', 'unfollow', @account.following, ROWS_PROCESSING_LIMIT, reblogs: { header: 'Show boosts', default: true }) end def import_blocks! @@ -85,6 +85,7 @@ class ImportService < BaseService head_items = items.uniq { |acct, _| acct.split('@')[1] } tail_items = items - head_items + Import::RelationshipWorker.push_bulk(head_items + tail_items) do |acct, extra| [@account.id, acct, action, extra] end @@ -133,10 +134,6 @@ class ImportService < BaseService Paperclip.io_adapters.for(@import.data).read end - def follow_limit - FollowLimitValidator.limit_for_account(@account) - end - def relations_map_for_account(account, account_ids) { blocking: {}, diff --git a/app/validators/import_validator.rb b/app/validators/import_validator.rb new file mode 100644 index 000000000..a182abfa5 --- /dev/null +++ b/app/validators/import_validator.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +class ImportValidator < ActiveModel::Validator + KNOWN_HEADERS = [ + 'Account address', + '#domain', + '#uri', + ].freeze + + def validate(import) + return if import.type.blank? || import.data.blank? + + # We parse because newlines could be part of individual rows. This + # runs on create so we should be reading the local file here before + # it is uploaded to object storage or moved anywhere... + csv_data = CSV.parse(import.data.queued_for_write[:original].read) + + row_count = csv_data.size + row_count -= 1 if KNOWN_HEADERS.include?(csv_data.first&.first) + + import.errors.add(:data, I18n.t('imports.errors.over_rows_processing_limit', count: ImportService::ROWS_PROCESSING_LIMIT)) if row_count > ImportService::ROWS_PROCESSING_LIMIT + + case import.type + when 'following' + validate_following_import(import, row_count) + end + end + + private + + def validate_following_import(import, row_count) + base_limit = FollowLimitValidator.limit_for_account(import.account) + + limit = begin + if import.overwrite? + base_limit + else + base_limit - import.account.following_count + end + end + + import.errors.add(:data, I18n.t('users.follow_limit_reached', limit: base_limit)) if row_count > limit + end +end diff --git a/config/locales/en.yml b/config/locales/en.yml index e91f4344f..18a207f5e 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -923,6 +923,8 @@ en: status: Verification status view_proof: View proof imports: + errors: + over_rows_processing_limit: contains more than %{count} rows modes: merge: Merge merge_long: Keep existing records and add new ones diff --git a/spec/models/follow_spec.rb b/spec/models/follow_spec.rb index 0c84e5e7b..e723a1ef2 100644 --- a/spec/models/follow_spec.rb +++ b/spec/models/follow_spec.rb @@ -5,7 +5,7 @@ RSpec.describe Follow, type: :model do let(:bob) { Fabricate(:account, username: 'bob') } describe 'validations' do - subject { Follow.new(account: alice, target_account: bob) } + subject { Follow.new(account: alice, target_account: bob, rate_limit: true) } it 'has a valid fabricator' do follow = Fabricate.build(:follow) diff --git a/spec/models/import_spec.rb b/spec/models/import_spec.rb index 321761166..a5eec1722 100644 --- a/spec/models/import_spec.rb +++ b/spec/models/import_spec.rb @@ -20,5 +20,15 @@ RSpec.describe Import, type: :model do import = Import.create(account: account, type: type) expect(import).to model_have_error_on_field(:data) end + + it 'is invalid with too many rows in data' do + import = Import.create(account: account, type: type, data: StringIO.new("foo@bar.com\n" * (ImportService::ROWS_PROCESSING_LIMIT + 10))) + expect(import).to model_have_error_on_field(:data) + end + + it 'is invalid when there are more rows when following limit' do + import = Import.create(account: account, type: type, data: StringIO.new("foo@bar.com\n" * (FollowLimitValidator.limit_for_account(account) + 10))) + expect(import).to model_have_error_on_field(:data) + end end end -- cgit From a60d9335d8e7c4aa070f081719ee2a438b0e0202 Mon Sep 17 00:00:00 2001 From: ThibG Date: Fri, 18 Dec 2020 23:26:26 +0100 Subject: Fix resolving accounts sometimes creating duplicate records for a given AP id (#15364) * Fix ResolveAccountService accepting mismatching acct: URI * Set attributes that should be updated regardless of suspension * Fix key fetching * Automatically merge remote accounts with duplicate `uri` * Add tests * Add "tootctl accounts fix-duplicates" Finds duplicate accounts sharing a same ActivityPub `id`, re-fetch them and merge them under the canonical `acct:` URI. Co-authored-by: Claire --- .../activitypub/fetch_remote_account_service.rb | 2 +- .../activitypub/process_account_service.rb | 28 ++++++++--- app/services/resolve_account_service.rb | 17 ++----- app/workers/account_merging_worker.rb | 18 +++++++ lib/mastodon/accounts_cli.rb | 19 ++++++++ spec/services/resolve_account_service_spec.rb | 56 ++++++++++++++++++++-- 6 files changed, 116 insertions(+), 24 deletions(-) create mode 100644 app/workers/account_merging_worker.rb (limited to 'app/services') diff --git a/app/services/activitypub/fetch_remote_account_service.rb b/app/services/activitypub/fetch_remote_account_service.rb index e5bd0c47c..9d01f5386 100644 --- a/app/services/activitypub/fetch_remote_account_service.rb +++ b/app/services/activitypub/fetch_remote_account_service.rb @@ -28,7 +28,7 @@ class ActivityPub::FetchRemoteAccountService < BaseService return unless only_key || verified_webfinger? - ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key) + ActivityPub::ProcessAccountService.new.call(@username, @domain, @json, only_key: only_key, verified_webfinger: !only_key) rescue Oj::ParseError nil end diff --git a/app/services/activitypub/process_account_service.rb b/app/services/activitypub/process_account_service.rb index 4cb8e09db..6afeb92d6 100644 --- a/app/services/activitypub/process_account_service.rb +++ b/app/services/activitypub/process_account_service.rb @@ -28,6 +28,8 @@ class ActivityPub::ProcessAccountService < BaseService update_account process_tags process_attachments + + process_duplicate_accounts! if @options[:verified_webfinger] else raise Mastodon::RaceConditionError end @@ -69,34 +71,42 @@ class ActivityPub::ProcessAccountService < BaseService @account.protocol = :activitypub set_suspension! + set_immediate_protocol_attributes! + set_fetchable_key! unless @account.suspended? && @account.suspension_origin_local? set_immediate_attributes! unless @account.suspended? - set_fetchable_attributes! unless @options[:only_keys] || @account.suspended? + set_fetchable_attributes! unless @options[:only_key] || @account.suspended? @account.save_with_optional_media! end - def set_immediate_attributes! + def set_immediate_protocol_attributes! @account.inbox_url = @json['inbox'] || '' @account.outbox_url = @json['outbox'] || '' @account.shared_inbox_url = (@json['endpoints'].is_a?(Hash) ? @json['endpoints']['sharedInbox'] : @json['sharedInbox']) || '' @account.followers_url = @json['followers'] || '' - @account.featured_collection_url = @json['featured'] || '' - @account.devices_url = @json['devices'] || '' @account.url = url || @uri @account.uri = @uri + @account.actor_type = actor_type + end + + def set_immediate_attributes! + @account.featured_collection_url = @json['featured'] || '' + @account.devices_url = @json['devices'] || '' @account.display_name = @json['name'] || '' @account.note = @json['summary'] || '' @account.locked = @json['manuallyApprovesFollowers'] || false @account.fields = property_values || {} @account.also_known_as = as_array(@json['alsoKnownAs'] || []).map { |item| value_or_id(item) } - @account.actor_type = actor_type @account.discoverable = @json['discoverable'] || false end + def set_fetchable_key! + @account.public_key = public_key || '' + end + def set_fetchable_attributes! @account.avatar_remote_url = image_url('icon') || '' unless skip_download? @account.header_remote_url = image_url('image') || '' unless skip_download? - @account.public_key = public_key || '' @account.statuses_count = outbox_total_items if outbox_total_items.present? @account.following_count = following_total_items if following_total_items.present? @account.followers_count = followers_total_items if followers_total_items.present? @@ -140,6 +150,12 @@ class ActivityPub::ProcessAccountService < BaseService VerifyAccountLinksWorker.perform_async(@account.id) end + def process_duplicate_accounts! + return unless Account.where(uri: @account.uri).where.not(id: @account.id).exists? + + AccountMergingWorker.perform_async(@account.id) + end + def actor_type if @json['type'].is_a?(Array) @json['type'].find { |type| ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES.include?(type) } diff --git a/app/services/resolve_account_service.rb b/app/services/resolve_account_service.rb index 74b0b82d0..3301aaf51 100644 --- a/app/services/resolve_account_service.rb +++ b/app/services/resolve_account_service.rb @@ -49,7 +49,7 @@ class ResolveAccountService < BaseService # Now it is certain, it is definitely a remote account, and it # either needs to be created, or updated from fresh data - process_account! + fetch_account! rescue Webfinger::Error, Oj::ParseError => e Rails.logger.debug "Webfinger query for #{@uri} failed: #{e}" nil @@ -104,16 +104,12 @@ class ResolveAccountService < BaseService acct.gsub(/\Aacct:/, '').split('@') end - def process_account! + def fetch_account! return unless activitypub_ready? RedisLock.acquire(lock_options) do |lock| if lock.acquired? - @account = Account.find_remote(@username, @domain) - - next if actor_json.nil? - - @account = ActivityPub::ProcessAccountService.new.call(@username, @domain, actor_json) + @account = ActivityPub::FetchRemoteAccountService.new.call(actor_url) else raise Mastodon::RaceConditionError end @@ -136,13 +132,6 @@ class ResolveAccountService < BaseService @actor_url ||= @webfinger.link('self', 'href') end - def actor_json - return @actor_json if defined?(@actor_json) - - json = fetch_resource(actor_url, false) - @actor_json = supported_context?(json) && equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) ? json : nil - end - def gone_from_origin? @gone end diff --git a/app/workers/account_merging_worker.rb b/app/workers/account_merging_worker.rb new file mode 100644 index 000000000..8c234e7ac --- /dev/null +++ b/app/workers/account_merging_worker.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +class AccountMergingWorker + include Sidekiq::Worker + + sidekiq_options queue: 'pull' + + def perform(account_id) + account = Account.find(account_id) + + return true if account.nil? || account.local? + + Account.where(uri: account.uri).where.not(id: account.id).find_each do |duplicate| + account.merge_with!(duplicate) + duplicate.destroy + end + end +end diff --git a/lib/mastodon/accounts_cli.rb b/lib/mastodon/accounts_cli.rb index bef4093a8..474643869 100644 --- a/lib/mastodon/accounts_cli.rb +++ b/lib/mastodon/accounts_cli.rb @@ -236,6 +236,25 @@ module Mastodon say('OK', :green) end + desc 'fix-duplicates', 'Find duplicate remote accounts and merge them' + option :dry_run, type: :boolean + long_desc <<-LONG_DESC + Merge known remote accounts sharing an ActivityPub actor identifier. + + Such duplicates can occur when a remote server admin misconfigures their + domain configuration. + LONG_DESC + def fix_duplicates + Account.remote.select(:uri, 'count(*)').group(:uri).having('count(*) > 1').pluck_each(:uri) do |uri| + say("Duplicates found for #{uri}") + begin + ActivityPub::FetchRemotAccountService.new.call(uri) unless options[:dry_run] + rescue => e + say("Error processing #{uri}: #{e}", :red) + end + end + end + desc 'backup USERNAME', 'Request a backup for a user' long_desc <<-LONG_DESC Request a new backup for an account with a given USERNAME. diff --git a/spec/services/resolve_account_service_spec.rb b/spec/services/resolve_account_service_spec.rb index 5bd0ec264..a604e90b5 100644 --- a/spec/services/resolve_account_service_spec.rb +++ b/spec/services/resolve_account_service_spec.rb @@ -60,7 +60,22 @@ RSpec.describe ResolveAccountService, type: :service do context 'with a legitimate webfinger redirection' do before do - webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } + webfinger = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } + stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) + end + + it 'returns new remote account' do + account = subject.call('Foo@redirected.example.com') + + expect(account.activitypub?).to eq true + expect(account.acct).to eq 'foo@ap.example.com' + expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' + end + end + + context 'with a misconfigured redirection' do + before do + webfinger = { subject: 'acct:Foo@redirected.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) end @@ -75,9 +90,9 @@ RSpec.describe ResolveAccountService, type: :service do context 'with too many webfinger redirections' do before do - webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } + webfinger = { subject: 'acct:foo@evil.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } stub_request(:get, 'https://redirected.example.com/.well-known/webfinger?resource=acct:Foo@redirected.example.com').to_return(body: Oj.dump(webfinger), headers: { 'Content-Type': 'application/jrd+json' }) - webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo' }] } + webfinger2 = { subject: 'acct:foo@ap.example.com', links: [{ rel: 'self', href: 'https://ap.example.com/users/foo', type: 'application/activity+json' }] } stub_request(:get, 'https://evil.example.com/.well-known/webfinger?resource=acct:foo@evil.example.com').to_return(body: Oj.dump(webfinger2), headers: { 'Content-Type': 'application/jrd+json' }) end @@ -111,6 +126,41 @@ RSpec.describe ResolveAccountService, type: :service do end end + context 'with an already-known actor changing acct: URI' do + let!(:duplicate) { Fabricate(:account, username: 'foo', domain: 'old.example.com', uri: 'https://ap.example.com/users/foo') } + let!(:status) { Fabricate(:status, account: duplicate, text: 'foo') } + + it 'returns new remote account' do + account = subject.call('foo@ap.example.com') + + expect(account.activitypub?).to eq true + expect(account.domain).to eq 'ap.example.com' + expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' + expect(account.uri).to eq 'https://ap.example.com/users/foo' + end + + it 'merges accounts' do + account = subject.call('foo@ap.example.com') + + expect(status.reload.account_id).to eq account.id + expect(Account.where(uri: account.uri).count).to eq 1 + end + end + + context 'with an already-known acct: URI changing ActivityPub id' do + let!(:old_account) { Fabricate(:account, username: 'foo', domain: 'ap.example.com', uri: 'https://old.example.com/users/foo', last_webfingered_at: nil) } + let!(:status) { Fabricate(:status, account: old_account, text: 'foo') } + + it 'returns new remote account' do + account = subject.call('foo@ap.example.com') + + expect(account.activitypub?).to eq true + expect(account.domain).to eq 'ap.example.com' + expect(account.inbox_url).to eq 'https://ap.example.com/users/foo/inbox' + expect(account.uri).to eq 'https://ap.example.com/users/foo' + end + end + it 'processes one remote account at a time using locks' do wait_for_start = true fail_occurred = false -- cgit