From a16e06bbf584412c5a0f812da37fcd6e2c479d1a Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 30 May 2018 02:51:26 +0200 Subject: Deduplicate accounts and make unique username/domain index case-insensitive (#7658) Fix #6937 Fix #6837 Fix #6667 --- .../20180528141303_fix_accounts_unique_index.rb | 84 ++++++++++++++++++++++ db/schema.rb | 5 +- 2 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 db/migrate/20180528141303_fix_accounts_unique_index.rb diff --git a/db/migrate/20180528141303_fix_accounts_unique_index.rb b/db/migrate/20180528141303_fix_accounts_unique_index.rb new file mode 100644 index 000000000..2e5eca814 --- /dev/null +++ b/db/migrate/20180528141303_fix_accounts_unique_index.rb @@ -0,0 +1,84 @@ +class FixAccountsUniqueIndex < ActiveRecord::Migration[5.2] + disable_ddl_transaction! + + def up + say '' + say 'WARNING: This migration may take a *long* time for large instances' + say 'It will *not* lock tables for any significant time, but it may run' + say 'for a very long time. We will pause for 10 seconds to allow you to' + say 'interrupt this migration if you are not ready.' + say '' + say 'This migration will irreversibly delete user accounts with duplicate' + say 'usernames. You may use the `rake mastodon:maintenance:find_duplicate_usernames`' + say 'task to manually deal with such accounts before running this migration.' + + 10.downto(1) do |i| + say "Continuing in #{i} second#{i == 1 ? '' : 's'}...", true + sleep 1 + end + + duplicates = Account.connection.select_all('SELECT string_agg(id::text, \',\') AS ids FROM accounts GROUP BY lower(username), lower(domain) HAVING count(*) > 1').to_hash + + duplicates.each do |row| + deduplicate_account!(row['ids'].split(',')) + end + + remove_index :accounts, name: 'index_accounts_on_username_and_domain_lower' if index_name_exists?(:accounts, 'index_accounts_on_username_and_domain_lower') + safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_accounts_on_username_and_domain_lower ON accounts (lower(username), lower(domain))' } + remove_index :accounts, name: 'index_accounts_on_username_and_domain' if index_name_exists?(:accounts, 'index_accounts_on_username_and_domain') + end + + def down + raise ActiveRecord::IrreversibleMigration + end + + private + + def deduplicate_account!(account_ids) + accounts = Account.where(id: account_ids).to_a + accounts = account.first.local? ? accounts.sort_by(&:created_at) : accounts.sort_by(&:updated_at).reverse + reference_account = accounts.shift + + accounts.each do |other_account| + if other_account.public_key == reference_account.public_key + # The accounts definitely point to the same resource, so + # it's safe to re-attribute content and relationships + merge_accounts!(reference_account, other_account) + elsif other_account.local? + # Since domain is in the GROUP BY clause, both accounts + # are always either going to be local or not local, so only + # one check is needed. Since we cannot support two users with + # the same username locally, one has to go. 😢 + other_account.user.destroy + end + + other_account.destroy + end + end + + def merge_accounts!(main_account, duplicate_account) + [Status, Favourite, Mention, StatusPin, StreamEntry].each do |klass| + klass.where(account_id: duplicate_account.id).update_all(account_id: main_account.id) + end + + # Since it's the same remote resource, the remote resource likely + # already believes we are following/blocking, so it's safe to + # re-attribute the relationships too. However, during the presence + # of the index bug users could have *also* followed the reference + # account already, therefore mass update will not work and we need + # to check for (and skip past) uniqueness errors + [Follow, FollowRequest, Block, Mute].each do |klass| + klass.where(account_id: duplicate_account.id).find_each do |record| + record.update(account_id: main_account.id) + rescue ActiveRecord::RecordNotUnique + next + end + + klass.where(target_account_id: duplicate_account.id).find_each do |record| + record.update(target_account_id: main_account.id) + rescue ActiveRecord::RecordNotUnique + next + end + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 7435b6cc9..c9d4e0fe7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2018_05_14_140000) do +ActiveRecord::Schema.define(version: 2018_05_28_141303) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -77,10 +77,9 @@ ActiveRecord::Schema.define(version: 2018_05_14_140000) do t.jsonb "fields" t.string "actor_type" t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin - t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower" + t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", unique: true t.index ["uri"], name: "index_accounts_on_uri" t.index ["url"], name: "index_accounts_on_url" - t.index ["username", "domain"], name: "index_accounts_on_username_and_domain", unique: true end create_table "admin_action_logs", force: :cascade do |t| -- cgit