about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2018-05-30 02:51:26 +0200
committerGitHub <noreply@github.com>2018-05-30 02:51:26 +0200
commita16e06bbf584412c5a0f812da37fcd6e2c479d1a (patch)
tree4d4a7e1dcc7af3bf1e636e761b98b36be898f058
parenta7d726c3836a87006cedcdc4bd186f8aff89d093 (diff)
Deduplicate accounts and make unique username/domain index case-insensitive (#7658)
Fix #6937
Fix #6837
Fix #6667
-rw-r--r--db/migrate/20180528141303_fix_accounts_unique_index.rb84
-rw-r--r--db/schema.rb5
2 files changed, 86 insertions, 3 deletions
diff --git a/db/migrate/20180528141303_fix_accounts_unique_index.rb b/db/migrate/20180528141303_fix_accounts_unique_index.rb
new file mode 100644
index 000000000..2e5eca814
--- /dev/null
+++ b/db/migrate/20180528141303_fix_accounts_unique_index.rb
@@ -0,0 +1,84 @@
+class FixAccountsUniqueIndex < ActiveRecord::Migration[5.2]
+  disable_ddl_transaction!
+
+  def up
+    say ''
+    say 'WARNING: This migration may take a *long* time for large instances'
+    say 'It will *not* lock tables for any significant time, but it may run'
+    say 'for a very long time. We will pause for 10 seconds to allow you to'
+    say 'interrupt this migration if you are not ready.'
+    say ''
+    say 'This migration will irreversibly delete user accounts with duplicate'
+    say 'usernames. You may use the `rake mastodon:maintenance:find_duplicate_usernames`'
+    say 'task to manually deal with such accounts before running this migration.'
+
+    10.downto(1) do |i|
+      say "Continuing in #{i} second#{i == 1 ? '' : 's'}...", true
+      sleep 1
+    end
+
+    duplicates = Account.connection.select_all('SELECT string_agg(id::text, \',\') AS ids FROM accounts GROUP BY lower(username), lower(domain) HAVING count(*) > 1').to_hash
+
+    duplicates.each do |row|
+      deduplicate_account!(row['ids'].split(','))
+    end
+
+    remove_index :accounts, name: 'index_accounts_on_username_and_domain_lower' if index_name_exists?(:accounts, 'index_accounts_on_username_and_domain_lower')
+    safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_accounts_on_username_and_domain_lower ON accounts (lower(username), lower(domain))' }
+    remove_index :accounts, name: 'index_accounts_on_username_and_domain' if index_name_exists?(:accounts, 'index_accounts_on_username_and_domain')
+  end
+
+  def down
+    raise ActiveRecord::IrreversibleMigration
+  end
+
+  private
+
+  def deduplicate_account!(account_ids)
+    accounts          = Account.where(id: account_ids).to_a
+    accounts          = account.first.local? ? accounts.sort_by(&:created_at) : accounts.sort_by(&:updated_at).reverse
+    reference_account = accounts.shift
+
+    accounts.each do |other_account|
+      if other_account.public_key == reference_account.public_key
+        # The accounts definitely point to the same resource, so
+        # it's safe to re-attribute content and relationships
+        merge_accounts!(reference_account, other_account)
+      elsif other_account.local?
+        # Since domain is in the GROUP BY clause, both accounts
+        # are always either going to be local or not local, so only
+        # one check is needed. Since we cannot support two users with
+        # the same username locally, one has to go. 😢
+        other_account.user.destroy
+      end
+
+      other_account.destroy
+    end
+  end
+
+  def merge_accounts!(main_account, duplicate_account)
+    [Status, Favourite, Mention, StatusPin, StreamEntry].each do |klass|
+      klass.where(account_id: duplicate_account.id).update_all(account_id: main_account.id)
+    end
+
+    # Since it's the same remote resource, the remote resource likely
+    # already believes we are following/blocking, so it's safe to
+    # re-attribute the relationships too. However, during the presence
+    # of the index bug users could have *also* followed the reference
+    # account already, therefore mass update will not work and we need
+    # to check for (and skip past) uniqueness errors
+    [Follow, FollowRequest, Block, Mute].each do |klass|
+      klass.where(account_id: duplicate_account.id).find_each do |record|
+        record.update(account_id: main_account.id)
+      rescue ActiveRecord::RecordNotUnique
+        next
+      end
+
+      klass.where(target_account_id: duplicate_account.id).find_each do |record|
+        record.update(target_account_id: main_account.id)
+      rescue ActiveRecord::RecordNotUnique
+        next
+      end
+    end
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 7435b6cc9..c9d4e0fe7 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema.define(version: 2018_05_14_140000) do
+ActiveRecord::Schema.define(version: 2018_05_28_141303) do
 
   # These are extensions that must be enabled in order to support this database
   enable_extension "plpgsql"
@@ -77,10 +77,9 @@ ActiveRecord::Schema.define(version: 2018_05_14_140000) do
     t.jsonb "fields"
     t.string "actor_type"
     t.index "(((setweight(to_tsvector('simple'::regconfig, (display_name)::text), 'A'::\"char\") || setweight(to_tsvector('simple'::regconfig, (username)::text), 'B'::\"char\")) || setweight(to_tsvector('simple'::regconfig, (COALESCE(domain, ''::character varying))::text), 'C'::\"char\")))", name: "search_index", using: :gin
-    t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower"
+    t.index "lower((username)::text), lower((domain)::text)", name: "index_accounts_on_username_and_domain_lower", unique: true
     t.index ["uri"], name: "index_accounts_on_uri"
     t.index ["url"], name: "index_accounts_on_url"
-    t.index ["username", "domain"], name: "index_accounts_on_username_and_domain", unique: true
   end
 
   create_table "admin_action_logs", force: :cascade do |t|