about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2018-05-31 17:09:09 +0200
committerGitHub <noreply@github.com>2018-05-31 17:09:09 +0200
commit19b4c666f7f760986be4215ea874f951c617a0df (patch)
treeaf189dd6f1479f359d7d2a92947f4a6e60a91286
parent1e938b966e7dd6ec329567be7c285779b6d11599 (diff)
Improve account index migration (#7684)
* Improve account index migration

- Display more progress in stdout
- Catch PG::UniqueViolation when re-attributing favourites
- Skip callbacks and validations when re-attributing other relationships

* Use in_batches to reduce table lock-up during account merge

* Use #say_with_time to benchmark each deduplication
-rw-r--r--db/migrate/20180528141303_fix_accounts_unique_index.rb42
1 files changed, 22 insertions, 20 deletions
diff --git a/db/migrate/20180528141303_fix_accounts_unique_index.rb b/db/migrate/20180528141303_fix_accounts_unique_index.rb
index aadb5b7db..e949436dc 100644
--- a/db/migrate/20180528141303_fix_accounts_unique_index.rb
+++ b/db/migrate/20180528141303_fix_accounts_unique_index.rb
@@ -39,26 +39,28 @@ class FixAccountsUniqueIndex < ActiveRecord::Migration[5.2]
     accounts          = accounts.first.local? ? accounts.sort_by(&:created_at) : accounts.sort_by(&:updated_at).reverse
     reference_account = accounts.shift
 
-    accounts.each do |other_account|
-      if other_account.public_key == reference_account.public_key
-        # The accounts definitely point to the same resource, so
-        # it's safe to re-attribute content and relationships
-        merge_accounts!(reference_account, other_account)
-      elsif other_account.local?
-        # Since domain is in the GROUP BY clause, both accounts
-        # are always either going to be local or not local, so only
-        # one check is needed. Since we cannot support two users with
-        # the same username locally, one has to go. 😢
-        other_account.user&.destroy
-      end
+    say_with_time "Deduplicating @#{reference_account.acct} (#{accounts.size} duplicates)..." do
+      accounts.each do |other_account|
+        if other_account.public_key == reference_account.public_key
+          # The accounts definitely point to the same resource, so
+          # it's safe to re-attribute content and relationships
+          merge_accounts!(reference_account, other_account)
+        elsif other_account.local?
+          # Since domain is in the GROUP BY clause, both accounts
+          # are always either going to be local or not local, so only
+          # one check is needed. Since we cannot support two users with
+          # the same username locally, one has to go. 😢
+          other_account.user&.destroy
+        end
 
-      other_account.destroy
+        other_account.destroy
+      end
     end
   end
 
   def merge_accounts!(main_account, duplicate_account)
-    [Status, Favourite, Mention, StatusPin, StreamEntry].each do |klass|
-      klass.where(account_id: duplicate_account.id).update_all(account_id: main_account.id)
+    [Status, Mention, StatusPin, StreamEntry].each do |klass|
+      klass.where(account_id: duplicate_account.id).in_batches.update_all(account_id: main_account.id)
     end
 
     # Since it's the same remote resource, the remote resource likely
@@ -67,19 +69,19 @@ class FixAccountsUniqueIndex < ActiveRecord::Migration[5.2]
     # of the index bug users could have *also* followed the reference
     # account already, therefore mass update will not work and we need
     # to check for (and skip past) uniqueness errors
-    [Follow, FollowRequest, Block, Mute].each do |klass|
+    [Favourite, Follow, FollowRequest, Block, Mute].each do |klass|
       klass.where(account_id: duplicate_account.id).find_each do |record|
         begin
-          record.update(account_id: main_account.id)
-        rescue ActiveRecord::RecordNotUnique
+          record.update_attribute(:account_id, main_account.id)
+        rescue PG::UniqueViolation
           next
         end
       end
 
       klass.where(target_account_id: duplicate_account.id).find_each do |record|
         begin
-          record.update(target_account_id: main_account.id)
-        rescue ActiveRecord::RecordNotUnique
+          record.update_attribute(:target_account_id, main_account.id)
+        rescue PG::UniqueViolation
           next
         end
       end