about summary refs log tree commit diff
diff options
context:
space:
mode:
authorClaire <claire.github-309c@sitedethib.com>2021-05-05 22:04:52 +0200
committerGitHub <noreply@github.com>2021-05-05 22:04:52 +0200
commitd9ae3db8d5543cf0b7fa44186c191c9bb2472d23 (patch)
tree512309009b33d99557dbfbc7d2513b1b3792bf60
parent351c74459084ccffce1333b57c2af9a6b55cac8d (diff)
Improve performance of follow recommendation scheduler (#16159)
Express follow_recommendations in terms of account_summaries rather than
accounts, integrate filters that are unconditionally used, and materialize
the resulting view.

This should result in the bulk of the computation being performed only once
instead of **once per recommendation language**.
-rw-r--r--app/models/follow_recommendation.rb6
-rw-r--r--app/workers/scheduler/follow_recommendations_scheduler.rb5
-rw-r--r--db/migrate/20210505174616_update_follow_recommendations_to_version_2.rb18
-rw-r--r--db/schema.rb28
-rw-r--r--db/views/follow_recommendations_v02.sql34
5 files changed, 75 insertions, 16 deletions
diff --git a/app/models/follow_recommendation.rb b/app/models/follow_recommendation.rb
index 6670b6560..1ed6dc49b 100644
--- a/app/models/follow_recommendation.rb
+++ b/app/models/follow_recommendation.rb
@@ -14,9 +14,11 @@ class FollowRecommendation < ApplicationRecord
   belongs_to :account_summary, foreign_key: :account_id
   belongs_to :account, foreign_key: :account_id
 
-  scope :safe, -> { joins(:account_summary).merge(AccountSummary.safe) }
   scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) }
-  scope :filtered, -> { joins(:account_summary).merge(AccountSummary.filtered) }
+
+  def self.refresh
+    Scenic.database.refresh_materialized_view(table_name, concurrently: true, cascade: false)
+  end
 
   def readonly?
     true
diff --git a/app/workers/scheduler/follow_recommendations_scheduler.rb b/app/workers/scheduler/follow_recommendations_scheduler.rb
index 0a0286496..cb1e15961 100644
--- a/app/workers/scheduler/follow_recommendations_scheduler.rb
+++ b/app/workers/scheduler/follow_recommendations_scheduler.rb
@@ -14,13 +14,14 @@ class Scheduler::FollowRecommendationsScheduler
   def perform
     # Maintaining a materialized view speeds-up subsequent queries significantly
     AccountSummary.refresh
+    FollowRecommendation.refresh
 
-    fallback_recommendations = FollowRecommendation.safe.filtered.limit(SET_SIZE).index_by(&:account_id)
+    fallback_recommendations = FollowRecommendation.limit(SET_SIZE).index_by(&:account_id)
 
     I18n.available_locales.each do |locale|
       recommendations = begin
         if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist
-          FollowRecommendation.safe.filtered.localized(locale).limit(SET_SIZE).index_by(&:account_id)
+          FollowRecommendation.localized(locale).limit(SET_SIZE).index_by(&:account_id)
         else
           {}
         end
diff --git a/db/migrate/20210505174616_update_follow_recommendations_to_version_2.rb b/db/migrate/20210505174616_update_follow_recommendations_to_version_2.rb
new file mode 100644
index 000000000..9b2a284e4
--- /dev/null
+++ b/db/migrate/20210505174616_update_follow_recommendations_to_version_2.rb
@@ -0,0 +1,18 @@
+class UpdateFollowRecommendationsToVersion2 < ActiveRecord::Migration[6.1]
+  # We're switching from a normal to a materialized view so we need
+  # custom `up` and `down` paths.
+
+  def up
+    drop_view :follow_recommendations
+    create_view :follow_recommendations, version: 2, materialized: true
+
+    # To be able to refresh the view concurrently,
+    # at least one unique index is required
+    safety_assured { add_index :follow_recommendations, :account_id, unique: true }
+  end
+
+  def down
+    drop_view :follow_recommendations, materialized: true
+    create_view :follow_recommendations, version: 1
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 0d951ee95..88e906079 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema.define(version: 2021_04_25_135952) do
+ActiveRecord::Schema.define(version: 2021_05_05_174616) do
 
   # These are extensions that must be enabled in order to support this database
   enable_extension "plpgsql"
@@ -1114,30 +1114,34 @@ ActiveRecord::Schema.define(version: 2021_04_25_135952) do
   SQL
   add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true
 
-  create_view "follow_recommendations", sql_definition: <<-SQL
+  create_view "follow_recommendations", materialized: true, sql_definition: <<-SQL
       SELECT t0.account_id,
       sum(t0.rank) AS rank,
       array_agg(t0.reason) AS reason
-     FROM ( SELECT accounts.id AS account_id,
+     FROM ( SELECT account_summaries.account_id,
               ((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank,
               'most_followed'::text AS reason
-             FROM ((follows
-               JOIN accounts ON ((accounts.id = follows.target_account_id)))
+             FROM (((follows
+               JOIN account_summaries ON ((account_summaries.account_id = follows.target_account_id)))
                JOIN users ON ((users.account_id = follows.account_id)))
-            WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true))
-            GROUP BY accounts.id
+               LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = follows.target_account_id)))
+            WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL))
+            GROUP BY account_summaries.account_id
            HAVING (count(follows.id) >= 5)
           UNION ALL
-           SELECT accounts.id AS account_id,
+           SELECT account_summaries.account_id,
               (sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank,
               'most_interactions'::text AS reason
-             FROM ((status_stats
+             FROM (((status_stats
                JOIN statuses ON ((statuses.id = status_stats.status_id)))
-               JOIN accounts ON ((accounts.id = statuses.account_id)))
-            WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true))
-            GROUP BY accounts.id
+               JOIN account_summaries ON ((account_summaries.account_id = statuses.account_id)))
+               LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = statuses.account_id)))
+            WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL))
+            GROUP BY account_summaries.account_id
            HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0
     GROUP BY t0.account_id
     ORDER BY (sum(t0.rank)) DESC;
   SQL
+  add_index "follow_recommendations", ["account_id"], name: "index_follow_recommendations_on_account_id", unique: true
+
 end
diff --git a/db/views/follow_recommendations_v02.sql b/db/views/follow_recommendations_v02.sql
new file mode 100644
index 000000000..673c5cc85
--- /dev/null
+++ b/db/views/follow_recommendations_v02.sql
@@ -0,0 +1,34 @@
+SELECT
+  account_id,
+  sum(rank) AS rank,
+  array_agg(reason) AS reason
+FROM (
+  SELECT
+    account_summaries.account_id AS account_id,
+    count(follows.id) / (1.0 + count(follows.id)) AS rank,
+    'most_followed' AS reason
+  FROM follows
+  INNER JOIN account_summaries ON account_summaries.account_id = follows.target_account_id
+  INNER JOIN users ON users.account_id = follows.account_id
+  LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = follows.target_account_id
+  WHERE users.current_sign_in_at >= (now() - interval '30 days')
+    AND account_summaries.sensitive = 'f'
+    AND follow_recommendation_suppressions.id IS NULL
+  GROUP BY account_summaries.account_id
+  HAVING count(follows.id) >= 5
+  UNION ALL
+  SELECT account_summaries.account_id AS account_id,
+         sum(reblogs_count + favourites_count) / (1.0 + sum(reblogs_count + favourites_count)) AS rank,
+         'most_interactions' AS reason
+  FROM status_stats
+  INNER JOIN statuses ON statuses.id = status_stats.status_id
+  INNER JOIN account_summaries ON account_summaries.account_id = statuses.account_id
+  LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = statuses.account_id
+  WHERE statuses.id >= ((date_part('epoch', now() - interval '30 days') * 1000)::bigint << 16)
+    AND account_summaries.sensitive = 'f'
+    AND follow_recommendation_suppressions.id IS NULL
+  GROUP BY account_summaries.account_id
+  HAVING sum(reblogs_count + favourites_count) >= 5
+) t0
+GROUP BY account_id
+ORDER BY rank DESC