about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-09-02 18:11:13 +0200
committerGitHub <noreply@github.com>2019-09-02 18:11:13 +0200
commit70ddef2654a931827ce5e4323e3042365f6078f2 (patch)
tree05ed40d6d1b218a5131199c88d0c9cd1fa0091a0
parentac6935b629f5e9cd07cca468a6f3588a6e585e25 (diff)
Change trending hashtags to not disappear instantly after midnight (#11712)
-rw-r--r--app/controllers/admin/tags_controller.rb2
-rw-r--r--app/lib/feed_manager.rb2
-rw-r--r--app/models/tag.rb4
-rw-r--r--app/models/trending_tags.rb102
-rw-r--r--app/workers/scheduler/trending_tags_scheduler.rb11
-rw-r--r--config/sidekiq.yml3
-rw-r--r--db/migrate/20190901035623_add_max_score_to_tags.rb6
-rw-r--r--db/post_migrate/20190901040524_remove_score_from_tags.rb12
-rw-r--r--db/schema.rb6
-rw-r--r--spec/models/trending_tags_spec.rb68
10 files changed, 179 insertions, 37 deletions
diff --git a/app/controllers/admin/tags_controller.rb b/app/controllers/admin/tags_controller.rb
index 25d9b7d3d..8bd4e5f8b 100644
--- a/app/controllers/admin/tags_controller.rb
+++ b/app/controllers/admin/tags_controller.rb
@@ -57,7 +57,7 @@ module Admin
       scope = scope.unreviewed if filter_params[:review] == 'unreviewed'
       scope = scope.reviewed.order(reviewed_at: :desc) if filter_params[:review] == 'reviewed'
       scope = scope.pending_review.order(requested_review_at: :desc) if filter_params[:review] == 'pending_review'
-      scope.order(score: :desc)
+      scope.order(max_score: :desc)
     end
 
     def filter_params
diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb
index ca3d890a8..871ec5c19 100644
--- a/app/lib/feed_manager.rb
+++ b/app/lib/feed_manager.rb
@@ -63,7 +63,7 @@ class FeedManager
     reblog_key   = key(type, account_id, 'reblogs')
 
     # Remove any items past the MAX_ITEMS'th entry in our feed
-    redis.zremrangebyrank(timeline_key, '0', (-(FeedManager::MAX_ITEMS + 1)).to_s)
+    redis.zremrangebyrank(timeline_key, 0, -(FeedManager::MAX_ITEMS + 1))
 
     # Get the score of the REBLOG_FALLOFF'th item in our feed, and stop
     # tracking anything after it for deduplication purposes.
diff --git a/app/models/tag.rb b/app/models/tag.rb
index 945e3a3c6..135e0a030 100644
--- a/app/models/tag.rb
+++ b/app/models/tag.rb
@@ -7,14 +7,14 @@
 #  name                :string           default(""), not null
 #  created_at          :datetime         not null
 #  updated_at          :datetime         not null
-#  score               :integer
 #  usable              :boolean
 #  trendable           :boolean
 #  listable            :boolean
 #  reviewed_at         :datetime
 #  requested_review_at :datetime
 #  last_status_at      :datetime
-#  last_trend_at       :datetime
+#  max_score           :float
+#  max_score_at        :datetime
 #
 
 class Tag < ApplicationRecord
diff --git a/app/models/trending_tags.rb b/app/models/trending_tags.rb
index e4ce988c1..e1b92b175 100644
--- a/app/models/trending_tags.rb
+++ b/app/models/trending_tags.rb
@@ -7,6 +7,8 @@ class TrendingTags
   THRESHOLD            = 5
   LIMIT                = 10
   REVIEW_THRESHOLD     = 3
+  MAX_SCORE_COOLDOWN   = 3.days.freeze
+  MAX_SCORE_HALFLIFE   = 6.hours.freeze
 
   class << self
     include Redisable
@@ -16,14 +18,75 @@ class TrendingTags
 
       increment_historical_use!(tag.id, at_time)
       increment_unique_use!(tag.id, account.id, at_time)
-      increment_vote!(tag, at_time)
+      increment_use!(tag.id, at_time)
 
       tag.update(last_status_at: Time.now.utc) if tag.last_status_at.nil? || tag.last_status_at < 12.hours.ago
-      tag.update(last_trend_at: Time.now.utc)  if trending?(tag) && (tag.last_trend_at.nil? || tag.last_trend_at < 12.hours.ago)
+    end
+
+    def update!(at_time = Time.now.utc)
+      tag_ids = redis.smembers("#{KEY}:used:#{at_time.beginning_of_day.to_i}") + redis.zrange(KEY, 0, -1)
+      tags    = Tag.where(id: tag_ids.uniq)
+
+      # First pass to calculate scores and update the set
+
+      tags.each do |tag|
+        expected  = redis.pfcount("activity:tags:#{tag.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts").to_f
+        expected  = 1.0 if expected.zero?
+        observed  = redis.pfcount("activity:tags:#{tag.id}:#{at_time.beginning_of_day.to_i}:accounts").to_f
+        max_time  = tag.max_score_at
+        max_score = tag.max_score
+        max_score = 0 if max_time.nil? || max_time < (at_time - MAX_SCORE_COOLDOWN)
+
+        score = begin
+          if expected > observed || observed < THRESHOLD
+            0
+          else
+            ((observed - expected)**2) / expected
+          end
+        end
+
+        if score > max_score
+          max_score = score
+          max_time  = at_time
+
+          # Not interested in triggering any callbacks for this
+          tag.update_columns(max_score: max_score, max_score_at: max_time)
+        end
+
+        decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / MAX_SCORE_HALFLIFE.to_f))
+
+        if decaying_score.zero?
+          redis.zrem(KEY, tag.id)
+        else
+          redis.zadd(KEY, decaying_score, tag.id)
+        end
+      end
+
+      users_for_review = User.staff.includes(:account).to_a.select(&:allows_trending_tag_emails?)
+
+      # Second pass to notify about previously unreviewed trends
+
+      tags.each do |tag|
+        current_rank              = redis.zrevrank(KEY, tag.id)
+        needs_review_notification = tag.requires_review? && !tag.requested_review?
+        rank_passes_threshold     = current_rank.present? && current_rank <= REVIEW_THRESHOLD
+
+        next unless !tag.trendable? && rank_passes_threshold && needs_review_notification
+
+        tag.touch(:requested_review_at)
+
+        users_for_review.each do |user|
+          AdminMailer.new_trending_tag(user.account, tag).deliver_later!
+        end
+      end
+
+      # Trim older items
+
+      redis.zremrangebyrank(KEY, 0, -(LIMIT + 1))
     end
 
     def get(limit, filtered: true)
-      tag_ids = redis.zrevrange("#{KEY}:#{Time.now.utc.beginning_of_day.to_i}", 0, LIMIT - 1).map(&:to_i)
+      tag_ids = redis.zrevrange(KEY, 0, LIMIT - 1).map(&:to_i)
 
       tags = Tag.where(id: tag_ids)
       tags = tags.where(trendable: true) if filtered
@@ -33,8 +96,8 @@ class TrendingTags
     end
 
     def trending?(tag)
-      rank = redis.zrevrank("#{KEY}:#{Time.now.utc.beginning_of_day.to_i}", tag.id)
-      rank.present? && rank <= LIMIT
+      rank = redis.zrevrank(KEY, tag.id)
+      rank.present? && rank < LIMIT
     end
 
     private
@@ -51,31 +114,10 @@ class TrendingTags
       redis.expire(key, EXPIRE_HISTORY_AFTER)
     end
 
-    def increment_vote!(tag, at_time)
-      key      = "#{KEY}:#{at_time.beginning_of_day.to_i}"
-      expected = redis.pfcount("activity:tags:#{tag.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts").to_f
-      expected = 1.0 if expected.zero?
-      observed = redis.pfcount("activity:tags:#{tag.id}:#{at_time.beginning_of_day.to_i}:accounts").to_f
-
-      if expected > observed || observed < THRESHOLD
-        redis.zrem(key, tag.id)
-      else
-        score    = ((observed - expected)**2) / expected
-        old_rank = redis.zrevrank(key, tag.id)
-
-        redis.zadd(key, score, tag.id)
-        request_review!(tag) if (old_rank.nil? || old_rank > REVIEW_THRESHOLD) && redis.zrevrank(key, tag.id) <= REVIEW_THRESHOLD && !tag.trendable? && tag.requires_review? && !tag.requested_review?
-      end
-
-      redis.expire(key, EXPIRE_TRENDS_AFTER)
-    end
-
-    def request_review!(tag)
-      return unless Setting.trends
-
-      tag.touch(:requested_review_at)
-
-      User.staff.includes(:account).find_each { |u| AdminMailer.new_trending_tag(u.account, tag).deliver_later! if u.allows_trending_tag_emails? }
+    def increment_use!(tag_id, at_time)
+      key = "#{KEY}:used:#{at_time.beginning_of_day.to_i}"
+      redis.sadd(key, tag_id)
+      redis.expire(key, EXPIRE_HISTORY_AFTER)
     end
   end
 end
diff --git a/app/workers/scheduler/trending_tags_scheduler.rb b/app/workers/scheduler/trending_tags_scheduler.rb
new file mode 100644
index 000000000..77f0d5747
--- /dev/null
+++ b/app/workers/scheduler/trending_tags_scheduler.rb
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+class Scheduler::TrendingTagsScheduler
+  include Sidekiq::Worker
+
+  sidekiq_options unique: :until_executed, retry: 0
+
+  def perform
+    TrendingTags.update! if Setting.trends
+  end
+end
diff --git a/config/sidekiq.yml b/config/sidekiq.yml
index 6ebe450b0..5de25de23 100644
--- a/config/sidekiq.yml
+++ b/config/sidekiq.yml
@@ -9,6 +9,9 @@
   scheduled_statuses_scheduler:
     every: '5m'
     class: Scheduler::ScheduledStatusesScheduler
+  trending_tags_scheduler:
+    every: '5m'
+    class: Scheduler::TrendingTagsScheduler
   media_cleanup_scheduler:
     cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
     class: Scheduler::MediaCleanupScheduler
diff --git a/db/migrate/20190901035623_add_max_score_to_tags.rb b/db/migrate/20190901035623_add_max_score_to_tags.rb
new file mode 100644
index 000000000..f936e9871
--- /dev/null
+++ b/db/migrate/20190901035623_add_max_score_to_tags.rb
@@ -0,0 +1,6 @@
+class AddMaxScoreToTags < ActiveRecord::Migration[5.2]
+  def change
+    add_column :tags, :max_score, :float
+    add_column :tags, :max_score_at, :datetime
+  end
+end
diff --git a/db/post_migrate/20190901040524_remove_score_from_tags.rb b/db/post_migrate/20190901040524_remove_score_from_tags.rb
new file mode 100644
index 000000000..a1112700b
--- /dev/null
+++ b/db/post_migrate/20190901040524_remove_score_from_tags.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+class RemoveScoreFromTags < ActiveRecord::Migration[5.2]
+  disable_ddl_transaction!
+
+  def change
+    safety_assured do
+      remove_column :tags, :score, :int
+      remove_column :tags, :last_trend_at, :datetime
+    end
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 482bca367..5576f70bf 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema.define(version: 2019_08_23_221802) do
+ActiveRecord::Schema.define(version: 2019_09_01_040524) do
 
   # These are extensions that must be enabled in order to support this database
   enable_extension "plpgsql"
@@ -664,14 +664,14 @@ ActiveRecord::Schema.define(version: 2019_08_23_221802) do
     t.string "name", default: "", null: false
     t.datetime "created_at", null: false
     t.datetime "updated_at", null: false
-    t.integer "score"
     t.boolean "usable"
     t.boolean "trendable"
     t.boolean "listable"
     t.datetime "reviewed_at"
     t.datetime "requested_review_at"
     t.datetime "last_status_at"
-    t.datetime "last_trend_at"
+    t.float "max_score"
+    t.datetime "max_score_at"
     t.index "lower((name)::text)", name: "index_tags_on_name_lower", unique: true
   end
 
diff --git a/spec/models/trending_tags_spec.rb b/spec/models/trending_tags_spec.rb
new file mode 100644
index 000000000..b6122c994
--- /dev/null
+++ b/spec/models/trending_tags_spec.rb
@@ -0,0 +1,68 @@
+require 'rails_helper'
+
+RSpec.describe TrendingTags do
+  describe '.record_use!' do
+    pending
+  end
+
+  describe '.update!' do
+    let!(:at_time) { Time.now.utc }
+    let!(:tag1) { Fabricate(:tag, name: 'Catstodon') }
+    let!(:tag2) { Fabricate(:tag, name: 'DogsOfMastodon') }
+    let!(:tag3) { Fabricate(:tag, name: 'OCs') }
+
+    before do
+      allow(Redis.current).to receive(:pfcount) do |key|
+        case key
+        when "activity:tags:#{tag1.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts"
+          2
+        when "activity:tags:#{tag1.id}:#{at_time.beginning_of_day.to_i}:accounts"
+          16
+        when "activity:tags:#{tag2.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts"
+          0
+        when "activity:tags:#{tag2.id}:#{at_time.beginning_of_day.to_i}:accounts"
+          4
+        when "activity:tags:#{tag3.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts"
+          13
+        end
+      end
+
+      Redis.current.zadd('trending_tags', 0.9, tag3.id)
+      Redis.current.sadd("trending_tags:used:#{at_time.beginning_of_day.to_i}", [tag1.id, tag2.id])
+
+      tag3.update(max_score: 0.9, max_score_at: (at_time - 1.day).beginning_of_day + 12.hours)
+
+      described_class.update!(at_time)
+    end
+
+    it 'calculates and re-calculates scores' do
+      expect(described_class.get(10, filtered: false)).to eq [tag1, tag3]
+    end
+
+    it 'omits hashtags below threshold' do
+      expect(described_class.get(10, filtered: false)).to_not include(tag2)
+    end
+
+    it 'decays scores' do
+      expect(Redis.current.zscore('trending_tags', tag3.id)).to be < 0.9
+    end
+  end
+
+  describe '.trending?' do
+    let(:tag) { Fabricate(:tag) }
+
+    before do
+      10.times { |i| Redis.current.zadd('trending_tags', i + 1, Fabricate(:tag).id) }
+    end
+
+    it 'returns true if the hashtag is within limit' do
+      Redis.current.zadd('trending_tags', 11, tag.id)
+      expect(described_class.trending?(tag)).to be true
+    end
+
+    it 'returns false if the hashtag is outside the limit' do
+      Redis.current.zadd('trending_tags', 0, tag.id)
+      expect(described_class.trending?(tag)).to be false
+    end
+  end
+end