about summary refs log tree commit diff
path: root/app/models/trends
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2021-11-25 13:07:38 +0100
committerGitHub <noreply@github.com>2021-11-25 13:07:38 +0100
commit6e50134a42cb303e6e42f89f9ddb5aacf83e7a6d (patch)
treef60727e2c871857422082d814bb0cb28ce88f6c3 /app/models/trends
parent46e62fc4b33f3566eb9bf588b15bac28cae967a3 (diff)
Add trending links (#16917)
* Add trending links

* Add overriding specific links trendability

* Add link type to preview cards and only trend articles

Change trends review notifications from being sent every 5 minutes to being sent every 2 hours

Change threshold from 5 unique accounts to 15 unique accounts

* Fix tests
Diffstat (limited to 'app/models/trends')
-rw-r--r--app/models/trends/base.rb80
-rw-r--r--app/models/trends/history.rb98
-rw-r--r--app/models/trends/links.rb117
-rw-r--r--app/models/trends/tags.rb111
4 files changed, 406 insertions, 0 deletions
diff --git a/app/models/trends/base.rb b/app/models/trends/base.rb
new file mode 100644
index 000000000..b767dcb1a
--- /dev/null
+++ b/app/models/trends/base.rb
@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+
+class Trends::Base
+  include Redisable
+
+  class_attribute :default_options
+
+  attr_reader :options
+
+  # @param [Hash] options
+  # @option options [Integer] :threshold Minimum amount of uses by unique accounts to begin calculating the score
+  # @option options [Integer] :review_threshold Minimum rank (lower = better) before requesting a review
+  # @option options [ActiveSupport::Duration] :max_score_cooldown For this amount of time, the peak score (if bigger than current score) is decayed-from
+  # @option options [ActiveSupport::Duration] :max_score_halflife How quickly a peak score decays
+  def initialize(options = {})
+    @options = self.class.default_options.merge(options)
+  end
+
+  def register(_status)
+    raise NotImplementedError
+  end
+
+  def add(*)
+    raise NotImplementedError
+  end
+
+  def refresh(*)
+    raise NotImplementedError
+  end
+
+  def request_review
+    raise NotImplementedError
+  end
+
+  def get(*)
+    raise NotImplementedError
+  end
+
+  def score(id)
+    redis.zscore("#{key_prefix}:all", id) || 0
+  end
+
+  def rank(id)
+    redis.zrevrank("#{key_prefix}:allowed", id)
+  end
+
+  def currently_trending_ids(allowed, limit)
+    redis.zrevrange(allowed ? "#{key_prefix}:allowed" : "#{key_prefix}:all", 0, limit.positive? ? limit - 1 : limit).map(&:to_i)
+  end
+
+  protected
+
+  def key_prefix
+    raise NotImplementedError
+  end
+
+  def recently_used_ids(at_time = Time.now.utc)
+    redis.smembers(used_key(at_time)).map(&:to_i)
+  end
+
+  def record_used_id(id, at_time = Time.now.utc)
+    redis.sadd(used_key(at_time), id)
+    redis.expire(used_key(at_time), 1.day.seconds)
+  end
+
+  def trim_older_items
+    redis.zremrangebyscore("#{key_prefix}:all", '-inf', '(1')
+    redis.zremrangebyscore("#{key_prefix}:allowed", '-inf', '(1')
+  end
+
+  def score_at_rank(rank)
+    redis.zrevrange("#{key_prefix}:allowed", 0, rank, with_scores: true).last&.last || 0
+  end
+
+  private
+
+  def used_key(at_time)
+    "#{key_prefix}:used:#{at_time.beginning_of_day.to_i}"
+  end
+end
diff --git a/app/models/trends/history.rb b/app/models/trends/history.rb
new file mode 100644
index 000000000..608e33792
--- /dev/null
+++ b/app/models/trends/history.rb
@@ -0,0 +1,98 @@
+# frozen_string_literal: true
+
+class Trends::History
+  include Enumerable
+
+  class Aggregate
+    include Redisable
+
+    def initialize(prefix, id, date_range)
+      @days = date_range.map { |date| Day.new(prefix, id, date.to_time(:utc)) }
+    end
+
+    def uses
+      redis.mget(*@days.map { |day| day.key_for(:uses) }).map(&:to_i).sum
+    end
+
+    def accounts
+      redis.pfcount(*@days.map { |day| day.key_for(:accounts) })
+    end
+  end
+
+  class Day
+    include Redisable
+
+    EXPIRE_AFTER = 14.days.seconds
+
+    def initialize(prefix, id, day)
+      @prefix = prefix
+      @id     = id
+      @day    = day.beginning_of_day
+    end
+
+    attr_reader :day
+
+    def accounts
+      redis.pfcount(key_for(:accounts))
+    end
+
+    def uses
+      redis.get(key_for(:uses))&.to_i || 0
+    end
+
+    def add(account_id)
+      redis.pipelined do
+        redis.incrby(key_for(:uses), 1)
+        redis.pfadd(key_for(:accounts), account_id)
+        redis.expire(key_for(:uses), EXPIRE_AFTER)
+        redis.expire(key_for(:accounts), EXPIRE_AFTER)
+      end
+    end
+
+    def as_json
+      { day: day.to_i.to_s, accounts: accounts.to_s, uses: uses.to_s }
+    end
+
+    def key_for(suffix)
+      case suffix
+      when :accounts
+        "#{key_prefix}:#{suffix}"
+      when :uses
+        key_prefix
+      end
+    end
+
+    def key_prefix
+      "activity:#{@prefix}:#{@id}:#{day.to_i}"
+    end
+  end
+
+  def initialize(prefix, id)
+    @prefix = prefix
+    @id     = id
+  end
+
+  def get(date)
+    Day.new(@prefix, @id, date)
+  end
+
+  def add(account_id, at_time = Time.now.utc)
+    Day.new(@prefix, @id, at_time).add(account_id)
+  end
+
+  def aggregate(date_range)
+    Aggregate.new(@prefix, @id, date_range)
+  end
+
+  def each(&block)
+    if block_given?
+      (0...7).map { |i| block.call(get(i.days.ago)) }
+    else
+      to_enum(:each)
+    end
+  end
+
+  def as_json(*)
+    map(&:as_json)
+  end
+end
diff --git a/app/models/trends/links.rb b/app/models/trends/links.rb
new file mode 100644
index 000000000..a0d65138b
--- /dev/null
+++ b/app/models/trends/links.rb
@@ -0,0 +1,117 @@
+# frozen_string_literal: true
+
+class Trends::Links < Trends::Base
+  PREFIX = 'trending_links'
+
+  self.default_options = {
+    threshold: 15,
+    review_threshold: 10,
+    max_score_cooldown: 2.days.freeze,
+    max_score_halflife: 8.hours.freeze,
+  }
+
+  def register(status, at_time = Time.now.utc)
+    original_status = status.reblog? ? status.reblog : status
+
+    return unless original_status.public_visibility? && status.public_visibility? &&
+                  !original_status.account.silenced? && !status.account.silenced? &&
+                  !original_status.spoiler_text?
+
+    original_status.preview_cards.each do |preview_card|
+      add(preview_card, status.account_id, at_time) if preview_card.appropriate_for_trends?
+    end
+  end
+
+  def add(preview_card, account_id, at_time = Time.now.utc)
+    preview_card.history.add(account_id, at_time)
+    record_used_id(preview_card.id, at_time)
+  end
+
+  def get(allowed, limit)
+    preview_card_ids = currently_trending_ids(allowed, limit)
+    preview_cards = PreviewCard.where(id: preview_card_ids).index_by(&:id)
+    preview_card_ids.map { |id| preview_cards[id] }.compact
+  end
+
+  def refresh(at_time = Time.now.utc)
+    preview_cards = PreviewCard.where(id: (recently_used_ids(at_time) + currently_trending_ids(false, -1)).uniq)
+    calculate_scores(preview_cards, at_time)
+    trim_older_items
+  end
+
+  def request_review
+    preview_cards = PreviewCard.where(id: currently_trending_ids(false, -1))
+
+    preview_cards_requiring_review = preview_cards.filter_map do |preview_card|
+      next unless would_be_trending?(preview_card.id) && !preview_card.trendable? && preview_card.requires_review_notification?
+
+      if preview_card.provider.nil?
+        preview_card.provider = PreviewCardProvider.create(domain: preview_card.domain, requested_review_at: Time.now.utc)
+      else
+        preview_card.provider.touch(:requested_review_at)
+      end
+
+      preview_card
+    end
+
+    return if preview_cards_requiring_review.empty?
+
+    User.staff.includes(:account).find_each do |user|
+      AdminMailer.new_trending_links(user.account, preview_cards_requiring_review).deliver_later! if user.allows_trending_tag_emails?
+    end
+  end
+
+  protected
+
+  def key_prefix
+    PREFIX
+  end
+
+  private
+
+  def calculate_scores(preview_cards, at_time)
+    preview_cards.each do |preview_card|
+      expected  = preview_card.history.get(at_time - 1.day).accounts.to_f
+      expected  = 1.0 if expected.zero?
+      observed  = preview_card.history.get(at_time).accounts.to_f
+      max_time  = preview_card.max_score_at
+      max_score = preview_card.max_score
+      max_score = 0 if max_time.nil? || max_time < (at_time - options[:max_score_cooldown])
+
+      score = begin
+        if expected > observed || observed < options[:threshold]
+          0
+        else
+          ((observed - expected)**2) / expected
+        end
+      end
+
+      if score > max_score
+        max_score = score
+        max_time  = at_time
+
+        # Not interested in triggering any callbacks for this
+        preview_card.update_columns(max_score: max_score, max_score_at: max_time)
+      end
+
+      decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / options[:max_score_halflife].to_f))
+
+      if decaying_score.zero?
+        redis.zrem("#{PREFIX}:all", preview_card.id)
+        redis.zrem("#{PREFIX}:allowed", preview_card.id)
+      else
+        redis.zadd("#{PREFIX}:all", decaying_score, preview_card.id)
+
+        if preview_card.trendable?
+          redis.zadd("#{PREFIX}:allowed", decaying_score, preview_card.id)
+        else
+          redis.zrem("#{PREFIX}:allowed", preview_card.id)
+        end
+      end
+    end
+  end
+
+  def would_be_trending?(id)
+    score(id) > score_at_rank(options[:review_threshold] - 1)
+  end
+end
diff --git a/app/models/trends/tags.rb b/app/models/trends/tags.rb
new file mode 100644
index 000000000..13e0ab56b
--- /dev/null
+++ b/app/models/trends/tags.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+class Trends::Tags < Trends::Base
+  PREFIX = 'trending_tags'
+
+  self.default_options = {
+    threshold: 15,
+    review_threshold: 10,
+    max_score_cooldown: 2.days.freeze,
+    max_score_halflife: 4.hours.freeze,
+  }
+
+  def register(status, at_time = Time.now.utc)
+    original_status = status.reblog? ? status.reblog : status
+
+    return unless original_status.public_visibility? && status.public_visibility? &&
+                  !original_status.account.silenced? && !status.account.silenced?
+
+    original_status.tags.each do |tag|
+      add(tag, status.account_id, at_time) if tag.usable?
+    end
+  end
+
+  def add(tag, account_id, at_time = Time.now.utc)
+    tag.history.add(account_id, at_time)
+    record_used_id(tag.id, at_time)
+  end
+
+  def refresh(at_time = Time.now.utc)
+    tags = Tag.where(id: (recently_used_ids(at_time) + currently_trending_ids(false, -1)).uniq)
+    calculate_scores(tags, at_time)
+    trim_older_items
+  end
+
+  def get(allowed, limit)
+    tag_ids = currently_trending_ids(allowed, limit)
+    tags = Tag.where(id: tag_ids).index_by(&:id)
+    tag_ids.map { |id| tags[id] }.compact
+  end
+
+  def request_review
+    tags = Tag.where(id: currently_trending_ids(false, -1))
+
+    tags_requiring_review = tags.filter_map do |tag|
+      next unless would_be_trending?(tag.id) && !tag.trendable? && tag.requires_review_notification?
+
+      tag.touch(:requested_review_at)
+      tag
+    end
+
+    return if tags_requiring_review.empty?
+
+    User.staff.includes(:account).find_each do |user|
+      AdminMailer.new_trending_tags(user.account, tags_requiring_review).deliver_later! if user.allows_trending_tag_emails?
+    end
+  end
+
+  protected
+
+  def key_prefix
+    PREFIX
+  end
+
+  private
+
+  def calculate_scores(tags, at_time)
+    tags.each do |tag|
+      expected  = tag.history.get(at_time - 1.day).accounts.to_f
+      expected  = 1.0 if expected.zero?
+      observed  = tag.history.get(at_time).accounts.to_f
+      max_time  = tag.max_score_at
+      max_score = tag.max_score
+      max_score = 0 if max_time.nil? || max_time < (at_time - options[:max_score_cooldown])
+
+      score = begin
+        if expected > observed || observed < options[:threshold]
+          0
+        else
+          ((observed - expected)**2) / expected
+        end
+      end
+
+      if score > max_score
+        max_score = score
+        max_time  = at_time
+
+        # Not interested in triggering any callbacks for this
+        tag.update_columns(max_score: max_score, max_score_at: max_time)
+      end
+
+      decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / options[:max_score_halflife].to_f))
+
+      if decaying_score.zero?
+        redis.zrem("#{PREFIX}:all", tag.id)
+        redis.zrem("#{PREFIX}:allowed", tag.id)
+      else
+        redis.zadd("#{PREFIX}:all", decaying_score, tag.id)
+
+        if tag.trendable?
+          redis.zadd("#{PREFIX}:allowed", decaying_score, tag.id)
+        else
+          redis.zrem("#{PREFIX}:allowed", tag.id)
+        end
+      end
+    end
+  end
+
+  def would_be_trending?(id)
+    score(id) > score_at_rank(options[:review_threshold] - 1)
+  end
+end