diff options
Diffstat (limited to 'app/models/trends')
-rw-r--r-- | app/models/trends/base.rb | 67 | ||||
-rw-r--r-- | app/models/trends/history.rb | 98 | ||||
-rw-r--r-- | app/models/trends/links.rb | 122 | ||||
-rw-r--r-- | app/models/trends/tags.rb | 116 |
4 files changed, 403 insertions, 0 deletions
diff --git a/app/models/trends/base.rb b/app/models/trends/base.rb new file mode 100644 index 000000000..788f128a0 --- /dev/null +++ b/app/models/trends/base.rb @@ -0,0 +1,67 @@ +# frozen_string_literal: true + +class Trends::Base + include Redisable + + def register(_status) + raise NotImplementedError + end + + def add(*) + raise NotImplementedError + end + + def refresh(*) + raise NotImplementedError + end + + def request_review + raise NotImplementedError + end + + def get(*) + raise NotImplementedError + end + + def score(id) + redis.zscore("#{key_prefix}:all", id) || 0 + end + + def rank(id) + redis.zrevrank("#{key_prefix}:allowed", id) + end + + def currently_trending_ids(allowed, limit) + redis.zrevrange(allowed ? "#{key_prefix}:allowed" : "#{key_prefix}:all", 0, limit.positive? ? limit - 1 : limit).map(&:to_i) + end + + protected + + def key_prefix + raise NotImplementedError + end + + def recently_used_ids(at_time = Time.now.utc) + redis.smembers(used_key(at_time)).map(&:to_i) + end + + def record_used_id(id, at_time = Time.now.utc) + redis.sadd(used_key(at_time), id) + redis.expire(used_key(at_time), 1.day.seconds) + end + + def trim_older_items + redis.zremrangebyscore("#{key_prefix}:all", '-inf', '(1') + redis.zremrangebyscore("#{key_prefix}:allowed", '-inf', '(1') + end + + def score_at_rank(rank) + redis.zrevrange("#{key_prefix}:allowed", 0, rank, with_scores: true).last&.last || 0 + end + + private + + def used_key(at_time) + "#{key_prefix}:used:#{at_time.beginning_of_day.to_i}" + end +end diff --git a/app/models/trends/history.rb b/app/models/trends/history.rb new file mode 100644 index 000000000..608e33792 --- /dev/null +++ b/app/models/trends/history.rb @@ -0,0 +1,98 @@ +# frozen_string_literal: true + +class Trends::History + include Enumerable + + class Aggregate + include Redisable + + def initialize(prefix, id, date_range) + @days = date_range.map { |date| Day.new(prefix, id, date.to_time(:utc)) } + end + + def uses + redis.mget(*@days.map { |day| day.key_for(:uses) }).map(&:to_i).sum + end + + def accounts + redis.pfcount(*@days.map { |day| day.key_for(:accounts) }) + end + end + + class Day + include Redisable + + EXPIRE_AFTER = 14.days.seconds + + def initialize(prefix, id, day) + @prefix = prefix + @id = id + @day = day.beginning_of_day + end + + attr_reader :day + + def accounts + redis.pfcount(key_for(:accounts)) + end + + def uses + redis.get(key_for(:uses))&.to_i || 0 + end + + def add(account_id) + redis.pipelined do + redis.incrby(key_for(:uses), 1) + redis.pfadd(key_for(:accounts), account_id) + redis.expire(key_for(:uses), EXPIRE_AFTER) + redis.expire(key_for(:accounts), EXPIRE_AFTER) + end + end + + def as_json + { day: day.to_i.to_s, accounts: accounts.to_s, uses: uses.to_s } + end + + def key_for(suffix) + case suffix + when :accounts + "#{key_prefix}:#{suffix}" + when :uses + key_prefix + end + end + + def key_prefix + "activity:#{@prefix}:#{@id}:#{day.to_i}" + end + end + + def initialize(prefix, id) + @prefix = prefix + @id = id + end + + def get(date) + Day.new(@prefix, @id, date) + end + + def add(account_id, at_time = Time.now.utc) + Day.new(@prefix, @id, at_time).add(account_id) + end + + def aggregate(date_range) + Aggregate.new(@prefix, @id, date_range) + end + + def each(&block) + if block_given? + (0...7).map { |i| block.call(get(i.days.ago)) } + else + to_enum(:each) + end + end + + def as_json(*) + map(&:as_json) + end +end diff --git a/app/models/trends/links.rb b/app/models/trends/links.rb new file mode 100644 index 000000000..a7cd9e29d --- /dev/null +++ b/app/models/trends/links.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +class Trends::Links < Trends::Base + PREFIX = 'trending_links' + + # Minimum amount of uses by unique accounts to begin calculating the score + THRESHOLD = 15 + + # Minimum rank (lower = better) before requesting a review + REVIEW_THRESHOLD = 10 + + # For this amount of time, the peak score (if bigger than current score) is decayed-from + MAX_SCORE_COOLDOWN = 2.days.freeze + + # How quickly a peak score decays + MAX_SCORE_HALFLIFE = 8.hours.freeze + + def register(status, at_time = Time.now.utc) + original_status = status.reblog? ? status.reblog : status + + return unless original_status.public_visibility? && status.public_visibility? && + !original_status.account.silenced? && !status.account.silenced? && + !original_status.spoiler_text? + + original_status.preview_cards.each do |preview_card| + add(preview_card, status.account_id, at_time) if preview_card.appropriate_for_trends? + end + end + + def add(preview_card, account_id, at_time = Time.now.utc) + preview_card.history.add(account_id, at_time) + record_used_id(preview_card.id, at_time) + end + + def get(allowed, limit) + preview_card_ids = currently_trending_ids(allowed, limit) + preview_cards = PreviewCard.where(id: preview_card_ids).index_by(&:id) + preview_card_ids.map { |id| preview_cards[id] }.compact + end + + def refresh(at_time = Time.now.utc) + preview_cards = PreviewCard.where(id: (recently_used_ids(at_time) + currently_trending_ids(false, -1)).uniq) + calculate_scores(preview_cards, at_time) + trim_older_items + end + + def request_review + preview_cards = PreviewCard.where(id: currently_trending_ids(false, -1)) + + preview_cards_requiring_review = preview_cards.filter_map do |preview_card| + next unless would_be_trending?(preview_card.id) && !preview_card.trendable? && preview_card.requires_review_notification? + + if preview_card.provider.nil? + preview_card.provider = PreviewCardProvider.create(domain: preview_card.domain, requested_review_at: Time.now.utc) + else + preview_card.provider.touch(:requested_review_at) + end + + preview_card + end + + return if preview_cards_requiring_review.empty? + + User.staff.includes(:account).find_each do |user| + AdminMailer.new_trending_links(user.account, preview_cards_requiring_review).deliver_later! if user.allows_trending_tag_emails? + end + end + + protected + + def key_prefix + PREFIX + end + + private + + def calculate_scores(preview_cards, at_time) + preview_cards.each do |preview_card| + expected = preview_card.history.get(at_time - 1.day).accounts.to_f + expected = 1.0 if expected.zero? + observed = preview_card.history.get(at_time).accounts.to_f + max_time = preview_card.max_score_at + max_score = preview_card.max_score + max_score = 0 if max_time.nil? || max_time < (at_time - MAX_SCORE_COOLDOWN) + + score = begin + if expected > observed || observed < THRESHOLD + 0 + else + ((observed - expected)**2) / expected + end + end + + if score > max_score + max_score = score + max_time = at_time + + # Not interested in triggering any callbacks for this + preview_card.update_columns(max_score: max_score, max_score_at: max_time) + end + + decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / MAX_SCORE_HALFLIFE.to_f)) + + if decaying_score.zero? + redis.zrem("#{PREFIX}:all", preview_card.id) + redis.zrem("#{PREFIX}:allowed", preview_card.id) + else + redis.zadd("#{PREFIX}:all", decaying_score, preview_card.id) + + if preview_card.trendable? + redis.zadd("#{PREFIX}:allowed", decaying_score, preview_card.id) + else + redis.zrem("#{PREFIX}:allowed", preview_card.id) + end + end + end + end + + def would_be_trending?(id) + score(id) > score_at_rank(REVIEW_THRESHOLD - 1) + end +end diff --git a/app/models/trends/tags.rb b/app/models/trends/tags.rb new file mode 100644 index 000000000..027e3dbac --- /dev/null +++ b/app/models/trends/tags.rb @@ -0,0 +1,116 @@ +# frozen_string_literal: true + +class Trends::Tags < Trends::Base + PREFIX = 'trending_tags' + + # Minimum amount of uses by unique accounts to begin calculating the score + THRESHOLD = 15 + + # Minimum rank (lower = better) before requesting a review + REVIEW_THRESHOLD = 10 + + # For this amount of time, the peak score (if bigger than current score) is decayed-from + MAX_SCORE_COOLDOWN = 2.days.freeze + + # How quickly a peak score decays + MAX_SCORE_HALFLIFE = 4.hours.freeze + + def register(status, at_time = Time.now.utc) + original_status = status.reblog? ? status.reblog : status + + return unless original_status.public_visibility? && status.public_visibility? && + !original_status.account.silenced? && !status.account.silenced? + + original_status.tags.each do |tag| + add(tag, status.account_id, at_time) if tag.usable? + end + end + + def add(tag, account_id, at_time = Time.now.utc) + tag.history.add(account_id, at_time) + record_used_id(tag.id, at_time) + end + + def refresh(at_time = Time.now.utc) + tags = Tag.where(id: (recently_used_ids(at_time) + currently_trending_ids(false, -1)).uniq) + calculate_scores(tags, at_time) + trim_older_items + end + + def get(allowed, limit) + tag_ids = currently_trending_ids(allowed, limit) + tags = Tag.where(id: tag_ids).index_by(&:id) + tag_ids.map { |id| tags[id] }.compact + end + + def request_review + tags = Tag.where(id: currently_trending_ids(false, -1)) + + tags_requiring_review = tags.filter_map do |tag| + next unless would_be_trending?(tag.id) && !tag.trendable? && tag.requires_review_notification? + + tag.touch(:requested_review_at) + tag + end + + return if tags_requiring_review.empty? + + User.staff.includes(:account).find_each do |user| + AdminMailer.new_trending_tags(user.account, tags_requiring_review).deliver_later! if user.allows_trending_tag_emails? + end + end + + protected + + def key_prefix + PREFIX + end + + private + + def calculate_scores(tags, at_time) + tags.each do |tag| + expected = tag.history.get(at_time - 1.day).accounts.to_f + expected = 1.0 if expected.zero? + observed = tag.history.get(at_time).accounts.to_f + max_time = tag.max_score_at + max_score = tag.max_score + max_score = 0 if max_time.nil? || max_time < (at_time - MAX_SCORE_COOLDOWN) + + score = begin + if expected > observed || observed < THRESHOLD + 0 + else + ((observed - expected)**2) / expected + end + end + + if score > max_score + max_score = score + max_time = at_time + + # Not interested in triggering any callbacks for this + tag.update_columns(max_score: max_score, max_score_at: max_time) + end + + decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / MAX_SCORE_HALFLIFE.to_f)) + + if decaying_score.zero? + redis.zrem("#{PREFIX}:all", tag.id) + redis.zrem("#{PREFIX}:allowed", tag.id) + else + redis.zadd("#{PREFIX}:all", decaying_score, tag.id) + + if tag.trendable? + redis.zadd("#{PREFIX}:allowed", decaying_score, tag.id) + else + redis.zrem("#{PREFIX}:allowed", tag.id) + end + end + end + end + + def would_be_trending?(id) + score(id) > score_at_rank(REVIEW_THRESHOLD - 1) + end +end |