about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--app/helpers/blocklist_helper.rb51
-rw-r--r--app/workers/scheduler/janitor_scheduler.rb148
-rw-r--r--config/sidekiq.yml3
3 files changed, 202 insertions, 0 deletions
diff --git a/app/helpers/blocklist_helper.rb b/app/helpers/blocklist_helper.rb
new file mode 100644
index 000000000..5730a2ef5
--- /dev/null
+++ b/app/helpers/blocklist_helper.rb
@@ -0,0 +1,51 @@
+module BlocklistHelper
+  def merged_blocklist
+    # ordered by preference
+    # prefer vulpine b/c they have easy-to-parse reason text
+    blocklist = vulpine_club_blocks | dialup_express_blocks | ten_forward_blocks
+    blocklist.uniq { |entry| entry[:domain] }
+  end
+
+  def dialup_express_blocks
+    admin_id = Account.find_remote('xenon', 'sleeping.town')&.id
+    return [] if admin_id.nil?
+
+    domains = ActiveRecord::Base.connection.select_values("SELECT unnest(regexp_matches(text, '\\m[\\w\\-]+\\.[\\w\-]+(?:\\.[\\w\\-]+)*', 'g')) FROM statuses WHERE account_id = #{admin_id.to_i} AND NOT reply AND created_at >= (NOW() - INTERVAL '2 days') AND tsv @@ to_tsquery('new <-> dialup <-> express <2> block') EXCEPT SELECT domain FROM domain_blocks")
+
+    domains.map! do |domain|
+      {domain: domain, severity: :suspend, reason: '(imported from dialup.express)'}
+    end
+  end
+
+  def ten_forward_blocks
+    admin_id = Account.find_remote('guinan', 'tenforward.social')&.id
+    return [] if admin_id.nil?
+
+    domains += ActiveRecord::Base.connection.select_values("SELECT unnest(regexp_matches(text, '\\m[\\w\\-]+\\.[\\w\-]+(?:\\.[\\w\\-]+)*', 'g')) FROM statuses WHERE account_id = #{admin_id.to_i} AND NOT reply AND created_at >= (NOW() - INTERVAL '2 days') AND tsv @@ to_tsquery('ten <-> forward <-> moderation <-> announcement') EXCEPT SELECT domain FROM domain_blocks")
+
+    domains.map! do |domain|
+      {domain: domain, severity: :suspend, reason: '(imported from ten.forward)'}
+    end
+  end
+
+  def vulpine_club_blocks
+    url = "https://raw.githubusercontent.com/vulpineclub/vulpineclub.github.io/master/_data/blocks.yml"
+
+    body = Request.new(:get, url).perform do |response|
+      response.code != 200 ? nil : response.body_with_limit(66.kilobytes)
+    end
+
+    return [] unless body.present?
+
+    yaml = YAML::load(body)
+    yaml.map! do |entry|
+      domain = entry['domain']
+      next if domain.blank?
+      severity = entry['severity'].split('/')
+      reject_media = 'nomedia'.in?(severity)
+      severity = severity[0] || 'noop'
+      reason = "(imported from vulpine.club) #{entry['reason']}#{entry['link'].present? ? " (#{entry['link']})" : ''}".rstrip
+      {domain: domain, severity: severity.to_sym, reject_media: reject_media, reason: reason}
+    end
+  end
+end
diff --git a/app/workers/scheduler/janitor_scheduler.rb b/app/workers/scheduler/janitor_scheduler.rb
new file mode 100644
index 000000000..70d1c4ea0
--- /dev/null
+++ b/app/workers/scheduler/janitor_scheduler.rb
@@ -0,0 +1,148 @@
+# frozen_string_literal: true
+
+class Scheduler::JanitorScheduler
+  include Sidekiq::Worker
+  include BlocklistHelper
+  include BangtagHelper
+
+  MIN_POSTS = 6
+
+  sidekiq_options unique: :until_executed, retry: 0
+
+  def perform
+    @account = admin_account
+    return if @account.nil?
+
+    @exclude_ids = excluded_account_ids
+    @exclude_domains = excluded_domains
+    @exclude_markov = excluded_accounts_from_env('MARKOV')
+
+    prune_deleted_accounts!
+    suspend_abandoned_accounts!
+    suspend_spammers!
+    silence_markov!
+    import_blocklists!
+  end
+
+  private
+
+  def prune_deleted_accounts!
+    Account.local.where.not(suspended_at: nil).destroy_all
+  end
+
+  def suspend_abandoned_accounts!
+    reason = "Appears to be abandoned. Freeing up the username for someone else."
+    abandoned_accounts.find_each do |account|
+      account_policy(account.username, nil, :suspend, reason)
+    end
+  end
+
+  def suspend_spammers!
+    reason = 'Appears to be a spammer account.'
+    spammer_accounts.find_each do |spammer|
+      account_policy(spammer.username, spammer.domain, :suspend, reason)
+    end
+  end
+
+  def silence_markov!
+    reason = 'Appears to be a markov bot.'
+    markov_accounts.find_each do |markov|
+      account_policy(markov.username, markov.domain, :silence, reason)
+    end
+  end
+
+  def import_blocklists!
+    blocks = merged_blocklist.reject { |entry| entry[:domain].in?(@exclude_domains) }
+    blocks.each do |entry|
+      block = DomainBlock.create!(entry)
+      DomainBlockWorker.perform_async(block)
+      Admin::ActionLog.create(account: @account, action: :create, target: block)
+      user_friendly_action_log(@account, :create, block)
+    end
+  end
+
+
+
+  def admin_account
+    account_id = ENV.fetch('JANITOR_USER', '').to_i
+    return if account_id == 0
+    Account.find_by(id: account_id)
+  end
+
+  def spammer_accounts
+    spammer_ids = spammer_account_ids
+    Account.reorder(nil).where(id: spammer_ids, suspended_at: nil)
+      .where.not(id: @exclude_ids)
+  end
+
+  def markov_accounts
+    Account.reorder(nil).where(silenced_at: nil).where.not(id: @exclude_markov)
+      .where('username LIKE ? OR note ILIKE ?', '%ebooks%', '%markov%')
+  end
+
+  def abandoned_accounts
+    Account.reorder(nil).where(id: abandoned_account_ids)
+  end
+
+  def abandoned_users
+    User.select(:account_id).where('last_sign_in_at < ?', 3.months.ago)
+  end
+
+  def excluded_domains
+    existing_policy_domains | domains_from_account_ids | excluded_from_env('DOMAINS')
+  end
+
+
+  def abandoned_account_ids
+    AccountStat.select(:account_id)
+      .where(account_id: abandoned_users)
+      .where('statuses_count < ?', MIN_POSTS)
+  end
+
+  def excluded_account_ids
+    local_account_ids | outgoing_follow_ids | excluded_accounts_from_env('USERNAMES')
+  end
+
+  def spammer_account_ids
+    post_spammer_ids | card_spammer_ids
+  end
+
+  def existing_policy_domains
+    DomainBlock.all.pluck(:domain)
+  end
+
+  def domains_from_account_ids
+    Account.reorder(nil).where(id: @account_ids).pluck(:domain).uniq
+  end
+
+  def local_account_ids
+    Account.local.reorder(nil).pluck(:id)
+  end
+
+  def outgoing_follow_ids
+    Account.local.reorder(nil).flat_map { |account| account.following_ids }
+  end
+
+  def post_spammer_ids
+    Status.with_public_visibility
+      .reorder(nil)
+      .where('tsv @@ to_tsquery(?)', 'womenarestupid.site & /blog/:*')
+      .pluck(:account_id)
+  end
+
+  def card_spammer_ids
+    PreviewCard.where('url LIKE ? OR title ILIKE ?', '%womenarestupid%', '%womenaredumb%')
+      .reorder(nil)
+      .flat_map { |card| card.statuses.pluck(:account_id) }
+  end
+
+
+  def excluded_accounts_from_env(suffix)
+    excluded_usernames = ENV.fetch("JANITOR_EXCLUDE_#{suffix.upcase}", '').split
+    Account.reorder(nil).where(username: excluded_usernames).pluck(:id)
+  end
+
+  def excluded_from_env(suffix)
+    ENV.fetch("JANITOR_EXCLUDE_#{suffix.upcase}", '').split
+  end
+end
diff --git a/config/sidekiq.yml b/config/sidekiq.yml
index 1ab523efb..4390b5a0e 100644
--- a/config/sidekiq.yml
+++ b/config/sidekiq.yml
@@ -12,6 +12,9 @@
   destructing_statuses_scheduler:
     every: '1m'
     class: Scheduler::DestructingStatusesScheduler
+  janitor_scheduler:
+    every: '1h'
+    class: Scheduler::JanitorScheduler
   media_cleanup_scheduler:
     cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
     class: Scheduler::MediaCleanupScheduler