about summary refs log tree commit diff
path: root/app/lib
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2021-04-11 11:22:50 +0200
committerGitHub <noreply@github.com>2021-04-11 11:22:50 +0200
commit619fad6cf8078ea997554081febe850404bee73c (patch)
treed710dec1c628bedddb5fe88858b355fd78c9acc0 /app/lib
parent7183d9a113f67bb6b0f21bd2b3ba2f0d94b6134c (diff)
Remove spam check and dependency on nilsimsa gem (#16011)
Diffstat (limited to 'app/lib')
-rw-r--r--app/lib/activitypub/activity/create.rb5
-rw-r--r--app/lib/spam_check.rb198
2 files changed, 0 insertions, 203 deletions
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb
index 0fa306cdd..9f6dd9ce0 100644
--- a/app/lib/activitypub/activity/create.rb
+++ b/app/lib/activitypub/activity/create.rb
@@ -88,7 +88,6 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
 
     resolve_thread(@status)
     fetch_replies(@status)
-    check_for_spam
     distribute(@status)
     forward_for_reply
   end
@@ -492,10 +491,6 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
     Tombstone.exists?(uri: object_uri)
   end
 
-  def check_for_spam
-    SpamCheck.perform(@status)
-  end
-
   def forward_for_reply
     return unless @status.distributable? && @json['signature'].present? && reply_to_local?
 
diff --git a/app/lib/spam_check.rb b/app/lib/spam_check.rb
deleted file mode 100644
index dcb2db9ca..000000000
--- a/app/lib/spam_check.rb
+++ /dev/null
@@ -1,198 +0,0 @@
-# frozen_string_literal: true
-
-class SpamCheck
-  include Redisable
-  include ActionView::Helpers::TextHelper
-
-  # Threshold over which two Nilsimsa values are considered
-  # to refer to the same text
-  NILSIMSA_COMPARE_THRESHOLD = 95
-
-  # Nilsimsa doesn't work well on small inputs, so below
-  # this size, we check only for exact matches with MD5
-  NILSIMSA_MIN_SIZE = 10
-
-  # How long to keep the trail of digests between updates,
-  # there is no reason to store it forever
-  EXPIRE_SET_AFTER = 1.week.seconds
-
-  # How many digests to keep in an account's trail. If it's
-  # too small, spam could rotate around different message templates
-  MAX_TRAIL_SIZE = 10
-
-  # How many detected duplicates to allow through before
-  # considering the message as spam
-  THRESHOLD = 5
-
-  def initialize(status)
-    @account = status.account
-    @status  = status
-  end
-
-  def skip?
-    disabled? || already_flagged? || trusted? || no_unsolicited_mentions? || solicited_reply?
-  end
-
-  def spam?
-    if insufficient_data?
-      false
-    elsif nilsimsa?
-      digests_over_threshold?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
-    else
-      digests_over_threshold?('md5') { |_, other_digest| other_digest == digest }
-    end
-  end
-
-  def flag!
-    auto_report_status!
-  end
-
-  def remember!
-    # The scores in sorted sets don't actually have enough bits to hold an exact
-    # value of our snowflake IDs, so we use it only for its ordering property. To
-    # get the correct status ID back, we have to save it in the string value
-
-    redis.zadd(redis_key, @status.id, digest_with_algorithm)
-    redis.zremrangebyrank(redis_key, 0, -(MAX_TRAIL_SIZE + 1))
-    redis.expire(redis_key, EXPIRE_SET_AFTER)
-  end
-
-  def reset!
-    redis.del(redis_key)
-  end
-
-  def hashable_text
-    return @hashable_text if defined?(@hashable_text)
-
-    @hashable_text = @status.text
-    @hashable_text = remove_mentions(@hashable_text)
-    @hashable_text = strip_tags(@hashable_text) unless @status.local?
-    @hashable_text = normalize_unicode(@status.spoiler_text + ' ' + @hashable_text)
-    @hashable_text = remove_whitespace(@hashable_text)
-  end
-
-  def insufficient_data?
-    hashable_text.blank?
-  end
-
-  def digest
-    @digest ||= begin
-      if nilsimsa?
-        Nilsimsa.new(hashable_text).hexdigest
-      else
-        Digest::MD5.hexdigest(hashable_text)
-      end
-    end
-  end
-
-  def digest_with_algorithm
-    if nilsimsa?
-      ['nilsimsa', digest, @status.id].join(':')
-    else
-      ['md5', digest, @status.id].join(':')
-    end
-  end
-
-  class << self
-    def perform(status)
-      spam_check = new(status)
-
-      return if spam_check.skip?
-
-      if spam_check.spam?
-        spam_check.flag!
-      else
-        spam_check.remember!
-      end
-    end
-  end
-
-  private
-
-  def disabled?
-    !Setting.spam_check_enabled
-  end
-
-  def remove_mentions(text)
-    return text.gsub(Account::MENTION_RE, '') if @status.local?
-
-    Nokogiri::HTML.fragment(text).tap do |html|
-      mentions = @status.mentions.map { |mention| ActivityPub::TagManager.instance.url_for(mention.account) }
-
-      html.traverse do |element|
-        element.unlink if element.name == 'a' && mentions.include?(element['href'])
-      end
-    end.to_s
-  end
-
-  def normalize_unicode(text)
-    text.unicode_normalize(:nfkc).downcase
-  end
-
-  def remove_whitespace(text)
-    text.gsub(/\s+/, ' ').strip
-  end
-
-  def auto_report_status!
-    status_ids = Status.where(visibility: %i(public unlisted)).where(id: matching_status_ids).pluck(:id) + [@status.id] if @status.distributable?
-    ReportService.new.call(Account.representative, @account, status_ids: status_ids, comment: I18n.t('spam_check.spam_detected'))
-  end
-
-  def already_flagged?
-    @account.silenced? || @account.targeted_reports.unresolved.where(account_id: -99).exists?
-  end
-
-  def trusted?
-    @account.trust_level > Account::TRUST_LEVELS[:untrusted] || (@account.local? && @account.user_staff?)
-  end
-
-  def no_unsolicited_mentions?
-    @status.mentions.all? { |mention| mention.silent? || (!@account.local? && !mention.account.local?) || mention.account.following?(@account) }
-  end
-
-  def solicited_reply?
-    !@status.thread.nil? && @status.thread.mentions.where(account: @account).exists?
-  end
-
-  def nilsimsa_compare_value(first, second)
-    first  = [first].pack('H*')
-    second = [second].pack('H*')
-    bits   = 0
-
-    0.upto(31) do |i|
-      bits += Nilsimsa::POPC[255 & (first[i].ord ^ second[i].ord)].ord
-    end
-
-    128 - bits # -128 <= Nilsimsa Compare Value <= 128
-  end
-
-  def nilsimsa?
-    hashable_text.size > NILSIMSA_MIN_SIZE
-  end
-
-  def other_digests
-    redis.zrange(redis_key, 0, -1)
-  end
-
-  def digests_over_threshold?(filter_algorithm)
-    other_digests.select do |record|
-      algorithm, other_digest, status_id = record.split(':')
-
-      next unless algorithm == filter_algorithm
-
-      yield algorithm, other_digest, status_id
-    end.size >= THRESHOLD
-  end
-
-  def matching_status_ids
-    if nilsimsa?
-      other_digests.filter_map { |record| record.split(':')[2] if record.start_with?('nilsimsa') && nilsimsa_compare_value(digest, record.split(':')[1]) >= NILSIMSA_COMPARE_THRESHOLD }
-    else
-      other_digests.filter_map { |record| record.split(':')[2] if record.start_with?('md5') && record.split(':')[1] == digest }
-    end
-  end
-
-  def redis_key
-    @redis_key ||= "spam_check:#{@account.id}"
-  end
-end