diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2019-09-18 12:53:13 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-09-18 12:53:13 +0200 |
commit | 4f6af87906175d9ea802ef0c6f050388eac890fa (patch) | |
tree | 8bf6f80bd210420debb05767a9a616a17bb42254 /app | |
parent | 577706987d2e09e598130d37fb9a52cd4a6510ea (diff) |
Change spam check to apply to local accounts and add a threshold (#11806)
Instead of detecting spam on first duplicate message, add a threshold of 5 such messages to reduce false positives
Diffstat (limited to 'app')
-rw-r--r-- | app/lib/activitypub/activity/create.rb | 10 | ||||
-rw-r--r-- | app/lib/spam_check.rb | 46 | ||||
-rw-r--r-- | app/services/process_mentions_service.rb | 5 |
3 files changed, 44 insertions, 17 deletions
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index dea7fd43c..e69193b71 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -408,15 +408,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity end def check_for_spam - spam_check = SpamCheck.new(@status) - - return if spam_check.skip? - - if spam_check.spam? - spam_check.flag! - else - spam_check.remember! - end + SpamCheck.perform(@status) end def forward_for_reply diff --git a/app/lib/spam_check.rb b/app/lib/spam_check.rb index 0cf1b8790..441697364 100644 --- a/app/lib/spam_check.rb +++ b/app/lib/spam_check.rb @@ -4,9 +4,25 @@ class SpamCheck include Redisable include ActionView::Helpers::TextHelper + # Threshold over which two Nilsimsa values are considered + # to refer to the same text NILSIMSA_COMPARE_THRESHOLD = 95 - NILSIMSA_MIN_SIZE = 10 - EXPIRE_SET_AFTER = 1.week.seconds + + # Nilsimsa doesn't work well on small inputs, so below + # this size, we check only for exact matches with MD5 + NILSIMSA_MIN_SIZE = 10 + + # How long to keep the trail of digests between updates, + # there is no reason to store it forever + EXPIRE_SET_AFTER = 1.week.seconds + + # How many digests to keep in an account's trail. If it's + # too small, spam could rotate around different message templates + MAX_TRAIL_SIZE = 10 + + # How many detected duplicates to allow through before + # considering the message as spam + THRESHOLD = 5 def initialize(status) @account = status.account @@ -21,9 +37,9 @@ class SpamCheck if insufficient_data? false elsif nilsimsa? - any_other_digest?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD } + digests_over_threshold?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD } else - any_other_digest?('md5') { |_, other_digest| other_digest == digest } + digests_over_threshold?('md5') { |_, other_digest| other_digest == digest } end end @@ -38,7 +54,7 @@ class SpamCheck # get the correct status ID back, we have to save it in the string value redis.zadd(redis_key, @status.id, digest_with_algorithm) - redis.zremrangebyrank(redis_key, '0', '-10') + redis.zremrangebyrank(redis_key, 0, -(MAX_TRAIL_SIZE + 1)) redis.expire(redis_key, EXPIRE_SET_AFTER) end @@ -78,6 +94,20 @@ class SpamCheck end end + class << self + def perform(status) + spam_check = new(status) + + return if spam_check.skip? + + if spam_check.spam? + spam_check.flag! + else + spam_check.remember! + end + end + end + private def disabled? @@ -149,14 +179,14 @@ class SpamCheck redis.zrange(redis_key, 0, -1) end - def any_other_digest?(filter_algorithm) - other_digests.any? do |record| + def digests_over_threshold?(filter_algorithm) + other_digests.select do |record| algorithm, other_digest, status_id = record.split(':') next unless algorithm == filter_algorithm yield algorithm, other_digest, status_id - end + end.size >= THRESHOLD end def matching_status_ids diff --git a/app/services/process_mentions_service.rb b/app/services/process_mentions_service.rb index 90dca9740..2f7a9e985 100644 --- a/app/services/process_mentions_service.rb +++ b/app/services/process_mentions_service.rb @@ -33,6 +33,7 @@ class ProcessMentionsService < BaseService end status.save! + check_for_spam(status) mentions.each { |mention| create_notification(mention) } end @@ -61,4 +62,8 @@ class ProcessMentionsService < BaseService def resolve_account_service ResolveAccountService.new end + + def check_for_spam(status) + SpamCheck.perform(status) + end end |