diff options
author | Thibaut Girka <thib@sitedethib.com> | 2019-09-18 17:25:56 +0200 |
---|---|---|
committer | Thibaut Girka <thib@sitedethib.com> | 2019-09-18 17:25:56 +0200 |
commit | 5cadb4723832b91068ee51955b9d4b1336502369 (patch) | |
tree | 2e915e53ee0d25ea63ee3910ae8ced44f3295e21 /app/lib | |
parent | ab646fac5f582fe9bef22d8b9a4995fbb4b42d7d (diff) | |
parent | d0c2c5278391b82ba7fa2f230bf237805ff61a0c (diff) |
Merge branch 'master' into glitch-soc/merge-upstream
Conflicts: - app/controllers/auth/sessions_controller.rb Minor conflict due to glitch-soc's theming code
Diffstat (limited to 'app/lib')
-rw-r--r-- | app/lib/activitypub/activity/create.rb | 10 | ||||
-rw-r--r-- | app/lib/activitypub/activity/move.rb | 11 | ||||
-rw-r--r-- | app/lib/spam_check.rb | 46 |
3 files changed, 48 insertions, 19 deletions
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index dea7fd43c..e69193b71 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -408,15 +408,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity end def check_for_spam - spam_check = SpamCheck.new(@status) - - return if spam_check.skip? - - if spam_check.spam? - spam_check.flag! - else - spam_check.remember! - end + SpamCheck.perform(@status) end def forward_for_reply diff --git a/app/lib/activitypub/activity/move.rb b/app/lib/activitypub/activity/move.rb index d7a5f595c..6c6a2b967 100644 --- a/app/lib/activitypub/activity/move.rb +++ b/app/lib/activitypub/activity/move.rb @@ -10,10 +10,13 @@ class ActivityPub::Activity::Move < ActivityPub::Activity target_account = ActivityPub::FetchRemoteAccountService.new.call(target_uri) - return if target_account.nil? || !target_account.also_known_as.include?(origin_account.uri) + if target_account.nil? || target_account.suspended? || !target_account.also_known_as.include?(origin_account.uri) + unmark_as_processing! + return + end # In case for some reason we didn't have a redirect for the profile already, set it - origin_account.update(moved_to_account: target_account) if origin_account.moved_to_account_id.nil? + origin_account.update(moved_to_account: target_account) # Initiate a re-follow for each follower origin_account.followers.local.select(:id).find_in_batches do |follower_accounts| @@ -40,4 +43,8 @@ class ActivityPub::Activity::Move < ActivityPub::Activity def mark_as_processing! redis.setex("move_in_progress:#{@account.id}", PROCESSING_COOLDOWN, true) end + + def unmark_as_processing! + redis.del("move_in_progress:#{@account.id}") + end end diff --git a/app/lib/spam_check.rb b/app/lib/spam_check.rb index 0cf1b8790..441697364 100644 --- a/app/lib/spam_check.rb +++ b/app/lib/spam_check.rb @@ -4,9 +4,25 @@ class SpamCheck include Redisable include ActionView::Helpers::TextHelper + # Threshold over which two Nilsimsa values are considered + # to refer to the same text NILSIMSA_COMPARE_THRESHOLD = 95 - NILSIMSA_MIN_SIZE = 10 - EXPIRE_SET_AFTER = 1.week.seconds + + # Nilsimsa doesn't work well on small inputs, so below + # this size, we check only for exact matches with MD5 + NILSIMSA_MIN_SIZE = 10 + + # How long to keep the trail of digests between updates, + # there is no reason to store it forever + EXPIRE_SET_AFTER = 1.week.seconds + + # How many digests to keep in an account's trail. If it's + # too small, spam could rotate around different message templates + MAX_TRAIL_SIZE = 10 + + # How many detected duplicates to allow through before + # considering the message as spam + THRESHOLD = 5 def initialize(status) @account = status.account @@ -21,9 +37,9 @@ class SpamCheck if insufficient_data? false elsif nilsimsa? - any_other_digest?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD } + digests_over_threshold?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD } else - any_other_digest?('md5') { |_, other_digest| other_digest == digest } + digests_over_threshold?('md5') { |_, other_digest| other_digest == digest } end end @@ -38,7 +54,7 @@ class SpamCheck # get the correct status ID back, we have to save it in the string value redis.zadd(redis_key, @status.id, digest_with_algorithm) - redis.zremrangebyrank(redis_key, '0', '-10') + redis.zremrangebyrank(redis_key, 0, -(MAX_TRAIL_SIZE + 1)) redis.expire(redis_key, EXPIRE_SET_AFTER) end @@ -78,6 +94,20 @@ class SpamCheck end end + class << self + def perform(status) + spam_check = new(status) + + return if spam_check.skip? + + if spam_check.spam? + spam_check.flag! + else + spam_check.remember! + end + end + end + private def disabled? @@ -149,14 +179,14 @@ class SpamCheck redis.zrange(redis_key, 0, -1) end - def any_other_digest?(filter_algorithm) - other_digests.any? do |record| + def digests_over_threshold?(filter_algorithm) + other_digests.select do |record| algorithm, other_digest, status_id = record.split(':') next unless algorithm == filter_algorithm yield algorithm, other_digest, status_id - end + end.size >= THRESHOLD end def matching_status_ids |