about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--app/lib/activitypub/activity/create.rb10
-rw-r--r--app/lib/spam_check.rb46
-rw-r--r--app/services/process_mentions_service.rb5
-rw-r--r--spec/lib/spam_check_spec.rb34
4 files changed, 66 insertions, 29 deletions
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb
index dea7fd43c..e69193b71 100644
--- a/app/lib/activitypub/activity/create.rb
+++ b/app/lib/activitypub/activity/create.rb
@@ -408,15 +408,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
   end
 
   def check_for_spam
-    spam_check = SpamCheck.new(@status)
-
-    return if spam_check.skip?
-
-    if spam_check.spam?
-      spam_check.flag!
-    else
-      spam_check.remember!
-    end
+    SpamCheck.perform(@status)
   end
 
   def forward_for_reply
diff --git a/app/lib/spam_check.rb b/app/lib/spam_check.rb
index 0cf1b8790..441697364 100644
--- a/app/lib/spam_check.rb
+++ b/app/lib/spam_check.rb
@@ -4,9 +4,25 @@ class SpamCheck
   include Redisable
   include ActionView::Helpers::TextHelper
 
+  # Threshold over which two Nilsimsa values are considered
+  # to refer to the same text
   NILSIMSA_COMPARE_THRESHOLD = 95
-  NILSIMSA_MIN_SIZE          = 10
-  EXPIRE_SET_AFTER           = 1.week.seconds
+
+  # Nilsimsa doesn't work well on small inputs, so below
+  # this size, we check only for exact matches with MD5
+  NILSIMSA_MIN_SIZE = 10
+
+  # How long to keep the trail of digests between updates,
+  # there is no reason to store it forever
+  EXPIRE_SET_AFTER = 1.week.seconds
+
+  # How many digests to keep in an account's trail. If it's
+  # too small, spam could rotate around different message templates
+  MAX_TRAIL_SIZE = 10
+
+  # How many detected duplicates to allow through before
+  # considering the message as spam
+  THRESHOLD = 5
 
   def initialize(status)
     @account = status.account
@@ -21,9 +37,9 @@ class SpamCheck
     if insufficient_data?
       false
     elsif nilsimsa?
-      any_other_digest?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
+      digests_over_threshold?('nilsimsa') { |_, other_digest| nilsimsa_compare_value(digest, other_digest) >= NILSIMSA_COMPARE_THRESHOLD }
     else
-      any_other_digest?('md5') { |_, other_digest| other_digest == digest }
+      digests_over_threshold?('md5') { |_, other_digest| other_digest == digest }
     end
   end
 
@@ -38,7 +54,7 @@ class SpamCheck
     # get the correct status ID back, we have to save it in the string value
 
     redis.zadd(redis_key, @status.id, digest_with_algorithm)
-    redis.zremrangebyrank(redis_key, '0', '-10')
+    redis.zremrangebyrank(redis_key, 0, -(MAX_TRAIL_SIZE + 1))
     redis.expire(redis_key, EXPIRE_SET_AFTER)
   end
 
@@ -78,6 +94,20 @@ class SpamCheck
     end
   end
 
+  class << self
+    def perform(status)
+      spam_check = new(status)
+
+      return if spam_check.skip?
+
+      if spam_check.spam?
+        spam_check.flag!
+      else
+        spam_check.remember!
+      end
+    end
+  end
+
   private
 
   def disabled?
@@ -149,14 +179,14 @@ class SpamCheck
     redis.zrange(redis_key, 0, -1)
   end
 
-  def any_other_digest?(filter_algorithm)
-    other_digests.any? do |record|
+  def digests_over_threshold?(filter_algorithm)
+    other_digests.select do |record|
       algorithm, other_digest, status_id = record.split(':')
 
       next unless algorithm == filter_algorithm
 
       yield algorithm, other_digest, status_id
-    end
+    end.size >= THRESHOLD
   end
 
   def matching_status_ids
diff --git a/app/services/process_mentions_service.rb b/app/services/process_mentions_service.rb
index 90dca9740..2f7a9e985 100644
--- a/app/services/process_mentions_service.rb
+++ b/app/services/process_mentions_service.rb
@@ -33,6 +33,7 @@ class ProcessMentionsService < BaseService
     end
 
     status.save!
+    check_for_spam(status)
 
     mentions.each { |mention| create_notification(mention) }
   end
@@ -61,4 +62,8 @@ class ProcessMentionsService < BaseService
   def resolve_account_service
     ResolveAccountService.new
   end
+
+  def check_for_spam(status)
+    SpamCheck.perform(status)
+  end
 end
diff --git a/spec/lib/spam_check_spec.rb b/spec/lib/spam_check_spec.rb
index 9e0989216..4cae46111 100644
--- a/spec/lib/spam_check_spec.rb
+++ b/spec/lib/spam_check_spec.rb
@@ -86,23 +86,33 @@ RSpec.describe SpamCheck do
     end
 
     it 'returns true for duplicate statuses to the same recipient' do
-      status1 = status_with_html('@alice Hello')
-      described_class.new(status1).remember!
+      described_class::THRESHOLD.times do
+        status1 = status_with_html('@alice Hello')
+        described_class.new(status1).remember!
+      end
+
       status2 = status_with_html('@alice Hello')
       expect(described_class.new(status2).spam?).to be true
     end
 
     it 'returns true for duplicate statuses to different recipients' do
-      status1 = status_with_html('@alice Hello')
-      described_class.new(status1).remember!
+      described_class::THRESHOLD.times do
+        status1 = status_with_html('@alice Hello')
+        described_class.new(status1).remember!
+      end
+
       status2 = status_with_html('@bob Hello')
       expect(described_class.new(status2).spam?).to be true
     end
 
     it 'returns true for nearly identical statuses with random numbers' do
       source_text = 'Sodium, atomic number 11, was first isolated by Humphry Davy in 1807. A chemical component of salt, he named it Na in honor of the saltiest region on earth, North America.'
-      status1 = status_with_html('@alice ' + source_text + ' 1234')
-      described_class.new(status1).remember!
+
+      described_class::THRESHOLD.times do
+        status1 = status_with_html('@alice ' + source_text + ' 1234')
+        described_class.new(status1).remember!
+      end
+
       status2 = status_with_html('@bob ' + source_text + ' 9568')
       expect(described_class.new(status2).spam?).to be true
     end
@@ -140,9 +150,9 @@ RSpec.describe SpamCheck do
     let(:redis_key) { spam_check.send(:redis_key) }
 
     it 'remembers' do
-      expect do
-        spam_check.remember!
-      end.to change { Redis.current.exists(redis_key) }.from(false).to(true)
+      expect(Redis.current.exists(redis_key)).to be true
+      spam_check.remember!
+      expect(Redis.current.exists(redis_key)).to be true
     end
   end
 
@@ -156,9 +166,9 @@ RSpec.describe SpamCheck do
     end
 
     it 'resets' do
-      expect do
-        spam_check.reset!
-      end.to change { Redis.current.exists(redis_key) }.from(true).to(false)
+      expect(Redis.current.exists(redis_key)).to be true
+      spam_check.reset!
+      expect(Redis.current.exists(redis_key)).to be false
     end
   end