about summary refs log tree commit diff
path: root/spec/lib
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-07-13 16:45:50 +0200
committerGitHub <noreply@github.com>2019-07-13 16:45:50 +0200
commit6ff67be0f6e79ec403e08c69717ee8c89451c70e (patch)
tree1746025bdf7b3113ddb373e9fb137877952d2503 /spec/lib
parent402302776c82c3853e723fe0c0c4dc99c69da3d9 (diff)
Add a spam check (#11217)
* Add a spam check

* Use Nilsimsa to generate locality-sensitive hashes and compare using Levenshtein distance

* Add more tests

* Add exemption when the message is a reply to something that mentions the sender

* Use Nilsimsa Compare Value instead of Levenshtein distance

* Use MD5 for messages shorter than 10 characters

* Add message to automated report, do not add non-public statuses to
automated report, add trust level to accounts and make unsilencing
raise the trust level to prevent repeated spam checks on that account

* Expire spam check data after 3 months

* Add support for local statuses, reduce expiration to 1 week, always create a report

* Add content warnings to the spam check and exempt empty statuses

* Change Nilsimsa threshold to 95 and make sure removed statuses are removed from the spam check

* Add all matched statuses into automatic report
Diffstat (limited to 'spec/lib')
-rw-r--r--spec/lib/spam_check_spec.rb160
1 files changed, 160 insertions, 0 deletions
diff --git a/spec/lib/spam_check_spec.rb b/spec/lib/spam_check_spec.rb
new file mode 100644
index 000000000..c722dc642
--- /dev/null
+++ b/spec/lib/spam_check_spec.rb
@@ -0,0 +1,160 @@
+require 'rails_helper'
+
+RSpec.describe SpamCheck do
+  let!(:sender) { Fabricate(:account) }
+  let!(:alice) { Fabricate(:account, username: 'alice') }
+  let!(:bob) { Fabricate(:account, username: 'bob') }
+
+  def status_with_html(text, options = {})
+    status = PostStatusService.new.call(sender, { text: text }.merge(options))
+    status.update_columns(text: Formatter.instance.format(status), local: false)
+    status
+  end
+
+  describe '#hashable_text' do
+    it 'removes mentions from HTML for remote statuses' do
+      status = status_with_html('@alice Hello')
+      expect(described_class.new(status).hashable_text).to eq 'hello'
+    end
+
+    it 'removes mentions from text for local statuses' do
+      status = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
+      expect(described_class.new(status).hashable_text).to eq 'hey , how are you?'
+    end
+  end
+
+  describe '#insufficient_data?' do
+    it 'returns true when there is no text' do
+      status = status_with_html('@alice')
+      expect(described_class.new(status).insufficient_data?).to be true
+    end
+
+    it 'returns false when there is text' do
+      status = status_with_html('@alice h')
+      expect(described_class.new(status).insufficient_data?).to be false
+    end
+  end
+
+  describe '#digest' do
+    it 'returns a string' do
+      status = status_with_html('@alice Hello world')
+      expect(described_class.new(status).digest).to be_a String
+    end
+  end
+
+  describe '#spam?' do
+    it 'returns false for a unique status' do
+      status = status_with_html('@alice Hello')
+      expect(described_class.new(status).spam?).to be false
+    end
+
+    it 'returns false for different statuses to the same recipient' do
+      status1 = status_with_html('@alice Hello')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@alice Are you available to talk?')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns false for statuses with different content warnings' do
+      status1 = status_with_html('@alice Are you available to talk?')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@alice Are you available to talk?', spoiler_text: 'This is a completely different matter than what I was talking about previously, I swear!')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns false for different statuses to different recipients' do
+      status1 = status_with_html('@alice How is it going?')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob Are you okay?')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns false for very short different statuses to different recipients' do
+      status1 = status_with_html('@alice 🙄')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob Huh?')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns false for statuses with no text' do
+      status1 = status_with_html('@alice')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob')
+      expect(described_class.new(status2).spam?).to be false
+    end
+
+    it 'returns true for duplicate statuses to the same recipient' do
+      status1 = status_with_html('@alice Hello')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@alice Hello')
+      expect(described_class.new(status2).spam?).to be true
+    end
+
+    it 'returns true for duplicate statuses to different recipients' do
+      status1 = status_with_html('@alice Hello')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob Hello')
+      expect(described_class.new(status2).spam?).to be true
+    end
+
+    it 'returns true for nearly identical statuses with random numbers' do
+      source_text = 'Sodium, atomic number 11, was first isolated by Humphry Davy in 1807. A chemical component of salt, he named it Na in honor of the saltiest region on earth, North America.'
+      status1 = status_with_html('@alice ' + source_text + ' 1234')
+      described_class.new(status1).remember!
+      status2 = status_with_html('@bob ' + source_text + ' 9568')
+      expect(described_class.new(status2).spam?).to be true
+    end
+  end
+
+  describe '#skip?' do
+    it 'returns true when the sender is already silenced' do
+      status = status_with_html('@alice Hello')
+      sender.silence!
+      expect(described_class.new(status).skip?).to be true
+    end
+
+    it 'returns true when the mentioned person follows the sender' do
+      status = status_with_html('@alice Hello')
+      alice.follow!(sender)
+      expect(described_class.new(status).skip?).to be true
+    end
+
+    it 'returns false when even one mentioned person doesn\'t follow the sender' do
+      status = status_with_html('@alice @bob Hello')
+      alice.follow!(sender)
+      expect(described_class.new(status).skip?).to be false
+    end
+
+    it 'returns true when the sender is replying to a status that mentions the sender' do
+      parent = PostStatusService.new.call(alice, text: "Hey @#{sender.username}, how are you?")
+      status = status_with_html('@alice @bob Hello', thread: parent)
+      expect(described_class.new(status).skip?).to be true
+    end
+  end
+
+  describe '#remember!' do
+    pending
+  end
+
+  describe '#flag!' do
+    let!(:status1) { status_with_html('@alice General Kenobi you are a bold one') }
+    let!(:status2) { status_with_html('@alice @bob General Kenobi, you are a bold one') }
+
+    before do
+      described_class.new(status1).remember!
+      described_class.new(status2).flag!
+    end
+
+    it 'silences the account' do
+      expect(sender.silenced?).to be true
+    end
+
+    it 'creates a report about the account' do
+      expect(sender.targeted_reports.unresolved.count).to eq 1
+    end
+
+    it 'attaches both matching statuses to the report' do
+      expect(sender.targeted_reports.first.status_ids).to include(status1.id, status2.id)
+    end
+  end
+end