From 431503bae2dc6e12bdade7d5d20f707112c2f7c2 Mon Sep 17 00:00:00 2001 From: David Yip Date: Mon, 6 Nov 2017 16:48:36 -0600 Subject: Also run the keyword matcher on a status' tags. #208. --- app/lib/feed_manager.rb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'app/lib') diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 3b16b5d52..414632a8a 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -173,10 +173,14 @@ class FeedManager def keyword_filter?(status, matcher) should_filter = matcher =~ status.text should_filter ||= matcher =~ status.spoiler_text + should_filter ||= status.tags.find_each.any? { |t| matcher =~ t.name } if status.reblog? - should_filter ||= matcher =~ status.reblog.text - should_filter ||= matcher =~ status.reblog.spoiler_text + reblog = status.reblog + + should_filter ||= matcher =~ reblog.text + should_filter ||= matcher =~ reblog.spoiler_text + should_filter ||= reblog.tags.find_each.any? { |t| matcher =~ t.name } end !!should_filter -- cgit From cb4ef24ac9d48e70648135f106fdc275dedf14fc Mon Sep 17 00:00:00 2001 From: David Yip Date: Wed, 15 Nov 2017 17:26:29 -0600 Subject: Match keyword mute filter on hashtags. #208. It is reasonable to expect someone to enter #foo to mute hashtag #foo. However, tags are recorded on statuses without the preceding #. To adjust for this, we build a separate tag matcher and use Tag::HASHTAG_RE to extract a hashtag from the hashtag syntax. --- app/lib/feed_manager.rb | 21 +++++----- app/models/glitch/keyword_mute.rb | 70 ++++++++++++++++++++++++--------- spec/models/glitch/keyword_mute_spec.rb | 4 +- 3 files changed, 66 insertions(+), 29 deletions(-) (limited to 'app/lib') diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 414632a8a..e12a5c016 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -141,7 +141,7 @@ class FeedManager return false if receiver_id == status.account_id return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?) - return true if keyword_filter?(status, Glitch::KeywordMute.matcher_for(receiver_id)) + return true if keyword_filter?(status, receiver_id) check_for_mutes = [status.account_id] check_for_mutes.concat(status.mentions.pluck(:account_id)) @@ -170,17 +170,20 @@ class FeedManager false end - def keyword_filter?(status, matcher) - should_filter = matcher =~ status.text - should_filter ||= matcher =~ status.spoiler_text - should_filter ||= status.tags.find_each.any? { |t| matcher =~ t.name } + def keyword_filter?(status, receiver_id) + text_matcher = Glitch::KeywordMute.text_matcher_for(receiver_id) + tag_matcher = Glitch::KeywordMute.tag_matcher_for(receiver_id) + + should_filter = text_matcher =~ status.text + should_filter ||= text_matcher =~ status.spoiler_text + should_filter ||= tag_matcher =~ status.tags if status.reblog? reblog = status.reblog - should_filter ||= matcher =~ reblog.text - should_filter ||= matcher =~ reblog.spoiler_text - should_filter ||= reblog.tags.find_each.any? { |t| matcher =~ t.name } + should_filter ||= text_matcher =~ reblog.text + should_filter ||= text_matcher =~ reblog.spoiler_text + should_filter ||= tag_matcher =~ status.tags end !!should_filter @@ -195,7 +198,7 @@ class FeedManager should_filter = Block.where(account_id: receiver_id, target_account_id: check_for_blocks).any? # Filter if it's from someone I blocked, in reply to someone I blocked, or mentioning someone I blocked should_filter ||= (status.account.silenced? && !Follow.where(account_id: receiver_id, target_account_id: status.account_id).exists?) # of if the account is silenced and I'm not following them - should_filter ||= keyword_filter?(status, Glitch::KeywordMute.matcher_for(receiver_id)) # or if the mention contains a muted keyword + should_filter ||= keyword_filter?(status, receiver_id) # or if the mention contains a muted keyword should_filter end diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 009de1880..733dd0bc8 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -16,51 +16,85 @@ class Glitch::KeywordMute < ApplicationRecord validates_presence_of :keyword - after_commit :invalidate_cached_matcher + after_commit :invalidate_cached_matchers - def self.matcher_for(account_id) - Matcher.new(account_id) + def self.text_matcher_for(account_id) + TextMatcher.new(account_id) + end + + def self.tag_matcher_for(account_id) + TagMatcher.new(account_id) end private - def invalidate_cached_matcher - Rails.cache.delete("keyword_mutes:regex:#{account_id}") + def invalidate_cached_matchers + Rails.cache.delete(TextMatcher.cache_key(account_id)) + Rails.cache.delete(TagMatcher.cache_key(account_id)) end - class Matcher + class RegexpMatcher attr_reader :account_id attr_reader :regex def initialize(account_id) @account_id = account_id - regex_text = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_text_for_account } + regex_text = Rails.cache.fetch(self.class.cache_key(account_id)) { make_regex_text } @regex = /#{regex_text}/ end + protected + + def keywords + Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword) + end + + def boundary_regex_for_keyword(keyword) + sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' + eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' + + /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/ + end + end + + class TextMatcher < RegexpMatcher + def self.cache_key(account_id) + format('keyword_mutes:regex:%s', account_id) + end + def =~(str) regex =~ str end private - def keywords - Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word) - end - - def regex_text_for_account - kws = keywords.find_each.with_object([]) do |kw, a| - a << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : kw.keyword) + def make_regex_text + kws = keywords.map! do |whole_word, keyword| + whole_word ? boundary_regex_for_keyword(keyword) : keyword end Regexp.union(kws).source end + end - def boundary_regex_for_keyword(keyword) - sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' - eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' + class TagMatcher < RegexpMatcher + def self.cache_key(account_id) + format('keyword_mutes:tag:%s', account_id) + end - /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/ + def =~(tags) + tags.pluck(:name).detect { |n| regex =~ n } + end + + private + + def make_regex_text + kws = keywords.map! do |whole_word, keyword| + term = (Tag::HASHTAG_RE =~ keyword) ? $1 : keyword + whole_word ? boundary_regex_for_keyword(term) : term + end + + Regexp.union(kws).source end end end diff --git a/spec/models/glitch/keyword_mute_spec.rb b/spec/models/glitch/keyword_mute_spec.rb index 9685c6493..e14af0e6a 100644 --- a/spec/models/glitch/keyword_mute_spec.rb +++ b/spec/models/glitch/keyword_mute_spec.rb @@ -4,8 +4,8 @@ RSpec.describe Glitch::KeywordMute, type: :model do let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } let(:bob) { Fabricate(:account, username: 'bob').tap(&:save!) } - describe '.matcher_for' do - let(:matcher) { Glitch::KeywordMute.matcher_for(alice) } + describe '.text_matcher_for' do + let(:matcher) { Glitch::KeywordMute.text_matcher_for(alice.id) } describe 'with no mutes' do before do -- cgit From 08652baab018e65cfc4e1fbcfc909ff01f96ea60 Mon Sep 17 00:00:00 2001 From: David Yip Date: Wed, 15 Nov 2017 18:26:21 -0600 Subject: Replace =~ with #matches?. #208. =~ made sense when we were passing it through to a regex, but we're no longer doing that: TagMatcher looks at individual tags and returns a value that *looks* like what you get out of #=~ but really isn't that meaningful. Probably a good idea to not subvert convention like this and instead use a name with guessable intent. --- app/lib/feed_manager.rb | 14 +++++++------- app/models/glitch/keyword_mute.rb | 8 ++++---- spec/models/glitch/keyword_mute_spec.rb | 28 ++++++++++++++-------------- 3 files changed, 25 insertions(+), 25 deletions(-) (limited to 'app/lib') diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index e12a5c016..a99606e1b 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -174,19 +174,19 @@ class FeedManager text_matcher = Glitch::KeywordMute.text_matcher_for(receiver_id) tag_matcher = Glitch::KeywordMute.tag_matcher_for(receiver_id) - should_filter = text_matcher =~ status.text - should_filter ||= text_matcher =~ status.spoiler_text - should_filter ||= tag_matcher =~ status.tags + should_filter = text_matcher.matches?(status.text) + should_filter ||= text_matcher.matches?(status.spoiler_text) + should_filter ||= tag_matcher.matches?(status.tags) if status.reblog? reblog = status.reblog - should_filter ||= text_matcher =~ reblog.text - should_filter ||= text_matcher =~ reblog.spoiler_text - should_filter ||= tag_matcher =~ status.tags + should_filter ||= text_matcher.matches?(reblog.text) + should_filter ||= text_matcher.matches?(reblog.spoiler_text) + should_filter ||= tag_matcher.matches?(status.tags) end - !!should_filter + should_filter end def filter_from_mentions?(status, receiver_id) diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 001be9201..a2481308f 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -62,8 +62,8 @@ class Glitch::KeywordMute < ApplicationRecord format('keyword_mutes:regex:text:%s', account_id) end - def =~(str) - regex =~ str + def matches?(str) + !!(regex =~ str) end private @@ -82,8 +82,8 @@ class Glitch::KeywordMute < ApplicationRecord format('keyword_mutes:regex:tag:%s', account_id) end - def =~(tags) - tags.pluck(:name).detect { |n| regex =~ n } + def matches?(tags) + tags.pluck(:name).any? { |n| regex =~ n } end private diff --git a/spec/models/glitch/keyword_mute_spec.rb b/spec/models/glitch/keyword_mute_spec.rb index e14af0e6a..bab276c26 100644 --- a/spec/models/glitch/keyword_mute_spec.rb +++ b/spec/models/glitch/keyword_mute_spec.rb @@ -13,7 +13,7 @@ RSpec.describe Glitch::KeywordMute, type: :model do end it 'does not match' do - expect(matcher =~ 'This is a hot take').to be_falsy + expect(matcher.matches?('This is a hot take')).to be_falsy end end @@ -21,75 +21,75 @@ RSpec.describe Glitch::KeywordMute, type: :model do it 'does not match keywords set by a different account' do Glitch::KeywordMute.create!(account: bob, keyword: 'take') - expect(matcher =~ 'This is a hot take').to be_falsy + expect(matcher.matches?('This is a hot take')).to be_falsy end it 'does not match if no keywords match the status text' do Glitch::KeywordMute.create!(account: alice, keyword: 'cold') - expect(matcher =~ 'This is a hot take').to be_falsy + expect(matcher.matches?('This is a hot take')).to be_falsy end it 'considers word boundaries when matching' do Glitch::KeywordMute.create!(account: alice, keyword: 'bob', whole_word: true) - expect(matcher =~ 'bobcats').to be_falsy + expect(matcher.matches?('bobcats')).to be_falsy end it 'matches substrings if whole_word is false' do Glitch::KeywordMute.create!(account: alice, keyword: 'take', whole_word: false) - expect(matcher =~ 'This is a shiitake mushroom').to be_truthy + expect(matcher.matches?('This is a shiitake mushroom')).to be_truthy end it 'matches keywords at the beginning of the text' do Glitch::KeywordMute.create!(account: alice, keyword: 'take') - expect(matcher =~ 'Take this').to be_truthy + expect(matcher.matches?('Take this')).to be_truthy end it 'matches keywords at the end of the text' do Glitch::KeywordMute.create!(account: alice, keyword: 'take') - expect(matcher =~ 'This is a hot take').to be_truthy + expect(matcher.matches?('This is a hot take')).to be_truthy end it 'matches if at least one keyword case-insensitively matches the text' do Glitch::KeywordMute.create!(account: alice, keyword: 'hot') - expect(matcher =~ 'This is a HOT take').to be_truthy + expect(matcher.matches?('This is a HOT take')).to be_truthy end it 'maintains case-insensitivity when combining keywords into a single matcher' do Glitch::KeywordMute.create!(account: alice, keyword: 'hot') Glitch::KeywordMute.create!(account: alice, keyword: 'cold') - expect(matcher =~ 'This is a HOT take').to be_truthy + expect(matcher.matches?('This is a HOT take')).to be_truthy end it 'matches keywords surrounded by non-alphanumeric ornamentation' do Glitch::KeywordMute.create!(account: alice, keyword: 'hot') - expect(matcher =~ '(hot take)').to be_truthy + expect(matcher.matches?('(hot take)')).to be_truthy end it 'escapes metacharacters in keywords' do Glitch::KeywordMute.create!(account: alice, keyword: '(hot take)') - expect(matcher =~ '(hot take)').to be_truthy + expect(matcher.matches?('(hot take)')).to be_truthy end it 'uses case-folding rules appropriate for more than just English' do Glitch::KeywordMute.create!(account: alice, keyword: 'großeltern') - expect(matcher =~ 'besuch der grosseltern').to be_truthy + expect(matcher.matches?('besuch der grosseltern')).to be_truthy end it 'matches keywords that are composed of multiple words' do Glitch::KeywordMute.create!(account: alice, keyword: 'a shiitake') - expect(matcher =~ 'This is a shiitake').to be_truthy - expect(matcher =~ 'This is shiitake').to_not be_truthy + expect(matcher.matches?('This is a shiitake')).to be_truthy + expect(matcher.matches?('This is shiitake')).to_not be_truthy end end end -- cgit