From 670e6a33f8eeca628707dc020e02ce32502d74a4 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 21 Oct 2017 14:47:17 -0500 Subject: Move KeywordMute into Glitch namespace. There are two motivations for this: 1. It looks like we're going to add other features that require server-side storage (e.g. user notes). 2. Namespacing glitchsoc modifications is a good idea anyway: even if we do not end up doing (1), if upstream introduces a keyword-mute feature that also uses a "KeywordMute" model, we can avoid some merge conflicts this way and work on the more interesting task of choosing which implementation to use. --- app/models/glitch/keyword_mute.rb | 49 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 app/models/glitch/keyword_mute.rb (limited to 'app/models/glitch') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb new file mode 100644 index 000000000..3b0b47f52 --- /dev/null +++ b/app/models/glitch/keyword_mute.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true +# == Schema Information +# +# Table name: glitch_keyword_mutes +# +# id :integer not null, primary key +# account_id :integer not null +# keyword :string not null +# whole_word :boolean default(TRUE), not null +# created_at :datetime not null +# updated_at :datetime not null +# + +class Glitch::KeywordMute < ApplicationRecord + belongs_to :account, required: true + + validates_presence_of :keyword + + after_commit :invalidate_cached_matcher + + def self.matcher_for(account_id) + Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } + end + + private + + def invalidate_cached_matcher + Rails.cache.delete("keyword_mutes:matcher:#{account_id}") + end + + class Matcher + attr_reader :regex + + def initialize(account_id) + re = [].tap do |arr| + Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| + boundary = m.whole_word ? '\b' : '' + arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" + end + end.join('|') + + @regex = /#{re}/i unless re.empty? + end + + def =~(str) + regex ? regex =~ str : false + end + end +end -- cgit From ad86c86fa8e0d577b1a6c7411367420e6beea4ea Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 21 Oct 2017 15:44:47 -0500 Subject: Apply keyword mutes to reblogs. --- app/lib/feed_manager.rb | 5 ++++- app/models/glitch/keyword_mute.rb | 4 ++++ spec/fabricators/glitch_keyword_mute_fabricator.rb | 2 +- spec/lib/feed_manager_spec.rb | 17 +++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) (limited to 'app/models/glitch') diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 1123f88bb..576188324 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -138,7 +138,9 @@ class FeedManager end def filter_from_home?(status, receiver_id) - return true if Glitch::KeywordMute.matcher_for(receiver_id) =~ status.text + keyword_mute_matcher = Glitch::KeywordMute.matcher_for(receiver_id) + + return true if keyword_mute_matcher =~ status.text return false if receiver_id == status.account_id return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?) @@ -161,6 +163,7 @@ class FeedManager return should_filter elsif status.reblog? # Filter out a reblog should_filter = Block.where(account_id: status.reblog.account_id, target_account_id: receiver_id).exists? # or if the author of the reblogged status is blocking me + should_filter ||= keyword_mute_matcher.matches?(status.reblog.text) should_filter ||= AccountDomainBlock.where(account_id: receiver_id, domain: status.reblog.account.domain).exists? # or the author's domain is blocked return should_filter end diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 3b0b47f52..823e252d3 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -45,5 +45,9 @@ class Glitch::KeywordMute < ApplicationRecord def =~(str) regex ? regex =~ str : false end + + def matches?(str) + !!(regex =~ str) + end end end diff --git a/spec/fabricators/glitch_keyword_mute_fabricator.rb b/spec/fabricators/glitch_keyword_mute_fabricator.rb index 8601ed6d7..20d393320 100644 --- a/spec/fabricators/glitch_keyword_mute_fabricator.rb +++ b/spec/fabricators/glitch_keyword_mute_fabricator.rb @@ -1,2 +1,2 @@ -Fabricator(:glitch_keyword_mute) do +Fabricator('Glitch::KeywordMute') do end diff --git a/spec/lib/feed_manager_spec.rb b/spec/lib/feed_manager_spec.rb index 1861cc6ed..c9403d616 100644 --- a/spec/lib/feed_manager_spec.rb +++ b/spec/lib/feed_manager_spec.rb @@ -119,6 +119,23 @@ RSpec.describe FeedManager do reblog = Fabricate(:status, reblog: status, account: jeff) expect(FeedManager.instance.filter?(:home, reblog, alice.id)).to be true end + + it 'returns true for a status containing a muted keyword' do + Fabricate('Glitch::KeywordMute', account: alice, keyword: 'take') + alice.follow!(bob) + status = Fabricate(:status, text: 'This is a hot take', account: bob) + + expect(FeedManager.instance.filter?(:home, status, alice.id)).to be true + end + + it 'returns true for a reblog containing a muted keyword' do + Fabricate('Glitch::KeywordMute', account: alice, keyword: 'take') + alice.follow!(jeff) + status = Fabricate(:status, text: 'This is a hot take', account: bob) + reblog = Fabricate(:status, reblog: status, account: jeff) + + expect(FeedManager.instance.filter?(:home, reblog, alice.id)).to be true + end end context 'for mentions feed' do -- cgit From 4b68e82a19ab2853264515a25af8d39841e43f00 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 22 Oct 2017 00:24:32 -0500 Subject: Don't add \b to whole-word keywords that don't start with word characters. Ditto for ending with \b. Consider muting the phrase "(hot take)". I stipulate it is reasonable to enter this with the default "match whole word" behavior. Under the old behavior, this would be encoded as \b\(hot\ take\)\b However, if \b is before the first character in the string and the first character in the string is not a word character, then the match will fail. Ditto for after. In our example, "(" is not a word character, so this will not match statuses containing "(hot take)", and that's a very surprising behavior. To address this, we only add leading and trailing \b to keywords that start or end with word characters. --- app/models/glitch/keyword_mute.rb | 36 +++++++++++++++++++++++---------- spec/models/glitch/keyword_mute_spec.rb | 12 ++++++++--- 2 files changed, 34 insertions(+), 14 deletions(-) (limited to 'app/models/glitch') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 823e252d3..20fd89d9b 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -19,35 +19,49 @@ class Glitch::KeywordMute < ApplicationRecord after_commit :invalidate_cached_matcher def self.matcher_for(account_id) - Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } + Matcher.new(account_id) end private def invalidate_cached_matcher - Rails.cache.delete("keyword_mutes:matcher:#{account_id}") + Rails.cache.delete("keyword_mutes:regex:#{account_id}") end class Matcher + attr_reader :account_id attr_reader :regex def initialize(account_id) - re = [].tap do |arr| - Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| - boundary = m.whole_word ? '\b' : '' - arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" + @account_id = account_id + @regex = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_for_account } + end + + def keywords + Glitch::KeywordMute. + where(account_id: account_id). + select(:keyword, :id, :whole_word) + end + + def regex_for_account + re_text = [].tap do |arr| + keywords.find_each do |kw| + arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword)) end end.join('|') - @regex = /#{re}/i unless re.empty? + /#{re_text}/i unless re_text.empty? end - def =~(str) - regex ? regex =~ str : false + def boundary_regex_for_keyword(keyword) + sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' + eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' + + "#{sb}#{Regexp.escape(keyword)}#{eb}" end - def matches?(str) - !!(regex =~ str) + def =~(str) + regex ? regex =~ str : false end end end diff --git a/spec/models/glitch/keyword_mute_spec.rb b/spec/models/glitch/keyword_mute_spec.rb index 108cdafec..95e59defc 100644 --- a/spec/models/glitch/keyword_mute_spec.rb +++ b/spec/models/glitch/keyword_mute_spec.rb @@ -7,7 +7,7 @@ RSpec.describe Glitch::KeywordMute, type: :model do describe '.matcher_for' do let(:matcher) { Glitch::KeywordMute.matcher_for(alice) } - describe 'with no Glitch::KeywordMutes for an account' do + describe 'with no mutes' do before do Glitch::KeywordMute.delete_all end @@ -17,7 +17,7 @@ RSpec.describe Glitch::KeywordMute, type: :model do end end - describe 'with Glitch::KeywordMutes for an account' do + describe 'with mutes' do it 'does not match keywords set by a different account' do Glitch::KeywordMute.create!(account: bob, keyword: 'take') @@ -63,7 +63,13 @@ RSpec.describe Glitch::KeywordMute, type: :model do it 'matches keywords surrounded by non-alphanumeric ornamentation' do Glitch::KeywordMute.create!(account: alice, keyword: 'hot') - expect(matcher =~ 'This is a ~*HOT*~ take').to be_truthy + expect(matcher =~ '(hot take)').to be_truthy + end + + it 'escapes metacharacters in keywords' do + Glitch::KeywordMute.create!(account: alice, keyword: '(hot take)') + + expect(matcher =~ '(hot take)').to be_truthy end it 'uses case-folding rules appropriate for more than just English' do -- cgit From af8f06413ee3bab91f1fb89b5828ed9a44e1a6bd Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 22 Oct 2017 01:11:17 -0500 Subject: KeywordMute matcher: more closely mimic Regexp#=~ behavior. Regexp#=~ returns nil if it does not match. An empty mute set does not match any status, so KeywordMute::Matcher#=~ ought to return nil also. --- app/models/glitch/keyword_mute.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/models/glitch') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 20fd89d9b..a7ab3650e 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -61,7 +61,7 @@ class Glitch::KeywordMute < ApplicationRecord end def =~(str) - regex ? regex =~ str : false + regex ? regex =~ str : nil end end end -- cgit From 8410d33b49d66683f5765b6c6ee5a4a1af5b098f Mon Sep 17 00:00:00 2001 From: David Yip Date: Mon, 23 Oct 2017 19:31:59 -0500 Subject: Only cache the regex text, not the regex itself. It is possible to cache a Regexp object, but I'm not sure what happens if e.g. that object remains in cache across two different Ruby versions. Caching a string seems to raise fewer questions. --- app/models/glitch/keyword_mute.rb | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'app/models/glitch') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index a7ab3650e..4c3e69de4 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -34,23 +34,20 @@ class Glitch::KeywordMute < ApplicationRecord def initialize(account_id) @account_id = account_id - @regex = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_for_account } + regex_text = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_text_for_account } + @regex = /#{regex_text}/i unless regex_text.empty? end def keywords - Glitch::KeywordMute. - where(account_id: account_id). - select(:keyword, :id, :whole_word) + Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word) end - def regex_for_account - re_text = [].tap do |arr| + def regex_text_for_account + [].tap do |arr| keywords.find_each do |kw| arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword)) end end.join('|') - - /#{re_text}/i unless re_text.empty? end def boundary_regex_for_keyword(keyword) -- cgit From f5a32839761f4951dc09ecbf207573183c6e2f80 Mon Sep 17 00:00:00 2001 From: David Yip Date: Tue, 24 Oct 2017 18:31:34 -0500 Subject: Switch to Regexp.union for building the mute expression. Also make the keyword-building methods private: they always probably should have been private, but now I have encoded enough fun and games into them that it now seems wrong for them to *not* be private. --- app/models/glitch/keyword_mute.rb | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'app/models/glitch') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 4c3e69de4..f0969c65e 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -35,30 +35,32 @@ class Glitch::KeywordMute < ApplicationRecord def initialize(account_id) @account_id = account_id regex_text = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_text_for_account } - @regex = /#{regex_text}/i unless regex_text.empty? + @regex = /#{regex_text}/i end + def =~(str) + regex ? regex =~ str : nil + end + + private + def keywords Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word) end def regex_text_for_account - [].tap do |arr| - keywords.find_each do |kw| - arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword)) - end - end.join('|') + kws = keywords.find_each.with_object([]) do |kw, a| + a << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : kw.keyword) + end + + Regexp.union(kws).source end def boundary_regex_for_keyword(keyword) sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' - "#{sb}#{Regexp.escape(keyword)}#{eb}" - end - - def =~(str) - regex ? regex =~ str : nil + /#{sb}#{Regexp.escape(keyword)}#{eb}/ end end end -- cgit From e40fe4092dfd927dd4b6605b7b398fcd0984d903 Mon Sep 17 00:00:00 2001 From: David Yip Date: Tue, 24 Oct 2017 19:03:59 -0500 Subject: Remove nil check in Glitch::KeywordMute#=~. @regex can no longer be nil, so we don't need to check it. --- app/models/glitch/keyword_mute.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/models/glitch') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index f0969c65e..73de4d4b7 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -39,7 +39,7 @@ class Glitch::KeywordMute < ApplicationRecord end def =~(str) - regex ? regex =~ str : nil + regex =~ str end private -- cgit From 656d54e9451dc99e212513b799a4deb4d1227bf0 Mon Sep 17 00:00:00 2001 From: David Yip Date: Mon, 13 Nov 2017 11:06:02 -0600 Subject: Maintain case-insensitivity when merging multiple matchers (#213) When given two regexps, Regexp.union preserves the options set (or not set) on each regex; this meant that none of the multiline (m), case-insensitivity (i), or extended syntax (x) options were set. Our regexps are written expecting the m, i, and x options were set on all of them, so we need to make sure that we preserve that behavior. --- app/models/glitch/keyword_mute.rb | 4 ++-- spec/models/glitch/keyword_mute_spec.rb | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'app/models/glitch') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 73de4d4b7..009de1880 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -35,7 +35,7 @@ class Glitch::KeywordMute < ApplicationRecord def initialize(account_id) @account_id = account_id regex_text = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_text_for_account } - @regex = /#{regex_text}/i + @regex = /#{regex_text}/ end def =~(str) @@ -60,7 +60,7 @@ class Glitch::KeywordMute < ApplicationRecord sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' - /#{sb}#{Regexp.escape(keyword)}#{eb}/ + /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/ end end end diff --git a/spec/models/glitch/keyword_mute_spec.rb b/spec/models/glitch/keyword_mute_spec.rb index 1423823ba..9685c6493 100644 --- a/spec/models/glitch/keyword_mute_spec.rb +++ b/spec/models/glitch/keyword_mute_spec.rb @@ -60,6 +60,13 @@ RSpec.describe Glitch::KeywordMute, type: :model do expect(matcher =~ 'This is a HOT take').to be_truthy end + it 'maintains case-insensitivity when combining keywords into a single matcher' do + Glitch::KeywordMute.create!(account: alice, keyword: 'hot') + Glitch::KeywordMute.create!(account: alice, keyword: 'cold') + + expect(matcher =~ 'This is a HOT take').to be_truthy + end + it 'matches keywords surrounded by non-alphanumeric ornamentation' do Glitch::KeywordMute.create!(account: alice, keyword: 'hot') -- cgit