From 9093e2de7a133470eec1049a13465f81928d0119 Mon Sep 17 00:00:00 2001 From: David Yip Date: Mon, 9 Oct 2017 17:28:28 -0500 Subject: Add KeywordMute model. Gist of the proposed keyword mute implementation: Keyword mutes are represented server-side as one keyword per record. For each account, there exists a keyword regex that is generated as one big alternation of all keywords. This regex is cached (in Redis, I guess) so we can quickly get it when filtering in FeedManager. --- app/models/keyword_mute.rb | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 app/models/keyword_mute.rb (limited to 'app/models') diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb new file mode 100644 index 000000000..91816eed9 --- /dev/null +++ b/app/models/keyword_mute.rb @@ -0,0 +1,13 @@ +# == Schema Information +# +# Table name: keyword_mutes +# +# id :integer not null, primary key +# account_id :integer not null +# keyword :string not null +# created_at :datetime not null +# updated_at :datetime not null +# + +class KeywordMute < ApplicationRecord +end -- cgit From 4745d6eeca3a422f41775ee5f31989fc036da7d6 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 14 Oct 2017 02:28:20 -0500 Subject: Spec out KeywordMute interface. #164. --- app/lib/feed_manager.rb | 2 ++ app/models/keyword_mute.rb | 2 ++ spec/models/keyword_mute_spec.rb | 18 +++++++++++++++++- 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'app/models') diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index ca15745cb..baaa09e86 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -138,6 +138,8 @@ class FeedManager end def filter_from_home?(status, receiver_id) + return true if KeywordMute.where(account_id: receiver_id).matches?(status.text) + return false if receiver_id == status.account_id return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?) diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb index 91816eed9..d397a1f41 100644 --- a/app/models/keyword_mute.rb +++ b/app/models/keyword_mute.rb @@ -10,4 +10,6 @@ # class KeywordMute < ApplicationRecord + def self.matches?(text) + end end diff --git a/spec/models/keyword_mute_spec.rb b/spec/models/keyword_mute_spec.rb index cd0881565..cb6e554e4 100644 --- a/spec/models/keyword_mute_spec.rb +++ b/spec/models/keyword_mute_spec.rb @@ -1,5 +1,21 @@ require 'rails_helper' RSpec.describe KeywordMute, type: :model do - pending "add some examples to (or delete) #{__FILE__}" + describe '.matches?' do + let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } + let(:status) { Fabricate(:status, account: alice).tap(&:save!) } + let(:keyword_mute) { Fabricate(:keyword_mute, account: alice, keyword: 'take').tap(&:save!) } + + it 'returns true if any keyword in the set matches the status text' do + status.update_attribute(:text, 'This is a hot take') + + expect(KeywordMute.where(account: alice).matches?(status.text)).to be_truthy + end + + it 'returns false if no keyword in the set matches the status text' + + describe 'matching' do + it 'is case-insensitive' + end + end end -- cgit From 603cf02b703a2df2ae6690077a3e21a5ce64b548 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 14 Oct 2017 20:36:53 -0500 Subject: Rework KeywordMute interface to use a matcher object; spec out matcher. #164. A matcher object that builds a match from KeywordMute data and runs it over text is, in my view, one of the easier ways to write examples for this sort of thing. --- app/lib/feed_manager.rb | 2 +- app/models/keyword_mute.rb | 31 +++++++++++++++++- spec/models/keyword_mute_spec.rb | 70 ++++++++++++++++++++++++++++++++++------ 3 files changed, 91 insertions(+), 12 deletions(-) (limited to 'app/models') diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index baaa09e86..516bd81af 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -138,7 +138,7 @@ class FeedManager end def filter_from_home?(status, receiver_id) - return true if KeywordMute.where(account_id: receiver_id).matches?(status.text) + return true if KeywordMute.matcher_for(receiver_id) =~ status.text return false if receiver_id == status.account_id return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?) diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb index d397a1f41..d80fcaa60 100644 --- a/app/models/keyword_mute.rb +++ b/app/models/keyword_mute.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true # == Schema Information # # Table name: keyword_mutes @@ -10,6 +11,34 @@ # class KeywordMute < ApplicationRecord - def self.matches?(text) + belongs_to :account, required: true + + validates_presence_of :keyword + + def self.matcher_for(account) + Rails.cache.fetch("keyword_mutes:matcher:#{account}") { Matcher.new(account) } + end + + class Matcher + attr_reader :regex + + def initialize(account) + re = String.new.tap do |str| + scoped = KeywordMute.where(account: account) + keywords = scoped.select(:id, :keyword) + count = scoped.count + + keywords.find_each.with_index do |kw, index| + str << Regexp.escape(kw.keyword.strip) + str << '|' if index < count - 1 + end + end + + @regex = /\b(?:#{re})\b/i unless re.empty? + end + + def =~(str) + @regex ? @regex =~ str : false + end end end diff --git a/spec/models/keyword_mute_spec.rb b/spec/models/keyword_mute_spec.rb index cb6e554e4..211a9b4c6 100644 --- a/spec/models/keyword_mute_spec.rb +++ b/spec/models/keyword_mute_spec.rb @@ -1,21 +1,71 @@ require 'rails_helper' RSpec.describe KeywordMute, type: :model do - describe '.matches?' do - let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } - let(:status) { Fabricate(:status, account: alice).tap(&:save!) } - let(:keyword_mute) { Fabricate(:keyword_mute, account: alice, keyword: 'take').tap(&:save!) } + let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } + let(:bob) { Fabricate(:account, username: 'bob').tap(&:save!) } - it 'returns true if any keyword in the set matches the status text' do - status.update_attribute(:text, 'This is a hot take') + describe '.matcher_for' do + let(:matcher) { KeywordMute.matcher_for(alice) } - expect(KeywordMute.where(account: alice).matches?(status.text)).to be_truthy + describe 'with no KeywordMutes for an account' do + before do + KeywordMute.delete_all + end + + it 'does not match' do + expect(matcher =~ 'This is a hot take').to be_falsy + end end - it 'returns false if no keyword in the set matches the status text' + describe 'with KeywordMutes for an account' do + it 'does not match keywords set by a different account' do + KeywordMute.create!(account: bob, keyword: 'take') + + expect(matcher =~ 'This is a hot take').to be_falsy + end + + it 'does not match if no keywords match the status text' do + KeywordMute.create!(account: alice, keyword: 'cold') + + expect(matcher =~ 'This is a hot take').to be_falsy + end + + it 'does not match substrings matching keywords' do + KeywordMute.create!(account: alice, keyword: 'take') + + expect(matcher =~ 'This is a shiitake mushroom').to be_falsy + end + + it 'matches keywords at the beginning of the text' do + KeywordMute.create!(account: alice, keyword: 'take') + + expect(matcher =~ 'Take this').to be_truthy + end + + it 'matches keywords at the beginning of the text' do + KeywordMute.create!(account: alice, keyword: 'take') + + expect(matcher =~ 'This is a hot take').to be_truthy + end + + it 'matches if at least one keyword case-insensitively matches the text' do + KeywordMute.create!(account: alice, keyword: 'hot') + + expect(matcher =~ 'This is a hot take').to be_truthy + end + + it 'uses case-folding rules appropriate for more than just English' do + KeywordMute.create!(account: alice, keyword: 'großeltern') + + expect(matcher =~ 'besuch der grosseltern').to be_truthy + end + + it 'matches keywords that are composed of multiple words' do + KeywordMute.create!(account: alice, keyword: 'a shiitake') - describe 'matching' do - it 'is case-insensitive' + expect(matcher =~ 'This is a shiitake').to be_truthy + expect(matcher =~ 'This is shiitake').to_not be_truthy + end end end end -- cgit From a4851100fd3f20fdc01a3ebf7942fab6d5d03ebf Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 14 Oct 2017 20:45:14 -0500 Subject: Make use of the regex attr_reader. #164. It would also have been valid to get rid of the attr_reader, but I like being able to reach inside KeywordMute::Matcher without resorting to instance_variable_get tomfoolery. --- app/models/keyword_mute.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/models') diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb index d80fcaa60..e1a8c3712 100644 --- a/app/models/keyword_mute.rb +++ b/app/models/keyword_mute.rb @@ -38,7 +38,7 @@ class KeywordMute < ApplicationRecord end def =~(str) - @regex ? @regex =~ str : false + regex ? regex =~ str : false end end end -- cgit From 693c66dfde891a5d540dc4cdc0c712495c31100c Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 15 Oct 2017 02:32:03 -0500 Subject: Use more idiomatic string concatentation. #164. The intent of the previous concatenation was to minimize object allocations, which can end up being a slow killer. However, it turns out that under MRI 2.4.x, the shove-strings-in-an-array-and-join method is not only arguably more common but (in this particular case) actually allocates *fewer* objects than the string concatenation. Or, at least, that's what I gather by running this: words = %w(palmettoes nudged hibernation bullish stockade's tightened Hades Dixie's formalize superego's commissaries Zappa's viceroy's apothecaries tablespoonful's barons Chennai tollgate ticked expands) a = Account.first KeywordMute.transaction do words.each { |w| KeywordMute.create!(keyword: w, account: a) } GC.start s1 = GC.stat re = String.new.tap do |str| scoped = KeywordMute.where(account: a) keywords = scoped.select(:id, :keyword) count = scoped.count keywords.find_each.with_index do |kw, index| str << Regexp.escape(kw.keyword.strip) str << '|' if index < count - 1 end end s2 = GC.stat puts s1.inspect, s2.inspect raise ActiveRecord::Rollback end vs this: words = %w( palmettoes nudged hibernation bullish stockade's tightened Hades Dixie's formalize superego's commissaries Zappa's viceroy's apothecaries tablespoonful's barons Chennai tollgate ticked expands ) a = Account.first KeywordMute.transaction do words.each { |w| KeywordMute.create!(keyword: w, account: a) } GC.start s1 = GC.stat re = [].tap do |arr| KeywordMute.where(account: a).select(:keyword, :id).find_each do |m| arr << Regexp.escape(m.keyword.strip) end end.join('|') s2 = GC.stat puts s1.inspect, s2.inspect raise ActiveRecord::Rollback end Using rails r, here is a comparison of the total_allocated_objects and malloc_increase_bytes GC stat data: total_allocated_objects malloc_increase_bytes string concat 3200241 -> 3201428 (+1187) 1176 -> 45216 (44040) array join 3200380 -> 3201299 (+919) 1176 -> 36448 (35272) --- app/models/keyword_mute.rb | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'app/models') diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb index e1a8c3712..f94e0f795 100644 --- a/app/models/keyword_mute.rb +++ b/app/models/keyword_mute.rb @@ -23,16 +23,11 @@ class KeywordMute < ApplicationRecord attr_reader :regex def initialize(account) - re = String.new.tap do |str| - scoped = KeywordMute.where(account: account) - keywords = scoped.select(:id, :keyword) - count = scoped.count - - keywords.find_each.with_index do |kw, index| - str << Regexp.escape(kw.keyword.strip) - str << '|' if index < count - 1 + re = [].tap do |arr| + KeywordMute.where(account: account).select(:keyword, :id).find_each do |m| + arr << Regexp.escape(m.keyword.strip) end - end + end.join('|') @regex = /\b(?:#{re})\b/i unless re.empty? end -- cgit From b4b657eb1daebd1472384ff8ea1c1b9c4b313c5c Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 15 Oct 2017 02:52:53 -0500 Subject: Invalidate cached matcher objects on KeywordMute commit. #164. --- app/models/keyword_mute.rb | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'app/models') diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb index f94e0f795..8b54ad696 100644 --- a/app/models/keyword_mute.rb +++ b/app/models/keyword_mute.rb @@ -15,16 +15,24 @@ class KeywordMute < ApplicationRecord validates_presence_of :keyword - def self.matcher_for(account) - Rails.cache.fetch("keyword_mutes:matcher:#{account}") { Matcher.new(account) } + after_commit :invalidate_cached_matcher + + def self.matcher_for(account_id) + Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } + end + + private + + def invalidate_cached_matcher + Rails.cache.delete("keyword_mutes:matcher:#{account_id}") end class Matcher attr_reader :regex - def initialize(account) + def initialize(account_id) re = [].tap do |arr| - KeywordMute.where(account: account).select(:keyword, :id).find_each do |m| + KeywordMute.where(account_id: account_id).select(:keyword, :id).find_each do |m| arr << Regexp.escape(m.keyword.strip) end end.join('|') -- cgit From 4a64181461cb02599da98166da4b527adbb705ad Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 15 Oct 2017 19:49:22 -0500 Subject: Allow keywords to match either substrings or whole words. Word-boundary matching only works as intended in English and languages that use similar word-breaking characters; it doesn't work so well in (say) Japanese, Chinese, or Thai. It's unacceptable to have a feature that doesn't work as intended for some languages. (Moreso especially considering that it's likely that the largest contingent on the Mastodon bit of the fediverse speaks Japanese.) There are rules specified in Unicode TR29[1] for word-breaking across all languages supported by Unicode, but the rules deliberately do not cover all cases. In fact, TR29 states For example, reliable detection of word boundaries in languages such as Thai, Lao, Chinese, or Japanese requires the use of dictionary lookup, analogous to English hyphenation. So we aren't going to be able to make word detection work with regexes within Mastodon (or glitchsoc). However, for a first pass (even if it's kind of punting) we can allow the user to choose whether they want word or substring detection and warn about the limitations of this implementation in, say, docs. [1]: https://unicode.org/reports/tr29/ https://web.archive.org/web/20171001005125/https://unicode.org/reports/tr29/ --- app/models/keyword_mute.rb | 8 +++++--- db/migrate/20171009222537_create_keyword_mutes.rb | 1 + db/schema.rb | 1 + spec/models/keyword_mute_spec.rb | 12 +++++++++--- 4 files changed, 16 insertions(+), 6 deletions(-) (limited to 'app/models') diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb index 8b54ad696..b0229923d 100644 --- a/app/models/keyword_mute.rb +++ b/app/models/keyword_mute.rb @@ -6,6 +6,7 @@ # id :integer not null, primary key # account_id :integer not null # keyword :string not null +# whole_word :boolean default(TRUE), not null # created_at :datetime not null # updated_at :datetime not null # @@ -32,12 +33,13 @@ class KeywordMute < ApplicationRecord def initialize(account_id) re = [].tap do |arr| - KeywordMute.where(account_id: account_id).select(:keyword, :id).find_each do |m| - arr << Regexp.escape(m.keyword.strip) + KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| + boundary = m.whole_word ? '\b' : '' + arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" end end.join('|') - @regex = /\b(?:#{re})\b/i unless re.empty? + @regex = /#{re}/i unless re.empty? end def =~(str) diff --git a/db/migrate/20171009222537_create_keyword_mutes.rb b/db/migrate/20171009222537_create_keyword_mutes.rb index ee690e799..ec0c756fb 100644 --- a/db/migrate/20171009222537_create_keyword_mutes.rb +++ b/db/migrate/20171009222537_create_keyword_mutes.rb @@ -3,6 +3,7 @@ class CreateKeywordMutes < ActiveRecord::Migration[5.1] create_table :keyword_mutes do |t| t.references :account, null: false t.string :keyword, null: false + t.boolean :whole_word, null: false, default: true t.timestamps end diff --git a/db/schema.rb b/db/schema.rb index 420bb0d2e..c0704b13e 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -170,6 +170,7 @@ ActiveRecord::Schema.define(version: 20171010025614) do create_table "keyword_mutes", force: :cascade do |t| t.bigint "account_id", null: false t.string "keyword", null: false + t.boolean "whole_word", default: true, null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false t.index ["account_id"], name: "index_keyword_mutes_on_account_id" diff --git a/spec/models/keyword_mute_spec.rb b/spec/models/keyword_mute_spec.rb index de5d32bb4..c74505188 100644 --- a/spec/models/keyword_mute_spec.rb +++ b/spec/models/keyword_mute_spec.rb @@ -30,10 +30,16 @@ RSpec.describe KeywordMute, type: :model do expect(matcher =~ 'This is a hot take').to be_falsy end - it 'does not match substrings matching keywords' do - KeywordMute.create!(account: alice, keyword: 'take') + it 'considers word boundaries when matching' do + KeywordMute.create!(account: alice, keyword: 'bob', whole_word: true) + + expect(matcher =~ 'bobcats').to be_falsy + end + + it 'matches substrings if whole_word is false' do + KeywordMute.create!(account: alice, keyword: 'take', whole_word: false) - expect(matcher =~ 'This is a shiitake mushroom').to be_falsy + expect(matcher =~ 'This is a shiitake mushroom').to be_truthy end it 'matches keywords at the beginning of the text' do -- cgit From 670e6a33f8eeca628707dc020e02ce32502d74a4 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 21 Oct 2017 14:47:17 -0500 Subject: Move KeywordMute into Glitch namespace. There are two motivations for this: 1. It looks like we're going to add other features that require server-side storage (e.g. user notes). 2. Namespacing glitchsoc modifications is a good idea anyway: even if we do not end up doing (1), if upstream introduces a keyword-mute feature that also uses a "KeywordMute" model, we can avoid some merge conflicts this way and work on the more interesting task of choosing which implementation to use. --- .../settings/keyword_mutes_controller.rb | 2 +- app/lib/feed_manager.rb | 2 +- app/models/glitch.rb | 7 ++ app/models/glitch/keyword_mute.rb | 49 +++++++++++++ app/models/keyword_mute.rb | 49 ------------- ...900_move_keyword_mutes_into_glitch_namespace.rb | 7 ++ db/schema.rb | 22 +++--- spec/fabricators/glitch_keyword_mute_fabricator.rb | 2 + spec/fabricators/keyword_mute_fabricator.rb | 2 - spec/models/glitch/keyword_mute_spec.rb | 83 ++++++++++++++++++++++ spec/models/keyword_mute_spec.rb | 83 ---------------------- 11 files changed, 161 insertions(+), 147 deletions(-) create mode 100644 app/models/glitch.rb create mode 100644 app/models/glitch/keyword_mute.rb delete mode 100644 app/models/keyword_mute.rb create mode 100644 db/migrate/20171021191900_move_keyword_mutes_into_glitch_namespace.rb create mode 100644 spec/fabricators/glitch_keyword_mute_fabricator.rb delete mode 100644 spec/fabricators/keyword_mute_fabricator.rb create mode 100644 spec/models/glitch/keyword_mute_spec.rb delete mode 100644 spec/models/keyword_mute_spec.rb (limited to 'app/models') diff --git a/app/controllers/settings/keyword_mutes_controller.rb b/app/controllers/settings/keyword_mutes_controller.rb index d9f99af09..6ae05108d 100644 --- a/app/controllers/settings/keyword_mutes_controller.rb +++ b/app/controllers/settings/keyword_mutes_controller.rb @@ -55,7 +55,7 @@ class Settings::KeywordMutesController < ApplicationController end def keyword_mutes_for_account - KeywordMute.where(account: @account) + Glitch::KeywordMute.where(account: @account) end def load_keyword_mute diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 516bd81af..1123f88bb 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -138,7 +138,7 @@ class FeedManager end def filter_from_home?(status, receiver_id) - return true if KeywordMute.matcher_for(receiver_id) =~ status.text + return true if Glitch::KeywordMute.matcher_for(receiver_id) =~ status.text return false if receiver_id == status.account_id return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?) diff --git a/app/models/glitch.rb b/app/models/glitch.rb new file mode 100644 index 000000000..0e497babc --- /dev/null +++ b/app/models/glitch.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module Glitch + def self.table_name_prefix + 'glitch_' + end +end diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb new file mode 100644 index 000000000..3b0b47f52 --- /dev/null +++ b/app/models/glitch/keyword_mute.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true +# == Schema Information +# +# Table name: glitch_keyword_mutes +# +# id :integer not null, primary key +# account_id :integer not null +# keyword :string not null +# whole_word :boolean default(TRUE), not null +# created_at :datetime not null +# updated_at :datetime not null +# + +class Glitch::KeywordMute < ApplicationRecord + belongs_to :account, required: true + + validates_presence_of :keyword + + after_commit :invalidate_cached_matcher + + def self.matcher_for(account_id) + Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } + end + + private + + def invalidate_cached_matcher + Rails.cache.delete("keyword_mutes:matcher:#{account_id}") + end + + class Matcher + attr_reader :regex + + def initialize(account_id) + re = [].tap do |arr| + Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| + boundary = m.whole_word ? '\b' : '' + arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" + end + end.join('|') + + @regex = /#{re}/i unless re.empty? + end + + def =~(str) + regex ? regex =~ str : false + end + end +end diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb deleted file mode 100644 index b0229923d..000000000 --- a/app/models/keyword_mute.rb +++ /dev/null @@ -1,49 +0,0 @@ -# frozen_string_literal: true -# == Schema Information -# -# Table name: keyword_mutes -# -# id :integer not null, primary key -# account_id :integer not null -# keyword :string not null -# whole_word :boolean default(TRUE), not null -# created_at :datetime not null -# updated_at :datetime not null -# - -class KeywordMute < ApplicationRecord - belongs_to :account, required: true - - validates_presence_of :keyword - - after_commit :invalidate_cached_matcher - - def self.matcher_for(account_id) - Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } - end - - private - - def invalidate_cached_matcher - Rails.cache.delete("keyword_mutes:matcher:#{account_id}") - end - - class Matcher - attr_reader :regex - - def initialize(account_id) - re = [].tap do |arr| - KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| - boundary = m.whole_word ? '\b' : '' - arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" - end - end.join('|') - - @regex = /#{re}/i unless re.empty? - end - - def =~(str) - regex ? regex =~ str : false - end - end -end diff --git a/db/migrate/20171021191900_move_keyword_mutes_into_glitch_namespace.rb b/db/migrate/20171021191900_move_keyword_mutes_into_glitch_namespace.rb new file mode 100644 index 000000000..269bb49d6 --- /dev/null +++ b/db/migrate/20171021191900_move_keyword_mutes_into_glitch_namespace.rb @@ -0,0 +1,7 @@ +class MoveKeywordMutesIntoGlitchNamespace < ActiveRecord::Migration[5.1] + def change + safety_assured do + rename_table :keyword_mutes, :glitch_keyword_mutes + end + end +end diff --git a/db/schema.rb b/db/schema.rb index c0704b13e..c09876c4d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20171010025614) do +ActiveRecord::Schema.define(version: 20171021191900) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -155,6 +155,15 @@ ActiveRecord::Schema.define(version: 20171010025614) do t.index ["account_id", "target_account_id"], name: "index_follows_on_account_id_and_target_account_id", unique: true end + create_table "glitch_keyword_mutes", force: :cascade do |t| + t.bigint "account_id", null: false + t.string "keyword", null: false + t.boolean "whole_word", default: true, null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["account_id"], name: "index_glitch_keyword_mutes_on_account_id" + end + create_table "imports", force: :cascade do |t| t.integer "type", null: false t.boolean "approved", default: false, null: false @@ -167,15 +176,6 @@ ActiveRecord::Schema.define(version: 20171010025614) do t.bigint "account_id", null: false end - create_table "keyword_mutes", force: :cascade do |t| - t.bigint "account_id", null: false - t.string "keyword", null: false - t.boolean "whole_word", default: true, null: false - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.index ["account_id"], name: "index_keyword_mutes_on_account_id" - end - create_table "media_attachments", force: :cascade do |t| t.bigint "status_id" t.string "file_file_name" @@ -481,8 +481,8 @@ ActiveRecord::Schema.define(version: 20171010025614) do add_foreign_key "follow_requests", "accounts", name: "fk_76d644b0e7", on_delete: :cascade add_foreign_key "follows", "accounts", column: "target_account_id", name: "fk_745ca29eac", on_delete: :cascade add_foreign_key "follows", "accounts", name: "fk_32ed1b5560", on_delete: :cascade + add_foreign_key "glitch_keyword_mutes", "accounts", on_delete: :cascade add_foreign_key "imports", "accounts", name: "fk_6db1b6e408", on_delete: :cascade - add_foreign_key "keyword_mutes", "accounts", on_delete: :cascade add_foreign_key "media_attachments", "accounts", name: "fk_96dd81e81b", on_delete: :nullify add_foreign_key "media_attachments", "statuses", on_delete: :nullify add_foreign_key "mentions", "accounts", name: "fk_970d43f9d1", on_delete: :cascade diff --git a/spec/fabricators/glitch_keyword_mute_fabricator.rb b/spec/fabricators/glitch_keyword_mute_fabricator.rb new file mode 100644 index 000000000..8601ed6d7 --- /dev/null +++ b/spec/fabricators/glitch_keyword_mute_fabricator.rb @@ -0,0 +1,2 @@ +Fabricator(:glitch_keyword_mute) do +end diff --git a/spec/fabricators/keyword_mute_fabricator.rb b/spec/fabricators/keyword_mute_fabricator.rb deleted file mode 100644 index 82cf845c8..000000000 --- a/spec/fabricators/keyword_mute_fabricator.rb +++ /dev/null @@ -1,2 +0,0 @@ -Fabricator(:keyword_mute) do -end diff --git a/spec/models/glitch/keyword_mute_spec.rb b/spec/models/glitch/keyword_mute_spec.rb new file mode 100644 index 000000000..108cdafec --- /dev/null +++ b/spec/models/glitch/keyword_mute_spec.rb @@ -0,0 +1,83 @@ +require 'rails_helper' + +RSpec.describe Glitch::KeywordMute, type: :model do + let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } + let(:bob) { Fabricate(:account, username: 'bob').tap(&:save!) } + + describe '.matcher_for' do + let(:matcher) { Glitch::KeywordMute.matcher_for(alice) } + + describe 'with no Glitch::KeywordMutes for an account' do + before do + Glitch::KeywordMute.delete_all + end + + it 'does not match' do + expect(matcher =~ 'This is a hot take').to be_falsy + end + end + + describe 'with Glitch::KeywordMutes for an account' do + it 'does not match keywords set by a different account' do + Glitch::KeywordMute.create!(account: bob, keyword: 'take') + + expect(matcher =~ 'This is a hot take').to be_falsy + end + + it 'does not match if no keywords match the status text' do + Glitch::KeywordMute.create!(account: alice, keyword: 'cold') + + expect(matcher =~ 'This is a hot take').to be_falsy + end + + it 'considers word boundaries when matching' do + Glitch::KeywordMute.create!(account: alice, keyword: 'bob', whole_word: true) + + expect(matcher =~ 'bobcats').to be_falsy + end + + it 'matches substrings if whole_word is false' do + Glitch::KeywordMute.create!(account: alice, keyword: 'take', whole_word: false) + + expect(matcher =~ 'This is a shiitake mushroom').to be_truthy + end + + it 'matches keywords at the beginning of the text' do + Glitch::KeywordMute.create!(account: alice, keyword: 'take') + + expect(matcher =~ 'Take this').to be_truthy + end + + it 'matches keywords at the beginning of the text' do + Glitch::KeywordMute.create!(account: alice, keyword: 'take') + + expect(matcher =~ 'This is a hot take').to be_truthy + end + + it 'matches if at least one keyword case-insensitively matches the text' do + Glitch::KeywordMute.create!(account: alice, keyword: 'hot') + + expect(matcher =~ 'This is a HOT take').to be_truthy + end + + it 'matches keywords surrounded by non-alphanumeric ornamentation' do + Glitch::KeywordMute.create!(account: alice, keyword: 'hot') + + expect(matcher =~ 'This is a ~*HOT*~ take').to be_truthy + end + + it 'uses case-folding rules appropriate for more than just English' do + Glitch::KeywordMute.create!(account: alice, keyword: 'großeltern') + + expect(matcher =~ 'besuch der grosseltern').to be_truthy + end + + it 'matches keywords that are composed of multiple words' do + Glitch::KeywordMute.create!(account: alice, keyword: 'a shiitake') + + expect(matcher =~ 'This is a shiitake').to be_truthy + expect(matcher =~ 'This is shiitake').to_not be_truthy + end + end + end +end diff --git a/spec/models/keyword_mute_spec.rb b/spec/models/keyword_mute_spec.rb deleted file mode 100644 index c74505188..000000000 --- a/spec/models/keyword_mute_spec.rb +++ /dev/null @@ -1,83 +0,0 @@ -require 'rails_helper' - -RSpec.describe KeywordMute, type: :model do - let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } - let(:bob) { Fabricate(:account, username: 'bob').tap(&:save!) } - - describe '.matcher_for' do - let(:matcher) { KeywordMute.matcher_for(alice) } - - describe 'with no KeywordMutes for an account' do - before do - KeywordMute.delete_all - end - - it 'does not match' do - expect(matcher =~ 'This is a hot take').to be_falsy - end - end - - describe 'with KeywordMutes for an account' do - it 'does not match keywords set by a different account' do - KeywordMute.create!(account: bob, keyword: 'take') - - expect(matcher =~ 'This is a hot take').to be_falsy - end - - it 'does not match if no keywords match the status text' do - KeywordMute.create!(account: alice, keyword: 'cold') - - expect(matcher =~ 'This is a hot take').to be_falsy - end - - it 'considers word boundaries when matching' do - KeywordMute.create!(account: alice, keyword: 'bob', whole_word: true) - - expect(matcher =~ 'bobcats').to be_falsy - end - - it 'matches substrings if whole_word is false' do - KeywordMute.create!(account: alice, keyword: 'take', whole_word: false) - - expect(matcher =~ 'This is a shiitake mushroom').to be_truthy - end - - it 'matches keywords at the beginning of the text' do - KeywordMute.create!(account: alice, keyword: 'take') - - expect(matcher =~ 'Take this').to be_truthy - end - - it 'matches keywords at the beginning of the text' do - KeywordMute.create!(account: alice, keyword: 'take') - - expect(matcher =~ 'This is a hot take').to be_truthy - end - - it 'matches if at least one keyword case-insensitively matches the text' do - KeywordMute.create!(account: alice, keyword: 'hot') - - expect(matcher =~ 'This is a HOT take').to be_truthy - end - - it 'matches keywords surrounded by non-alphanumeric ornamentation' do - KeywordMute.create!(account: alice, keyword: 'hot') - - expect(matcher =~ 'This is a ~*HOT*~ take').to be_truthy - end - - it 'uses case-folding rules appropriate for more than just English' do - KeywordMute.create!(account: alice, keyword: 'großeltern') - - expect(matcher =~ 'besuch der grosseltern').to be_truthy - end - - it 'matches keywords that are composed of multiple words' do - KeywordMute.create!(account: alice, keyword: 'a shiitake') - - expect(matcher =~ 'This is a shiitake').to be_truthy - expect(matcher =~ 'This is shiitake').to_not be_truthy - end - end - end -end -- cgit From ad86c86fa8e0d577b1a6c7411367420e6beea4ea Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 21 Oct 2017 15:44:47 -0500 Subject: Apply keyword mutes to reblogs. --- app/lib/feed_manager.rb | 5 ++++- app/models/glitch/keyword_mute.rb | 4 ++++ spec/fabricators/glitch_keyword_mute_fabricator.rb | 2 +- spec/lib/feed_manager_spec.rb | 17 +++++++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) (limited to 'app/models') diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 1123f88bb..576188324 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -138,7 +138,9 @@ class FeedManager end def filter_from_home?(status, receiver_id) - return true if Glitch::KeywordMute.matcher_for(receiver_id) =~ status.text + keyword_mute_matcher = Glitch::KeywordMute.matcher_for(receiver_id) + + return true if keyword_mute_matcher =~ status.text return false if receiver_id == status.account_id return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?) @@ -161,6 +163,7 @@ class FeedManager return should_filter elsif status.reblog? # Filter out a reblog should_filter = Block.where(account_id: status.reblog.account_id, target_account_id: receiver_id).exists? # or if the author of the reblogged status is blocking me + should_filter ||= keyword_mute_matcher.matches?(status.reblog.text) should_filter ||= AccountDomainBlock.where(account_id: receiver_id, domain: status.reblog.account.domain).exists? # or the author's domain is blocked return should_filter end diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 3b0b47f52..823e252d3 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -45,5 +45,9 @@ class Glitch::KeywordMute < ApplicationRecord def =~(str) regex ? regex =~ str : false end + + def matches?(str) + !!(regex =~ str) + end end end diff --git a/spec/fabricators/glitch_keyword_mute_fabricator.rb b/spec/fabricators/glitch_keyword_mute_fabricator.rb index 8601ed6d7..20d393320 100644 --- a/spec/fabricators/glitch_keyword_mute_fabricator.rb +++ b/spec/fabricators/glitch_keyword_mute_fabricator.rb @@ -1,2 +1,2 @@ -Fabricator(:glitch_keyword_mute) do +Fabricator('Glitch::KeywordMute') do end diff --git a/spec/lib/feed_manager_spec.rb b/spec/lib/feed_manager_spec.rb index 1861cc6ed..c9403d616 100644 --- a/spec/lib/feed_manager_spec.rb +++ b/spec/lib/feed_manager_spec.rb @@ -119,6 +119,23 @@ RSpec.describe FeedManager do reblog = Fabricate(:status, reblog: status, account: jeff) expect(FeedManager.instance.filter?(:home, reblog, alice.id)).to be true end + + it 'returns true for a status containing a muted keyword' do + Fabricate('Glitch::KeywordMute', account: alice, keyword: 'take') + alice.follow!(bob) + status = Fabricate(:status, text: 'This is a hot take', account: bob) + + expect(FeedManager.instance.filter?(:home, status, alice.id)).to be true + end + + it 'returns true for a reblog containing a muted keyword' do + Fabricate('Glitch::KeywordMute', account: alice, keyword: 'take') + alice.follow!(jeff) + status = Fabricate(:status, text: 'This is a hot take', account: bob) + reblog = Fabricate(:status, reblog: status, account: jeff) + + expect(FeedManager.instance.filter?(:home, reblog, alice.id)).to be true + end end context 'for mentions feed' do -- cgit From 4b68e82a19ab2853264515a25af8d39841e43f00 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 22 Oct 2017 00:24:32 -0500 Subject: Don't add \b to whole-word keywords that don't start with word characters. Ditto for ending with \b. Consider muting the phrase "(hot take)". I stipulate it is reasonable to enter this with the default "match whole word" behavior. Under the old behavior, this would be encoded as \b\(hot\ take\)\b However, if \b is before the first character in the string and the first character in the string is not a word character, then the match will fail. Ditto for after. In our example, "(" is not a word character, so this will not match statuses containing "(hot take)", and that's a very surprising behavior. To address this, we only add leading and trailing \b to keywords that start or end with word characters. --- app/models/glitch/keyword_mute.rb | 36 +++++++++++++++++++++++---------- spec/models/glitch/keyword_mute_spec.rb | 12 ++++++++--- 2 files changed, 34 insertions(+), 14 deletions(-) (limited to 'app/models') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 823e252d3..20fd89d9b 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -19,35 +19,49 @@ class Glitch::KeywordMute < ApplicationRecord after_commit :invalidate_cached_matcher def self.matcher_for(account_id) - Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } + Matcher.new(account_id) end private def invalidate_cached_matcher - Rails.cache.delete("keyword_mutes:matcher:#{account_id}") + Rails.cache.delete("keyword_mutes:regex:#{account_id}") end class Matcher + attr_reader :account_id attr_reader :regex def initialize(account_id) - re = [].tap do |arr| - Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| - boundary = m.whole_word ? '\b' : '' - arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" + @account_id = account_id + @regex = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_for_account } + end + + def keywords + Glitch::KeywordMute. + where(account_id: account_id). + select(:keyword, :id, :whole_word) + end + + def regex_for_account + re_text = [].tap do |arr| + keywords.find_each do |kw| + arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword)) end end.join('|') - @regex = /#{re}/i unless re.empty? + /#{re_text}/i unless re_text.empty? end - def =~(str) - regex ? regex =~ str : false + def boundary_regex_for_keyword(keyword) + sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' + eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' + + "#{sb}#{Regexp.escape(keyword)}#{eb}" end - def matches?(str) - !!(regex =~ str) + def =~(str) + regex ? regex =~ str : false end end end diff --git a/spec/models/glitch/keyword_mute_spec.rb b/spec/models/glitch/keyword_mute_spec.rb index 108cdafec..95e59defc 100644 --- a/spec/models/glitch/keyword_mute_spec.rb +++ b/spec/models/glitch/keyword_mute_spec.rb @@ -7,7 +7,7 @@ RSpec.describe Glitch::KeywordMute, type: :model do describe '.matcher_for' do let(:matcher) { Glitch::KeywordMute.matcher_for(alice) } - describe 'with no Glitch::KeywordMutes for an account' do + describe 'with no mutes' do before do Glitch::KeywordMute.delete_all end @@ -17,7 +17,7 @@ RSpec.describe Glitch::KeywordMute, type: :model do end end - describe 'with Glitch::KeywordMutes for an account' do + describe 'with mutes' do it 'does not match keywords set by a different account' do Glitch::KeywordMute.create!(account: bob, keyword: 'take') @@ -63,7 +63,13 @@ RSpec.describe Glitch::KeywordMute, type: :model do it 'matches keywords surrounded by non-alphanumeric ornamentation' do Glitch::KeywordMute.create!(account: alice, keyword: 'hot') - expect(matcher =~ 'This is a ~*HOT*~ take').to be_truthy + expect(matcher =~ '(hot take)').to be_truthy + end + + it 'escapes metacharacters in keywords' do + Glitch::KeywordMute.create!(account: alice, keyword: '(hot take)') + + expect(matcher =~ '(hot take)').to be_truthy end it 'uses case-folding rules appropriate for more than just English' do -- cgit From af8f06413ee3bab91f1fb89b5828ed9a44e1a6bd Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 22 Oct 2017 01:11:17 -0500 Subject: KeywordMute matcher: more closely mimic Regexp#=~ behavior. Regexp#=~ returns nil if it does not match. An empty mute set does not match any status, so KeywordMute::Matcher#=~ ought to return nil also. --- app/models/glitch/keyword_mute.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/models') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 20fd89d9b..a7ab3650e 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -61,7 +61,7 @@ class Glitch::KeywordMute < ApplicationRecord end def =~(str) - regex ? regex =~ str : false + regex ? regex =~ str : nil end end end -- cgit From 8410d33b49d66683f5765b6c6ee5a4a1af5b098f Mon Sep 17 00:00:00 2001 From: David Yip Date: Mon, 23 Oct 2017 19:31:59 -0500 Subject: Only cache the regex text, not the regex itself. It is possible to cache a Regexp object, but I'm not sure what happens if e.g. that object remains in cache across two different Ruby versions. Caching a string seems to raise fewer questions. --- app/models/glitch/keyword_mute.rb | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'app/models') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index a7ab3650e..4c3e69de4 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -34,23 +34,20 @@ class Glitch::KeywordMute < ApplicationRecord def initialize(account_id) @account_id = account_id - @regex = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_for_account } + regex_text = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_text_for_account } + @regex = /#{regex_text}/i unless regex_text.empty? end def keywords - Glitch::KeywordMute. - where(account_id: account_id). - select(:keyword, :id, :whole_word) + Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word) end - def regex_for_account - re_text = [].tap do |arr| + def regex_text_for_account + [].tap do |arr| keywords.find_each do |kw| arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword)) end end.join('|') - - /#{re_text}/i unless re_text.empty? end def boundary_regex_for_keyword(keyword) -- cgit From f5a32839761f4951dc09ecbf207573183c6e2f80 Mon Sep 17 00:00:00 2001 From: David Yip Date: Tue, 24 Oct 2017 18:31:34 -0500 Subject: Switch to Regexp.union for building the mute expression. Also make the keyword-building methods private: they always probably should have been private, but now I have encoded enough fun and games into them that it now seems wrong for them to *not* be private. --- app/models/glitch/keyword_mute.rb | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'app/models') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 4c3e69de4..f0969c65e 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -35,30 +35,32 @@ class Glitch::KeywordMute < ApplicationRecord def initialize(account_id) @account_id = account_id regex_text = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_text_for_account } - @regex = /#{regex_text}/i unless regex_text.empty? + @regex = /#{regex_text}/i end + def =~(str) + regex ? regex =~ str : nil + end + + private + def keywords Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word) end def regex_text_for_account - [].tap do |arr| - keywords.find_each do |kw| - arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword)) - end - end.join('|') + kws = keywords.find_each.with_object([]) do |kw, a| + a << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : kw.keyword) + end + + Regexp.union(kws).source end def boundary_regex_for_keyword(keyword) sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' - "#{sb}#{Regexp.escape(keyword)}#{eb}" - end - - def =~(str) - regex ? regex =~ str : nil + /#{sb}#{Regexp.escape(keyword)}#{eb}/ end end end -- cgit From e40fe4092dfd927dd4b6605b7b398fcd0984d903 Mon Sep 17 00:00:00 2001 From: David Yip Date: Tue, 24 Oct 2017 19:03:59 -0500 Subject: Remove nil check in Glitch::KeywordMute#=~. @regex can no longer be nil, so we don't need to check it. --- app/models/glitch/keyword_mute.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/models') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index f0969c65e..73de4d4b7 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -39,7 +39,7 @@ class Glitch::KeywordMute < ApplicationRecord end def =~(str) - regex ? regex =~ str : nil + regex =~ str end private -- cgit