From 9093e2de7a133470eec1049a13465f81928d0119 Mon Sep 17 00:00:00 2001 From: David Yip Date: Mon, 9 Oct 2017 17:28:28 -0500 Subject: Add KeywordMute model. Gist of the proposed keyword mute implementation: Keyword mutes are represented server-side as one keyword per record. For each account, there exists a keyword regex that is generated as one big alternation of all keywords. This regex is cached (in Redis, I guess) so we can quickly get it when filtering in FeedManager. --- db/migrate/20171009222537_create_keyword_mutes.rb | 11 +++++++++++ db/schema.rb | 9 +++++++++ 2 files changed, 20 insertions(+) create mode 100644 db/migrate/20171009222537_create_keyword_mutes.rb (limited to 'db') diff --git a/db/migrate/20171009222537_create_keyword_mutes.rb b/db/migrate/20171009222537_create_keyword_mutes.rb new file mode 100644 index 000000000..ee690e799 --- /dev/null +++ b/db/migrate/20171009222537_create_keyword_mutes.rb @@ -0,0 +1,11 @@ +class CreateKeywordMutes < ActiveRecord::Migration[5.1] + def change + create_table :keyword_mutes do |t| + t.references :account, null: false + t.string :keyword, null: false + t.timestamps + end + + add_foreign_key :keyword_mutes, :accounts, on_delete: :cascade + end +end diff --git a/db/schema.rb b/db/schema.rb index 128f51ee7..420bb0d2e 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -167,6 +167,14 @@ ActiveRecord::Schema.define(version: 20171010025614) do t.bigint "account_id", null: false end + create_table "keyword_mutes", force: :cascade do |t| + t.bigint "account_id", null: false + t.string "keyword", null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["account_id"], name: "index_keyword_mutes_on_account_id" + end + create_table "media_attachments", force: :cascade do |t| t.bigint "status_id" t.string "file_file_name" @@ -473,6 +481,7 @@ ActiveRecord::Schema.define(version: 20171010025614) do add_foreign_key "follows", "accounts", column: "target_account_id", name: "fk_745ca29eac", on_delete: :cascade add_foreign_key "follows", "accounts", name: "fk_32ed1b5560", on_delete: :cascade add_foreign_key "imports", "accounts", name: "fk_6db1b6e408", on_delete: :cascade + add_foreign_key "keyword_mutes", "accounts", on_delete: :cascade add_foreign_key "media_attachments", "accounts", name: "fk_96dd81e81b", on_delete: :nullify add_foreign_key "media_attachments", "statuses", on_delete: :nullify add_foreign_key "mentions", "accounts", name: "fk_970d43f9d1", on_delete: :cascade -- cgit From 4a64181461cb02599da98166da4b527adbb705ad Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 15 Oct 2017 19:49:22 -0500 Subject: Allow keywords to match either substrings or whole words. Word-boundary matching only works as intended in English and languages that use similar word-breaking characters; it doesn't work so well in (say) Japanese, Chinese, or Thai. It's unacceptable to have a feature that doesn't work as intended for some languages. (Moreso especially considering that it's likely that the largest contingent on the Mastodon bit of the fediverse speaks Japanese.) There are rules specified in Unicode TR29[1] for word-breaking across all languages supported by Unicode, but the rules deliberately do not cover all cases. In fact, TR29 states For example, reliable detection of word boundaries in languages such as Thai, Lao, Chinese, or Japanese requires the use of dictionary lookup, analogous to English hyphenation. So we aren't going to be able to make word detection work with regexes within Mastodon (or glitchsoc). However, for a first pass (even if it's kind of punting) we can allow the user to choose whether they want word or substring detection and warn about the limitations of this implementation in, say, docs. [1]: https://unicode.org/reports/tr29/ https://web.archive.org/web/20171001005125/https://unicode.org/reports/tr29/ --- app/models/keyword_mute.rb | 8 +++++--- db/migrate/20171009222537_create_keyword_mutes.rb | 1 + db/schema.rb | 1 + spec/models/keyword_mute_spec.rb | 12 +++++++++--- 4 files changed, 16 insertions(+), 6 deletions(-) (limited to 'db') diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb index 8b54ad696..b0229923d 100644 --- a/app/models/keyword_mute.rb +++ b/app/models/keyword_mute.rb @@ -6,6 +6,7 @@ # id :integer not null, primary key # account_id :integer not null # keyword :string not null +# whole_word :boolean default(TRUE), not null # created_at :datetime not null # updated_at :datetime not null # @@ -32,12 +33,13 @@ class KeywordMute < ApplicationRecord def initialize(account_id) re = [].tap do |arr| - KeywordMute.where(account_id: account_id).select(:keyword, :id).find_each do |m| - arr << Regexp.escape(m.keyword.strip) + KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| + boundary = m.whole_word ? '\b' : '' + arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" end end.join('|') - @regex = /\b(?:#{re})\b/i unless re.empty? + @regex = /#{re}/i unless re.empty? end def =~(str) diff --git a/db/migrate/20171009222537_create_keyword_mutes.rb b/db/migrate/20171009222537_create_keyword_mutes.rb index ee690e799..ec0c756fb 100644 --- a/db/migrate/20171009222537_create_keyword_mutes.rb +++ b/db/migrate/20171009222537_create_keyword_mutes.rb @@ -3,6 +3,7 @@ class CreateKeywordMutes < ActiveRecord::Migration[5.1] create_table :keyword_mutes do |t| t.references :account, null: false t.string :keyword, null: false + t.boolean :whole_word, null: false, default: true t.timestamps end diff --git a/db/schema.rb b/db/schema.rb index 420bb0d2e..c0704b13e 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -170,6 +170,7 @@ ActiveRecord::Schema.define(version: 20171010025614) do create_table "keyword_mutes", force: :cascade do |t| t.bigint "account_id", null: false t.string "keyword", null: false + t.boolean "whole_word", default: true, null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false t.index ["account_id"], name: "index_keyword_mutes_on_account_id" diff --git a/spec/models/keyword_mute_spec.rb b/spec/models/keyword_mute_spec.rb index de5d32bb4..c74505188 100644 --- a/spec/models/keyword_mute_spec.rb +++ b/spec/models/keyword_mute_spec.rb @@ -30,10 +30,16 @@ RSpec.describe KeywordMute, type: :model do expect(matcher =~ 'This is a hot take').to be_falsy end - it 'does not match substrings matching keywords' do - KeywordMute.create!(account: alice, keyword: 'take') + it 'considers word boundaries when matching' do + KeywordMute.create!(account: alice, keyword: 'bob', whole_word: true) + + expect(matcher =~ 'bobcats').to be_falsy + end + + it 'matches substrings if whole_word is false' do + KeywordMute.create!(account: alice, keyword: 'take', whole_word: false) - expect(matcher =~ 'This is a shiitake mushroom').to be_falsy + expect(matcher =~ 'This is a shiitake mushroom').to be_truthy end it 'matches keywords at the beginning of the text' do -- cgit From 670e6a33f8eeca628707dc020e02ce32502d74a4 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sat, 21 Oct 2017 14:47:17 -0500 Subject: Move KeywordMute into Glitch namespace. There are two motivations for this: 1. It looks like we're going to add other features that require server-side storage (e.g. user notes). 2. Namespacing glitchsoc modifications is a good idea anyway: even if we do not end up doing (1), if upstream introduces a keyword-mute feature that also uses a "KeywordMute" model, we can avoid some merge conflicts this way and work on the more interesting task of choosing which implementation to use. --- .../settings/keyword_mutes_controller.rb | 2 +- app/lib/feed_manager.rb | 2 +- app/models/glitch.rb | 7 ++ app/models/glitch/keyword_mute.rb | 49 +++++++++++++ app/models/keyword_mute.rb | 49 ------------- ...900_move_keyword_mutes_into_glitch_namespace.rb | 7 ++ db/schema.rb | 22 +++--- spec/fabricators/glitch_keyword_mute_fabricator.rb | 2 + spec/fabricators/keyword_mute_fabricator.rb | 2 - spec/models/glitch/keyword_mute_spec.rb | 83 ++++++++++++++++++++++ spec/models/keyword_mute_spec.rb | 83 ---------------------- 11 files changed, 161 insertions(+), 147 deletions(-) create mode 100644 app/models/glitch.rb create mode 100644 app/models/glitch/keyword_mute.rb delete mode 100644 app/models/keyword_mute.rb create mode 100644 db/migrate/20171021191900_move_keyword_mutes_into_glitch_namespace.rb create mode 100644 spec/fabricators/glitch_keyword_mute_fabricator.rb delete mode 100644 spec/fabricators/keyword_mute_fabricator.rb create mode 100644 spec/models/glitch/keyword_mute_spec.rb delete mode 100644 spec/models/keyword_mute_spec.rb (limited to 'db') diff --git a/app/controllers/settings/keyword_mutes_controller.rb b/app/controllers/settings/keyword_mutes_controller.rb index d9f99af09..6ae05108d 100644 --- a/app/controllers/settings/keyword_mutes_controller.rb +++ b/app/controllers/settings/keyword_mutes_controller.rb @@ -55,7 +55,7 @@ class Settings::KeywordMutesController < ApplicationController end def keyword_mutes_for_account - KeywordMute.where(account: @account) + Glitch::KeywordMute.where(account: @account) end def load_keyword_mute diff --git a/app/lib/feed_manager.rb b/app/lib/feed_manager.rb index 516bd81af..1123f88bb 100644 --- a/app/lib/feed_manager.rb +++ b/app/lib/feed_manager.rb @@ -138,7 +138,7 @@ class FeedManager end def filter_from_home?(status, receiver_id) - return true if KeywordMute.matcher_for(receiver_id) =~ status.text + return true if Glitch::KeywordMute.matcher_for(receiver_id) =~ status.text return false if receiver_id == status.account_id return true if status.reply? && (status.in_reply_to_id.nil? || status.in_reply_to_account_id.nil?) diff --git a/app/models/glitch.rb b/app/models/glitch.rb new file mode 100644 index 000000000..0e497babc --- /dev/null +++ b/app/models/glitch.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +module Glitch + def self.table_name_prefix + 'glitch_' + end +end diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb new file mode 100644 index 000000000..3b0b47f52 --- /dev/null +++ b/app/models/glitch/keyword_mute.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true +# == Schema Information +# +# Table name: glitch_keyword_mutes +# +# id :integer not null, primary key +# account_id :integer not null +# keyword :string not null +# whole_word :boolean default(TRUE), not null +# created_at :datetime not null +# updated_at :datetime not null +# + +class Glitch::KeywordMute < ApplicationRecord + belongs_to :account, required: true + + validates_presence_of :keyword + + after_commit :invalidate_cached_matcher + + def self.matcher_for(account_id) + Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } + end + + private + + def invalidate_cached_matcher + Rails.cache.delete("keyword_mutes:matcher:#{account_id}") + end + + class Matcher + attr_reader :regex + + def initialize(account_id) + re = [].tap do |arr| + Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| + boundary = m.whole_word ? '\b' : '' + arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" + end + end.join('|') + + @regex = /#{re}/i unless re.empty? + end + + def =~(str) + regex ? regex =~ str : false + end + end +end diff --git a/app/models/keyword_mute.rb b/app/models/keyword_mute.rb deleted file mode 100644 index b0229923d..000000000 --- a/app/models/keyword_mute.rb +++ /dev/null @@ -1,49 +0,0 @@ -# frozen_string_literal: true -# == Schema Information -# -# Table name: keyword_mutes -# -# id :integer not null, primary key -# account_id :integer not null -# keyword :string not null -# whole_word :boolean default(TRUE), not null -# created_at :datetime not null -# updated_at :datetime not null -# - -class KeywordMute < ApplicationRecord - belongs_to :account, required: true - - validates_presence_of :keyword - - after_commit :invalidate_cached_matcher - - def self.matcher_for(account_id) - Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } - end - - private - - def invalidate_cached_matcher - Rails.cache.delete("keyword_mutes:matcher:#{account_id}") - end - - class Matcher - attr_reader :regex - - def initialize(account_id) - re = [].tap do |arr| - KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| - boundary = m.whole_word ? '\b' : '' - arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" - end - end.join('|') - - @regex = /#{re}/i unless re.empty? - end - - def =~(str) - regex ? regex =~ str : false - end - end -end diff --git a/db/migrate/20171021191900_move_keyword_mutes_into_glitch_namespace.rb b/db/migrate/20171021191900_move_keyword_mutes_into_glitch_namespace.rb new file mode 100644 index 000000000..269bb49d6 --- /dev/null +++ b/db/migrate/20171021191900_move_keyword_mutes_into_glitch_namespace.rb @@ -0,0 +1,7 @@ +class MoveKeywordMutesIntoGlitchNamespace < ActiveRecord::Migration[5.1] + def change + safety_assured do + rename_table :keyword_mutes, :glitch_keyword_mutes + end + end +end diff --git a/db/schema.rb b/db/schema.rb index c0704b13e..c09876c4d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20171010025614) do +ActiveRecord::Schema.define(version: 20171021191900) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -155,6 +155,15 @@ ActiveRecord::Schema.define(version: 20171010025614) do t.index ["account_id", "target_account_id"], name: "index_follows_on_account_id_and_target_account_id", unique: true end + create_table "glitch_keyword_mutes", force: :cascade do |t| + t.bigint "account_id", null: false + t.string "keyword", null: false + t.boolean "whole_word", default: true, null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["account_id"], name: "index_glitch_keyword_mutes_on_account_id" + end + create_table "imports", force: :cascade do |t| t.integer "type", null: false t.boolean "approved", default: false, null: false @@ -167,15 +176,6 @@ ActiveRecord::Schema.define(version: 20171010025614) do t.bigint "account_id", null: false end - create_table "keyword_mutes", force: :cascade do |t| - t.bigint "account_id", null: false - t.string "keyword", null: false - t.boolean "whole_word", default: true, null: false - t.datetime "created_at", null: false - t.datetime "updated_at", null: false - t.index ["account_id"], name: "index_keyword_mutes_on_account_id" - end - create_table "media_attachments", force: :cascade do |t| t.bigint "status_id" t.string "file_file_name" @@ -481,8 +481,8 @@ ActiveRecord::Schema.define(version: 20171010025614) do add_foreign_key "follow_requests", "accounts", name: "fk_76d644b0e7", on_delete: :cascade add_foreign_key "follows", "accounts", column: "target_account_id", name: "fk_745ca29eac", on_delete: :cascade add_foreign_key "follows", "accounts", name: "fk_32ed1b5560", on_delete: :cascade + add_foreign_key "glitch_keyword_mutes", "accounts", on_delete: :cascade add_foreign_key "imports", "accounts", name: "fk_6db1b6e408", on_delete: :cascade - add_foreign_key "keyword_mutes", "accounts", on_delete: :cascade add_foreign_key "media_attachments", "accounts", name: "fk_96dd81e81b", on_delete: :nullify add_foreign_key "media_attachments", "statuses", on_delete: :nullify add_foreign_key "mentions", "accounts", name: "fk_970d43f9d1", on_delete: :cascade diff --git a/spec/fabricators/glitch_keyword_mute_fabricator.rb b/spec/fabricators/glitch_keyword_mute_fabricator.rb new file mode 100644 index 000000000..8601ed6d7 --- /dev/null +++ b/spec/fabricators/glitch_keyword_mute_fabricator.rb @@ -0,0 +1,2 @@ +Fabricator(:glitch_keyword_mute) do +end diff --git a/spec/fabricators/keyword_mute_fabricator.rb b/spec/fabricators/keyword_mute_fabricator.rb deleted file mode 100644 index 82cf845c8..000000000 --- a/spec/fabricators/keyword_mute_fabricator.rb +++ /dev/null @@ -1,2 +0,0 @@ -Fabricator(:keyword_mute) do -end diff --git a/spec/models/glitch/keyword_mute_spec.rb b/spec/models/glitch/keyword_mute_spec.rb new file mode 100644 index 000000000..108cdafec --- /dev/null +++ b/spec/models/glitch/keyword_mute_spec.rb @@ -0,0 +1,83 @@ +require 'rails_helper' + +RSpec.describe Glitch::KeywordMute, type: :model do + let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } + let(:bob) { Fabricate(:account, username: 'bob').tap(&:save!) } + + describe '.matcher_for' do + let(:matcher) { Glitch::KeywordMute.matcher_for(alice) } + + describe 'with no Glitch::KeywordMutes for an account' do + before do + Glitch::KeywordMute.delete_all + end + + it 'does not match' do + expect(matcher =~ 'This is a hot take').to be_falsy + end + end + + describe 'with Glitch::KeywordMutes for an account' do + it 'does not match keywords set by a different account' do + Glitch::KeywordMute.create!(account: bob, keyword: 'take') + + expect(matcher =~ 'This is a hot take').to be_falsy + end + + it 'does not match if no keywords match the status text' do + Glitch::KeywordMute.create!(account: alice, keyword: 'cold') + + expect(matcher =~ 'This is a hot take').to be_falsy + end + + it 'considers word boundaries when matching' do + Glitch::KeywordMute.create!(account: alice, keyword: 'bob', whole_word: true) + + expect(matcher =~ 'bobcats').to be_falsy + end + + it 'matches substrings if whole_word is false' do + Glitch::KeywordMute.create!(account: alice, keyword: 'take', whole_word: false) + + expect(matcher =~ 'This is a shiitake mushroom').to be_truthy + end + + it 'matches keywords at the beginning of the text' do + Glitch::KeywordMute.create!(account: alice, keyword: 'take') + + expect(matcher =~ 'Take this').to be_truthy + end + + it 'matches keywords at the beginning of the text' do + Glitch::KeywordMute.create!(account: alice, keyword: 'take') + + expect(matcher =~ 'This is a hot take').to be_truthy + end + + it 'matches if at least one keyword case-insensitively matches the text' do + Glitch::KeywordMute.create!(account: alice, keyword: 'hot') + + expect(matcher =~ 'This is a HOT take').to be_truthy + end + + it 'matches keywords surrounded by non-alphanumeric ornamentation' do + Glitch::KeywordMute.create!(account: alice, keyword: 'hot') + + expect(matcher =~ 'This is a ~*HOT*~ take').to be_truthy + end + + it 'uses case-folding rules appropriate for more than just English' do + Glitch::KeywordMute.create!(account: alice, keyword: 'großeltern') + + expect(matcher =~ 'besuch der grosseltern').to be_truthy + end + + it 'matches keywords that are composed of multiple words' do + Glitch::KeywordMute.create!(account: alice, keyword: 'a shiitake') + + expect(matcher =~ 'This is a shiitake').to be_truthy + expect(matcher =~ 'This is shiitake').to_not be_truthy + end + end + end +end diff --git a/spec/models/keyword_mute_spec.rb b/spec/models/keyword_mute_spec.rb deleted file mode 100644 index c74505188..000000000 --- a/spec/models/keyword_mute_spec.rb +++ /dev/null @@ -1,83 +0,0 @@ -require 'rails_helper' - -RSpec.describe KeywordMute, type: :model do - let(:alice) { Fabricate(:account, username: 'alice').tap(&:save!) } - let(:bob) { Fabricate(:account, username: 'bob').tap(&:save!) } - - describe '.matcher_for' do - let(:matcher) { KeywordMute.matcher_for(alice) } - - describe 'with no KeywordMutes for an account' do - before do - KeywordMute.delete_all - end - - it 'does not match' do - expect(matcher =~ 'This is a hot take').to be_falsy - end - end - - describe 'with KeywordMutes for an account' do - it 'does not match keywords set by a different account' do - KeywordMute.create!(account: bob, keyword: 'take') - - expect(matcher =~ 'This is a hot take').to be_falsy - end - - it 'does not match if no keywords match the status text' do - KeywordMute.create!(account: alice, keyword: 'cold') - - expect(matcher =~ 'This is a hot take').to be_falsy - end - - it 'considers word boundaries when matching' do - KeywordMute.create!(account: alice, keyword: 'bob', whole_word: true) - - expect(matcher =~ 'bobcats').to be_falsy - end - - it 'matches substrings if whole_word is false' do - KeywordMute.create!(account: alice, keyword: 'take', whole_word: false) - - expect(matcher =~ 'This is a shiitake mushroom').to be_truthy - end - - it 'matches keywords at the beginning of the text' do - KeywordMute.create!(account: alice, keyword: 'take') - - expect(matcher =~ 'Take this').to be_truthy - end - - it 'matches keywords at the beginning of the text' do - KeywordMute.create!(account: alice, keyword: 'take') - - expect(matcher =~ 'This is a hot take').to be_truthy - end - - it 'matches if at least one keyword case-insensitively matches the text' do - KeywordMute.create!(account: alice, keyword: 'hot') - - expect(matcher =~ 'This is a HOT take').to be_truthy - end - - it 'matches keywords surrounded by non-alphanumeric ornamentation' do - KeywordMute.create!(account: alice, keyword: 'hot') - - expect(matcher =~ 'This is a ~*HOT*~ take').to be_truthy - end - - it 'uses case-folding rules appropriate for more than just English' do - KeywordMute.create!(account: alice, keyword: 'großeltern') - - expect(matcher =~ 'besuch der grosseltern').to be_truthy - end - - it 'matches keywords that are composed of multiple words' do - KeywordMute.create!(account: alice, keyword: 'a shiitake') - - expect(matcher =~ 'This is a shiitake').to be_truthy - expect(matcher =~ 'This is shiitake').to_not be_truthy - end - end - end -end -- cgit