From 37d495eeeb810127e0c6c62bff865c1ba66f45c8 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 3 Jun 2018 18:12:55 -0500 Subject: keyword mute: Store keywords as a list This has a couple of advantages over the regex approach: - Keywords are individually addressable, which makes it easier to gather statistics (#363) - Keywords can be individually applied to different feeds, e.g. skipping mentions (#454) It *does* end up creating many more Regexp objects. I'm not yet sure if the difference is significant. --- app/models/glitch/keyword_mute.rb | 72 +++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 17ebc5b5e..e7cbbe617 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -33,68 +33,74 @@ class Glitch::KeywordMute < ApplicationRecord Rails.cache.delete(TagMatcher.cache_key(account_id)) end - class RegexpMatcher + class CachedKeywordMute + attr_reader :keyword + attr_reader :whole_word + + def initialize(keyword, whole_word) + @keyword = keyword + @whole_word = whole_word + end + + def boundary_regex_for_keyword + sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' + eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' + + /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/ + end + + def matches?(str) + str =~ (whole_word ? boundary_regex_for_keyword : /#{keyword}/i) + end + end + + class Matcher attr_reader :account_id - attr_reader :regex + attr_reader :words def initialize(account_id) @account_id = account_id - regex_text = Rails.cache.fetch(self.class.cache_key(account_id)) { make_regex_text } - @regex = /#{regex_text}/ + @words = Rails.cache.fetch(self.class.cache_key(account_id)) { fetch_keywords } end protected - def keywords - Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword) + def fetch_keywords + Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword).map do |whole_word, keyword| + CachedKeywordMute.new(transform_keyword(keyword), whole_word) + end end - def boundary_regex_for_keyword(keyword) - sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' - eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' - - /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/ + def transform_keyword(keyword) + keyword end end - class TextMatcher < RegexpMatcher + class TextMatcher < Matcher def self.cache_key(account_id) format('keyword_mutes:regex:text:%s', account_id) end def matches?(str) - !!(regex =~ str) - end - - private - - def make_regex_text - kws = keywords.map! do |whole_word, keyword| - whole_word ? boundary_regex_for_keyword(keyword) : /(?i:#{Regexp.escape(keyword)})/ - end - - Regexp.union(kws).source + words.any? { |w| w.matches?(str) } end end - class TagMatcher < RegexpMatcher + class TagMatcher < Matcher def self.cache_key(account_id) format('keyword_mutes:regex:tag:%s', account_id) end def matches?(tags) - tags.pluck(:name).any? { |n| regex =~ n } + tags.pluck(:name).any? do |n| + words.any? { |w| w.matches?(n) } + end end - private - - def make_regex_text - kws = keywords.map! do |whole_word, keyword| - term = (Tag::HASHTAG_RE =~ keyword) ? $1 : keyword - whole_word ? boundary_regex_for_keyword(term) : term - end + protected - Regexp.union(kws).source + def transform_keyword(kw) + Tag::HASHTAG_RE =~ kw ? $1 : kw end end end -- cgit