keyword mute: Store keywords as a list

This has a couple of advantages over the regex approach: - Keywords are individually addressable, which makes it easier to gather statistics (#363) - Keywords can be individually applied to different feeds, e.g. skipping mentions (#454) It *does* end up creating many more Regexp objects. I'm not yet sure if the difference is significant.
author: David Yip <yipdw@member.fsf.org> 2018-06-03 18:12:55 -0500
committer: David Yip <yipdw@member.fsf.org> 2018-06-03 18:12:55 -0500
commit: 37d495eeeb810127e0c6c62bff865c1ba66f45c8 (patch)
tree: 262d909a584565de86673e7b06c335329915d1ac /app/models/glitch
parent: 5dd2a78034c4ab8d7eac6f4553185eadc48b6c26 (diff)
1 files changed, 39 insertions, 33 deletions
diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb
index 17ebc5b5e..e7cbbe617 100644
--- a/app/models/glitch/keyword_mute.rb
+++ b/app/models/glitch/keyword_mute.rb
@@ -33,68 +33,74 @@ class Glitch::KeywordMute < ApplicationRecord
     Rails.cache.delete(TagMatcher.cache_key(account_id))
   end
 
-  class RegexpMatcher
+  class CachedKeywordMute
+    attr_reader :keyword
+    attr_reader :whole_word
+
+    def initialize(keyword, whole_word)
+      @keyword = keyword
+      @whole_word = whole_word
+    end
+
+    def boundary_regex_for_keyword
+      sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
+      eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
+
+      /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/
+    end
+
+    def matches?(str)
+      str =~ (whole_word ? boundary_regex_for_keyword : /#{keyword}/i)
+    end
+  end
+
+  class Matcher
     attr_reader :account_id
-    attr_reader :regex
+    attr_reader :words
 
     def initialize(account_id)
       @account_id = account_id
-      regex_text = Rails.cache.fetch(self.class.cache_key(account_id)) { make_regex_text }
-      @regex = /#{regex_text}/
+      @words = Rails.cache.fetch(self.class.cache_key(account_id)) { fetch_keywords }
     end
 
     protected
 
-    def keywords
-      Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword)
+    def fetch_keywords
+      Glitch::KeywordMute.where(account_id: account_id).pluck(:whole_word, :keyword).map do |whole_word, keyword|
+        CachedKeywordMute.new(transform_keyword(keyword), whole_word)
+      end
     end
 
-    def boundary_regex_for_keyword(keyword)
-      sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
-      eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
-
-      /(?mix:#{sb}#{Regexp.escape(keyword)}#{eb})/
+    def transform_keyword(keyword)
+      keyword
     end
   end
 
-  class TextMatcher < RegexpMatcher
+  class TextMatcher < Matcher
     def self.cache_key(account_id)
       format('keyword_mutes:regex:text:%s', account_id)
     end
 
     def matches?(str)
-      !!(regex =~ str)
-    end
-
-    private
-
-    def make_regex_text
-      kws = keywords.map! do |whole_word, keyword|
-        whole_word ? boundary_regex_for_keyword(keyword) : /(?i:#{Regexp.escape(keyword)})/
-      end
-
-      Regexp.union(kws).source
+      words.any? { |w| w.matches?(str) }
     end
   end
 
-  class TagMatcher < RegexpMatcher
+  class TagMatcher < Matcher
     def self.cache_key(account_id)
       format('keyword_mutes:regex:tag:%s', account_id)
     end
 
     def matches?(tags)
-      tags.pluck(:name).any? { |n| regex =~ n }
+      tags.pluck(:name).any? do |n|
+        words.any? { |w| w.matches?(n) }
+      end
     end
 
-    private
-
-    def make_regex_text
-      kws = keywords.map! do |whole_word, keyword|
-        term = (Tag::HASHTAG_RE =~ keyword) ? $1 : keyword
-        whole_word ? boundary_regex_for_keyword(term) : term
-      end
+    protected
 
-      Regexp.union(kws).source
+    def transform_keyword(kw)
+      Tag::HASHTAG_RE =~ kw ? $1 : kw
     end
   end
 end
author	David Yip <yipdw@member.fsf.org>	2018-06-03 18:12:55 -0500
committer	David Yip <yipdw@member.fsf.org>	2018-06-03 18:12:55 -0500
commit	37d495eeeb810127e0c6c62bff865c1ba66f45c8 (patch)
tree	262d909a584565de86673e7b06c335329915d1ac /app/models/glitch
parent	5dd2a78034c4ab8d7eac6f4553185eadc48b6c26 (diff)