about summary refs log tree commit diff
path: root/app/models
diff options
context:
space:
mode:
authorDavid Yip <yipdw@member.fsf.org>2017-10-22 00:24:32 -0500
committerDavid Yip <yipdw@member.fsf.org>2017-10-22 00:38:54 -0500
commit4b68e82a19ab2853264515a25af8d39841e43f00 (patch)
tree814f30ea153dce13ba09df0aa25f68dac77ba9f8 /app/models
parent19826774f06244b0c84a1973b3a366df0d7f0f5a (diff)
Don't add \b to whole-word keywords that don't start with word characters.
Ditto for ending with \b.

Consider muting the phrase "(hot take)".  I stipulate it is reasonable
to enter this with the default "match whole word" behavior.  Under the
old behavior, this would be encoded as

    \b\(hot\ take\)\b

However, if \b is before the first character in the string and the first
character in the string is not a word character, then the match will
fail.  Ditto for after.  In our example, "(" is not a word character, so
this will not match statuses containing "(hot take)", and that's a very
surprising behavior.

To address this, we only add leading and trailing \b to keywords that
start or end with word characters.
Diffstat (limited to 'app/models')
-rw-r--r--app/models/glitch/keyword_mute.rb36
1 files changed, 25 insertions, 11 deletions
diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb
index 823e252d3..20fd89d9b 100644
--- a/app/models/glitch/keyword_mute.rb
+++ b/app/models/glitch/keyword_mute.rb
@@ -19,35 +19,49 @@ class Glitch::KeywordMute < ApplicationRecord
   after_commit :invalidate_cached_matcher
 
   def self.matcher_for(account_id)
-    Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) }
+    Matcher.new(account_id)
   end
 
   private
 
   def invalidate_cached_matcher
-    Rails.cache.delete("keyword_mutes:matcher:#{account_id}")
+    Rails.cache.delete("keyword_mutes:regex:#{account_id}")
   end
 
   class Matcher
+    attr_reader :account_id
     attr_reader :regex
 
     def initialize(account_id)
-      re = [].tap do |arr|
-        Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m|
-          boundary = m.whole_word ? '\b' : ''
-          arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}"
+      @account_id = account_id
+      @regex = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_for_account }
+    end
+
+    def keywords
+      Glitch::KeywordMute.
+        where(account_id: account_id).
+        select(:keyword, :id, :whole_word)
+    end
+
+    def regex_for_account
+      re_text = [].tap do |arr|
+        keywords.find_each do |kw|
+          arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword))
         end
       end.join('|')
 
-      @regex = /#{re}/i unless re.empty?
+      /#{re_text}/i unless re_text.empty?
     end
 
-    def =~(str)
-      regex ? regex =~ str : false
+    def boundary_regex_for_keyword(keyword)
+      sb = keyword =~ /\A[[:word:]]/ ? '\b' : ''
+      eb = keyword =~ /[[:word:]]\Z/ ? '\b' : ''
+
+      "#{sb}#{Regexp.escape(keyword)}#{eb}"
     end
 
-    def matches?(str)
-      !!(regex =~ str)
+    def =~(str)
+      regex ? regex =~ str : false
     end
   end
 end