From 4b68e82a19ab2853264515a25af8d39841e43f00 Mon Sep 17 00:00:00 2001 From: David Yip Date: Sun, 22 Oct 2017 00:24:32 -0500 Subject: Don't add \b to whole-word keywords that don't start with word characters. Ditto for ending with \b. Consider muting the phrase "(hot take)". I stipulate it is reasonable to enter this with the default "match whole word" behavior. Under the old behavior, this would be encoded as \b\(hot\ take\)\b However, if \b is before the first character in the string and the first character in the string is not a word character, then the match will fail. Ditto for after. In our example, "(" is not a word character, so this will not match statuses containing "(hot take)", and that's a very surprising behavior. To address this, we only add leading and trailing \b to keywords that start or end with word characters. --- app/models/glitch/keyword_mute.rb | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) (limited to 'app/models') diff --git a/app/models/glitch/keyword_mute.rb b/app/models/glitch/keyword_mute.rb index 823e252d3..20fd89d9b 100644 --- a/app/models/glitch/keyword_mute.rb +++ b/app/models/glitch/keyword_mute.rb @@ -19,35 +19,49 @@ class Glitch::KeywordMute < ApplicationRecord after_commit :invalidate_cached_matcher def self.matcher_for(account_id) - Rails.cache.fetch("keyword_mutes:matcher:#{account_id}") { Matcher.new(account_id) } + Matcher.new(account_id) end private def invalidate_cached_matcher - Rails.cache.delete("keyword_mutes:matcher:#{account_id}") + Rails.cache.delete("keyword_mutes:regex:#{account_id}") end class Matcher + attr_reader :account_id attr_reader :regex def initialize(account_id) - re = [].tap do |arr| - Glitch::KeywordMute.where(account_id: account_id).select(:keyword, :id, :whole_word).find_each do |m| - boundary = m.whole_word ? '\b' : '' - arr << "#{boundary}#{Regexp.escape(m.keyword.strip)}#{boundary}" + @account_id = account_id + @regex = Rails.cache.fetch("keyword_mutes:regex:#{account_id}") { regex_for_account } + end + + def keywords + Glitch::KeywordMute. + where(account_id: account_id). + select(:keyword, :id, :whole_word) + end + + def regex_for_account + re_text = [].tap do |arr| + keywords.find_each do |kw| + arr << (kw.whole_word ? boundary_regex_for_keyword(kw.keyword) : Regexp.escape(kw.keyword)) end end.join('|') - @regex = /#{re}/i unless re.empty? + /#{re_text}/i unless re_text.empty? end - def =~(str) - regex ? regex =~ str : false + def boundary_regex_for_keyword(keyword) + sb = keyword =~ /\A[[:word:]]/ ? '\b' : '' + eb = keyword =~ /[[:word:]]\Z/ ? '\b' : '' + + "#{sb}#{Regexp.escape(keyword)}#{eb}" end - def matches?(str) - !!(regex =~ str) + def =~(str) + regex ? regex =~ str : false end end end -- cgit