about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-07-29 20:40:21 +0200
committerGitHub <noreply@github.com>2019-07-29 20:40:21 +0200
commite136112ab76a37cbde5c1bcfeb562c8e0a5b5116 (patch)
treedb91ad2e6b5579c6682b4cab0c3488bad506595d
parentaefeb65656605f3e32021acbc78c8e21307921fd (diff)
Fix tag normalization and migration not removing duplicate tags (#11441)
Fix #11428
-rw-r--r--app/models/tag.rb8
-rw-r--r--db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb13
-rw-r--r--spec/models/tag_spec.rb34
3 files changed, 51 insertions, 4 deletions
diff --git a/app/models/tag.rb b/app/models/tag.rb
index 972242064..46e3a3ec0 100644
--- a/app/models/tag.rb
+++ b/app/models/tag.rb
@@ -65,7 +65,7 @@ class Tag < ApplicationRecord
 
   class << self
     def find_or_create_by_names(name_or_names)
-      Array(name_or_names).map(&method(:normalize)).uniq.map do |normalized_name|
+      Array(name_or_names).map(&method(:normalize)).uniq { |str| str.mb_chars.downcase.to_s }.map do |normalized_name|
         tag = matching_name(normalized_name).first || create(name: normalized_name)
 
         yield tag if block_given?
@@ -77,7 +77,7 @@ class Tag < ApplicationRecord
     def search_for(term, limit = 5, offset = 0)
       pattern = sanitize_sql_like(normalize(term.strip)) + '%'
 
-      Tag.where(arel_table[:name].lower.matches(pattern.downcase))
+      Tag.where(arel_table[:name].lower.matches(pattern.mb_chars.downcase.to_s))
          .order(:name)
          .limit(limit)
          .offset(offset)
@@ -92,7 +92,7 @@ class Tag < ApplicationRecord
     end
 
     def matching_name(name_or_names)
-      names = Array(name_or_names).map { |name| normalize(name).downcase }
+      names = Array(name_or_names).map { |name| normalize(name).mb_chars.downcase.to_s }
 
       if names.size == 1
         where(arel_table[:name].lower.eq(names.first))
@@ -104,7 +104,7 @@ class Tag < ApplicationRecord
     private
 
     def normalize(str)
-      str.gsub(/\A#/, '').mb_chars.to_s
+      str.gsub(/\A#/, '')
     end
   end
 
diff --git a/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb b/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb
index 6fa8c0ec4..057fc86ba 100644
--- a/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb
+++ b/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb
@@ -2,6 +2,19 @@ class AddCaseInsensitiveIndexToTags < ActiveRecord::Migration[5.2]
   disable_ddl_transaction!
 
   def up
+    Tag.connection.select_all('SELECT string_agg(id::text, \',\') AS ids FROM tags GROUP BY lower(name) HAVING count(*) > 1').to_hash.each do |row|
+      canonical_tag_id  = row['ids'].split(',').first
+      redundant_tag_ids = row['ids'].split(',')[1..-1]
+
+      safety_assured do
+        execute "UPDATE accounts_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM accounts_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.account_id = t0.account_id)"
+        execute "UPDATE statuses_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')}) AND NOT EXISTS (SELECT t1.tag_id FROM statuses_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.status_id = t0.status_id)"
+        execute "UPDATE featured_tags AS t0 SET tag_id = #{canonical_tag_id} WHERE tag_id IN (#{redundant_tag_ids.join(', ')})  AND NOT EXISTS (SELECT t1.tag_id FROM featured_tags AS t1 WHERE t1.tag_id = #{canonical_tag_id} AND t1.account_id = t0.account_id)"
+      end
+
+      Tag.where(id: redundant_tag_ids).in_batches.delete_all
+    end
+
     safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_tags_on_name_lower ON tags (lower(name))' }
     remove_index :tags, name: 'index_tags_on_name'
     remove_index :tags, name: 'hashtag_search_index'
diff --git a/spec/models/tag_spec.rb b/spec/models/tag_spec.rb
index 9a30ceaa5..5f07fd618 100644
--- a/spec/models/tag_spec.rb
+++ b/spec/models/tag_spec.rb
@@ -82,6 +82,40 @@ RSpec.describe Tag, type: :model do
     end
   end
 
+  describe '.find_normalized' do
+    it 'returns tag for a multibyte case-insensitive name' do
+      upcase_string   = 'abcABCabcABCやゆよ'
+      downcase_string = 'abcabcabcabcやゆよ';
+
+      tag = Fabricate(:tag, name: downcase_string)
+      expect(Tag.find_normalized(upcase_string)).to eq tag
+    end
+  end
+
+  describe '.matching_name' do
+    it 'returns tags for multibyte case-insensitive names' do
+      upcase_string   = 'abcABCabcABCやゆよ'
+      downcase_string = 'abcabcabcabcやゆよ';
+
+      tag = Fabricate(:tag, name: downcase_string)
+      expect(Tag.matching_name(upcase_string)).to eq [tag]
+    end
+  end
+
+  describe '.find_or_create_by_names' do
+    it 'runs a passed block once per tag regardless of duplicates' do
+      upcase_string   = 'abcABCabcABCやゆよ'
+      downcase_string = 'abcabcabcabcやゆよ';
+      count           = 0
+
+      Tag.find_or_create_by_names([upcase_string, downcase_string]) do |tag|
+        count += 1
+      end
+
+      expect(count).to eq 1
+    end
+  end
+
   describe '.search_for' do
     it 'finds tag records with matching names' do
       tag = Fabricate(:tag, name: "match")