about summary refs log tree commit diff
diff options
context:
space:
mode:
authorTakeshi Umeda <noel.yoshiba@gmail.com>2021-11-19 06:02:08 +0900
committerGitHub <noreply@github.com>2021-11-18 22:02:08 +0100
commit3419d3ec84c3aa4f450265642e0a85dcdd3c36d0 (patch)
treedfd530fcfaed29a5421138087ef3a240d3c45a3c
parent2b6a25c609e2d1814e5dd586e9f40868d944c8bc (diff)
Bump chewy from 5.2.0 to 7.2.3 (supports Elasticsearch 7.x) (#16915)
* Bump chewy from 5.2.0 to 7.2.2

* fix style (codeclimate)

* fix style

* fix style

* Bump chewy from 7.2.2 to 7.2.3
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock38
-rw-r--r--app/chewy/accounts_index.rb24
-rw-r--r--app/chewy/statuses_index.rb48
-rw-r--r--app/chewy/tags_index.rb16
-rw-r--r--app/models/account.rb2
-rw-r--r--app/models/account_stat.rb2
-rw-r--r--app/models/bookmark.rb2
-rw-r--r--app/models/favourite.rb2
-rw-r--r--app/models/status.rb2
-rw-r--r--app/models/tag.rb2
-rw-r--r--app/services/batched_remove_status_service.rb2
-rw-r--r--app/services/delete_account_service.rb4
-rw-r--r--config/initializers/chewy.rb20
-rw-r--r--lib/mastodon/search_cli.rb99
15 files changed, 128 insertions, 137 deletions
diff --git a/Gemfile b/Gemfile
index a420f0577..57d78f637 100644
--- a/Gemfile
+++ b/Gemfile
@@ -30,7 +30,7 @@ gem 'bootsnap', '~> 1.9.1', require: false
 gem 'browser'
 gem 'charlock_holmes', '~> 0.7.7'
 gem 'iso-639'
-gem 'chewy', '~> 5.2'
+gem 'chewy', '~> 7.2'
 gem 'cld3', '~> 3.4.2'
 gem 'devise', '~> 4.8'
 gem 'devise-two-factor', '~> 4.0'
diff --git a/Gemfile.lock b/Gemfile.lock
index 31dade486..30fe32a22 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -147,9 +147,9 @@ GEM
       activesupport
     cbor (0.5.9.6)
     charlock_holmes (0.7.7)
-    chewy (5.2.0)
+    chewy (7.2.3)
       activesupport (>= 5.2)
-      elasticsearch (>= 2.0.0)
+      elasticsearch (>= 7.12.0, < 7.14.0)
       elasticsearch-dsl
     chunky_png (1.4.0)
     cld3 (3.4.2)
@@ -197,13 +197,13 @@ GEM
       railties (>= 3.2)
     e2mmap (0.1.0)
     ed25519 (1.2.4)
-    elasticsearch (7.10.1)
-      elasticsearch-api (= 7.10.1)
-      elasticsearch-transport (= 7.10.1)
-    elasticsearch-api (7.10.1)
+    elasticsearch (7.13.3)
+      elasticsearch-api (= 7.13.3)
+      elasticsearch-transport (= 7.13.3)
+    elasticsearch-api (7.13.3)
       multi_json
-    elasticsearch-dsl (0.1.9)
-    elasticsearch-transport (7.10.1)
+    elasticsearch-dsl (0.1.10)
+    elasticsearch-transport (7.13.3)
       faraday (~> 1)
       multi_json
     encryptor (3.0.0)
@@ -214,11 +214,25 @@ GEM
     fabrication (2.22.0)
     faker (2.19.0)
       i18n (>= 1.6, < 2)
-    faraday (1.3.0)
+    faraday (1.8.0)
+      faraday-em_http (~> 1.0)
+      faraday-em_synchrony (~> 1.0)
+      faraday-excon (~> 1.1)
+      faraday-httpclient (~> 1.0.1)
       faraday-net_http (~> 1.0)
+      faraday-net_http_persistent (~> 1.1)
+      faraday-patron (~> 1.0)
+      faraday-rack (~> 1.0)
       multipart-post (>= 1.2, < 3)
-      ruby2_keywords
+      ruby2_keywords (>= 0.0.4)
+    faraday-em_http (1.0.0)
+    faraday-em_synchrony (1.0.0)
+    faraday-excon (1.1.0)
+    faraday-httpclient (1.0.1)
     faraday-net_http (1.0.1)
+    faraday-net_http_persistent (1.2.0)
+    faraday-patron (1.0.0)
+    faraday-rack (1.0.0)
     fast_blank (1.0.1)
     fastimage (2.2.5)
     ffi (1.15.4)
@@ -539,7 +553,7 @@ GEM
     ruby-saml (1.13.0)
       nokogiri (>= 1.10.5)
       rexml
-    ruby2_keywords (0.0.4)
+    ruby2_keywords (0.0.5)
     rufus-scheduler (3.7.0)
       fugit (~> 1.1, >= 1.1.6)
     safety_net_attestation (0.4.0)
@@ -686,7 +700,7 @@ DEPENDENCIES
   capistrano-yarn (~> 2.0)
   capybara (~> 3.36)
   charlock_holmes (~> 0.7.7)
-  chewy (~> 5.2)
+  chewy (~> 7.2)
   cld3 (~> 3.4.2)
   climate_control (~> 0.2)
   color_diff (~> 0.1)
diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb
index b814e009e..6f9ea76e9 100644
--- a/app/chewy/accounts_index.rb
+++ b/app/chewy/accounts_index.rb
@@ -23,21 +23,21 @@ class AccountsIndex < Chewy::Index
     },
   }
 
-  define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do
-    root date_detection: false do
-      field :id, type: 'long'
+  index_scope ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? }
 
-      field :display_name, type: 'text', analyzer: 'content' do
-        field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
-      end
+  root date_detection: false do
+    field :id, type: 'long'
 
-      field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do
-        field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
-      end
+    field :display_name, type: 'text', analyzer: 'content' do
+      field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
+    end
 
-      field :following_count, type: 'long', value: ->(account) { account.following.local.count }
-      field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
-      field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
+    field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do
+      field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
     end
+
+    field :following_count, type: 'long', value: ->(account) { account.following.local.count }
+    field :followers_count, type: 'long', value: ->(account) { account.followers.local.count }
+    field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at }
   end
 end
diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb
index 47cb856ea..1903c2ea3 100644
--- a/app/chewy/statuses_index.rb
+++ b/app/chewy/statuses_index.rb
@@ -31,36 +31,36 @@ class StatusesIndex < Chewy::Index
     },
   }
 
-  define_type ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll) do
-    crutch :mentions do |collection|
-      data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
-      data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
-    end
+  index_scope ::Status.unscoped.kept.without_reblogs.includes(:media_attachments, :preloadable_poll)
 
-    crutch :favourites do |collection|
-      data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
-      data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
-    end
+  crutch :mentions do |collection|
+    data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local, silent: false).pluck(:status_id, :account_id)
+    data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+  end
 
-    crutch :reblogs do |collection|
-      data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
-      data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
-    end
+  crutch :favourites do |collection|
+    data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
+    data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+  end
 
-    crutch :bookmarks do |collection|
-      data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
-      data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
-    end
+  crutch :reblogs do |collection|
+    data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id)
+    data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+  end
 
-    root date_detection: false do
-      field :id, type: 'long'
-      field :account_id, type: 'long'
+  crutch :bookmarks do |collection|
+    data = ::Bookmark.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id)
+    data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) }
+  end
 
-      field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
-        field :stemmed, type: 'text', analyzer: 'content'
-      end
+  root date_detection: false do
+    field :id, type: 'long'
+    field :account_id, type: 'long'
 
-      field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
+    field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do
+      field :stemmed, type: 'text', analyzer: 'content'
     end
+
+    field :searchable_by, type: 'long', value: ->(status, crutches) { status.searchable_by(crutches) }
   end
 end
diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb
index 300fc128f..f811a8d67 100644
--- a/app/chewy/tags_index.rb
+++ b/app/chewy/tags_index.rb
@@ -23,15 +23,15 @@ class TagsIndex < Chewy::Index
     },
   }
 
-  define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do
-    root date_detection: false do
-      field :name, type: 'text', analyzer: 'content' do
-        field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
-      end
+  index_scope ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? }
 
-      field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
-      field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
-      field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
+  root date_detection: false do
+    field :name, type: 'text', analyzer: 'content' do
+      field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
     end
+
+    field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
+    field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
+    field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
   end
 end
diff --git a/app/models/account.rb b/app/models/account.rb
index 291d3e571..d289c5e53 100644
--- a/app/models/account.rb
+++ b/app/models/account.rb
@@ -143,7 +143,7 @@ class Account < ApplicationRecord
 
   delegate :chosen_languages, to: :user, prefix: false, allow_nil: true
 
-  update_index('accounts#account', :self)
+  update_index('accounts', :self)
 
   def local?
     domain.nil?
diff --git a/app/models/account_stat.rb b/app/models/account_stat.rb
index e702fa4a4..b49827267 100644
--- a/app/models/account_stat.rb
+++ b/app/models/account_stat.rb
@@ -19,5 +19,5 @@ class AccountStat < ApplicationRecord
 
   belongs_to :account, inverse_of: :account_stat
 
-  update_index('accounts#account', :account)
+  update_index('accounts', :account)
 end
diff --git a/app/models/bookmark.rb b/app/models/bookmark.rb
index f21ea714c..6334ef0df 100644
--- a/app/models/bookmark.rb
+++ b/app/models/bookmark.rb
@@ -13,7 +13,7 @@
 class Bookmark < ApplicationRecord
   include Paginable
 
-  update_index('statuses#status', :status) if Chewy.enabled?
+  update_index('statuses', :status) if Chewy.enabled?
 
   belongs_to :account, inverse_of: :bookmarks
   belongs_to :status,  inverse_of: :bookmarks
diff --git a/app/models/favourite.rb b/app/models/favourite.rb
index ca8bce146..2f355739a 100644
--- a/app/models/favourite.rb
+++ b/app/models/favourite.rb
@@ -13,7 +13,7 @@
 class Favourite < ApplicationRecord
   include Paginable
 
-  update_index('statuses#status', :status)
+  update_index('statuses', :status)
 
   belongs_to :account, inverse_of: :favourites
   belongs_to :status,  inverse_of: :favourites
diff --git a/app/models/status.rb b/app/models/status.rb
index c7f761bc6..749a23718 100644
--- a/app/models/status.rb
+++ b/app/models/status.rb
@@ -42,7 +42,7 @@ class Status < ApplicationRecord
   # will be based on current time instead of `created_at`
   attr_accessor :override_timestamps
 
-  update_index('statuses#status', :proper)
+  update_index('statuses', :proper)
 
   enum visibility: [:public, :unlisted, :private, :direct, :limited], _suffix: :visibility
 
diff --git a/app/models/tag.rb b/app/models/tag.rb
index 735c30608..dcce28391 100644
--- a/app/models/tag.rb
+++ b/app/models/tag.rb
@@ -39,7 +39,7 @@ class Tag < ApplicationRecord
   scope :recently_used, ->(account) { joins(:statuses).where(statuses: { id: account.statuses.select(:id).limit(1000) }).group(:id).order(Arel.sql('count(*) desc')) }
   scope :matches_name, ->(term) { where(arel_table[:name].lower.matches(arel_table.lower("#{sanitize_sql_like(Tag.normalize(term))}%"), nil, true)) } # Search with case-sensitive to use B-tree index
 
-  update_index('tags#tag', :self)
+  update_index('tags', :self)
 
   def to_param
     name
diff --git a/app/services/batched_remove_status_service.rb b/app/services/batched_remove_status_service.rb
index b54bcae35..5000062e4 100644
--- a/app/services/batched_remove_status_service.rb
+++ b/app/services/batched_remove_status_service.rb
@@ -31,7 +31,7 @@ class BatchedRemoveStatusService < BaseService
 
     # Since we skipped all callbacks, we also need to manually
     # deindex the statuses
-    Chewy.strategy.current.update(StatusesIndex::Status, statuses_and_reblogs) if Chewy.enabled?
+    Chewy.strategy.current.update(StatusesIndex, statuses_and_reblogs) if Chewy.enabled?
 
     return if options[:skip_side_effects]
 
diff --git a/app/services/delete_account_service.rb b/app/services/delete_account_service.rb
index d8270498a..ac571d7e2 100644
--- a/app/services/delete_account_service.rb
+++ b/app/services/delete_account_service.rb
@@ -189,7 +189,7 @@ class DeleteAccountService < BaseService
     @account.favourites.in_batches do |favourites|
       ids = favourites.pluck(:status_id)
       StatusStat.where(status_id: ids).update_all('favourites_count = GREATEST(0, favourites_count - 1)')
-      Chewy.strategy.current.update(StatusesIndex::Status, ids) if Chewy.enabled?
+      Chewy.strategy.current.update(StatusesIndex, ids) if Chewy.enabled?
       Rails.cache.delete_multi(ids.map { |id| "statuses/#{id}" })
       favourites.delete_all
     end
@@ -197,7 +197,7 @@ class DeleteAccountService < BaseService
 
   def purge_bookmarks!
     @account.bookmarks.in_batches do |bookmarks|
-      Chewy.strategy.current.update(StatusesIndex::Status, bookmarks.pluck(:status_id)) if Chewy.enabled?
+      Chewy.strategy.current.update(StatusesIndex, bookmarks.pluck(:status_id)) if Chewy.enabled?
       bookmarks.delete_all
     end
   end
diff --git a/config/initializers/chewy.rb b/config/initializers/chewy.rb
index 820182a21..fbbcbbcde 100644
--- a/config/initializers/chewy.rb
+++ b/config/initializers/chewy.rb
@@ -37,23 +37,3 @@ end
 # Mastodon is run with hidden services enabled, because
 # ElasticSearch is *not* supposed to be accessed through a proxy
 Faraday.ignore_env_proxy = true
-
-# Elasticsearch 7.x workaround
-Elasticsearch::Transport::Client.prepend Module.new {
-  def search(arguments = {})
-    arguments[:rest_total_hits_as_int] = true
-    super arguments
-  end
-}
-
-Elasticsearch::API::Indices::IndicesClient.prepend Module.new {
-  def create(arguments = {})
-    arguments[:include_type_name] = true
-    super arguments
-  end
-
-  def put_mapping(arguments = {})
-    arguments[:include_type_name] = true
-    super arguments
-  end
-}
diff --git a/lib/mastodon/search_cli.rb b/lib/mastodon/search_cli.rb
index 0126dfcff..2d1ca1c05 100644
--- a/lib/mastodon/search_cli.rb
+++ b/lib/mastodon/search_cli.rb
@@ -64,11 +64,7 @@ module Mastodon
       progress.title = 'Estimating workload '
 
       # Estimate the amount of data that has to be imported first
-      indices.each do |index|
-        index.types.each do |type|
-          progress.total = (progress.total || 0) + type.adapter.default_scope.count
-        end
-      end
+      progress.total = indices.sum { |index| index.adapter.default_scope.count }
 
       # Now import all the actual data. Mind that unlike chewy:sync, we don't
       # fetch and compare all record IDs from the database and the index to
@@ -80,67 +76,68 @@ module Mastodon
         batch_size     = 1_000
         slice_size     = (batch_size / options[:concurrency]).ceil
 
-        index.types.each do |type|
-          type.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
-            futures = []
+        index.adapter.default_scope.reorder(nil).find_in_batches(batch_size: batch_size) do |batch|
+          futures = []
 
-            batch.each_slice(slice_size) do |records|
-              futures << Concurrent::Future.execute(executor: pool) do
-                begin
-                  if !progress.total.nil? && progress.progress + records.size > progress.total
-                    # The number of items has changed between start and now,
-                    # since there is no good way to predict the final count from
-                    # here, just change the progress bar to an indeterminate one
+          batch.each_slice(slice_size) do |records|
+            futures << Concurrent::Future.execute(executor: pool) do
+              begin
+                if !progress.total.nil? && progress.progress + records.size > progress.total
+                  # The number of items has changed between start and now,
+                  # since there is no good way to predict the final count from
+                  # here, just change the progress bar to an indeterminate one
 
-                    progress.total = nil
-                  end
+                  progress.total = nil
+                end
 
-                  grouped_records = nil
-                  bulk_body       = nil
-                  index_count     = 0
-                  delete_count    = 0
+                grouped_records = nil
+                bulk_body       = nil
+                index_count     = 0
+                delete_count    = 0
 
-                  ActiveRecord::Base.connection_pool.with_connection do
-                    grouped_records = type.adapter.send(:grouped_objects, records)
-                    bulk_body       = Chewy::Type::Import::BulkBuilder.new(type, **grouped_records).bulk_body
+                ActiveRecord::Base.connection_pool.with_connection do
+                  grouped_records = records.to_a.group_by do |record|
+                    index.adapter.send(:delete_from_index?, record) ? :delete : :to_index
                   end
 
-                  index_count  = grouped_records[:index].size  if grouped_records.key?(:index)
-                  delete_count = grouped_records[:delete].size if grouped_records.key?(:delete)
-
-                  # The following is an optimization for statuses specifically, since
-                  # we want to de-index statuses that cannot be searched by anybody,
-                  # but can't use Chewy's delete_if logic because it doesn't use
-                  # crutches and our searchable_by logic depends on them
-                  if type == StatusesIndex::Status
-                    bulk_body.map! do |entry|
-                      if entry[:index] && entry.dig(:index, :data, 'searchable_by').blank?
-                        index_count  -= 1
-                        delete_count += 1
-
-                        { delete: entry[:index].except(:data) }
-                      else
-                        entry
-                      end
+                  bulk_body = Chewy::Index::Import::BulkBuilder.new(index, **grouped_records).bulk_body
+                end
+
+                index_count  = grouped_records[:to_index].size  if grouped_records.key?(:to_index)
+                delete_count = grouped_records[:delete].size    if grouped_records.key?(:delete)
+
+                # The following is an optimization for statuses specifically, since
+                # we want to de-index statuses that cannot be searched by anybody,
+                # but can't use Chewy's delete_if logic because it doesn't use
+                # crutches and our searchable_by logic depends on them
+                if index == StatusesIndex
+                  bulk_body.map! do |entry|
+                    if entry[:to_index] && entry.dig(:to_index, :data, 'searchable_by').blank?
+                      index_count  -= 1
+                      delete_count += 1
+
+                      { delete: entry[:to_index].except(:data) }
+                    else
+                      entry
                     end
                   end
+                end
 
-                  Chewy::Type::Import::BulkRequest.new(type).perform(bulk_body)
+                Chewy::Index::Import::BulkRequest.new(index).perform(bulk_body)
 
-                  progress.progress += records.size
+                progress.progress += records.size
 
-                  added.increment(index_count)
-                  removed.increment(delete_count)
+                added.increment(index_count)
+                removed.increment(delete_count)
 
-                  sleep 1
-                rescue => e
-                  progress.log pastel.red("Error importing #{index}: #{e}")
-                end
+                sleep 1
+              rescue => e
+                progress.log pastel.red("Error importing #{index}: #{e}")
               end
             end
-
-            futures.map(&:value)
           end
+
+          futures.map(&:value)
         end
       end