about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2022-05-18 23:29:14 +0200
committerGitHub <noreply@github.com>2022-05-18 23:29:14 +0200
commit679b7158e3cd3881e8cbaf2d2c0c97725b3b5fd9 (patch)
tree9cc1dd73c3cd894207bf3c806234c8c919459985
parentded5a0254a4d29a7384ef766a1e92467fe4ebd2b (diff)
Change search indexing to use batches to minimize resource usage (#18451)
-rw-r--r--Gemfile2
-rw-r--r--app/chewy/accounts_index.rb2
-rw-r--r--app/chewy/statuses_index.rb2
-rw-r--r--app/chewy/tags_index.rb2
-rw-r--r--app/workers/scheduler/indexing_scheduler.rb26
-rw-r--r--config/application.rb2
-rw-r--r--config/initializers/chewy.rb5
-rw-r--r--config/sidekiq.yml4
-rw-r--r--lib/chewy/strategy/custom_sidekiq.rb11
-rw-r--r--lib/chewy/strategy/mastodon.rb27
10 files changed, 64 insertions, 19 deletions
diff --git a/Gemfile b/Gemfile
index 445b10496..2e77fb42a 100644
--- a/Gemfile
+++ b/Gemfile
@@ -81,7 +81,7 @@ gem 'scenic', '~> 1.6'
 gem 'sidekiq', '~> 6.4'
 gem 'sidekiq-scheduler', '~> 4.0'
 gem 'sidekiq-unique-jobs', '~> 7.1'
-gem 'sidekiq-bulk', '~>0.2.0'
+gem 'sidekiq-bulk', '~> 0.2.0'
 gem 'simple-navigation', '~> 4.3'
 gem 'simple_form', '~> 5.1'
 gem 'sprockets-rails', '~> 3.4', require: 'sprockets/railtie'
diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb
index 6f9ea76e9..763958a3f 100644
--- a/app/chewy/accounts_index.rb
+++ b/app/chewy/accounts_index.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 class AccountsIndex < Chewy::Index
-  settings index: { refresh_interval: '5m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     analyzer: {
       content: {
         tokenizer: 'whitespace',
diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb
index 1304aeedb..c20009879 100644
--- a/app/chewy/statuses_index.rb
+++ b/app/chewy/statuses_index.rb
@@ -3,7 +3,7 @@
 class StatusesIndex < Chewy::Index
   include FormattingHelper
 
-  settings index: { refresh_interval: '15m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     filter: {
       english_stop: {
         type: 'stop',
diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb
index f9db2b03a..a5b139bca 100644
--- a/app/chewy/tags_index.rb
+++ b/app/chewy/tags_index.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 
 class TagsIndex < Chewy::Index
-  settings index: { refresh_interval: '15m' }, analysis: {
+  settings index: { refresh_interval: '30s' }, analysis: {
     analyzer: {
       content: {
         tokenizer: 'keyword',
diff --git a/app/workers/scheduler/indexing_scheduler.rb b/app/workers/scheduler/indexing_scheduler.rb
new file mode 100644
index 000000000..3a6f47a29
--- /dev/null
+++ b/app/workers/scheduler/indexing_scheduler.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+class Scheduler::IndexingScheduler
+  include Sidekiq::Worker
+  include Redisable
+
+  sidekiq_options retry: 0
+
+  def perform
+    indexes.each do |type|
+      with_redis do |redis|
+        ids = redis.smembers("chewy:queue:#{type.name}")
+
+        type.import!(ids)
+
+        redis.pipelined do |pipeline|
+          ids.each { |id| pipeline.srem("chewy:queue:#{type.name}", id) }
+        end
+      end
+    end
+  end
+
+  def indexes
+    [AccountsIndex, TagsIndex, StatusesIndex]
+  end
+end
diff --git a/config/application.rb b/config/application.rb
index 64987cfe7..24fa2a978 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -38,7 +38,7 @@ require_relative '../lib/mastodon/version'
 require_relative '../lib/mastodon/rack_middleware'
 require_relative '../lib/devise/two_factor_ldap_authenticatable'
 require_relative '../lib/devise/two_factor_pam_authenticatable'
-require_relative '../lib/chewy/strategy/custom_sidekiq'
+require_relative '../lib/chewy/strategy/mastodon'
 require_relative '../lib/webpacker/manifest_extensions'
 require_relative '../lib/webpacker/helper_extensions'
 require_relative '../lib/rails/engine_extensions'
diff --git a/config/initializers/chewy.rb b/config/initializers/chewy.rb
index f303fc54d..752fc3c6d 100644
--- a/config/initializers/chewy.rb
+++ b/config/initializers/chewy.rb
@@ -13,15 +13,14 @@ Chewy.settings = {
   journal: false,
   user: user,
   password: password,
-  sidekiq: { queue: 'pull' },
 }
 
 # We use our own async strategy even outside the request-response
 # cycle, which takes care of checking if Elasticsearch is enabled
 # or not. However, mind that for the Rails console, the :urgent
 # strategy is set automatically with no way to override it.
-Chewy.root_strategy              = :custom_sidekiq
-Chewy.request_strategy           = :custom_sidekiq
+Chewy.root_strategy              = :mastodon
+Chewy.request_strategy           = :mastodon
 Chewy.use_after_commit_callbacks = false
 
 module Chewy
diff --git a/config/sidekiq.yml b/config/sidekiq.yml
index 26be26326..2a3871468 100644
--- a/config/sidekiq.yml
+++ b/config/sidekiq.yml
@@ -21,6 +21,10 @@
     every: '6h'
     class: Scheduler::Trends::ReviewNotificationsScheduler
     queue: scheduler
+  indexing_scheduler:
+    every: '5m'
+    class: Scheduler::IndexingScheduler
+    queue: scheduler
   media_cleanup_scheduler:
     cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
     class: Scheduler::MediaCleanupScheduler
diff --git a/lib/chewy/strategy/custom_sidekiq.rb b/lib/chewy/strategy/custom_sidekiq.rb
deleted file mode 100644
index 794ae4ed4..000000000
--- a/lib/chewy/strategy/custom_sidekiq.rb
+++ /dev/null
@@ -1,11 +0,0 @@
-# frozen_string_literal: true
-
-module Chewy
-  class Strategy
-    class CustomSidekiq < Sidekiq
-      def update(_type, _objects, _options = {})
-        super if Chewy.enabled?
-      end
-    end
-  end
-end
diff --git a/lib/chewy/strategy/mastodon.rb b/lib/chewy/strategy/mastodon.rb
new file mode 100644
index 000000000..ee8b92186
--- /dev/null
+++ b/lib/chewy/strategy/mastodon.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Chewy
+  class Strategy
+    class Mastodon < Base
+      def initialize
+        super
+
+        @stash = Hash.new { |hash, key| hash[key] = [] }
+      end
+
+      def update(type, objects, _options = {})
+        @stash[type].concat(type.root.id ? Array.wrap(objects) : type.adapter.identify(objects)) if Chewy.enabled?
+      end
+
+      def leave
+        RedisConfiguration.with do |redis|
+          redis.pipelined do |pipeline|
+            @stash.each do |type, ids|
+              pipeline.sadd("chewy:queue:#{type.name}", ids)
+            end
+          end
+        end
+      end
+    end
+  end
+end