diff options
Diffstat (limited to 'app/workers/scheduler')
4 files changed, 28 insertions, 37 deletions
diff --git a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb index bd92fe32c..f237f1dc9 100644 --- a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb +++ b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb @@ -7,7 +7,7 @@ class Scheduler::AccountsStatusesCleanupScheduler # This limit is mostly to be nice to the fediverse at large and not # generate too much traffic. # This also helps limiting the running time of the scheduler itself. - MAX_BUDGET = 50 + MAX_BUDGET = 150 # This is an attempt to spread the load across instances, as various # accounts are likely to have various followers. @@ -15,28 +15,22 @@ class Scheduler::AccountsStatusesCleanupScheduler # This is an attempt to limit the workload generated by status removal # jobs to something the particular instance can handle. - PER_THREAD_BUDGET = 5 + PER_THREAD_BUDGET = 6 # Those avoid loading an instance that is already under load - MAX_DEFAULT_SIZE = 2 + MAX_DEFAULT_SIZE = 200 MAX_DEFAULT_LATENCY = 5 - MAX_PUSH_SIZE = 5 + MAX_PUSH_SIZE = 500 MAX_PUSH_LATENCY = 10 + # 'pull' queue has lower priority jobs, and it's unlikely that pushing # deletes would cause much issues with this queue if it didn't cause issues # with default and push. Yet, do not enqueue deletes if the instance is # lagging behind too much. - MAX_PULL_SIZE = 500 - MAX_PULL_LATENCY = 300 - - # This is less of an issue in general, but deleting old statuses is likely - # to cause delivery errors, and thus increase the number of jobs to be retried. - # This doesn't directly translate to load, but connection errors and a high - # number of dead instances may lead to this spiraling out of control if - # unchecked. - MAX_RETRY_SIZE = 50_000 + MAX_PULL_SIZE = 10_000 + MAX_PULL_LATENCY = 5.minutes.to_i - sidekiq_options retry: 0, lock: :until_executed + sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 1.day.to_i def perform return if under_load? @@ -62,17 +56,17 @@ class Scheduler::AccountsStatusesCleanupScheduler # The idea here is to loop through all policies at least once until the budget is exhausted # and start back after the last processed account otherwise break if budget.zero? || (num_processed_accounts.zero? && first_policy_id.nil?) + first_policy_id = nil end end def compute_budget - threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.map { |x| x['concurrency'] }.sum + threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.pluck('concurrency').sum [PER_THREAD_BUDGET * threads, MAX_BUDGET].min end def under_load? - return true if Sidekiq::Stats.new.retry_size > MAX_RETRY_SIZE queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY) end diff --git a/app/workers/scheduler/follow_recommendations_scheduler.rb b/app/workers/scheduler/follow_recommendations_scheduler.rb index 57f78170e..17cf3f2cc 100644 --- a/app/workers/scheduler/follow_recommendations_scheduler.rb +++ b/app/workers/scheduler/follow_recommendations_scheduler.rb @@ -19,13 +19,11 @@ class Scheduler::FollowRecommendationsScheduler fallback_recommendations = FollowRecommendation.order(rank: :desc).limit(SET_SIZE) Trends.available_locales.each do |locale| - recommendations = begin - if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist - FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).map { |recommendation| [recommendation.account_id, recommendation.rank] } - else - [] - end - end + recommendations = if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist + FollowRecommendation.localized(locale).order(rank: :desc).limit(SET_SIZE).map { |recommendation| [recommendation.rank, recommendation.account_id] } + else + [] + end # Use language-agnostic results if there are not enough language-specific ones missing = SET_SIZE - recommendations.size @@ -35,14 +33,14 @@ class Scheduler::FollowRecommendationsScheduler # Language-specific results should be above language-agnostic ones, # otherwise language-agnostic ones will always overshadow them - recommendations.map! { |(account_id, rank)| [account_id, rank + max_fallback_rank] } + recommendations.map! { |(rank, account_id)| [rank + max_fallback_rank, account_id] } added = 0 fallback_recommendations.each do |recommendation| - next if recommendations.any? { |(account_id, _)| account_id == recommendation.account_id } + next if recommendations.any? { |(_, account_id)| account_id == recommendation.account_id } - recommendations << [recommendation.account_id, recommendation.rank] + recommendations << [recommendation.rank, recommendation.account_id] added += 1 break if added >= missing @@ -51,10 +49,7 @@ class Scheduler::FollowRecommendationsScheduler redis.multi do |multi| multi.del(key(locale)) - - recommendations.each do |(account_id, rank)| - multi.zadd(key(locale), rank, account_id) - end + multi.zadd(key(locale), recommendations) end end end diff --git a/app/workers/scheduler/indexing_scheduler.rb b/app/workers/scheduler/indexing_scheduler.rb index c42396629..d622f5586 100644 --- a/app/workers/scheduler/indexing_scheduler.rb +++ b/app/workers/scheduler/indexing_scheduler.rb @@ -6,17 +6,19 @@ class Scheduler::IndexingScheduler sidekiq_options retry: 0 + IMPORT_BATCH_SIZE = 1000 + SCAN_BATCH_SIZE = 10 * IMPORT_BATCH_SIZE + def perform return unless Chewy.enabled? indexes.each do |type| with_redis do |redis| - ids = redis.smembers("chewy:queue:#{type.name}") - - type.import!(ids) - - redis.pipelined do |pipeline| - ids.each { |id| pipeline.srem("chewy:queue:#{type.name}", id) } + redis.sscan_each("chewy:queue:#{type.name}", count: SCAN_BATCH_SIZE).each_slice(IMPORT_BATCH_SIZE) do |ids| + type.import!(ids) + redis.pipelined do |pipeline| + pipeline.srem("chewy:queue:#{type.name}", ids) + end end end end diff --git a/app/workers/scheduler/user_cleanup_scheduler.rb b/app/workers/scheduler/user_cleanup_scheduler.rb index 63f9ed78c..45cfbc62e 100644 --- a/app/workers/scheduler/user_cleanup_scheduler.rb +++ b/app/workers/scheduler/user_cleanup_scheduler.rb @@ -15,7 +15,7 @@ class Scheduler::UserCleanupScheduler def clean_unconfirmed_accounts! User.where('confirmed_at is NULL AND confirmation_sent_at <= ?', 2.days.ago).reorder(nil).find_in_batches do |batch| # We have to do it separately because of missing database constraints - AccountModerationNote.where(account_id: batch.map(&:account_id)).delete_all + AccountModerationNote.where(target_account_id: batch.map(&:account_id)).delete_all Account.where(id: batch.map(&:account_id)).delete_all User.where(id: batch.map(&:id)).delete_all end |