From 41eeb9ebaa65abe3fcbab60847b69d2469726d8a Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 25 Aug 2020 20:39:35 +0900 Subject: Use Status.group instead of Status.distinct in HashQueryService (#14662) DISTINCT clause removes duplicated records according to all the selected attributes. In reality, it can remove duplicated records only looking at statuses.id, but the clause confuses the query planner and yields insufficient performance. The behavior is also problematic if the scope produced by HashQueryService is used to query columns without id (using pluck method, for example). The scope is expected to contain unique statuses, but the uniquness will be evaluated with some arbitrary columns other than id. GROUP BY clause resolves those problem by explicitly specifying the column to take into account for the record distinction. A workaround for the problem of DISTINCT clause in Api::V1::Timelines::TagController is no longer necessary and removed. --- app/controllers/api/v1/timelines/tag_controller.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'app/controllers/api/v1/timelines') diff --git a/app/controllers/api/v1/timelines/tag_controller.rb b/app/controllers/api/v1/timelines/tag_controller.rb index 2d6ad5a80..62f34d3f7 100644 --- a/app/controllers/api/v1/timelines/tag_controller.rb +++ b/app/controllers/api/v1/timelines/tag_controller.rb @@ -33,9 +33,7 @@ class Api::V1::Timelines::TagController < Api::BaseController ) if truthy_param?(:only_media) - # `SELECT DISTINCT id, updated_at` is too slow, so pluck ids at first, and then select id, updated_at with ids. - status_ids = statuses.joins(:media_attachments).distinct(:id).pluck(:id) - statuses.where(id: status_ids) + statuses.joins(:media_attachments) else statuses end -- cgit From e26e7a1cb5992375eecedbc10ab9bcef4e603a88 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 28 Aug 2020 19:29:59 +0900 Subject: Replace incorrect use of distinct with group (#14675) Some uses of ActiveRecord::QueryMethods#distinct pass field names but they are incorrect for the current version of Rails. ActiveRecord::QueryMethods#group provides the expected behavior and benefits performance. See commit 6da24aad4cafdef8d8a2c92bac2002a5fc2fe9c8. --- app/controllers/api/v1/accounts/statuses_controller.rb | 12 +----------- app/controllers/api/v1/timelines/public_controller.rb | 4 +--- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'app/controllers/api/v1/timelines') diff --git a/app/controllers/api/v1/accounts/statuses_controller.rb b/app/controllers/api/v1/accounts/statuses_controller.rb index 114ee0a82..1af768e54 100644 --- a/app/controllers/api/v1/accounts/statuses_controller.rb +++ b/app/controllers/api/v1/accounts/statuses_controller.rb @@ -41,17 +41,7 @@ class Api::V1::Accounts::StatusesController < Api::BaseController end def only_media_scope - Status.where(id: account_media_status_ids) - end - - def account_media_status_ids - # `SELECT DISTINCT id, updated_at` is too slow, so pluck ids at first, and then select id, updated_at with ids. - # Also, Avoid getting slow by not narrowing down by `statuses.account_id`. - # When narrowing down by `statuses.account_id`, `index_statuses_20180106` will be used - # and the table will be joined by `Merge Semi Join`, so the query will be slow. - @account.statuses.joins(:media_attachments).merge(@account.media_attachments).permitted_for(@account, current_account) - .paginate_by_max_id(limit_param(DEFAULT_STATUSES_LIMIT), params[:max_id], params[:since_id]) - .reorder(id: :desc).distinct(:id).pluck(:id) + Status.joins(:media_attachments).merge(@account.media_attachments.reorder(nil)).group(:id) end def pinned_scope diff --git a/app/controllers/api/v1/timelines/public_controller.rb b/app/controllers/api/v1/timelines/public_controller.rb index c6e7854d9..6ca903c16 100644 --- a/app/controllers/api/v1/timelines/public_controller.rb +++ b/app/controllers/api/v1/timelines/public_controller.rb @@ -30,9 +30,7 @@ class Api::V1::Timelines::PublicController < Api::BaseController ) if truthy_param?(:only_media) - # `SELECT DISTINCT id, updated_at` is too slow, so pluck ids at first, and then select id, updated_at with ids. - status_ids = statuses.joins(:media_attachments).distinct(:id).pluck(:id) - statuses.where(id: status_ids) + statuses.joins(:media_attachments).group(:id) else statuses end -- cgit From 64ef37b89de806f49cc59e011aa0ee2039c82c46 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 28 Aug 2020 19:31:56 +0900 Subject: Introduce ApplicationController#cache_collection_paginated_by_id (#14677) * Replace incorrect use of distinct with group Some uses of ActiveRecord::QueryMethods#distinct pass field names but they are incorrect for the current version of Rails. ActiveRecord::QueryMethods#group provides the expected behavior and benefits performance. See commit 6da24aad4cafdef8d8a2c92bac2002a5fc2fe9c8. * Introduce ApplicationController#cache_collection_paginated_by_id ApplicationController#cache_collection_paginated_by_id fuses ApplicationController#cache_collection and Paginable.paginate_by_id. An advantage of this method is that it prevents from modifying scope which Paginable.paginate_by_id may provide. ApplicationController#cache_collection always return an array and there is no possibility of the scope modification. It is also clear for a programmer, considering the implication of "cache". This method can also emit more efficient queries by using Cacheable.cache_ids before calling Paginable.paginate_by_id. --- app/controllers/accounts_controller.rb | 12 ++++++++---- app/controllers/activitypub/outboxes_controller.rb | 8 ++++++-- app/controllers/api/v1/accounts/statuses_controller.rb | 11 ++++++----- app/controllers/api/v1/notifications_controller.rb | 8 +++----- app/controllers/api/v1/timelines/public_controller.rb | 16 +++++++++------- app/controllers/api/v1/timelines/tag_controller.rb | 17 ++++++----------- app/controllers/concerns/cache_concern.rb | 4 ++++ 7 files changed, 42 insertions(+), 34 deletions(-) (limited to 'app/controllers/api/v1/timelines') diff --git a/app/controllers/accounts_controller.rb b/app/controllers/accounts_controller.rb index db77b628c..d97d88fd9 100644 --- a/app/controllers/accounts_controller.rb +++ b/app/controllers/accounts_controller.rb @@ -28,8 +28,7 @@ class AccountsController < ApplicationController end @pinned_statuses = cache_collection(@account.pinned_statuses, Status) if show_pinned_statuses? - @statuses = filtered_status_page - @statuses = cache_collection(@statuses, Status) + @statuses = cached_filtered_status_page @rss_url = rss_url unless @statuses.empty? @@ -142,8 +141,13 @@ class AccountsController < ApplicationController request.path.split('.').first.ends_with?(Addressable::URI.parse("/tagged/#{params[:tag]}").normalize) end - def filtered_status_page - filtered_statuses.paginate_by_id(PAGE_SIZE, params_slice(:max_id, :min_id, :since_id)) + def cached_filtered_status_page + cache_collection_paginated_by_id( + filtered_statuses, + Status, + PAGE_SIZE, + params_slice(:max_id, :min_id, :since_id) + ) end def params_slice(*keys) diff --git a/app/controllers/activitypub/outboxes_controller.rb b/app/controllers/activitypub/outboxes_controller.rb index e25a4bc07..c33c15255 100644 --- a/app/controllers/activitypub/outboxes_controller.rb +++ b/app/controllers/activitypub/outboxes_controller.rb @@ -50,8 +50,12 @@ class ActivityPub::OutboxesController < ActivityPub::BaseController return unless page_requested? @statuses = @account.statuses.permitted_for(@account, signed_request_account) - @statuses = @statuses.paginate_by_id(LIMIT, params_slice(:max_id, :min_id, :since_id)) - @statuses = cache_collection(@statuses, Status) + @statuses = cache_collection_paginated_by_id( + @statuses, + Status, + LIMIT, + params_slice(:max_id, :min_id, :since_id) + ) end def page_requested? diff --git a/app/controllers/api/v1/accounts/statuses_controller.rb b/app/controllers/api/v1/accounts/statuses_controller.rb index 1af768e54..85a9133e3 100644 --- a/app/controllers/api/v1/accounts/statuses_controller.rb +++ b/app/controllers/api/v1/accounts/statuses_controller.rb @@ -22,10 +22,6 @@ class Api::V1::Accounts::StatusesController < Api::BaseController end def cached_account_statuses - cache_collection account_statuses, Status - end - - def account_statuses statuses = truthy_param?(:pinned) ? pinned_scope : permitted_account_statuses statuses.merge!(only_media_scope) if truthy_param?(:only_media) @@ -33,7 +29,12 @@ class Api::V1::Accounts::StatusesController < Api::BaseController statuses.merge!(no_reblogs_scope) if truthy_param?(:exclude_reblogs) statuses.merge!(hashtag_scope) if params[:tagged].present? - statuses.paginate_by_id(limit_param(DEFAULT_STATUSES_LIMIT), params_slice(:max_id, :since_id, :min_id)) + cache_collection_paginated_by_id( + statuses, + Status, + limit_param(DEFAULT_STATUSES_LIMIT), + params_slice(:max_id, :since_id, :min_id) + ) end def permitted_account_statuses diff --git a/app/controllers/api/v1/notifications_controller.rb b/app/controllers/api/v1/notifications_controller.rb index 8ac227765..9d03cb879 100644 --- a/app/controllers/api/v1/notifications_controller.rb +++ b/app/controllers/api/v1/notifications_controller.rb @@ -31,11 +31,9 @@ class Api::V1::NotificationsController < Api::BaseController private def load_notifications - cache_collection paginated_notifications, Notification - end - - def paginated_notifications - browserable_account_notifications.paginate_by_id( + cache_collection_paginated_by_id( + browserable_account_notifications, + Notification, limit_param(DEFAULT_NOTIFICATIONS_LIMIT), params_slice(:max_id, :since_id, :min_id) ) diff --git a/app/controllers/api/v1/timelines/public_controller.rb b/app/controllers/api/v1/timelines/public_controller.rb index 6ca903c16..26d877b00 100644 --- a/app/controllers/api/v1/timelines/public_controller.rb +++ b/app/controllers/api/v1/timelines/public_controller.rb @@ -16,18 +16,20 @@ class Api::V1::Timelines::PublicController < Api::BaseController end def load_statuses - cached_public_statuses + cached_public_statuses_page end - def cached_public_statuses - cache_collection public_statuses, Status - end - - def public_statuses - statuses = public_timeline_statuses.paginate_by_id( + def cached_public_statuses_page + cache_collection_paginated_by_id( + public_statuses, + Status, limit_param(DEFAULT_STATUSES_LIMIT), params_slice(:max_id, :since_id, :min_id) ) + end + + def public_statuses + statuses = public_timeline_statuses if truthy_param?(:only_media) statuses.joins(:media_attachments).group(:id) diff --git a/app/controllers/api/v1/timelines/tag_controller.rb b/app/controllers/api/v1/timelines/tag_controller.rb index 62f34d3f7..76f7d3590 100644 --- a/app/controllers/api/v1/timelines/tag_controller.rb +++ b/app/controllers/api/v1/timelines/tag_controller.rb @@ -20,23 +20,18 @@ class Api::V1::Timelines::TagController < Api::BaseController end def cached_tagged_statuses - cache_collection tagged_statuses, Status - end - - def tagged_statuses if @tag.nil? [] else - statuses = tag_timeline_statuses.paginate_by_id( + statuses = tag_timeline_statuses + statuses = statuses.joins(:media_attachments) if truthy_param?(:only_media) + + cache_collection_paginated_by_id( + statuses, + Status, limit_param(DEFAULT_STATUSES_LIMIT), params_slice(:max_id, :since_id, :min_id) ) - - if truthy_param?(:only_media) - statuses.joins(:media_attachments) - else - statuses - end end end diff --git a/app/controllers/concerns/cache_concern.rb b/app/controllers/concerns/cache_concern.rb index c7d25ae00..189b92012 100644 --- a/app/controllers/concerns/cache_concern.rb +++ b/app/controllers/concerns/cache_concern.rb @@ -47,4 +47,8 @@ module CacheConcern raw.map { |item| cached_keys_with_value[item.id] || uncached[item.id] }.compact end + + def cache_collection_paginated_by_id(raw, klass, limit, options) + cache_collection raw.cache_ids.paginate_by_id(limit, options), klass + end end -- cgit