From 41eeb9ebaa65abe3fcbab60847b69d2469726d8a Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 25 Aug 2020 20:39:35 +0900 Subject: Use Status.group instead of Status.distinct in HashQueryService (#14662) DISTINCT clause removes duplicated records according to all the selected attributes. In reality, it can remove duplicated records only looking at statuses.id, but the clause confuses the query planner and yields insufficient performance. The behavior is also problematic if the scope produced by HashQueryService is used to query columns without id (using pluck method, for example). The scope is expected to contain unique statuses, but the uniquness will be evaluated with some arbitrary columns other than id. GROUP BY clause resolves those problem by explicitly specifying the column to take into account for the record distinction. A workaround for the problem of DISTINCT clause in Api::V1::Timelines::TagController is no longer necessary and removed. --- app/controllers/api/v1/timelines/tag_controller.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'app/controllers/api/v1') diff --git a/app/controllers/api/v1/timelines/tag_controller.rb b/app/controllers/api/v1/timelines/tag_controller.rb index 2d6ad5a80..62f34d3f7 100644 --- a/app/controllers/api/v1/timelines/tag_controller.rb +++ b/app/controllers/api/v1/timelines/tag_controller.rb @@ -33,9 +33,7 @@ class Api::V1::Timelines::TagController < Api::BaseController ) if truthy_param?(:only_media) - # `SELECT DISTINCT id, updated_at` is too slow, so pluck ids at first, and then select id, updated_at with ids. - status_ids = statuses.joins(:media_attachments).distinct(:id).pluck(:id) - statuses.where(id: status_ids) + statuses.joins(:media_attachments) else statuses end -- cgit From 552e886b648faa2a2229d86c7fd9abc8bb5ff99c Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 28 Aug 2020 16:27:33 +0900 Subject: Eagerly load statuses with the main query in Api::V1::FavouritesController (#14673) The old implementation had two queries: 1. The query constructed in Api::V1::FavouritesController#results 2. The query constructed in #cached_favourites, which is merged with 1. Both of them are issued againt PostgreSQL. The combination of the two queries caused the following problems: - The small window between the two queries involves race conditions. - Minor performance inefficiency. Moreover, the construction of query 2, which involves merging with query 1 has a bug. Query 1 is finalized with paginate_by_id, but paginate_by_id returns an array when min_id parameter is specified. The behavior prevents from merging the query, and in the real world, ActiveRecord simply ignores the merge (!), which results in querying the entire scan of statuses and favourites table. This change fixes these issues by simply letting query 1 get all the works done. --- app/controllers/api/v1/favourites_controller.rb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'app/controllers/api/v1') diff --git a/app/controllers/api/v1/favourites_controller.rb b/app/controllers/api/v1/favourites_controller.rb index 3e242905d..71a707d2a 100644 --- a/app/controllers/api/v1/favourites_controller.rb +++ b/app/controllers/api/v1/favourites_controller.rb @@ -17,14 +17,11 @@ class Api::V1::FavouritesController < Api::BaseController end def cached_favourites - cache_collection( - Status.reorder(nil).joins(:favourites).merge(results), - Status - ) + cache_collection(results.map(&:status), Status) end def results - @_results ||= account_favourites.paginate_by_id( + @_results ||= account_favourites.eager_load(:status).paginate_by_id( limit_param(DEFAULT_STATUSES_LIMIT), params_slice(:max_id, :since_id, :min_id) ) -- cgit From e26e7a1cb5992375eecedbc10ab9bcef4e603a88 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 28 Aug 2020 19:29:59 +0900 Subject: Replace incorrect use of distinct with group (#14675) Some uses of ActiveRecord::QueryMethods#distinct pass field names but they are incorrect for the current version of Rails. ActiveRecord::QueryMethods#group provides the expected behavior and benefits performance. See commit 6da24aad4cafdef8d8a2c92bac2002a5fc2fe9c8. --- app/controllers/api/v1/accounts/statuses_controller.rb | 12 +----------- app/controllers/api/v1/timelines/public_controller.rb | 4 +--- 2 files changed, 2 insertions(+), 14 deletions(-) (limited to 'app/controllers/api/v1') diff --git a/app/controllers/api/v1/accounts/statuses_controller.rb b/app/controllers/api/v1/accounts/statuses_controller.rb index 114ee0a82..1af768e54 100644 --- a/app/controllers/api/v1/accounts/statuses_controller.rb +++ b/app/controllers/api/v1/accounts/statuses_controller.rb @@ -41,17 +41,7 @@ class Api::V1::Accounts::StatusesController < Api::BaseController end def only_media_scope - Status.where(id: account_media_status_ids) - end - - def account_media_status_ids - # `SELECT DISTINCT id, updated_at` is too slow, so pluck ids at first, and then select id, updated_at with ids. - # Also, Avoid getting slow by not narrowing down by `statuses.account_id`. - # When narrowing down by `statuses.account_id`, `index_statuses_20180106` will be used - # and the table will be joined by `Merge Semi Join`, so the query will be slow. - @account.statuses.joins(:media_attachments).merge(@account.media_attachments).permitted_for(@account, current_account) - .paginate_by_max_id(limit_param(DEFAULT_STATUSES_LIMIT), params[:max_id], params[:since_id]) - .reorder(id: :desc).distinct(:id).pluck(:id) + Status.joins(:media_attachments).merge(@account.media_attachments.reorder(nil)).group(:id) end def pinned_scope diff --git a/app/controllers/api/v1/timelines/public_controller.rb b/app/controllers/api/v1/timelines/public_controller.rb index c6e7854d9..6ca903c16 100644 --- a/app/controllers/api/v1/timelines/public_controller.rb +++ b/app/controllers/api/v1/timelines/public_controller.rb @@ -30,9 +30,7 @@ class Api::V1::Timelines::PublicController < Api::BaseController ) if truthy_param?(:only_media) - # `SELECT DISTINCT id, updated_at` is too slow, so pluck ids at first, and then select id, updated_at with ids. - status_ids = statuses.joins(:media_attachments).distinct(:id).pluck(:id) - statuses.where(id: status_ids) + statuses.joins(:media_attachments).group(:id) else statuses end -- cgit From b63ede5005d33b52266650ec716d345f166e2df0 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 28 Aug 2020 19:30:23 +0900 Subject: Eagerly load statuses with the main query in Api::V1::BookmarksController (#14674) This is same with commit 552e886b648faa2a2229d86c7fd9abc8bb5ff99c except that it was for Api::V1::FavouritesController while this is for Api::V1::BookmarksController. --- app/controllers/api/v1/bookmarks_controller.rb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'app/controllers/api/v1') diff --git a/app/controllers/api/v1/bookmarks_controller.rb b/app/controllers/api/v1/bookmarks_controller.rb index c15212f0a..5c72f4a1a 100644 --- a/app/controllers/api/v1/bookmarks_controller.rb +++ b/app/controllers/api/v1/bookmarks_controller.rb @@ -17,14 +17,11 @@ class Api::V1::BookmarksController < Api::BaseController end def cached_bookmarks - cache_collection( - Status.reorder(nil).joins(:bookmarks).merge(results), - Status - ) + cache_collection(results.map(&:status), Status) end def results - @_results ||= account_bookmarks.paginate_by_id( + @_results ||= account_bookmarks.eager_load(:status).paginate_by_id( limit_param(DEFAULT_STATUSES_LIMIT), params_slice(:max_id, :since_id, :min_id) ) -- cgit From 64ef37b89de806f49cc59e011aa0ee2039c82c46 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 28 Aug 2020 19:31:56 +0900 Subject: Introduce ApplicationController#cache_collection_paginated_by_id (#14677) * Replace incorrect use of distinct with group Some uses of ActiveRecord::QueryMethods#distinct pass field names but they are incorrect for the current version of Rails. ActiveRecord::QueryMethods#group provides the expected behavior and benefits performance. See commit 6da24aad4cafdef8d8a2c92bac2002a5fc2fe9c8. * Introduce ApplicationController#cache_collection_paginated_by_id ApplicationController#cache_collection_paginated_by_id fuses ApplicationController#cache_collection and Paginable.paginate_by_id. An advantage of this method is that it prevents from modifying scope which Paginable.paginate_by_id may provide. ApplicationController#cache_collection always return an array and there is no possibility of the scope modification. It is also clear for a programmer, considering the implication of "cache". This method can also emit more efficient queries by using Cacheable.cache_ids before calling Paginable.paginate_by_id. --- app/controllers/accounts_controller.rb | 12 ++++++++---- app/controllers/activitypub/outboxes_controller.rb | 8 ++++++-- app/controllers/api/v1/accounts/statuses_controller.rb | 11 ++++++----- app/controllers/api/v1/notifications_controller.rb | 8 +++----- app/controllers/api/v1/timelines/public_controller.rb | 16 +++++++++------- app/controllers/api/v1/timelines/tag_controller.rb | 17 ++++++----------- app/controllers/concerns/cache_concern.rb | 4 ++++ 7 files changed, 42 insertions(+), 34 deletions(-) (limited to 'app/controllers/api/v1') diff --git a/app/controllers/accounts_controller.rb b/app/controllers/accounts_controller.rb index db77b628c..d97d88fd9 100644 --- a/app/controllers/accounts_controller.rb +++ b/app/controllers/accounts_controller.rb @@ -28,8 +28,7 @@ class AccountsController < ApplicationController end @pinned_statuses = cache_collection(@account.pinned_statuses, Status) if show_pinned_statuses? - @statuses = filtered_status_page - @statuses = cache_collection(@statuses, Status) + @statuses = cached_filtered_status_page @rss_url = rss_url unless @statuses.empty? @@ -142,8 +141,13 @@ class AccountsController < ApplicationController request.path.split('.').first.ends_with?(Addressable::URI.parse("/tagged/#{params[:tag]}").normalize) end - def filtered_status_page - filtered_statuses.paginate_by_id(PAGE_SIZE, params_slice(:max_id, :min_id, :since_id)) + def cached_filtered_status_page + cache_collection_paginated_by_id( + filtered_statuses, + Status, + PAGE_SIZE, + params_slice(:max_id, :min_id, :since_id) + ) end def params_slice(*keys) diff --git a/app/controllers/activitypub/outboxes_controller.rb b/app/controllers/activitypub/outboxes_controller.rb index e25a4bc07..c33c15255 100644 --- a/app/controllers/activitypub/outboxes_controller.rb +++ b/app/controllers/activitypub/outboxes_controller.rb @@ -50,8 +50,12 @@ class ActivityPub::OutboxesController < ActivityPub::BaseController return unless page_requested? @statuses = @account.statuses.permitted_for(@account, signed_request_account) - @statuses = @statuses.paginate_by_id(LIMIT, params_slice(:max_id, :min_id, :since_id)) - @statuses = cache_collection(@statuses, Status) + @statuses = cache_collection_paginated_by_id( + @statuses, + Status, + LIMIT, + params_slice(:max_id, :min_id, :since_id) + ) end def page_requested? diff --git a/app/controllers/api/v1/accounts/statuses_controller.rb b/app/controllers/api/v1/accounts/statuses_controller.rb index 1af768e54..85a9133e3 100644 --- a/app/controllers/api/v1/accounts/statuses_controller.rb +++ b/app/controllers/api/v1/accounts/statuses_controller.rb @@ -22,10 +22,6 @@ class Api::V1::Accounts::StatusesController < Api::BaseController end def cached_account_statuses - cache_collection account_statuses, Status - end - - def account_statuses statuses = truthy_param?(:pinned) ? pinned_scope : permitted_account_statuses statuses.merge!(only_media_scope) if truthy_param?(:only_media) @@ -33,7 +29,12 @@ class Api::V1::Accounts::StatusesController < Api::BaseController statuses.merge!(no_reblogs_scope) if truthy_param?(:exclude_reblogs) statuses.merge!(hashtag_scope) if params[:tagged].present? - statuses.paginate_by_id(limit_param(DEFAULT_STATUSES_LIMIT), params_slice(:max_id, :since_id, :min_id)) + cache_collection_paginated_by_id( + statuses, + Status, + limit_param(DEFAULT_STATUSES_LIMIT), + params_slice(:max_id, :since_id, :min_id) + ) end def permitted_account_statuses diff --git a/app/controllers/api/v1/notifications_controller.rb b/app/controllers/api/v1/notifications_controller.rb index 8ac227765..9d03cb879 100644 --- a/app/controllers/api/v1/notifications_controller.rb +++ b/app/controllers/api/v1/notifications_controller.rb @@ -31,11 +31,9 @@ class Api::V1::NotificationsController < Api::BaseController private def load_notifications - cache_collection paginated_notifications, Notification - end - - def paginated_notifications - browserable_account_notifications.paginate_by_id( + cache_collection_paginated_by_id( + browserable_account_notifications, + Notification, limit_param(DEFAULT_NOTIFICATIONS_LIMIT), params_slice(:max_id, :since_id, :min_id) ) diff --git a/app/controllers/api/v1/timelines/public_controller.rb b/app/controllers/api/v1/timelines/public_controller.rb index 6ca903c16..26d877b00 100644 --- a/app/controllers/api/v1/timelines/public_controller.rb +++ b/app/controllers/api/v1/timelines/public_controller.rb @@ -16,18 +16,20 @@ class Api::V1::Timelines::PublicController < Api::BaseController end def load_statuses - cached_public_statuses + cached_public_statuses_page end - def cached_public_statuses - cache_collection public_statuses, Status - end - - def public_statuses - statuses = public_timeline_statuses.paginate_by_id( + def cached_public_statuses_page + cache_collection_paginated_by_id( + public_statuses, + Status, limit_param(DEFAULT_STATUSES_LIMIT), params_slice(:max_id, :since_id, :min_id) ) + end + + def public_statuses + statuses = public_timeline_statuses if truthy_param?(:only_media) statuses.joins(:media_attachments).group(:id) diff --git a/app/controllers/api/v1/timelines/tag_controller.rb b/app/controllers/api/v1/timelines/tag_controller.rb index 62f34d3f7..76f7d3590 100644 --- a/app/controllers/api/v1/timelines/tag_controller.rb +++ b/app/controllers/api/v1/timelines/tag_controller.rb @@ -20,23 +20,18 @@ class Api::V1::Timelines::TagController < Api::BaseController end def cached_tagged_statuses - cache_collection tagged_statuses, Status - end - - def tagged_statuses if @tag.nil? [] else - statuses = tag_timeline_statuses.paginate_by_id( + statuses = tag_timeline_statuses + statuses = statuses.joins(:media_attachments) if truthy_param?(:only_media) + + cache_collection_paginated_by_id( + statuses, + Status, limit_param(DEFAULT_STATUSES_LIMIT), params_slice(:max_id, :since_id, :min_id) ) - - if truthy_param?(:only_media) - statuses.joins(:media_attachments) - else - statuses - end end end diff --git a/app/controllers/concerns/cache_concern.rb b/app/controllers/concerns/cache_concern.rb index c7d25ae00..189b92012 100644 --- a/app/controllers/concerns/cache_concern.rb +++ b/app/controllers/concerns/cache_concern.rb @@ -47,4 +47,8 @@ module CacheConcern raw.map { |item| cached_keys_with_value[item.id] || uncached[item.id] }.compact end + + def cache_collection_paginated_by_id(raw, klass, limit, options) + cache_collection raw.cache_ids.paginate_by_id(limit, options), klass + end end -- cgit