From dc786c0cf4467ade8db7d8b17e09f16923bfc1e8 Mon Sep 17 00:00:00 2001 From: Surinna Curtis Date: Wed, 2 May 2018 05:40:24 -0500 Subject: Support Actors/Statuses with multiple types (#7305) * Add equals_or_includes_any? helper in JsonLdHelper * Support arrays in JSON-LD type fields for actors/tags/objects. * Spec for resolving accounts with extension types * Style tweaks for codeclimate --- app/helpers/jsonld_helper.rb | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'app/helpers/jsonld_helper.rb') diff --git a/app/helpers/jsonld_helper.rb b/app/helpers/jsonld_helper.rb index dfb8fcb8b..a3cfdadb8 100644 --- a/app/helpers/jsonld_helper.rb +++ b/app/helpers/jsonld_helper.rb @@ -5,6 +5,10 @@ module JsonLdHelper haystack.is_a?(Array) ? haystack.include?(needle) : haystack == needle end + def equals_or_includes_any?(haystack, needles) + needles.any? { |needle| equals_or_includes?(haystack, needle) } + end + def first_of_value(value) value.is_a?(Array) ? value.first : value end -- cgit From cb5b5cb5f79bb2187d8124df91af4c8e1bfd7256 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 2 May 2018 18:58:48 +0200 Subject: Slightly reduce RAM usage (#7301) * No need to re-require sidekiq plugins, they are required via Gemfile * Add derailed_benchmarks tool, no need to require TTY gems in Gemfile * Replace ruby-oembed with FetchOEmbedService Reduce startup by 45382 allocated objects * Remove preloaded JSON-LD in favour of caching HTTP responses Reduce boot RAM by about 6 MiB * Fix tests * Fix test suite by stubbing out JSON-LD contexts --- Gemfile | 12 +- Gemfile.lock | 22 +- app/controllers/api/web/embeds_controller.rb | 11 +- .../settings/follower_domains_controller.rb | 2 - app/helpers/jsonld_helper.rb | 17 +- app/lib/provider_discovery.rb | 47 --- app/services/fan_out_on_write_service.rb | 2 - app/services/fetch_link_card_service.rb | 38 +- app/services/fetch_oembed_service.rb | 71 ++++ app/workers/scheduler/backup_cleanup_scheduler.rb | 1 - .../scheduler/doorkeeper_cleanup_scheduler.rb | 1 - app/workers/scheduler/email_scheduler.rb | 1 - app/workers/scheduler/feed_cleanup_scheduler.rb | 1 - app/workers/scheduler/ip_cleanup_scheduler.rb | 1 - app/workers/scheduler/media_cleanup_scheduler.rb | 1 - .../scheduler/subscriptions_cleanup_scheduler.rb | 2 - app/workers/scheduler/subscriptions_scheduler.rb | 3 - app/workers/scheduler/user_cleanup_scheduler.rb | 1 - app/workers/soft_block_domain_followers_worker.rb | 2 - config/initializers/json_ld.rb | 5 - config/initializers/oembed.rb | 4 - lib/json_ld/activitystreams.rb | 153 -------- lib/json_ld/identity.rb | 86 ----- lib/json_ld/security.rb | 50 --- lib/tasks/mastodon.rake | 2 + spec/fixtures/requests/json-ld.activitystreams.txt | 391 +++++++++++++++++++++ spec/fixtures/requests/json-ld.identity.txt | 100 ++++++ spec/fixtures/requests/json-ld.security.txt | 61 ++++ spec/lib/activitypub/linked_data_signature_spec.rb | 4 + spec/lib/provider_discovery_spec.rb | 118 ------- spec/rails_helper.rb | 14 + spec/services/account_search_service_spec.rb | 2 +- .../fetch_remote_account_service_spec.rb | 2 +- .../fetch_remote_status_service_spec.rb | 2 +- .../activitypub/process_account_service_spec.rb | 2 +- .../activitypub/process_collection_service_spec.rb | 2 +- spec/services/after_block_service_spec.rb | 2 +- spec/services/authorize_follow_service_spec.rb | 2 +- .../services/batched_remove_status_service_spec.rb | 2 +- .../block_domain_from_account_service_spec.rb | 2 +- spec/services/block_domain_service_spec.rb | 2 +- spec/services/block_service_spec.rb | 2 +- spec/services/bootstrap_timeline_service_spec.rb | 2 +- spec/services/fan_out_on_write_service_spec.rb | 2 +- spec/services/favourite_service_spec.rb | 2 +- spec/services/fetch_atom_service_spec.rb | 2 +- spec/services/fetch_link_card_service_spec.rb | 2 +- spec/services/fetch_oembed_service_spec.rb | 125 +++++++ spec/services/fetch_remote_account_service_spec.rb | 2 +- spec/services/fetch_remote_status_service_spec.rb | 2 +- spec/services/follow_service_spec.rb | 2 +- spec/services/mute_service_spec.rb | 2 +- spec/services/notify_service_spec.rb | 2 +- spec/services/post_status_service_spec.rb | 2 +- spec/services/precompute_feed_service_spec.rb | 2 +- spec/services/process_feed_service_spec.rb | 2 +- spec/services/process_interaction_service_spec.rb | 2 +- spec/services/process_mentions_service_spec.rb | 2 +- .../pubsubhubbub/subscribe_service_spec.rb | 2 +- .../pubsubhubbub/unsubscribe_service_spec.rb | 2 +- spec/services/reblog_service_spec.rb | 2 +- spec/services/reject_follow_service_spec.rb | 2 +- spec/services/remove_status_service_spec.rb | 2 +- spec/services/report_service_spec.rb | 2 +- spec/services/resolve_account_service_spec.rb | 2 +- spec/services/resolve_url_service_spec.rb | 2 +- spec/services/search_service_spec.rb | 2 +- spec/services/send_interaction_service_spec.rb | 2 +- spec/services/subscribe_service_spec.rb | 2 +- spec/services/suspend_account_service_spec.rb | 2 +- spec/services/unblock_domain_service_spec.rb | 2 +- spec/services/unblock_service_spec.rb | 2 +- spec/services/unfollow_service_spec.rb | 2 +- spec/services/unmute_service_spec.rb | 2 +- spec/services/unsubscribe_service_spec.rb | 2 +- .../services/update_remote_profile_service_spec.rb | 2 +- spec/spec_helper.rb | 12 +- 77 files changed, 881 insertions(+), 568 deletions(-) delete mode 100644 app/lib/provider_discovery.rb create mode 100644 app/services/fetch_oembed_service.rb delete mode 100644 config/initializers/json_ld.rb delete mode 100644 config/initializers/oembed.rb delete mode 100644 lib/json_ld/activitystreams.rb delete mode 100644 lib/json_ld/identity.rb delete mode 100644 lib/json_ld/security.rb create mode 100644 spec/fixtures/requests/json-ld.activitystreams.txt create mode 100644 spec/fixtures/requests/json-ld.identity.txt create mode 100644 spec/fixtures/requests/json-ld.security.txt delete mode 100644 spec/lib/provider_discovery_spec.rb create mode 100644 spec/services/fetch_oembed_service_spec.rb (limited to 'app/helpers/jsonld_helper.rb') diff --git a/Gemfile b/Gemfile index a33748568..f1665ce95 100644 --- a/Gemfile +++ b/Gemfile @@ -54,7 +54,7 @@ gem 'httplog', '~> 1.0' gem 'idn-ruby', require: 'idn' gem 'kaminari', '~> 1.1' gem 'link_header', '~> 0.0' -gem 'mime-types', '~> 3.1' +gem 'mime-types', '~> 3.1', require: 'mime/types/columnar' gem 'nokogiri', '~> 1.8' gem 'nsa', '~> 0.2' gem 'oj', '~> 3.5' @@ -70,7 +70,6 @@ gem 'rails-settings-cached', '~> 0.6' gem 'redis', '~> 4.0', require: ['redis', 'redis/connection/hiredis'] gem 'mario-redis-lock', '~> 1.2', require: 'redis_lock' gem 'rqrcode', '~> 0.10' -gem 'ruby-oembed', '~> 0.12', require: 'oembed' gem 'ruby-progressbar', '~> 1.4' gem 'sanitize', '~> 4.6' gem 'sidekiq', '~> 5.1' @@ -82,14 +81,14 @@ gem 'simple_form', '~> 4.0' gem 'sprockets-rails', '~> 3.2', require: 'sprockets/railtie' gem 'stoplight', '~> 2.1.3' gem 'strong_migrations', '~> 0.2' -gem 'tty-command', '~> 0.8' -gem 'tty-prompt', '~> 0.16' +gem 'tty-command', '~> 0.8', require: false +gem 'tty-prompt', '~> 0.16', require: false gem 'twitter-text', '~> 1.14' gem 'tzinfo-data', '~> 1.2018' gem 'webpacker', '~> 3.4' gem 'webpush' -gem 'json-ld-preloaded', '~> 2.2' +gem 'json-ld', '~> 2.2' gem 'rdf-normalize', '~> 0.3' group :development, :test do @@ -135,6 +134,9 @@ group :development do gem 'capistrano-rails', '~> 1.3' gem 'capistrano-rbenv', '~> 2.1' gem 'capistrano-yarn', '~> 2.0' + + gem 'derailed_benchmarks' + gem 'stackprof' end group :production do diff --git a/Gemfile.lock b/Gemfile.lock index d96165dcf..94ab0b7ca 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -75,6 +75,7 @@ GEM aws-sigv4 (~> 1.0) aws-sigv4 (1.0.2) bcrypt (3.1.11) + benchmark-ips (2.7.2) better_errors (2.4.0) coderay (>= 1.0.0) erubi (>= 1.0.0) @@ -138,6 +139,14 @@ GEM css_parser (1.6.0) addressable debug_inspector (0.0.3) + derailed_benchmarks (1.3.4) + benchmark-ips (~> 2) + get_process_mem (~> 0) + heapy (~> 0) + memory_profiler (~> 0) + rack (>= 1) + rake (> 10, < 13) + thor (~> 0.19) devise (4.4.3) bcrypt (~> 3.0) orm_adapter (~> 0.1) @@ -206,6 +215,7 @@ GEM fuubar (2.3.1) rspec-core (~> 3.0) ruby-progressbar (~> 1.4) + get_process_mem (0.2.1) globalid (0.4.1) activesupport (>= 4.2.0) goldfinger (2.1.0) @@ -226,6 +236,7 @@ GEM concurrent-ruby (~> 1.0) hashdiff (0.3.7) hashie (3.5.7) + heapy (0.1.3) highline (1.7.10) hiredis (0.6.1) hitimes (1.2.6) @@ -264,10 +275,6 @@ GEM json-ld (2.2.1) multi_json (~> 1.12) rdf (>= 2.2.8, < 4.0) - json-ld-preloaded (2.2.3) - json-ld (>= 2.2, < 4.0) - multi_json (~> 1.12) - rdf (>= 2.2, < 4.0) jsonapi-renderer (0.2.0) jwt (2.1.0) kaminari (1.1.1) @@ -502,7 +509,6 @@ GEM rainbow (>= 2.2.2, < 4.0) ruby-progressbar (~> 1.7) unicode-display_width (~> 1.0, >= 1.0.1) - ruby-oembed (0.12.0) ruby-progressbar (1.9.0) ruby-saml (1.7.2) nokogiri (>= 1.5.10) @@ -557,6 +563,7 @@ GEM sshkit (1.16.0) net-scp (>= 1.1.2) net-ssh (>= 2.8.0) + stackprof (0.2.11) statsd-ruby (1.2.1) stoplight (2.1.3) streamio-ffmpeg (3.0.2) @@ -645,6 +652,7 @@ DEPENDENCIES chewy (~> 5.0) cld3 (~> 3.2.0) climate_control (~> 0.2) + derailed_benchmarks devise (~> 4.4) devise-two-factor (~> 3.0) devise_pam_authenticatable2 (~> 9.1) @@ -668,7 +676,7 @@ DEPENDENCIES i18n-tasks (~> 0.9) idn-ruby iso-639 - json-ld-preloaded (~> 2.2) + json-ld (~> 2.2) kaminari (~> 1.1) letter_opener (~> 1.4) letter_opener_web (~> 1.3) @@ -714,7 +722,6 @@ DEPENDENCIES rspec-retry (~> 0.5) rspec-sidekiq (~> 3.0) rubocop (~> 0.55) - ruby-oembed (~> 0.12) ruby-progressbar (~> 1.4) sanitize (~> 4.6) scss_lint (~> 0.57) @@ -726,6 +733,7 @@ DEPENDENCIES simple_form (~> 4.0) simplecov (~> 0.16) sprockets-rails (~> 3.2) + stackprof stoplight (~> 2.1.3) streamio-ffmpeg (~> 3.0) strong_migrations (~> 0.2) diff --git a/app/controllers/api/web/embeds_controller.rb b/app/controllers/api/web/embeds_controller.rb index f2fe74b17..987290a14 100644 --- a/app/controllers/api/web/embeds_controller.rb +++ b/app/controllers/api/web/embeds_controller.rb @@ -9,9 +9,12 @@ class Api::Web::EmbedsController < Api::Web::BaseController status = StatusFinder.new(params[:url]).status render json: status, serializer: OEmbedSerializer, width: 400 rescue ActiveRecord::RecordNotFound - oembed = OEmbed::Providers.get(params[:url]) - render json: Oj.dump(oembed.fields) - rescue OEmbed::NotFound - render json: {}, status: :not_found + oembed = FetchOEmbedService.new.call(params[:url]) + + if oembed + render json: oembed + else + render json: {}, status: :not_found + end end end diff --git a/app/controllers/settings/follower_domains_controller.rb b/app/controllers/settings/follower_domains_controller.rb index 213d9e96d..91b521e7f 100644 --- a/app/controllers/settings/follower_domains_controller.rb +++ b/app/controllers/settings/follower_domains_controller.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'sidekiq-bulk' - class Settings::FollowerDomainsController < ApplicationController layout 'admin' diff --git a/app/helpers/jsonld_helper.rb b/app/helpers/jsonld_helper.rb index a3cfdadb8..e9056166c 100644 --- a/app/helpers/jsonld_helper.rb +++ b/app/helpers/jsonld_helper.rb @@ -48,7 +48,7 @@ module JsonLdHelper end def canonicalize(json) - graph = RDF::Graph.new << JSON::LD::API.toRdf(json) + graph = RDF::Graph.new << JSON::LD::API.toRdf(json, documentLoader: method(:load_jsonld_context)) graph.dump(:normalize) end @@ -90,4 +90,19 @@ module JsonLdHelper request.add_headers('Accept' => 'application/activity+json, application/ld+json') request end + + def load_jsonld_context(url, _options = {}, &_block) + json = Rails.cache.fetch("jsonld:context:#{url}", expires_in: 30.days, raw: true) do + request = Request.new(:get, url) + request.add_headers('Accept' => 'application/ld+json') + + request.perform do |res| + raise JSON::LD::JsonLdError::LoadingDocumentFailed unless res.code == 200 && res.mime_type == 'application/ld+json' + res.body_with_limit + end + end + + doc = JSON::LD::API::RemoteDocument.new(url, json) + block_given? ? yield(doc) : doc + end end diff --git a/app/lib/provider_discovery.rb b/app/lib/provider_discovery.rb deleted file mode 100644 index 3bec7211b..000000000 --- a/app/lib/provider_discovery.rb +++ /dev/null @@ -1,47 +0,0 @@ -# frozen_string_literal: true - -class ProviderDiscovery < OEmbed::ProviderDiscovery - class << self - def get(url, **options) - provider = discover_provider(url, options) - - options.delete(:html) - - provider.get(url, options) - end - - def discover_provider(url, **options) - format = options[:format] - - html = if options[:html] - Nokogiri::HTML(options[:html]) - else - Request.new(:get, url).perform do |res| - raise OEmbed::NotFound, url if res.code != 200 || res.mime_type != 'text/html' - Nokogiri::HTML(res.body_with_limit) - end - end - - if format.nil? || format == :json - provider_endpoint ||= html.at_xpath('//link[@type="application/json+oembed"]')&.attribute('href')&.value - format ||= :json if provider_endpoint - end - - if format.nil? || format == :xml - provider_endpoint ||= html.at_xpath('//link[@type="text/xml+oembed"]')&.attribute('href')&.value - format ||= :xml if provider_endpoint - end - - raise OEmbed::NotFound, url if provider_endpoint.nil? - begin - provider_endpoint = Addressable::URI.parse(provider_endpoint) - provider_endpoint.query = nil - provider_endpoint = provider_endpoint.to_s - rescue Addressable::URI::InvalidURIError - raise OEmbed::NotFound, url - end - - OEmbed::Provider.new(provider_endpoint, format) - end - end -end diff --git a/app/services/fan_out_on_write_service.rb b/app/services/fan_out_on_write_service.rb index 0f77556dc..510b80c82 100644 --- a/app/services/fan_out_on_write_service.rb +++ b/app/services/fan_out_on_write_service.rb @@ -1,7 +1,5 @@ # frozen_string_literal: true -require 'sidekiq-bulk' - class FanOutOnWriteService < BaseService # Push a status into home and mentions feeds # @param [Status] status diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index d5920a417..77d4aa538 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -85,42 +85,40 @@ class FetchLinkCardService < BaseService end def attempt_oembed - embed = OEmbed::Providers.get(@url, html: @html) + embed = FetchOEmbedService.new.call(@url, html: @html) - return false unless embed.respond_to?(:type) + return false if embed.nil? - @card.type = embed.type - @card.title = embed.respond_to?(:title) ? embed.title : '' - @card.author_name = embed.respond_to?(:author_name) ? embed.author_name : '' - @card.author_url = embed.respond_to?(:author_url) ? embed.author_url : '' - @card.provider_name = embed.respond_to?(:provider_name) ? embed.provider_name : '' - @card.provider_url = embed.respond_to?(:provider_url) ? embed.provider_url : '' + @card.type = embed[:type] + @card.title = embed[:title] || '' + @card.author_name = embed[:author_name] || '' + @card.author_url = embed[:author_url] || '' + @card.provider_name = embed[:provider_name] || '' + @card.provider_url = embed[:provider_url] || '' @card.width = 0 @card.height = 0 case @card.type when 'link' - @card.image_remote_url = embed.thumbnail_url if embed.respond_to?(:thumbnail_url) + @card.image_remote_url = embed[:thumbnail_url] if embed[:thumbnail_url].present? when 'photo' - return false unless embed.respond_to?(:url) + return false if embed[:url].blank? - @card.embed_url = embed.url - @card.image_remote_url = embed.url - @card.width = embed.width.presence || 0 - @card.height = embed.height.presence || 0 + @card.embed_url = embed[:url] + @card.image_remote_url = embed[:url] + @card.width = embed[:width].presence || 0 + @card.height = embed[:height].presence || 0 when 'video' - @card.width = embed.width.presence || 0 - @card.height = embed.height.presence || 0 - @card.html = Formatter.instance.sanitize(embed.html, Sanitize::Config::MASTODON_OEMBED) - @card.image_remote_url = embed.thumbnail_url if embed.respond_to?(:thumbnail_url) + @card.width = embed[:width].presence || 0 + @card.height = embed[:height].presence || 0 + @card.html = Formatter.instance.sanitize(embed[:html], Sanitize::Config::MASTODON_OEMBED) + @card.image_remote_url = embed[:thumbnail_url] if embed[:thumbnail_url].present? when 'rich' # Most providers rely on