From 5d3feed191bcbe2769512119752b426108152fe9 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 10 Jul 2019 18:59:28 +0200 Subject: Refactor fetching of remote resources (#11251) --- app/services/fetch_resource_service.rb | 68 ++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 app/services/fetch_resource_service.rb (limited to 'app/services/fetch_resource_service.rb') diff --git a/app/services/fetch_resource_service.rb b/app/services/fetch_resource_service.rb new file mode 100644 index 000000000..c0473f3ad --- /dev/null +++ b/app/services/fetch_resource_service.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +class FetchResourceService < BaseService + include JsonLdHelper + + ACCEPT_HEADER = 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams", text/html' + + def call(url) + return if url.blank? + + process(url) + rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e + Rails.logger.debug "Error fetching resource #{@url}: #{e}" + nil + end + + private + + def process(url, terminal = false) + @url = url + + perform_request { |response| process_response(response, terminal) } + end + + def perform_request(&block) + Request.new(:get, @url).add_headers('Accept' => ACCEPT_HEADER).perform(&block) + end + + def process_response(response, terminal = false) + return nil if response.code != 200 + + if ['application/activity+json', 'application/ld+json'].include?(response.mime_type) + body = response.body_with_limit + json = body_to_json(body) + + [json['id'], { prefetched_body: body, id: true }, :activitypub] if supported_context?(json) && (equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) || expected_type?(json)) + elsif !terminal + link_header = response['Link'] && parse_link_header(response) + + if link_header&.find_link(%w(rel alternate)) + process_link_headers(link_header) + elsif response.mime_type == 'text/html' + process_html(response) + end + end + end + + def expected_type?(json) + equals_or_includes_any?(json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES) + end + + def process_html(response) + page = Nokogiri::HTML(response.body_with_limit) + json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) } + + process(json_link['href'], terminal: true) unless json_link.nil? + end + + def process_link_headers(link_header) + json_link = link_header.find_link(%w(rel alternate), %w(type application/activity+json)) || link_header.find_link(%w(rel alternate), ['type', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"']) + + process(json_link.href, terminal: true) unless json_link.nil? + end + + def parse_link_header(response) + LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link']) + end +end -- cgit