about summary refs log tree commit diff
path: root/app/services/fetch_atom_service.rb
blob: 98ee1db845effd311c6eff73aeafbfadf2f9abb4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# frozen_string_literal: true

class FetchAtomService < BaseService
  def call(url)
    response = http_client.head(url)

    Rails.logger.debug "Remote status HEAD request returned code #{response.code}"

    response = http_client.get(url) if response.code == 405

    Rails.logger.debug "Remote status GET request returned code #{response.code}"

    return nil if response.code != 200
    return [url, fetch(url)] if response.mime_type == 'application/atom+xml'
    return process_headers(url, response) unless response['Link'].blank?
    process_html(fetch(url))
  rescue OpenSSL::SSL::SSLError => e
    Rails.logger.debug "SSL error: #{e}"
  end

  private

  def process_html(body)
    Rails.logger.debug 'Processing HTML'

    page = Nokogiri::HTML(body)
    alternate_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' }

    return nil if alternate_link.nil?
    [alternate_link['href'], fetch(alternate_link['href'])]
  end

  def process_headers(url, response)
    Rails.logger.debug 'Processing link header'

    link_header    = LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link'])
    alternate_link = link_header.find_link(%w(rel alternate), %w(type application/atom+xml))

    return process_html(fetch(url)) if alternate_link.nil?
    [alternate_link.href, fetch(alternate_link.href)]
  end

  def fetch(url)
    http_client.get(url).to_s
  end

  def http_client
    HTTP.timeout(:per_operation, write: 20, connect: 20, read: 50).follow
  end
end