about summary refs log tree commit diff
path: root/app/services/fetch_oembed_service.rb
blob: 4cbaa04c623ab263bcdbd4390464a0a3be607877 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# frozen_string_literal: true

class FetchOEmbedService
  ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
  URL_REGEX                 = /(=(http[s]?(%3A|:)(\/\/|%2F%2F)))([^&]*)/i.freeze

  attr_reader :url, :options, :format, :endpoint_url

  def call(url, options = {})
    @url     = url
    @options = options

    if @options[:cached_endpoint]
      parse_cached_endpoint!
    else
      discover_endpoint!
    end

    fetch!
  end

  private

  def discover_endpoint!
    return if html.nil?

    @format = @options[:format]
    page    = Nokogiri::HTML(html)

    if @format.nil? || @format == :json
      @endpoint_url ||= page.at_xpath('//link[@type="application/json+oembed"]')&.attribute('href')&.value
      @format       ||= :json if @endpoint_url
    end

    if @format.nil? || @format == :xml
      @endpoint_url ||= page.at_xpath('//link[@type="text/xml+oembed"]')&.attribute('href')&.value
      @format       ||= :xml if @endpoint_url
    end

    return if @endpoint_url.blank?

    @endpoint_url = begin
      base_url = Addressable::URI.parse(@url)

      # If the OEmbed endpoint is given as http but the URL we opened
      # was served over https, we can assume OEmbed will be available
      # through https as well

      (base_url + @endpoint_url).tap do |absolute_url|
        absolute_url.scheme = base_url.scheme if base_url.scheme == 'https'
      end.to_s
    end

    cache_endpoint!
  rescue Addressable::URI::InvalidURIError
    @endpoint_url = nil
  end

  def parse_cached_endpoint!
    cached = @options[:cached_endpoint]

    return if cached[:endpoint].nil? || cached[:format].nil?

    @endpoint_url = Addressable::Template.new(cached[:endpoint]).expand(url: @url).to_s
    @format       = cached[:format]
  end

  def cache_endpoint!
    return unless URL_REGEX.match?(@endpoint_url)

    url_domain = Addressable::URI.parse(@url).normalized_host

    endpoint_hash = {
      endpoint: @endpoint_url.gsub(URL_REGEX, '={url}'),
      format: @format,
    }

    Rails.cache.write("oembed_endpoint:#{url_domain}", endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN)
  end

  def fetch!
    return if @endpoint_url.blank?

    body = Request.new(:get, @endpoint_url).perform do |res|
      res.code != 200 ? nil : res.body_with_limit
    end

    validate(parse_for_format(body)) if body.present?
  rescue Oj::ParseError, Ox::ParseError
    nil
  end

  def parse_for_format(body)
    case @format
    when :json
      Oj.load(body, mode: :strict)&.with_indifferent_access
    when :xml
      Ox.load(body, mode: :hash_no_attrs)&.with_indifferent_access&.dig(:oembed)
    end
  end

  def validate(oembed)
    oembed if oembed[:version] == '1.0' && oembed[:type].present?
  end

  def html
    return @html if defined?(@html)

    @html = @options[:html] || Request.new(:get, @url).add_headers('Accept' => 'text/html').perform do |res|
      res.code != 200 || res.mime_type != 'text/html' ? nil : res.body_with_limit
    end
  end
end