about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-11-17 18:40:33 +0100
committerGitHub <noreply@github.com>2019-11-17 18:40:33 +0100
commitd14e74eff52352f1a2fb4bc2053bbb28c1aa29e0 (patch)
tree5fd20f50b1bdc8315d5afa59ea66e6c912d9931e
parent5a2c0707f163f14565d186db48bf0b4fe0b05b4f (diff)
Add cache for OEmbed endpoints to avoid extra HTTP requests (#12403)
* add youtube oembed endpoint

* add check for oembed endpoint

* change unless for a more readable if

* clear blank lines

* endpoint via https

* Fix string literal in condition

* use cache for endpoints

* use cache for endpoints

* clean up and adding check

* clean up and remove redundant return

* add html check

* add false to return

* use double quotes

* use double quotes

* Clean up
-rw-r--r--app/services/fetch_link_card_service.rb23
-rw-r--r--app/services/fetch_oembed_service.rb31
-rw-r--r--spec/services/fetch_oembed_service_spec.rb18
3 files changed, 64 insertions, 8 deletions
diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb
index 640c60fd4..29880e8d8 100644
--- a/app/services/fetch_link_card_service.rb
+++ b/app/services/fetch_link_card_service.rb
@@ -39,6 +39,12 @@ class FetchLinkCardService < BaseService
   def process_url
     @card ||= PreviewCard.new(url: @url)
 
+    attempt_oembed || attempt_opengraph
+  end
+
+  def html
+    return @html if defined?(@html)
+
     Request.new(:get, @url).perform do |res|
       if res.code == 200 && res.mime_type == 'text/html'
         @html = res.body_with_limit
@@ -48,10 +54,6 @@ class FetchLinkCardService < BaseService
         @html_charset = nil
       end
     end
-
-    return if @html.nil?
-
-    attempt_oembed || attempt_opengraph
   end
 
   def attach_card
@@ -88,12 +90,17 @@ class FetchLinkCardService < BaseService
   end
 
   def attempt_oembed
-    service = FetchOEmbedService.new
-    embed   = service.call(@url, html: @html)
-    url     = Addressable::URI.parse(service.endpoint_url)
+    service         = FetchOEmbedService.new
+    url_domain      = Addressable::URI.parse(@url).normalized_host
+    cached_endpoint = Rails.cache.read("oembed_endpoint:#{url_domain}")
+
+    embed   = service.call(@url, cached_endpoint: cached_endpoint) unless cached_endpoint.nil?
+    embed ||= service.call(@url, html: html) unless html.nil?
 
     return false if embed.nil?
 
+    url = Addressable::URI.parse(service.endpoint_url)
+
     @card.type          = embed[:type]
     @card.title         = embed[:title]         || ''
     @card.author_name   = embed[:author_name]   || ''
@@ -127,6 +134,8 @@ class FetchLinkCardService < BaseService
   end
 
   def attempt_opengraph
+    return if html.nil?
+
     detector = CharlockHolmes::EncodingDetector.new
     detector.strip_tags = true
 
diff --git a/app/services/fetch_oembed_service.rb b/app/services/fetch_oembed_service.rb
index 10176cfb9..4f8498c62 100644
--- a/app/services/fetch_oembed_service.rb
+++ b/app/services/fetch_oembed_service.rb
@@ -1,13 +1,20 @@
 # frozen_string_literal: true
 
 class FetchOEmbedService
+  ENDPOINT_CACHE_EXPIRES_IN = 24.hours.freeze
+
   attr_reader :url, :options, :format, :endpoint_url
 
   def call(url, options = {})
     @url     = url
     @options = options
 
-    discover_endpoint!
+    if @options[:cached_endpoint]
+      parse_cached_endpoint!
+    else
+      discover_endpoint!
+    end
+
     fetch!
   end
 
@@ -32,10 +39,32 @@ class FetchOEmbedService
     return if @endpoint_url.blank?
 
     @endpoint_url = (Addressable::URI.parse(@url) + @endpoint_url).to_s
+
+    cache_endpoint!
   rescue Addressable::URI::InvalidURIError
     @endpoint_url = nil
   end
 
+  def parse_cached_endpoint!
+    cached = @options[:cached_endpoint]
+
+    return if cached[:endpoint].nil? || cached[:format].nil?
+
+    @endpoint_url = Addressable::Template.new(cached[:endpoint]).expand(url: @url).to_s
+    @format       = cached[:format]
+  end
+
+  def cache_endpoint!
+    url_domain = Addressable::URI.parse(@url).normalized_host
+
+    endpoint_hash = {
+      endpoint: @endpoint_url.gsub(URI.encode_www_form_component(@url), '{url}'),
+      format: @format,
+    }
+
+    Rails.cache.write("oembed_endpoint:#{url_domain}", endpoint_hash, expires_in: ENDPOINT_CACHE_EXPIRES_IN)
+  end
+
   def fetch!
     return if @endpoint_url.blank?
 
diff --git a/spec/services/fetch_oembed_service_spec.rb b/spec/services/fetch_oembed_service_spec.rb
index 5789fb53b..a4262b040 100644
--- a/spec/services/fetch_oembed_service_spec.rb
+++ b/spec/services/fetch_oembed_service_spec.rb
@@ -113,6 +113,24 @@ describe FetchOEmbedService, type: :service do
 
     end
 
+    context 'when endpoint is cached' do
+      before do
+        stub_request(:get, 'http://www.youtube.com/oembed?format=json&url=https://www.youtube.com/watch?v=dqwpQarrDwk').to_return(
+          status: 200,
+          headers: { 'Content-Type': 'text/html' },
+          body: request_fixture('oembed_json_empty.html')
+        )
+      end
+
+      it 'returns new provider without fetching original URL first' do
+        subject.call('https://www.youtube.com/watch?v=dqwpQarrDwk', cached_endpoint: { endpoint: 'http://www.youtube.com/oembed?format=json&url={url}', format: :json })
+        expect(a_request(:get, 'https://www.youtube.com/watch?v=dqwpQarrDwk')).to_not have_been_made
+        expect(subject.endpoint_url).to eq 'http://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DdqwpQarrDwk'
+        expect(subject.format).to eq :json
+        expect(a_request(:get, 'http://www.youtube.com/oembed?format=json&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DdqwpQarrDwk')).to have_been_made
+      end
+    end
+
     context 'when status code is not 200' do
       before do
         stub_request(:get, 'https://host.test/oembed.html').to_return(