about summary refs log tree commit diff
path: root/app/services/fetch_atom_service.rb
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2016-09-26 16:42:38 +0200
committerEugen Rochko <eugen@zeonfederated.com>2016-09-26 16:44:40 +0200
commitc6b0311b8626b42bc7e79e0195047a50e5b64dd1 (patch)
treee68f4b6922de5ec6277de7bc76b2bdb47a8c3e89 /app/services/fetch_atom_service.rb
parent0bd4608ad1b29328f04fcad6e7a20ef61668d239 (diff)
Fix #54 - Fetch remote accounts by URL from mentions
Fetching atom extracted from FetchRemoteAccountService and FetchRemoteStatusService
into FetchAtomService. Mentions of the constant "http://activityschema.org/collection/public"
skipped as it's not a real URL/user.
Diffstat (limited to 'app/services/fetch_atom_service.rb')
-rw-r--r--app/services/fetch_atom_service.rb46
1 files changed, 46 insertions, 0 deletions
diff --git a/app/services/fetch_atom_service.rb b/app/services/fetch_atom_service.rb
new file mode 100644
index 000000000..57f789ada
--- /dev/null
+++ b/app/services/fetch_atom_service.rb
@@ -0,0 +1,46 @@
+class FetchAtomService < BaseService
+  def call(url)
+    response = http_client.head(url)
+
+    Rails.logger.debug "Remote status HEAD request returned code #{response.code}"
+    return nil if response.code != 200
+
+    if response.mime_type == 'application/atom+xml'
+      return [url, fetch(url)]
+    elsif !response['Link'].blank?
+      return process_headers(response)
+    else
+      return process_html(fetch(url))
+    end
+  end
+
+  private
+
+  def process_html(body)
+    Rails.logger.debug "Processing HTML"
+
+    page = Nokogiri::HTML(body)
+    alternate_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' }
+
+    return nil if alternate_link.nil?
+    return [alternate_link['href'], fetch(alternate_link['href'])]
+  end
+
+  def process_headers(response)
+    Rails.logger.debug "Processing link header"
+
+    link_header    = LinkHeader.parse(response['Link'])
+    alternate_link = link_header.find_link(['rel', 'alternate'], ['type', 'application/atom+xml'])
+
+    return nil if alternate_link.nil?
+    return [alternate_link.href, fetch(alternate_link.href)]
+  end
+
+  def fetch(url)
+    http_client.get(url).to_s
+  end
+
+  def http_client
+    HTTP.timeout(:per_operation, write: 20, connect: 20, read: 50)
+  end
+end