about summary refs log tree commit diff
path: root/app/services/fetch_link_card_service.rb
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2017-05-17 00:41:15 +0200
committerGitHub <noreply@github.com>2017-05-17 00:41:15 +0200
commit1efda1c4535d453b3f9e71c61a225d7cc0d7e75f (patch)
treefcb0c558381c690ae574eab7634efd2416767853 /app/services/fetch_link_card_service.rb
parenta51c8074dfced016d967b062132d9d8213daf543 (diff)
Fix #2572 - Resolve preview cards for remote statuses as well as local ones (#3088)
Diffstat (limited to 'app/services/fetch_link_card_service.rb')
-rw-r--r--app/services/fetch_link_card_service.rb26
1 files changed, 24 insertions, 2 deletions
diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb
index 27d7c2a70..dec4aabd6 100644
--- a/app/services/fetch_link_card_service.rb
+++ b/app/services/fetch_link_card_service.rb
@@ -7,11 +7,11 @@ class FetchLinkCardService < BaseService
 
   def call(status)
     # Get first http/https URL that isn't local
-    url = status.text.match(URL_PATTERN).to_a.reject { |uri| TagManager.instance.local_url?(uri) }.first
+    url = parse_urls(status)
 
     return if url.nil?
 
-    url  = Addressable::URI.parse(url).normalize.to_s
+    url  = url.to_s
     card = PreviewCard.where(status: status).first_or_initialize(status: status, url: url)
     res  = http_client.head(url)
 
@@ -22,6 +22,28 @@ class FetchLinkCardService < BaseService
 
   private
 
+  def parse_urls(status)
+    if status.local?
+      urls = status.text.match(URL_PATTERN).to_a.map { |uri| Addressable::URI.parse(uri).normalize }
+    else
+      html  = Nokogiri::HTML(status.text)
+      links = html.css('a')
+      urls  = links.map { |a| Addressable::URI.parse(a['href']).normalize unless skip_link?(a) }.compact
+    end
+
+    urls.reject { |uri| bad_url?(uri) }.first
+  end
+
+  def bad_url?(uri)
+    # Avoid local instance URLs and invalid URLs
+    TagManager.instance.local_url?(uri.to_s) || !%w(http https).include?(uri.scheme) || uri.host.blank?
+  end
+
+  def skip_link?(a)
+    # Avoid links for hashtags and mentions (microformats)
+    a['rel']&.include?('tag') || a['class']&.include?('u-url')
+  end
+
   def attempt_oembed(card, url)
     response = OEmbed::Providers.get(url)