about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--app/services/fetch_link_card_service.rb26
-rw-r--r--app/services/process_feed_service.rb4
-rw-r--r--spec/services/fetch_link_card_service_spec.rb28
-rw-r--r--spec/services/process_feed_service_spec.rb2
4 files changed, 54 insertions, 6 deletions
diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb
index 27d7c2a70..dec4aabd6 100644
--- a/app/services/fetch_link_card_service.rb
+++ b/app/services/fetch_link_card_service.rb
@@ -7,11 +7,11 @@ class FetchLinkCardService < BaseService
 
   def call(status)
     # Get first http/https URL that isn't local
-    url = status.text.match(URL_PATTERN).to_a.reject { |uri| TagManager.instance.local_url?(uri) }.first
+    url = parse_urls(status)
 
     return if url.nil?
 
-    url  = Addressable::URI.parse(url).normalize.to_s
+    url  = url.to_s
     card = PreviewCard.where(status: status).first_or_initialize(status: status, url: url)
     res  = http_client.head(url)
 
@@ -22,6 +22,28 @@ class FetchLinkCardService < BaseService
 
   private
 
+  def parse_urls(status)
+    if status.local?
+      urls = status.text.match(URL_PATTERN).to_a.map { |uri| Addressable::URI.parse(uri).normalize }
+    else
+      html  = Nokogiri::HTML(status.text)
+      links = html.css('a')
+      urls  = links.map { |a| Addressable::URI.parse(a['href']).normalize unless skip_link?(a) }.compact
+    end
+
+    urls.reject { |uri| bad_url?(uri) }.first
+  end
+
+  def bad_url?(uri)
+    # Avoid local instance URLs and invalid URLs
+    TagManager.instance.local_url?(uri.to_s) || !%w(http https).include?(uri.scheme) || uri.host.blank?
+  end
+
+  def skip_link?(a)
+    # Avoid links for hashtags and mentions (microformats)
+    a['rel']&.include?('tag') || a['class']&.include?('u-url')
+  end
+
   def attempt_oembed(card, url)
     response = OEmbed::Providers.get(url)
 
diff --git a/app/services/process_feed_service.rb b/app/services/process_feed_service.rb
index c573c0490..04d6a100f 100644
--- a/app/services/process_feed_service.rb
+++ b/app/services/process_feed_service.rb
@@ -69,8 +69,12 @@ class ProcessFeedService < BaseService
 
       notify_about_mentions!(status) unless status.reblog?
       notify_about_reblog!(status) if status.reblog? && status.reblog.account.local?
+
       Rails.logger.debug "Queuing remote status #{status.id} (#{id}) for distribution"
+
+      LinkCrawlWorker.perform_async(status.id) unless status.spoiler_text.present?
       DistributionWorker.perform_async(status.id)
+
       status
     end
 
diff --git a/spec/services/fetch_link_card_service_spec.rb b/spec/services/fetch_link_card_service_spec.rb
index 681c99c11..9df41cf55 100644
--- a/spec/services/fetch_link_card_service_spec.rb
+++ b/spec/services/fetch_link_card_service_spec.rb
@@ -1,15 +1,35 @@
 require 'rails_helper'
 
 RSpec.describe FetchLinkCardService do
+  subject { FetchLinkCardService.new }
+
   before do
     stub_request(:head, 'http://example.xn--fiqs8s/').to_return(status: 200, headers: { 'Content-Type' => 'text/html' })
     stub_request(:get, 'http://example.xn--fiqs8s/').to_return(request_fixture('idn.txt'))
+    stub_request(:head, 'https://github.com/qbi/WannaCry').to_return(status: 404)
+
+    subject.call(status)
+  end
+
+  context 'in a local status' do
+    context do
+      let(:status) { Fabricate(:status, text: 'Check out http://example.中国') }
+
+      it 'works with IDN URLs' do
+        expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made.at_least_once
+      end
+    end
   end
 
-  it 'works with IDN URLs' do
-    status = Fabricate(:status, text: 'Check out http://example.中国')
+  context 'in a remote status' do
+    let(:status) { Fabricate(:status, uri: 'abc', text: 'Habt ihr ein paar gute Links zu #<span class="tag"><a href="https://quitter.se/tag/wannacry" target="_blank" rel="tag noopener" title="https://quitter.se/tag/wannacry">Wannacry</a></span> herumfliegen?   Ich will mal unter <br> <a href="https://github.com/qbi/WannaCry" target="_blank" rel="noopener" title="https://github.com/qbi/WannaCry">https://github.com/qbi/WannaCry</a> was sammeln. !<a href="http://sn.jonkman.ca/group/416/id" target="_blank" rel="noopener" title="http://sn.jonkman.ca/group/416/id">security</a>&nbsp;') }
+
+    it 'parses out URLs' do
+      expect(a_request(:head, 'https://github.com/qbi/WannaCry')).to have_been_made.at_least_once
+    end
 
-    FetchLinkCardService.new.call(status)
-    expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made.at_least_once
+    it 'ignores URLs to hashtags' do
+      expect(a_request(:head, 'https://quitter.se/tag/wannacry')).to_not have_been_made
+    end
   end
 end
diff --git a/spec/services/process_feed_service_spec.rb b/spec/services/process_feed_service_spec.rb
index 9ac22cbba..d85ee3b56 100644
--- a/spec/services/process_feed_service_spec.rb
+++ b/spec/services/process_feed_service_spec.rb
@@ -9,6 +9,8 @@ RSpec.describe ProcessFeedService do
 
     before do
       stub_request(:post, "https://pubsubhubbub.superfeedr.com/").to_return(:status => 200, :body => "", :headers => {})
+      stub_request(:head, "http://kickass.zone/media/2").to_return(:status => 404)
+      stub_request(:head, "http://kickass.zone/media/3").to_return(:status => 404)
       stub_request(:get, "http://kickass.zone/system/accounts/avatars/000/000/001/large/eris.png").to_return(request_fixture('avatar.txt'))
       stub_request(:get, "http://kickass.zone/system/media_attachments/files/000/000/002/original/morpheus_linux.jpg?1476059910").to_return(request_fixture('attachment1.txt'))
       stub_request(:get, "http://kickass.zone/system/media_attachments/files/000/000/003/original/gizmo.jpg?1476060065").to_return(request_fixture('attachment2.txt'))