about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2022-03-25 19:31:35 +0100
committerGitHub <noreply@github.com>2022-03-25 19:31:35 +0100
commitb58db8f12eb19787ee3bd1ec8abab21027b3d4ef (patch)
tree7050325491e122dbe1b8bc31484ef9b78e69583a
parent9565b17d5d743d7ae4028d484b407d649210f6d2 (diff)
Add workaround for YouTube Shorts links (#17869)
* Add workaround for YouTube Shorts links

* Update link_details_extractor_spec.rb
-rw-r--r--app/lib/link_details_extractor.rb2
-rw-r--r--spec/lib/link_details_extractor_spec.rb8
2 files changed, 9 insertions, 1 deletions
diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb
index fabbd244d..b0c4e4f42 100644
--- a/app/lib/link_details_extractor.rb
+++ b/app/lib/link_details_extractor.rb
@@ -208,7 +208,7 @@ class LinkDetailsExtractor
   end
 
   def valid_url_or_nil(str, same_origin_only: false)
-    return if str.blank?
+    return if str.blank? || str == 'null'
 
     url = @original_url + Addressable::URI.parse(str)
 
diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb
index 84bb4579c..7ea867c61 100644
--- a/spec/lib/link_details_extractor_spec.rb
+++ b/spec/lib/link_details_extractor_spec.rb
@@ -25,6 +25,14 @@ RSpec.describe LinkDetailsExtractor do
         expect(subject.canonical_url).to eq 'https://foo.com/article'
       end
     end
+
+    context 'when canonical URL is set to "null"' do
+      let(:html) { '<!doctype html><link rel="canonical" href="null" />' }
+
+      it 'ignores the canonical URLs' do
+        expect(subject.canonical_url).to eq original_url
+      end
+    end
   end
 
   context 'when structured data is present' do