diff options
author | Starfall <us@starfall.systems> | 2023-04-14 19:22:47 -0500 |
---|---|---|
committer | Starfall <us@starfall.systems> | 2023-04-14 19:22:47 -0500 |
commit | 4fe1689de43f4404eb9530fcfbcbfb26d6c1c13a (patch) | |
tree | 6811b845bb7f4966b10dcefa3dea404246f161c7 /spec/lib/link_details_extractor_spec.rb | |
parent | 65c1e53a32cabcdbb7bca57002bb0f6acdebe07e (diff) | |
parent | bed63f6dae0879ac840066b031229e0d139089cd (diff) |
Merge remote-tracking branch 'glitch/main'
Diffstat (limited to 'spec/lib/link_details_extractor_spec.rb')
-rw-r--r-- | spec/lib/link_details_extractor_spec.rb | 130 |
1 files changed, 66 insertions, 64 deletions
diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb index 7ea867c61..a46dd743a 100644 --- a/spec/lib/link_details_extractor_spec.rb +++ b/spec/lib/link_details_extractor_spec.rb @@ -1,12 +1,14 @@ +# frozen_string_literal: true + require 'rails_helper' RSpec.describe LinkDetailsExtractor do + subject { described_class.new(original_url, html, html_charset) } + let(:original_url) { '' } let(:html) { '' } let(:html_charset) { nil } - subject { described_class.new(original_url, html, html_charset) } - describe '#canonical_url' do let(:original_url) { 'https://foo.com/article?bar=baz123' } @@ -39,17 +41,17 @@ RSpec.describe LinkDetailsExtractor do let(:original_url) { 'https://example.com/page.html' } context 'and is wrapped in CDATA tags' do - let(:html) { <<-HTML } -<!doctype html> -<html> -<head> - <script type="application/ld+json"> - //<![CDATA[ - {"@context":"http://schema.org","@type":"NewsArticle","mainEntityOfPage":"https://example.com/page.html","headline":"Foo","datePublished":"2022-01-31T19:53:00+00:00","url":"https://example.com/page.html","description":"Bar","author":{"@type":"Person","name":"Hoge"},"publisher":{"@type":"Organization","name":"Baz"}} - //]]> - </script> -</head> -</html> + let(:html) { <<~HTML } + <!doctype html> + <html> + <head> + <script type="application/ld+json"> + //<![CDATA[ + {"@context":"http://schema.org","@type":"NewsArticle","mainEntityOfPage":"https://example.com/page.html","headline":"Foo","datePublished":"2022-01-31T19:53:00+00:00","url":"https://example.com/page.html","description":"Bar","author":{"@type":"Person","name":"Hoge"},"publisher":{"@type":"Organization","name":"Baz"}} + //]]> + </script> + </head> + </html> HTML describe '#title' do @@ -78,57 +80,57 @@ RSpec.describe LinkDetailsExtractor do end context 'but the first tag is invalid JSON' do - let(:html) { <<-HTML } -<!doctype html> -<html> -<body> - <script type="application/ld+json"> - { - "@context":"https://schema.org", - "@type":"ItemList", - "url":"https://example.com/page.html", - "name":"Foo", - "description":"Bar" - }, - { - "@context": "https://schema.org", - "@type": "BreadcrumbList", - "itemListElement":[ - { - "@type":"ListItem", - "position":1, - "item":{ - "@id":"https://www.example.com", - "name":"Baz" - } - } - ] - } - </script> - <script type="application/ld+json"> - { - "@context":"https://schema.org", - "@type":"NewsArticle", - "mainEntityOfPage": { - "@type":"WebPage", - "@id": "http://example.com/page.html" - }, - "headline": "Foo", - "description": "Bar", - "datePublished": "2022-01-31T19:46:00+00:00", - "author": { - "@type": "Organization", - "name": "Hoge" - }, - "publisher": { - "@type": "NewsMediaOrganization", - "name":"Baz", - "url":"https://example.com/" - } - } - </script> -</body> -</html> + let(:html) { <<~HTML } + <!doctype html> + <html> + <body> + <script type="application/ld+json"> + { + "@context":"https://schema.org", + "@type":"ItemList", + "url":"https://example.com/page.html", + "name":"Foo", + "description":"Bar" + }, + { + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement":[ + { + "@type":"ListItem", + "position":1, + "item":{ + "@id":"https://www.example.com", + "name":"Baz" + } + } + ] + } + </script> + <script type="application/ld+json"> + { + "@context":"https://schema.org", + "@type":"NewsArticle", + "mainEntityOfPage": { + "@type":"WebPage", + "@id": "http://example.com/page.html" + }, + "headline": "Foo", + "description": "Bar", + "datePublished": "2022-01-31T19:46:00+00:00", + "author": { + "@type": "Organization", + "name": "Hoge" + }, + "publisher": { + "@type": "NewsMediaOrganization", + "name":"Baz", + "url":"https://example.com/" + } + } + </script> + </body> + </html> HTML describe '#title' do |