diff options
author | Claire <claire.github-309c@sitedethib.com> | 2022-02-08 19:43:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-08 19:43:59 +0100 |
commit | 2fd1db7c9d0fe8c1cca159b9b0818c72e7c318aa (patch) | |
tree | bc7319ae242a889bb1d05b7afdd365d78a43ac1d /spec/lib/link_details_extractor_spec.rb | |
parent | b1983623aec8e0b066d115736d2151e0c74407fa (diff) | |
parent | 692963d43beb5e66a86e15d63b5aa3eeca82f0a1 (diff) |
Merge pull request #1680 from ClearlyClaire/glitch-soc/merge-upstream
Merge upstream changes
Diffstat (limited to 'spec/lib/link_details_extractor_spec.rb')
-rw-r--r-- | spec/lib/link_details_extractor_spec.rb | 122 |
1 files changed, 122 insertions, 0 deletions
diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb index 850857b2d..84bb4579c 100644 --- a/spec/lib/link_details_extractor_spec.rb +++ b/spec/lib/link_details_extractor_spec.rb @@ -26,4 +26,126 @@ RSpec.describe LinkDetailsExtractor do end end end + + context 'when structured data is present' do + let(:original_url) { 'https://example.com/page.html' } + + context 'and is wrapped in CDATA tags' do + let(:html) { <<-HTML } +<!doctype html> +<html> +<head> + <script type="application/ld+json"> + //<![CDATA[ + {"@context":"http://schema.org","@type":"NewsArticle","mainEntityOfPage":"https://example.com/page.html","headline":"Foo","datePublished":"2022-01-31T19:53:00+00:00","url":"https://example.com/page.html","description":"Bar","author":{"@type":"Person","name":"Hoge"},"publisher":{"@type":"Organization","name":"Baz"}} + //]]> + </script> +</head> +</html> + HTML + + describe '#title' do + it 'returns the title from structured data' do + expect(subject.title).to eq 'Foo' + end + end + + describe '#description' do + it 'returns the description from structured data' do + expect(subject.description).to eq 'Bar' + end + end + + describe '#provider_name' do + it 'returns the provider name from structured data' do + expect(subject.provider_name).to eq 'Baz' + end + end + + describe '#author_name' do + it 'returns the author name from structured data' do + expect(subject.author_name).to eq 'Hoge' + end + end + end + + context 'but the first tag is invalid JSON' do + let(:html) { <<-HTML } +<!doctype html> +<html> +<body> + <script type="application/ld+json"> + { + "@context":"https://schema.org", + "@type":"ItemList", + "url":"https://example.com/page.html", + "name":"Foo", + "description":"Bar" + }, + { + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement":[ + { + "@type":"ListItem", + "position":1, + "item":{ + "@id":"https://www.example.com", + "name":"Baz" + } + } + ] + } + </script> + <script type="application/ld+json"> + { + "@context":"https://schema.org", + "@type":"NewsArticle", + "mainEntityOfPage": { + "@type":"WebPage", + "@id": "http://example.com/page.html" + }, + "headline": "Foo", + "description": "Bar", + "datePublished": "2022-01-31T19:46:00+00:00", + "author": { + "@type": "Organization", + "name": "Hoge" + }, + "publisher": { + "@type": "NewsMediaOrganization", + "name":"Baz", + "url":"https://example.com/" + } + } + </script> +</body> +</html> + HTML + + describe '#title' do + it 'returns the title from structured data' do + expect(subject.title).to eq 'Foo' + end + end + + describe '#description' do + it 'returns the description from structured data' do + expect(subject.description).to eq 'Bar' + end + end + + describe '#provider_name' do + it 'returns the provider name from structured data' do + expect(subject.provider_name).to eq 'Baz' + end + end + + describe '#author_name' do + it 'returns the author name from structured data' do + expect(subject.author_name).to eq 'Hoge' + end + end + end + end end |