diff options
author | Claire <claire.github-309c@sitedethib.com> | 2022-02-08 19:43:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-08 19:43:59 +0100 |
commit | 2fd1db7c9d0fe8c1cca159b9b0818c72e7c318aa (patch) | |
tree | bc7319ae242a889bb1d05b7afdd365d78a43ac1d /spec/lib | |
parent | b1983623aec8e0b066d115736d2151e0c74407fa (diff) | |
parent | 692963d43beb5e66a86e15d63b5aa3eeca82f0a1 (diff) |
Merge pull request #1680 from ClearlyClaire/glitch-soc/merge-upstream
Merge upstream changes
Diffstat (limited to 'spec/lib')
-rw-r--r-- | spec/lib/language_detector_spec.rb | 134 | ||||
-rw-r--r-- | spec/lib/link_details_extractor_spec.rb | 122 |
2 files changed, 122 insertions, 134 deletions
diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb deleted file mode 100644 index b7ba0f6c4..000000000 --- a/spec/lib/language_detector_spec.rb +++ /dev/null @@ -1,134 +0,0 @@ -# frozen_string_literal: true - -require 'rails_helper' - -describe LanguageDetector do - describe 'prepare_text' do - it 'returns unmodified string without special cases' do - string = 'just a regular string' - result = described_class.instance.send(:prepare_text, string) - - expect(result).to eq string - end - - it 'collapses spacing in strings' do - string = 'The formatting in this is very odd' - - result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'The formatting in this is very odd' - end - - it 'strips usernames from strings before detection' do - string = '@username Yeah, very surreal...! also @friend' - - result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'Yeah, very surreal...! also' - end - - it 'strips URLs from strings before detection' do - string = 'Our website is https://example.com and also http://localhost.dev' - - result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'Our website is and also' - end - - it 'converts #hashtags back to normal text before detection' do - string = 'Hey look at all the #animals and #FishAndChips' - - result = described_class.instance.send(:prepare_text, string) - expect(result).to eq 'Hey look at all the animals and fish and chips' - end - end - - describe 'detect' do - let(:account_without_user_locale) { Fabricate(:user, locale: nil).account } - let(:account_remote) { Fabricate(:account, domain: 'joinmastodon.org') } - - it 'detects english language for basic strings' do - strings = [ - "Hello and welcome to mastodon how are you today?", - "I'd rather not!", - "a lot of people just want to feel righteous all the time and that's all that matters", - ] - strings.each do |string| - result = described_class.instance.detect(string, account_without_user_locale) - - expect(result).to eq(:en), string - end - end - - it 'detects spanish language' do - string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon' - result = described_class.instance.detect(string, account_without_user_locale) - - expect(result).to eq :es - end - - describe 'when language can\'t be detected' do - it 'uses nil when sent an empty document' do - result = described_class.instance.detect('', account_without_user_locale) - expect(result).to eq nil - end - - describe 'because of a URL' do - it 'uses nil when sent just a URL' do - string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4' - cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string) - expect(cld_result).not_to eq :en - - result = described_class.instance.detect(string, account_without_user_locale) - - expect(result).to eq nil - end - end - - describe 'with an account' do - it 'uses the account locale when present' do - account = double(user_locale: 'fr') - result = described_class.instance.detect('', account) - - expect(result).to eq nil - end - - it 'uses nil when account is present but has no locale' do - result = described_class.instance.detect('', account_without_user_locale) - - expect(result).to eq nil - end - end - - describe 'with an `en` default locale' do - it 'uses nil for undetectable string' do - result = described_class.instance.detect('', account_without_user_locale) - - expect(result).to eq nil - end - end - - describe 'remote user' do - it 'detects Korean language' do - string = '안녕하세요' - result = described_class.instance.detect(string, account_remote) - - expect(result).to eq :ko - end - end - - describe 'with a non-`en` default locale' do - around(:each) do |example| - before = I18n.default_locale - I18n.default_locale = :ja - example.run - I18n.default_locale = before - end - - it 'uses nil for undetectable string' do - string = '' - result = described_class.instance.detect(string, account_without_user_locale) - - expect(result).to eq nil - end - end - end - end -end diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb index 850857b2d..84bb4579c 100644 --- a/spec/lib/link_details_extractor_spec.rb +++ b/spec/lib/link_details_extractor_spec.rb @@ -26,4 +26,126 @@ RSpec.describe LinkDetailsExtractor do end end end + + context 'when structured data is present' do + let(:original_url) { 'https://example.com/page.html' } + + context 'and is wrapped in CDATA tags' do + let(:html) { <<-HTML } +<!doctype html> +<html> +<head> + <script type="application/ld+json"> + //<![CDATA[ + {"@context":"http://schema.org","@type":"NewsArticle","mainEntityOfPage":"https://example.com/page.html","headline":"Foo","datePublished":"2022-01-31T19:53:00+00:00","url":"https://example.com/page.html","description":"Bar","author":{"@type":"Person","name":"Hoge"},"publisher":{"@type":"Organization","name":"Baz"}} + //]]> + </script> +</head> +</html> + HTML + + describe '#title' do + it 'returns the title from structured data' do + expect(subject.title).to eq 'Foo' + end + end + + describe '#description' do + it 'returns the description from structured data' do + expect(subject.description).to eq 'Bar' + end + end + + describe '#provider_name' do + it 'returns the provider name from structured data' do + expect(subject.provider_name).to eq 'Baz' + end + end + + describe '#author_name' do + it 'returns the author name from structured data' do + expect(subject.author_name).to eq 'Hoge' + end + end + end + + context 'but the first tag is invalid JSON' do + let(:html) { <<-HTML } +<!doctype html> +<html> +<body> + <script type="application/ld+json"> + { + "@context":"https://schema.org", + "@type":"ItemList", + "url":"https://example.com/page.html", + "name":"Foo", + "description":"Bar" + }, + { + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement":[ + { + "@type":"ListItem", + "position":1, + "item":{ + "@id":"https://www.example.com", + "name":"Baz" + } + } + ] + } + </script> + <script type="application/ld+json"> + { + "@context":"https://schema.org", + "@type":"NewsArticle", + "mainEntityOfPage": { + "@type":"WebPage", + "@id": "http://example.com/page.html" + }, + "headline": "Foo", + "description": "Bar", + "datePublished": "2022-01-31T19:46:00+00:00", + "author": { + "@type": "Organization", + "name": "Hoge" + }, + "publisher": { + "@type": "NewsMediaOrganization", + "name":"Baz", + "url":"https://example.com/" + } + } + </script> +</body> +</html> + HTML + + describe '#title' do + it 'returns the title from structured data' do + expect(subject.title).to eq 'Foo' + end + end + + describe '#description' do + it 'returns the description from structured data' do + expect(subject.description).to eq 'Bar' + end + end + + describe '#provider_name' do + it 'returns the provider name from structured data' do + expect(subject.provider_name).to eq 'Baz' + end + end + + describe '#author_name' do + it 'returns the author name from structured data' do + expect(subject.author_name).to eq 'Hoge' + end + end + end + end end |