about summary refs log tree commit diff
path: root/spec/lib
diff options
context:
space:
mode:
authorClaire <claire.github-309c@sitedethib.com>2022-02-08 18:23:53 +0100
committerClaire <claire.github-309c@sitedethib.com>2022-02-08 18:23:53 +0100
commit692963d43beb5e66a86e15d63b5aa3eeca82f0a1 (patch)
treebc7319ae242a889bb1d05b7afdd365d78a43ac1d /spec/lib
parentb1983623aec8e0b066d115736d2151e0c74407fa (diff)
parentb6d7726ecbc833abd00f6a9d36b24d9776cfe623 (diff)
Merge branch 'main' into glitch-soc/merge-upstream
Diffstat (limited to 'spec/lib')
-rw-r--r--spec/lib/language_detector_spec.rb134
-rw-r--r--spec/lib/link_details_extractor_spec.rb122
2 files changed, 122 insertions, 134 deletions
diff --git a/spec/lib/language_detector_spec.rb b/spec/lib/language_detector_spec.rb
deleted file mode 100644
index b7ba0f6c4..000000000
--- a/spec/lib/language_detector_spec.rb
+++ /dev/null
@@ -1,134 +0,0 @@
-# frozen_string_literal: true
-
-require 'rails_helper'
-
-describe LanguageDetector do
-  describe 'prepare_text' do
-    it 'returns unmodified string without special cases' do
-      string = 'just a regular string'
-      result = described_class.instance.send(:prepare_text, string)
-
-      expect(result).to eq string
-    end
-
-    it 'collapses spacing in strings' do
-      string = 'The formatting   in    this is very        odd'
-
-      result = described_class.instance.send(:prepare_text, string)
-      expect(result).to eq 'The formatting in this is very odd'
-    end
-
-    it 'strips usernames from strings before detection' do
-      string = '@username Yeah, very surreal...! also @friend'
-
-      result = described_class.instance.send(:prepare_text, string)
-      expect(result).to eq 'Yeah, very surreal...! also'
-    end
-
-    it 'strips URLs from strings before detection' do
-      string = 'Our website is https://example.com and also http://localhost.dev'
-
-      result = described_class.instance.send(:prepare_text, string)
-      expect(result).to eq 'Our website is and also'
-    end
-
-    it 'converts #hashtags back to normal text before detection' do
-      string = 'Hey look at all the #animals and #FishAndChips'
-
-      result = described_class.instance.send(:prepare_text, string)
-      expect(result).to eq 'Hey look at all the animals and fish and chips'
-    end
-  end
-
-  describe 'detect' do
-    let(:account_without_user_locale) { Fabricate(:user, locale: nil).account }
-    let(:account_remote) { Fabricate(:account, domain: 'joinmastodon.org') }
-
-    it 'detects english language for basic strings' do
-      strings = [
-        "Hello and welcome to mastodon how are you today?",
-        "I'd rather not!",
-        "a lot of people just want to feel righteous all the time and that's all that matters",
-      ]
-      strings.each do |string|
-        result = described_class.instance.detect(string, account_without_user_locale)
-
-        expect(result).to eq(:en), string
-      end
-    end
-
-    it 'detects spanish language' do
-      string = 'Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon. Obtener un Hola y bienvenidos a Mastodon'
-      result = described_class.instance.detect(string, account_without_user_locale)
-
-      expect(result).to eq :es
-    end
-
-    describe 'when language can\'t be detected' do
-      it 'uses nil when sent an empty document' do
-        result = described_class.instance.detect('', account_without_user_locale)
-        expect(result).to eq nil
-      end
-
-      describe 'because of a URL' do
-        it 'uses nil when sent just a URL' do
-          string = 'http://example.com/media/2kFTgOJLXhQf0g2nKB4'
-          cld_result = CLD3::NNetLanguageIdentifier.new(0, 2048).find_language(string)
-          expect(cld_result).not_to eq :en
-
-          result = described_class.instance.detect(string, account_without_user_locale)
-
-          expect(result).to eq nil
-        end
-      end
-
-      describe 'with an account' do
-        it 'uses the account locale when present' do
-          account = double(user_locale: 'fr')
-          result  = described_class.instance.detect('', account)
-
-          expect(result).to eq nil
-        end
-
-        it 'uses nil when account is present but has no locale' do
-          result = described_class.instance.detect('', account_without_user_locale)
-
-          expect(result).to eq nil
-        end
-      end
-
-      describe 'with an `en` default locale' do
-        it 'uses nil for undetectable string' do
-          result = described_class.instance.detect('', account_without_user_locale)
-
-          expect(result).to eq nil
-        end
-      end
-
-      describe 'remote user' do
-        it 'detects Korean language' do
-          string = '안녕하세요'
-          result = described_class.instance.detect(string, account_remote)
-
-          expect(result).to eq :ko
-        end
-      end
-
-      describe 'with a non-`en` default locale' do
-        around(:each) do |example|
-          before = I18n.default_locale
-          I18n.default_locale = :ja
-          example.run
-          I18n.default_locale = before
-        end
-
-        it 'uses nil for undetectable string' do
-          string = ''
-          result = described_class.instance.detect(string, account_without_user_locale)
-
-          expect(result).to eq nil
-        end
-      end
-    end
-  end
-end
diff --git a/spec/lib/link_details_extractor_spec.rb b/spec/lib/link_details_extractor_spec.rb
index 850857b2d..84bb4579c 100644
--- a/spec/lib/link_details_extractor_spec.rb
+++ b/spec/lib/link_details_extractor_spec.rb
@@ -26,4 +26,126 @@ RSpec.describe LinkDetailsExtractor do
       end
     end
   end
+
+  context 'when structured data is present' do
+    let(:original_url) { 'https://example.com/page.html' }
+
+    context 'and is wrapped in CDATA tags' do
+      let(:html) { <<-HTML }
+<!doctype html>
+<html>
+<head>
+  <script type="application/ld+json">
+  //<![CDATA[
+  {"@context":"http://schema.org","@type":"NewsArticle","mainEntityOfPage":"https://example.com/page.html","headline":"Foo","datePublished":"2022-01-31T19:53:00+00:00","url":"https://example.com/page.html","description":"Bar","author":{"@type":"Person","name":"Hoge"},"publisher":{"@type":"Organization","name":"Baz"}}
+  //]]>
+  </script>
+</head>
+</html>
+      HTML
+
+      describe '#title' do
+        it 'returns the title from structured data' do
+          expect(subject.title).to eq 'Foo'
+        end
+      end
+
+      describe '#description' do
+        it 'returns the description from structured data' do
+          expect(subject.description).to eq 'Bar'
+        end
+      end
+
+      describe '#provider_name' do
+        it 'returns the provider name from structured data' do
+          expect(subject.provider_name).to eq 'Baz'
+        end
+      end
+
+      describe '#author_name' do
+        it 'returns the author name from structured data' do
+          expect(subject.author_name).to eq 'Hoge'
+        end
+      end
+    end
+
+    context 'but the first tag is invalid JSON' do
+      let(:html) { <<-HTML }
+<!doctype html>
+<html>
+<body>
+  <script type="application/ld+json">
+    {
+      "@context":"https://schema.org",
+      "@type":"ItemList",
+      "url":"https://example.com/page.html",
+      "name":"Foo",
+      "description":"Bar"
+    },
+    {
+      "@context": "https://schema.org",
+      "@type": "BreadcrumbList",
+      "itemListElement":[
+        {
+          "@type":"ListItem",
+          "position":1,
+          "item":{
+            "@id":"https://www.example.com",
+            "name":"Baz"
+          }
+        }
+      ]
+    }
+  </script>
+  <script type="application/ld+json">
+    {
+      "@context":"https://schema.org",
+      "@type":"NewsArticle",
+      "mainEntityOfPage": {
+        "@type":"WebPage",
+        "@id": "http://example.com/page.html"
+      },
+      "headline": "Foo",
+      "description": "Bar",
+      "datePublished": "2022-01-31T19:46:00+00:00",
+      "author": {
+        "@type": "Organization",
+        "name": "Hoge"
+      },
+      "publisher": {
+        "@type": "NewsMediaOrganization",
+        "name":"Baz",
+        "url":"https://example.com/"
+      }
+    }
+  </script>
+</body>
+</html>
+      HTML
+
+      describe '#title' do
+        it 'returns the title from structured data' do
+          expect(subject.title).to eq 'Foo'
+        end
+      end
+
+      describe '#description' do
+        it 'returns the description from structured data' do
+          expect(subject.description).to eq 'Bar'
+        end
+      end
+
+      describe '#provider_name' do
+        it 'returns the provider name from structured data' do
+          expect(subject.provider_name).to eq 'Baz'
+        end
+      end
+
+      describe '#author_name' do
+        it 'returns the author name from structured data' do
+          expect(subject.author_name).to eq 'Hoge'
+        end
+      end
+    end
+  end
 end