about summary refs log tree commit diff
path: root/app/lib/link_details_extractor.rb
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2021-11-25 13:07:38 +0100
committerGitHub <noreply@github.com>2021-11-25 13:07:38 +0100
commit6e50134a42cb303e6e42f89f9ddb5aacf83e7a6d (patch)
treef60727e2c871857422082d814bb0cb28ce88f6c3 /app/lib/link_details_extractor.rb
parent46e62fc4b33f3566eb9bf588b15bac28cae967a3 (diff)
Add trending links (#16917)
* Add trending links

* Add overriding specific links trendability

* Add link type to preview cards and only trend articles

Change trends review notifications from being sent every 5 minutes to being sent every 2 hours

Change threshold from 5 unique accounts to 15 unique accounts

* Fix tests
Diffstat (limited to 'app/lib/link_details_extractor.rb')
-rw-r--r--app/lib/link_details_extractor.rb49
1 files changed, 48 insertions, 1 deletions
diff --git a/app/lib/link_details_extractor.rb b/app/lib/link_details_extractor.rb
index 8b38e8d0c..56ad0717b 100644
--- a/app/lib/link_details_extractor.rb
+++ b/app/lib/link_details_extractor.rb
@@ -4,6 +4,11 @@ class LinkDetailsExtractor
   include ActionView::Helpers::TagHelper
 
   class StructuredData
+    SUPPORTED_TYPES = %w(
+      NewsArticle
+      WebPage
+    ).freeze
+
     def initialize(data)
       @data = data
     end
@@ -16,6 +21,14 @@ class LinkDetailsExtractor
       json['description']
     end
 
+    def language
+      json['inLanguage']
+    end
+
+    def type
+      json['@type']
+    end
+
     def image
       obj = first_of_value(json['image'])
 
@@ -44,6 +57,10 @@ class LinkDetailsExtractor
       publisher['name']
     end
 
+    def publisher_logo
+      publisher.dig('logo', 'url')
+    end
+
     private
 
     def author
@@ -58,8 +75,12 @@ class LinkDetailsExtractor
       arr.is_a?(Array) ? arr.first : arr
     end
 
+    def root_array(root)
+      root.is_a?(Array) ? root : [root]
+    end
+
     def json
-      @json ||= first_of_value(Oj.load(@data))
+      @json ||= root_array(Oj.load(@data)).find { |obj| SUPPORTED_TYPES.include?(obj['@type']) } || {}
     end
   end
 
@@ -75,6 +96,7 @@ class LinkDetailsExtractor
       description: description || '',
       image_remote_url: image,
       type: type,
+      link_type: link_type,
       width: width || 0,
       height: height || 0,
       html: html || '',
@@ -83,6 +105,7 @@ class LinkDetailsExtractor
       author_name: author_name || '',
       author_url: author_url || '',
       embed_url: embed_url || '',
+      language: language,
     }
   end
 
@@ -90,6 +113,14 @@ class LinkDetailsExtractor
     player_url.present? ? :video : :link
   end
 
+  def link_type
+    if structured_data&.type == 'NewsArticle' || opengraph_tag('og:type') == 'article'
+      :article
+    else
+      :unknown
+    end
+  end
+
   def html
     player_url.present? ? content_tag(:iframe, nil, src: player_url, width: width, height: height, allowtransparency: 'true', scrolling: 'no', frameborder: '0') : nil
   end
@@ -138,6 +169,14 @@ class LinkDetailsExtractor
     valid_url_or_nil(opengraph_tag('twitter:player:stream'))
   end
 
+  def language
+    valid_locale_or_nil(structured_data&.language || opengraph_tag('og:locale') || document.xpath('//html').map { |element| element['lang'] }.first)
+  end
+
+  def icon
+    valid_url_or_nil(structured_data&.publisher_icon || link_tag('apple-touch-icon') || link_tag('shortcut icon'))
+  end
+
   private
 
   def player_url
@@ -162,6 +201,14 @@ class LinkDetailsExtractor
     nil
   end
 
+  def valid_locale_or_nil(str)
+    return nil if str.blank?
+
+    code,  = str.split(/_-/) # Strip out the region from e.g. en_US or ja-JA
+    locale = ISO_639.find(code)
+    locale&.alpha2
+  end
+
   def link_tag(name)
     document.xpath("//link[@rel=\"#{name}\"]").map { |link| link['href'] }.first
   end