diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2017-04-27 14:42:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-04-27 14:42:22 +0200 |
commit | 88725d6ce85115ea3b0652007db5d40a1c069be3 (patch) | |
tree | 7a8965abda1cfc3b6c319ea19ee216755ac2f2df | |
parent | be0a01145b5f303c5c506858146ccf6c6d5cee72 (diff) |
OEmbed support for PreviewCard (#2337)
* OEmbed support for PreviewCard * Improve ProviderDiscovery code failure treatment * Do not crawl links if there is a content warning, since those don't display a link card anyway * Reset db schema * Fresh migrate * Fix rubocop style issues Fix #1681 - return existing access token when applicable instead of creating new * Fix test * Extract http client to helper * Improve oembed controller
22 files changed, 278 insertions, 36 deletions
diff --git a/Gemfile b/Gemfile index 701c724ee..2909d8e45 100644 --- a/Gemfile +++ b/Gemfile @@ -49,6 +49,7 @@ gem 'rails-settings-cached' gem 'redis', '~>3.2', require: ['redis', 'redis/connection/hiredis'] gem 'rqrcode' gem 'ruby-oembed', require: 'oembed' +gem 'sanitize' gem 'sidekiq' gem 'sidekiq-unique-jobs' gem 'simple-navigation' diff --git a/Gemfile.lock b/Gemfile.lock index 14567dc5a..fc8d28104 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -123,6 +123,7 @@ GEM connection_pool (2.2.1) crack (0.4.3) safe_yaml (~> 1.0.0) + crass (1.0.2) debug_inspector (0.0.2) devise (4.2.1) bcrypt (~> 3.0) @@ -258,6 +259,8 @@ GEM nio4r (2.0.0) nokogiri (1.7.1) mini_portile2 (~> 2.1.0) + nokogumbo (1.4.10) + nokogiri oj (2.18.5) openssl (2.0.3) orm_adapter (0.5.0) @@ -398,6 +401,10 @@ GEM ruby-oembed (0.12.0) ruby-progressbar (1.8.1) safe_yaml (1.0.4) + sanitize (4.4.0) + crass (~> 1.0.2) + nokogiri (>= 1.4.4) + nokogumbo (~> 1.4.1) sass (3.4.23) sass-rails (5.0.6) railties (>= 4.0.0, < 6) @@ -540,6 +547,7 @@ DEPENDENCIES rspec-sidekiq rubocop ruby-oembed + sanitize sass-rails (~> 5.0) sidekiq sidekiq-unique-jobs diff --git a/app/assets/javascripts/components/actions/cards.jsx b/app/assets/javascripts/components/actions/cards.jsx index d4c1eda60..805be9709 100644 --- a/app/assets/javascripts/components/actions/cards.jsx +++ b/app/assets/javascripts/components/actions/cards.jsx @@ -13,7 +13,7 @@ export function fetchStatusCard(id) { dispatch(fetchStatusCardRequest(id)); api(getState).get(`/api/v1/statuses/${id}/card`).then(response => { - if (!response.data.url || !response.data.title || !response.data.description) { + if (!response.data.url) { return; } diff --git a/app/assets/javascripts/components/features/status/components/card.jsx b/app/assets/javascripts/components/features/status/components/card.jsx index 1b5722b6a..a5ce7f08a 100644 --- a/app/assets/javascripts/components/features/status/components/card.jsx +++ b/app/assets/javascripts/components/features/status/components/card.jsx @@ -14,14 +14,11 @@ const getHostname = url => { class Card extends React.PureComponent { - render () { + renderLink () { const { card } = this.props; - if (card === null) { - return null; - } - - let image = ''; + let image = ''; + let provider = card.get('provider_name'); if (card.get('image')) { image = ( @@ -31,18 +28,64 @@ class Card extends React.PureComponent { ); } + if (provider.length < 1) { + provider = getHostname(card.get('url')) + } + return ( <a href={card.get('url')} className='status-card' target='_blank' rel='noopener'> {image} <div className='status-card__content'> <strong className='status-card__title' title={card.get('title')}>{card.get('title')}</strong> - <p className='status-card__description'>{card.get('description').substring(0, 50)}</p> - <span className='status-card__host' style={hostStyle}>{getHostname(card.get('url'))}</span> + <p className='status-card__description'>{(card.get('description') || '').substring(0, 50)}</p> + <span className='status-card__host' style={hostStyle}>{provider}</span> </div> </a> ); } + + renderPhoto () { + const { card } = this.props; + + return ( + <a href={card.get('url')} className='status-card-photo' target='_blank' rel='noopener'> + <img src={card.get('url')} alt={card.get('title')} width={card.get('width')} height={card.get('height')} /> + </a> + ); + } + + renderVideo () { + const { card } = this.props; + const content = { __html: card.get('html') }; + + return ( + <div + className='status-card-video' + dangerouslySetInnerHTML={content} + /> + ); + } + + render () { + const { card } = this.props; + + if (card === null) { + return null; + } + + switch(card.get('type')) { + case 'link': + return this.renderLink(); + case 'photo': + return this.renderPhoto(); + case 'video': + return this.renderVideo(); + case 'rich': + default: + return null; + } + } } Card.propTypes = { diff --git a/app/assets/stylesheets/components.scss b/app/assets/stylesheets/components.scss index cbbe746c1..422575639 100644 --- a/app/assets/stylesheets/components.scss +++ b/app/assets/stylesheets/components.scss @@ -1734,6 +1734,28 @@ button.icon-button.active i.fa-retweet { } } +.status-card-video, .status-card-rich, .status-card-photo { + margin-top: 14px; + overflow: hidden; + + iframe { + width: 100%; + height: auto; + } +} + +.status-card-photo { + display: block; + text-decoration: none; + + img { + display: block; + width: 100%; + height: auto; + margin: 0; + } +} + .status-card__title { display: block; font-weight: 500; diff --git a/app/controllers/api/oembed_controller.rb b/app/controllers/api/oembed_controller.rb index 2ea482296..58d8207f6 100644 --- a/app/controllers/api/oembed_controller.rb +++ b/app/controllers/api/oembed_controller.rb @@ -14,8 +14,20 @@ class Api::OEmbedController < ApiController def stream_entry_from_url(url) params = Rails.application.routes.recognize_path(url) - raise ActiveRecord::RecordNotFound unless params[:controller] == 'stream_entries' && params[:action] == 'show' + raise ActiveRecord::RecordNotFound unless recognized_stream_entry_url?(params) - StreamEntry.find(params[:id]) + stream_entry(params) + end + + def recognized_stream_entry_url?(params) + %w(stream_entries statuses).include?(params[:controller]) && params[:action] == 'show' + end + + def stream_entry(params) + if params[:controller] == 'stream_entries' + StreamEntry.find(params[:id]) + else + Status.find(params[:id]).stream_entry + end end end diff --git a/app/helpers/http_helper.rb b/app/helpers/http_helper.rb new file mode 100644 index 000000000..1e1ac8256 --- /dev/null +++ b/app/helpers/http_helper.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +module HttpHelper + USER_AGENT = "#{HTTP::Request::USER_AGENT} (Mastodon/#{Mastodon::VERSION}; +http://#{Rails.configuration.x.local_domain}/)" + + def http_client(options = {}) + timeout = { write: 10, connect: 10, read: 10 }.merge(options) + + HTTP.headers(user_agent: USER_AGENT) + .timeout(:per_operation, timeout) + .follow + end +end diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index 1d8e90d1f..5ae6238d9 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -1,13 +1,13 @@ # frozen_string_literal: true require 'singleton' +require_relative './sanitize_config' class Formatter include Singleton include RoutingHelper include ActionView::Helpers::TextHelper - include ActionView::Helpers::SanitizeHelper def format(status) return reformat(status.content) unless status.local? @@ -23,7 +23,7 @@ class Formatter end def reformat(html) - sanitize(html, tags: %w(a br p span), attributes: %w(href rel class)) + sanitize(html, Sanitize::Config::MASTODON_STRICT) end def plaintext(status) @@ -43,6 +43,10 @@ class Formatter html.html_safe # rubocop:disable Rails/OutputSafety end + def sanitize(html, config) + Sanitize.fragment(html, config) + end + private def encode(html) diff --git a/app/lib/provider_discovery.rb b/app/lib/provider_discovery.rb new file mode 100644 index 000000000..761ddae0f --- /dev/null +++ b/app/lib/provider_discovery.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +class ProviderDiscovery < OEmbed::ProviderDiscovery + include HttpHelper + + class << self + def discover_provider(url, options = {}) + res = http_client.get(url) + format = options[:format] + + raise OEmbed::NotFound, url if res.code != 200 || res.mime_type != 'text/html' + + html = Nokogiri::HTML(res.to_s) + + if format.nil? || format == :json + provider_endpoint ||= html.at_xpath('//link[@type="application/json+oembed"]')&.attribute('href')&.value + format ||= :json if provider_endpoint + end + + if format.nil? || format == :xml + provider_endpoint ||= html.at_xpath('//link[@type="application/xml+oembed"]')&.attribute('href')&.value + format ||= :xml if provider_endpoint + end + + begin + provider_endpoint = Addressable::URI.parse(provider_endpoint) + provider_endpoint.query = nil + provider_endpoint = provider_endpoint.to_s + rescue Addressable::URI::InvalidURIError + raise OEmbed::NotFound, url + end + + OEmbed::Provider.new(provider_endpoint, format || OEmbed::Formatter.default) + end + end +end diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb new file mode 100644 index 000000000..7cf1c3062 --- /dev/null +++ b/app/lib/sanitize_config.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +class Sanitize + module Config + HTTP_PROTOCOLS ||= ['http', 'https', :relative].freeze + + MASTODON_STRICT ||= freeze_config( + elements: %w(p br span a), + + attributes: { + 'a' => %w(href), + 'span' => %w(class), + }, + + protocols: { + 'a' => { 'href' => HTTP_PROTOCOLS }, + } + ) + + MASTODON_OEMBED ||= freeze_config merge( + RELAXED, + elements: RELAXED[:elements] + %w(audio embed iframe source video), + + attributes: merge( + RELAXED[:attributes], + 'audio' => %w(controls), + 'embed' => %w(height src type width), + 'iframe' => %w(allowfullscreen frameborder height scrolling src width), + 'source' => %w(src type), + 'video' => %w(controls height loop width), + 'div' => [:data] + ), + + protocols: merge( + RELAXED[:protocols], + 'embed' => { 'src' => HTTP_PROTOCOLS }, + 'iframe' => { 'src' => HTTP_PROTOCOLS }, + 'source' => { 'src' => HTTP_PROTOCOLS } + ) + ) + end +end diff --git a/app/models/preview_card.rb b/app/models/preview_card.rb index e59b05eb8..0aa771101 100644 --- a/app/models/preview_card.rb +++ b/app/models/preview_card.rb @@ -3,6 +3,10 @@ class PreviewCard < ApplicationRecord IMAGE_MIME_TYPES = ['image/jpeg', 'image/png', 'image/gif'].freeze + self.inheritance_column = false + + enum type: [:link, :photo, :video, :rich] + belongs_to :status has_attached_file :image, styles: { original: '120x120#' }, convert_options: { all: '-quality 80 -strip' } diff --git a/app/services/fetch_atom_service.rb b/app/services/fetch_atom_service.rb index c3dad1eb9..9c514aa9f 100644 --- a/app/services/fetch_atom_service.rb +++ b/app/services/fetch_atom_service.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true class FetchAtomService < BaseService + include HttpHelper + def call(url) return if url.blank? @@ -45,8 +47,4 @@ class FetchAtomService < BaseService def fetch(url) http_client.get(url).to_s end - - def http_client - HTTP.timeout(:per_operation, write: 10, connect: 10, read: 10).follow - end end diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index f74a0d34d..416c5fdad 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -1,8 +1,9 @@ # frozen_string_literal: true class FetchLinkCardService < BaseService + include HttpHelper + URL_PATTERN = %r{https?://\S+} - USER_AGENT = "#{HTTP::Request::USER_AGENT} (Mastodon/#{Mastodon::VERSION}; +http://#{Rails.configuration.x.local_domain}/)" def call(status) # Get first http/https URL that isn't local @@ -10,13 +11,53 @@ class FetchLinkCardService < BaseService return if url.nil? + card = PreviewCard.where(status: status).first_or_initialize(status: status, url: url) + attempt_opengraph(card, url) unless attempt_oembed(card, url) + end + + private + + def attempt_oembed(card, url) + response = OEmbed::Providers.get(url) + + card.type = response.type + card.title = response.respond_to?(:title) ? response.title : '' + card.author_name = response.respond_to?(:author_name) ? response.author_name : '' + card.author_url = response.respond_to?(:author_url) ? response.author_url : '' + card.provider_name = response.respond_to?(:provider_name) ? response.provider_name : '' + card.provider_url = response.respond_to?(:provider_url) ? response.provider_url : '' + card.width = 0 + card.height = 0 + + case card.type + when 'link' + card.image = URI.parse(response.thumbnail_url) if response.respond_to?(:thumbnail_url) + when 'photo' + card.url = response.url + card.width = response.width.presence || 0 + card.height = response.height.presence || 0 + when 'video' + card.width = response.width.presence || 0 + card.height = response.height.presence || 0 + card.html = Formatter.instance.sanitize(response.html, Sanitize::Config::MASTODON_OEMBED) + when 'rich' + # Most providers rely on <script> tags, which is a no-no + return false + end + + card.save_with_optional_image! + rescue OEmbed::NotFound + false + end + + def attempt_opengraph(card, url) response = http_client.get(url) return if response.code != 200 || response.mime_type != 'text/html' page = Nokogiri::HTML(response.to_s) - card = PreviewCard.where(status: status).first_or_initialize(status: status, url: url) + card.type = :link card.title = meta_property(page, 'og:title') || page.at_xpath('//title')&.content card.description = meta_property(page, 'og:description') || meta_property(page, 'description') card.image = URI.parse(Addressable::URI.parse(meta_property(page, 'og:image')).normalize.to_s) if meta_property(page, 'og:image') @@ -26,12 +67,6 @@ class FetchLinkCardService < BaseService card.save_with_optional_image! end - private - - def http_client - HTTP.headers(user_agent: USER_AGENT).timeout(:per_operation, write: 10, connect: 10, read: 10).follow - end - def meta_property(html, property) html.at_xpath("//meta[@property=\"#{property}\"]")&.attribute('content')&.value || html.at_xpath("//meta[@name=\"#{property}\"]")&.attribute('content')&.value end diff --git a/app/services/follow_remote_account_service.rb b/app/services/follow_remote_account_service.rb index fbf983093..0b5abf58a 100644 --- a/app/services/follow_remote_account_service.rb +++ b/app/services/follow_remote_account_service.rb @@ -2,6 +2,7 @@ class FollowRemoteAccountService < BaseService include OStatus2::MagicKey + include HttpHelper DFRN_NS = 'http://purl.org/macgirvin/dfrn/1.0' @@ -73,7 +74,7 @@ class FollowRemoteAccountService < BaseService end def get_feed(url) - response = http_client.get(Addressable::URI.parse(url).normalize) + response = http_client(write: 20, connect: 20, read: 50).get(Addressable::URI.parse(url).normalize) [response.to_s, Nokogiri::XML(response)] end @@ -98,8 +99,4 @@ class FollowRemoteAccountService < BaseService def get_profile(body, account) RemoteProfileUpdateWorker.perform_async(account.id, body.force_encoding('UTF-8'), false) end - - def http_client - HTTP.timeout(:per_operation, write: 20, connect: 20, read: 50) - end end diff --git a/app/services/post_status_service.rb b/app/services/post_status_service.rb index 30b8032ed..b665391e3 100644 --- a/app/services/post_status_service.rb +++ b/app/services/post_status_service.rb @@ -34,7 +34,7 @@ class PostStatusService < BaseService process_mentions_service.call(status) process_hashtags_service.call(status) - LinkCrawlWorker.perform_async(status.id) + LinkCrawlWorker.perform_async(status.id) unless status.spoiler_text.present? DistributionWorker.perform_async(status.id) Pubsubhubbub::DistributionWorker.perform_async(status.stream_entry.id) diff --git a/app/views/api/v1/statuses/card.rabl b/app/views/api/v1/statuses/card.rabl index 8ba8dcbb1..5d8d7af3b 100644 --- a/app/views/api/v1/statuses/card.rabl +++ b/app/views/api/v1/statuses/card.rabl @@ -1,5 +1,7 @@ object @card -attributes :url, :title, :description +attributes :url, :title, :description, :type, + :author_name, :author_url, :provider_name, + :provider_url, :html, :width, :height node(:image) { |card| card.image? ? full_asset_url(card.image.url(:original)) : nil } diff --git a/config/initializers/doorkeeper.rb b/config/initializers/doorkeeper.rb index 2317733eb..b618bf344 100644 --- a/config/initializers/doorkeeper.rb +++ b/config/initializers/doorkeeper.rb @@ -36,7 +36,7 @@ Doorkeeper.configure do # Reuse access token for the same resource owner within an application (disabled by default) # Rationale: https://github.com/doorkeeper-gem/doorkeeper/issues/383 - # reuse_access_token + reuse_access_token # Issue access tokens with refresh token (disabled by default) # use_refresh_token diff --git a/config/initializers/kaminari_config.rb b/config/initializers/kaminari_config.rb index bd455f382..27b183eeb 100644 --- a/config/initializers/kaminari_config.rb +++ b/config/initializers/kaminari_config.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true + Kaminari.configure do |config| config.default_per_page = 40 config.window = 1 diff --git a/config/initializers/oembed.rb b/config/initializers/oembed.rb new file mode 100644 index 000000000..208e586cb --- /dev/null +++ b/config/initializers/oembed.rb @@ -0,0 +1,4 @@ +# frozen_string_literal: true + +require_relative '../../app/lib/provider_discovery' +OEmbed::Providers.register_fallback(ProviderDiscovery) diff --git a/db/migrate/20170425202925_add_oembed_to_preview_cards.rb b/db/migrate/20170425202925_add_oembed_to_preview_cards.rb new file mode 100644 index 000000000..6a932bbf2 --- /dev/null +++ b/db/migrate/20170425202925_add_oembed_to_preview_cards.rb @@ -0,0 +1,12 @@ +class AddOEmbedToPreviewCards < ActiveRecord::Migration[5.0] + def change + add_column :preview_cards, :type, :integer, default: 0, null: false + add_column :preview_cards, :html, :text, null: false, default: '' + add_column :preview_cards, :author_name, :string, null: false, default: '' + add_column :preview_cards, :author_url, :string, null: false, default: '' + add_column :preview_cards, :provider_name, :string, null: false, default: '' + add_column :preview_cards, :provider_url, :string, null: false, default: '' + add_column :preview_cards, :width, :integer, default: 0, null: false + add_column :preview_cards, :height, :integer, default: 0, null: false + end +end diff --git a/db/schema.rb b/db/schema.rb index cd31eb528..66326f2e2 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20170425131920) do +ActiveRecord::Schema.define(version: 20170425202925) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -203,6 +203,14 @@ ActiveRecord::Schema.define(version: 20170425131920) do t.datetime "image_updated_at" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.integer "type", default: 0, null: false + t.text "html", default: "", null: false + t.string "author_name", default: "", null: false + t.string "author_url", default: "", null: false + t.string "provider_name", default: "", null: false + t.string "provider_url", default: "", null: false + t.integer "width", default: 0, null: false + t.integer "height", default: 0, null: false t.index ["status_id"], name: "index_preview_cards_on_status_id", unique: true, using: :btree end @@ -333,4 +341,4 @@ ActiveRecord::Schema.define(version: 20170425131920) do end add_foreign_key "statuses", "statuses", column: "reblog_of_id", on_delete: :cascade -end \ No newline at end of file +end diff --git a/spec/services/fetch_link_card_service_spec.rb b/spec/services/fetch_link_card_service_spec.rb index 5d72d40b6..46fec295d 100644 --- a/spec/services/fetch_link_card_service_spec.rb +++ b/spec/services/fetch_link_card_service_spec.rb @@ -9,6 +9,6 @@ RSpec.describe FetchLinkCardService do status = Fabricate(:status, text: 'Check out http://example.中国') FetchLinkCardService.new.call(status) - expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made + expect(a_request(:get, 'http://example.xn--fiqs8s/')).to have_been_made.at_least_once end end |