about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2020-06-30 23:58:02 +0200
committerGitHub <noreply@github.com>2020-06-30 23:58:02 +0200
commit7aaf2b44ec698fd4f20b927fcac7edc0394a2647 (patch)
tree036b96777e1f525b2b76aab36204dee30cfa3345
parent65506bac3f3fe233b5b7b3241020bd74eb5c9259 (diff)
Fix remote files not using Content-Type header, streaming (#14184)
-rw-r--r--app/lib/response_with_limit.rb10
-rw-r--r--app/models/concerns/attachmentable.rb17
-rw-r--r--app/models/concerns/remotable.rb40
-rw-r--r--config/application.rb2
-rw-r--r--config/initializers/paperclip.rb1
-rw-r--r--lib/paperclip/image_extractor.rb45
-rw-r--r--lib/paperclip/media_type_spoof_detector_extensions.rb27
-rw-r--r--lib/paperclip/response_with_limit_adapter.rb55
-rw-r--r--spec/models/concerns/remotable_spec.rb10
9 files changed, 139 insertions, 68 deletions
diff --git a/app/lib/response_with_limit.rb b/app/lib/response_with_limit.rb
new file mode 100644
index 000000000..2cc17bc5f
--- /dev/null
+++ b/app/lib/response_with_limit.rb
@@ -0,0 +1,10 @@
+# frozen_string_literal: true
+
+class ResponseWithLimit
+  def initialize(response, limit)
+    @response = response
+    @limit = limit
+  end
+
+  attr_reader :response, :limit
+end
diff --git a/app/models/concerns/attachmentable.rb b/app/models/concerns/attachmentable.rb
index 18b872c1e..c5febb828 100644
--- a/app/models/concerns/attachmentable.rb
+++ b/app/models/concerns/attachmentable.rb
@@ -8,6 +8,17 @@ module Attachmentable
   MAX_MATRIX_LIMIT = 16_777_216 # 4096x4096px or approx. 16MB
   GIF_MATRIX_LIMIT = 921_600    # 1280x720px
 
+  # For some file extensions, there exist different content
+  # type variants, and browsers often send the wrong one,
+  # for example, sending an audio .ogg file as video/ogg,
+  # likewise, MimeMagic also misreports them as such. For
+  # those files, it is necessary to use the output of the
+  # `file` utility instead
+  INCORRECT_CONTENT_TYPES = %w(
+    video/ogg
+    video/webm
+  ).freeze
+
   included do
     before_post_process :obfuscate_file_name
     before_post_process :set_file_extensions
@@ -21,7 +32,7 @@ module Attachmentable
     self.class.attachment_definitions.each_key do |attachment_name|
       attachment = send(attachment_name)
 
-      next if attachment.blank? || attachment.queued_for_write[:original].blank?
+      next if attachment.blank? || attachment.queued_for_write[:original].blank? || !INCORRECT_CONTENT_TYPES.include?(attachment.instance_read(:content_type))
 
       attachment.instance_write :content_type, calculated_content_type(attachment)
     end
@@ -63,9 +74,7 @@ module Attachmentable
   end
 
   def calculated_content_type(attachment)
-    content_type = Paperclip.run('file', '-b --mime :file', file: attachment.queued_for_write[:original].path).split(/[:;\s]+/).first.chomp
-    content_type = 'video/mp4' if content_type == 'video/x-m4v'
-    content_type
+    Paperclip.run('file', '-b --mime :file', file: attachment.queued_for_write[:original].path).split(/[:;\s]+/).first.chomp
   rescue Terrapin::CommandLineError
     ''
   end
diff --git a/app/models/concerns/remotable.rb b/app/models/concerns/remotable.rb
index 53ebc0835..c6d0c7f1f 100644
--- a/app/models/concerns/remotable.rb
+++ b/app/models/concerns/remotable.rb
@@ -24,28 +24,16 @@ module Remotable
           Request.new(:get, url).perform do |response|
             raise Mastodon::UnexpectedResponseError, response unless (200...300).cover?(response.code)
 
-            content_type = parse_content_type(response.headers.get('content-type').last)
-            extname      = detect_extname_from_content_type(content_type)
-
-            if extname.nil?
-              disposition = response.headers.get('content-disposition').last
-              matches     = disposition&.match(/filename="([^"]*)"/)
-              filename    = matches.nil? ? parsed_url.path.split('/').last : matches[1]
-              extname     = filename.nil? ? '' : File.extname(filename)
-            end
-
-            basename = SecureRandom.hex(8)
-
-            public_send("#{attachment_name}_file_name=", basename + extname)
-            public_send("#{attachment_name}=", StringIO.new(response.body_with_limit(limit)))
+            public_send("#{attachment_name}=", ResponseWithLimit.new(response, limit))
           end
         rescue Mastodon::UnexpectedResponseError, HTTP::TimeoutError, HTTP::ConnectionError, OpenSSL::SSL::SSLError => e
           Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}"
           raise e unless suppress_errors
         rescue Paperclip::Errors::NotIdentifiedByImageMagickError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError, Paperclip::Error, Mastodon::DimensionsValidationError => e
           Rails.logger.debug "Error fetching remote #{attachment_name}: #{e}"
-          nil
         end
+
+        nil
       end
 
       define_method("#{attribute_name}=") do |url|
@@ -59,26 +47,4 @@ module Remotable
       alias_method("reset_#{attachment_name}!", "download_#{attachment_name}!")
     end
   end
-
-  private
-
-  def detect_extname_from_content_type(content_type)
-    return if content_type.nil?
-
-    type = MIME::Types[content_type].first
-
-    return if type.nil?
-
-    extname = type.extensions.first
-
-    return if extname.nil?
-
-    ".#{extname}"
-  end
-
-  def parse_content_type(content_type)
-    return if content_type.nil?
-
-    content_type.split(/\s*;\s*/).first
-  end
 end
diff --git a/config/application.rb b/config/application.rb
index 8348649df..a1da9d61f 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -9,10 +9,12 @@ Bundler.require(*Rails.groups)
 require_relative '../app/lib/exceptions'
 require_relative '../lib/paperclip/url_generator_extensions'
 require_relative '../lib/paperclip/attachment_extensions'
+require_relative '../lib/paperclip/media_type_spoof_detector_extensions'
 require_relative '../lib/paperclip/lazy_thumbnail'
 require_relative '../lib/paperclip/gif_transcoder'
 require_relative '../lib/paperclip/video_transcoder'
 require_relative '../lib/paperclip/type_corrector'
+require_relative '../lib/paperclip/response_with_limit_adapter'
 require_relative '../lib/mastodon/snowflake'
 require_relative '../lib/mastodon/version'
 require_relative '../lib/devise/two_factor_ldap_authenticatable'
diff --git a/config/initializers/paperclip.rb b/config/initializers/paperclip.rb
index ebb009065..b4849370d 100644
--- a/config/initializers/paperclip.rb
+++ b/config/initializers/paperclip.rb
@@ -1,6 +1,7 @@
 # frozen_string_literal: true
 
 Paperclip::DataUriAdapter.register
+Paperclip::ResponseWithLimitAdapter.register
 
 Paperclip.interpolates :filename do |attachment, style|
   if style == :original
diff --git a/lib/paperclip/image_extractor.rb b/lib/paperclip/image_extractor.rb
index 114852e8b..f5a54d1a5 100644
--- a/lib/paperclip/image_extractor.rb
+++ b/lib/paperclip/image_extractor.rb
@@ -4,28 +4,10 @@ require 'mime/types/columnar'
 
 module Paperclip
   class ImageExtractor < Paperclip::Processor
-    IMAGE_EXTRACTION_OPTIONS = {
-      convert_options: {
-        output: {
-          'loglevel' => 'fatal',
-          vf: 'scale=\'min(400\, iw):min(400\, ih)\':force_original_aspect_ratio=decrease',
-        }.freeze,
-      }.freeze,
-      format: 'png',
-      time: -1,
-      file_geometry_parser: FastGeometryParser,
-    }.freeze
-
     def make
       return @file unless options[:style] == :original
 
-      image = begin
-        begin
-          Paperclip::Transcoder.make(file, IMAGE_EXTRACTION_OPTIONS.dup, attachment)
-        rescue Paperclip::Error, ::Av::CommandError
-          nil
-        end
-      end
+      image = extract_image_from_file!
 
       unless image.nil?
         begin
@@ -36,7 +18,7 @@ module Paperclip
           # to make sure it's cleaned up
 
           begin
-            FileUtils.rm(image)
+            image.close(true)
           rescue Errno::ENOENT
             nil
           end
@@ -45,5 +27,28 @@ module Paperclip
 
       @file
     end
+
+    private
+
+    def extract_image_from_file!
+      ::Av.logger = Paperclip.logger
+
+      cli = ::Av.cli
+      dst = Tempfile.new([File.basename(@file.path, '.*'), '.png'])
+      dst.binmode
+
+      cli.add_source(@file.path)
+      cli.add_destination(dst.path)
+      cli.add_output_param loglevel: 'fatal'
+
+      begin
+        cli.run
+      rescue Cocaine::ExitStatusError
+        dst.close(true)
+        return nil
+      end
+
+      dst
+    end
   end
 end
diff --git a/lib/paperclip/media_type_spoof_detector_extensions.rb b/lib/paperclip/media_type_spoof_detector_extensions.rb
new file mode 100644
index 000000000..9c0557356
--- /dev/null
+++ b/lib/paperclip/media_type_spoof_detector_extensions.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Paperclip
+  module MediaTypeSpoofDetectorExtensions
+    def calculated_content_type
+      @calculated_content_type ||= type_from_mime_magic || type_from_file_command
+    end
+
+    def type_from_mime_magic
+      @type_from_mime_magic ||= begin
+        begin
+          File.open(@file.path) do |file|
+            MimeMagic.by_magic(file)&.type
+          end
+        rescue Errno::ENOENT
+          ''
+        end
+      end
+    end
+
+    def type_from_file_command
+      @type_from_file_command ||= FileCommandContentTypeDetector.new(@file.path).detect
+    end
+  end
+end
+
+Paperclip::MediaTypeSpoofDetector.prepend(Paperclip::MediaTypeSpoofDetectorExtensions)
diff --git a/lib/paperclip/response_with_limit_adapter.rb b/lib/paperclip/response_with_limit_adapter.rb
new file mode 100644
index 000000000..7d897b8d6
--- /dev/null
+++ b/lib/paperclip/response_with_limit_adapter.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+module Paperclip
+  class ResponseWithLimitAdapter < AbstractAdapter
+    def self.register
+      Paperclip.io_adapters.register self do |target|
+        target.is_a?(ResponseWithLimit)
+      end
+    end
+
+    def initialize(target, options = {})
+      super
+      cache_current_values
+    end
+
+    private
+
+    def cache_current_values
+      @original_filename = filename_from_content_disposition || filename_from_path || 'data'
+      @size = @target.response.content_length
+      @tempfile = copy_to_tempfile(@target)
+      @content_type = @target.response.mime_type || ContentTypeDetector.new(@tempfile.path).detect
+    end
+
+    def copy_to_tempfile(source)
+      bytes_read = 0
+
+      source.response.body.each do |chunk|
+        bytes_read += chunk.bytesize
+
+        destination.write(chunk)
+        chunk.clear
+
+        raise Mastodon::LengthValidationError if bytes_read > source.limit
+      end
+
+      destination.rewind
+      destination
+    rescue Mastodon::LengthValidationError
+      destination.close(true)
+      raise
+    ensure
+      source.response.connection.close
+    end
+
+    def filename_from_content_disposition
+      disposition = @target.response.headers['content-disposition']
+      disposition&.match(/filename="([^"]*)"/)&.captures&.first
+    end
+
+    def filename_from_path
+      @target.response.uri.path.split('/').last
+    end
+  end
+end
diff --git a/spec/models/concerns/remotable_spec.rb b/spec/models/concerns/remotable_spec.rb
index 2e6c8a9c6..9cc849ded 100644
--- a/spec/models/concerns/remotable_spec.rb
+++ b/spec/models/concerns/remotable_spec.rb
@@ -162,19 +162,15 @@ RSpec.describe Remotable do
           let(:headers)   { { 'content-disposition' => file } }
 
           it 'assigns file' do
-            string_io = StringIO.new('')
-            extname   = '.txt'
-            basename  = '0123456789abcdef'
+            response_with_limit = ResponseWithLimit.new(nil, 0)
 
-            allow(SecureRandom).to receive(:hex).and_return(basename)
-            allow(StringIO).to receive(:new).with(anything).and_return(string_io)
+            allow(ResponseWithLimit).to receive(:new).with(anything, anything).and_return(response_with_limit)
 
             expect(foo).to receive(:public_send).with("download_#{hoge}!", url)
 
             foo.hoge_remote_url = url
 
-            expect(foo).to receive(:public_send).with("#{hoge}=", string_io)
-            expect(foo).to receive(:public_send).with("#{hoge}_file_name=", basename + extname)
+            expect(foo).to receive(:public_send).with("#{hoge}=", response_with_limit)
 
             foo.download_hoge!(url)
           end