about summary refs log tree commit diff
diff options
context:
space:
mode:
authorMarkus Unterwaditzer <markus@unterwaditzer.net>2023-01-11 21:59:13 +0100
committerGitHub <noreply@github.com>2023-01-11 21:59:13 +0100
commit0c689b9d014324aba5b8751dacec4c0fc20b2038 (patch)
treece2835e33c6014072832a068c4278aca89e5432e
parentfd33bcb3b25d3eaf593ade0aa8709a1184fc254e (diff)
fix: allow verification when page size exceeds 1MB (using HTML5 parser) (#22879)
* fix: allow verification when page size exceeds 1MB
Truncates the page after 1MB instead

Closes #15316

* switch to HTML5 parser, fix rubocop errors

* undo rubocop fixes

Co-authored-by: Chris Zubak-Skees <chriszs@gmail.com>
-rw-r--r--app/lib/request.rb14
-rw-r--r--app/services/verify_link_service.rb2
-rw-r--r--spec/lib/request_spec.rb5
-rw-r--r--spec/services/verify_link_service_spec.rb27
4 files changed, 43 insertions, 5 deletions
diff --git a/app/lib/request.rb b/app/lib/request.rb
index b2819c8ed..0508169dc 100644
--- a/app/lib/request.rb
+++ b/app/lib/request.rb
@@ -154,9 +154,7 @@ class Request
   end
 
   module ClientLimit
-    def body_with_limit(limit = 1.megabyte)
-      raise Mastodon::LengthValidationError if content_length.present? && content_length > limit
-
+    def truncated_body(limit = 1.megabyte)
       if charset.nil?
         encoding = Encoding::BINARY
       else
@@ -173,11 +171,19 @@ class Request
         contents << chunk
         chunk.clear
 
-        raise Mastodon::LengthValidationError if contents.bytesize > limit
+        break if contents.bytesize > limit
       end
 
       contents
     end
+
+    def body_with_limit(limit = 1.megabyte)
+      raise Mastodon::LengthValidationError if content_length.present? && content_length > limit
+
+      contents = truncated_body(limit)
+      raise Mastodon::LengthValidationError if contents.bytesize > limit
+      contents
+    end
   end
 
   if ::HTTP::Response.methods.include?(:body_with_limit) && !Rails.env.production?
diff --git a/app/services/verify_link_service.rb b/app/services/verify_link_service.rb
index 7496fe2d5..d049b52d1 100644
--- a/app/services/verify_link_service.rb
+++ b/app/services/verify_link_service.rb
@@ -26,7 +26,7 @@ class VerifyLinkService < BaseService
   def link_back_present?
     return false if @body.blank?
 
-    links = Nokogiri::HTML(@body).xpath('//a[contains(concat(" ", normalize-space(@rel), " "), " me ")]|//link[contains(concat(" ", normalize-space(@rel), " "), " me ")]')
+    links = Nokogiri::HTML5(@body).xpath('//a[contains(concat(" ", normalize-space(@rel), " "), " me ")]|//link[contains(concat(" ", normalize-space(@rel), " "), " me ")]')
 
     if links.any? { |link| link['href']&.downcase == @link_back.downcase }
       true
diff --git a/spec/lib/request_spec.rb b/spec/lib/request_spec.rb
index 5eccf3201..8539944e2 100644
--- a/spec/lib/request_spec.rb
+++ b/spec/lib/request_spec.rb
@@ -120,6 +120,11 @@ describe Request do
       expect { subject.perform { |response| response.body_with_limit } }.to raise_error Mastodon::LengthValidationError
     end
 
+    it 'truncates large monolithic body' do
+      stub_request(:any, 'http://example.com').to_return(body: SecureRandom.random_bytes(2.megabytes), headers: { 'Content-Length' => 2.megabytes })
+      expect(subject.perform { |response| response.truncated_body.bytesize }).to be < 2.megabytes
+    end
+
     it 'uses binary encoding if Content-Type does not tell encoding' do
       stub_request(:any, 'http://example.com').to_return(body: '', headers: { 'Content-Type' => 'text/html' })
       expect(subject.perform { |response| response.body_with_limit.encoding }).to eq Encoding::BINARY
diff --git a/spec/services/verify_link_service_spec.rb b/spec/services/verify_link_service_spec.rb
index 52ba454cc..391560f1c 100644
--- a/spec/services/verify_link_service_spec.rb
+++ b/spec/services/verify_link_service_spec.rb
@@ -73,6 +73,33 @@ RSpec.describe VerifyLinkService, type: :service do
       end
     end
 
+    context 'when a document is truncated but the link back is valid' do
+      let(:html) do
+        "
+          <!doctype html>
+          <body>
+            <a rel=\"me\" href=\"#{ActivityPub::TagManager.instance.url_for(account)}\"
+        "
+      end
+
+      it 'marks the field as not verified' do
+        expect(field.verified?).to be false
+      end
+    end
+
+    context 'when a link back might be truncated' do
+      let(:html) do
+        "
+          <!doctype html>
+          <body>
+            <a rel=\"me\" href=\"#{ActivityPub::TagManager.instance.url_for(account)}"
+      end
+
+      it 'does not mark the field as verified' do
+        expect(field.verified?).to be false
+      end
+    end
+
     context 'when a link does not contain a link back' do
       let(:html) { '' }