about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--Gemfile2
-rw-r--r--Gemfile.lock1
-rw-r--r--app/javascript/mastodon/features/emoji/emoji.js80
-rw-r--r--app/lib/emoji_formatter.rb68
-rw-r--r--config/initializers/rack_attack.rb49
-rw-r--r--config/routes.rb6
-rw-r--r--spec/config/initializers/rack_attack_spec.rb82
7 files changed, 193 insertions, 95 deletions
diff --git a/Gemfile b/Gemfile
index 44f08b5cd..1bff6cc7d 100644
--- a/Gemfile
+++ b/Gemfile
@@ -122,6 +122,7 @@ group :test do
   gem 'simplecov', '~> 0.21', require: false
   gem 'webmock', '~> 3.18'
   gem 'rspec_junit_formatter', '~> 0.6'
+  gem 'rack-test', '~> 2.0'
 end
 
 group :development do
@@ -152,7 +153,6 @@ end
 
 gem 'concurrent-ruby', require: false
 gem 'connection_pool', require: false
-
 gem 'xorcist', '~> 1.1'
 
 gem 'hcaptcha', '~> 7.1'
diff --git a/Gemfile.lock b/Gemfile.lock
index bfe69991d..ddd89fa16 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -818,6 +818,7 @@ DEPENDENCIES
   rack (~> 2.2.4)
   rack-attack (~> 6.6)
   rack-cors (~> 1.1)
+  rack-test (~> 2.0)
   rails (~> 6.1.7)
   rails-controller-testing (~> 1.0)
   rails-i18n (~> 6.0)
diff --git a/app/javascript/mastodon/features/emoji/emoji.js b/app/javascript/mastodon/features/emoji/emoji.js
index fb1a3804c..0ab32767a 100644
--- a/app/javascript/mastodon/features/emoji/emoji.js
+++ b/app/javascript/mastodon/features/emoji/emoji.js
@@ -19,15 +19,23 @@ const emojiFilename = (filename) => {
   return borderedEmoji.includes(filename) ? (filename + '_border') : filename;
 };
 
-const emojify = (str, customEmojis = {}) => {
-  const tagCharsWithoutEmojis = '<&';
-  const tagCharsWithEmojis = Object.keys(customEmojis).length ? '<&:' : '<&';
-  let rtn = '', tagChars = tagCharsWithEmojis, invisible = 0;
+const emojifyTextNode = (node, customEmojis) => {
+  const parentElement = node.parentElement;
+  let str = node.textContent;
+
   for (;;) {
-    let match, i = 0, tag;
-    while (i < str.length && (tag = tagChars.indexOf(str[i])) === -1 && (invisible || !(match = trie.search(str.slice(i))))) {
-      i += str.codePointAt(i) < 65536 ? 1 : 2;
+    let match, i = 0;
+
+    if (customEmojis === null) {
+      while (i < str.length && !(match = trie.search(str.slice(i)))) {
+        i += str.codePointAt(i) < 65536 ? 1 : 2;
+      }
+    } else {
+      while (i < str.length && str[i] !== ':' && !(match = trie.search(str.slice(i)))) {
+        i += str.codePointAt(i) < 65536 ? 1 : 2;
+      }
     }
+
     let rend, replacement = '';
     if (i === str.length) {
       break;
@@ -35,8 +43,6 @@ const emojify = (str, customEmojis = {}) => {
       if (!(() => {
         rend = str.indexOf(':', i + 1) + 1;
         if (!rend) return false; // no pair of ':'
-        const lt = str.indexOf('<', i + 1);
-        if (!(lt === -1 || lt >= rend)) return false; // tag appeared before closing ':'
         const shortname = str.slice(i, rend);
         // now got a replacee as ':shortname:'
         // if you want additional emoji handler, add statements below which set replacement and return true.
@@ -47,29 +53,6 @@ const emojify = (str, customEmojis = {}) => {
         }
         return false;
       })()) rend = ++i;
-    } else if (tag >= 0) { // <, &
-      rend = str.indexOf('>;'[tag], i + 1) + 1;
-      if (!rend) {
-        break;
-      }
-      if (tag === 0) {
-        if (invisible) {
-          if (str[i + 1] === '/') { // closing tag
-            if (!--invisible) {
-              tagChars = tagCharsWithEmojis;
-            }
-          } else if (str[rend - 2] !== '/') { // opening tag
-            invisible++;
-          }
-        } else {
-          if (str.startsWith('<span class="invisible">', i)) {
-            // avoid emojifying on invisible text
-            invisible = 1;
-            tagChars = tagCharsWithoutEmojis;
-          }
-        }
-      }
-      i = rend;
     } else { // matched to unicode emoji
       const { filename, shortCode } = unicodeMapping[match];
       const title = shortCode ? `:${shortCode}:` : '';
@@ -80,10 +63,39 @@ const emojify = (str, customEmojis = {}) => {
         rend += 1;
       }
     }
-    rtn += str.slice(0, i) + replacement;
+
+    node.textContent = str.slice(0, i);
+    parentElement.insertAdjacentHTML('beforeend', replacement);
     str = str.slice(rend);
+    node = document.createTextNode(str);
+    parentElement.append(node);
+  }
+};
+
+const emojifyNode = (node, customEmojis) => {
+  for (const child of node.childNodes) {
+    switch(child.nodeType) {
+    case Node.TEXT_NODE:
+      emojifyTextNode(child, customEmojis);
+      break;
+    case Node.ELEMENT_NODE:
+      if (!child.classList.contains('invisible'))
+        emojifyNode(child, customEmojis);
+      break;
+    }
   }
-  return rtn + str;
+};
+
+const emojify = (str, customEmojis = {}) => {
+  const wrapper = document.createElement('div');
+  wrapper.innerHTML = str;
+
+  if (!Object.keys(customEmojis).length)
+    customEmojis = null;
+
+  emojifyNode(wrapper, customEmojis);
+
+  return wrapper.innerHTML;
 };
 
 export default emojify;
diff --git a/app/lib/emoji_formatter.rb b/app/lib/emoji_formatter.rb
index 194849c23..a9785d5f9 100644
--- a/app/lib/emoji_formatter.rb
+++ b/app/lib/emoji_formatter.rb
@@ -23,48 +23,40 @@ class EmojiFormatter
   def to_s
     return html if custom_emojis.empty? || html.blank?
 
-    i                     = -1
-    tag_open_index        = nil
-    inside_shortname      = false
-    shortname_start_index = -1
-    invisible_depth       = 0
-    last_index            = 0
-    result                = ''.dup
-
-    while i + 1 < html.size
-      i += 1
-
-      if invisible_depth.zero? && inside_shortname && html[i] == ':'
-        inside_shortname = false
-        shortcode = html[shortname_start_index + 1..i - 1]
-        char_after = html[i + 1]
-
-        next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode])
-
-        result << html[last_index..shortname_start_index - 1] if shortname_start_index.positive?
-        result << image_for_emoji(shortcode, emoji)
-        last_index = i + 1
-      elsif tag_open_index && html[i] == '>'
-        tag = html[tag_open_index..i]
-        tag_open_index = nil
-
-        if invisible_depth.positive?
-          invisible_depth += count_tag_nesting(tag)
-        elsif tag == '<span class="invisible">'
-          invisible_depth = 1
+    tree = Nokogiri::HTML.fragment(html)
+    tree.xpath('./text()|.//text()[not(ancestor[@class="invisible"])]').to_a.each do |node|
+      i                     = -1
+      inside_shortname      = false
+      shortname_start_index = -1
+      last_index            = 0
+      text                  = node.content
+      result                = Nokogiri::XML::NodeSet.new(tree.document)
+
+      while i + 1 < text.size
+        i += 1
+
+        if inside_shortname && text[i] == ':'
+          inside_shortname = false
+          shortcode = text[shortname_start_index + 1..i - 1]
+          char_after = text[i + 1]
+
+          next unless (char_after.nil? || !DISALLOWED_BOUNDING_REGEX.match?(char_after)) && (emoji = emoji_map[shortcode])
+
+          result << Nokogiri::XML::Text.new(text[last_index..shortname_start_index - 1], tree.document) if shortname_start_index.positive?
+          result << Nokogiri::HTML.fragment(image_for_emoji(shortcode, emoji))
+
+          last_index = i + 1
+        elsif text[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(text[i - 1]))
+          inside_shortname = true
+          shortname_start_index = i
         end
-      elsif html[i] == '<'
-        tag_open_index = i
-        inside_shortname = false
-      elsif !tag_open_index && html[i] == ':' && (i.zero? || !DISALLOWED_BOUNDING_REGEX.match?(html[i - 1]))
-        inside_shortname = true
-        shortname_start_index = i
       end
-    end
 
-    result << html[last_index..-1]
+      result << Nokogiri::XML::Text.new(text[last_index..-1], tree.document)
+      node.replace(result)
+    end
 
-    result.html_safe # rubocop:disable Rails/OutputSafety
+    tree.to_html.html_safe # rubocop:disable Rails/OutputSafety
   end
 
   private
diff --git a/config/initializers/rack_attack.rb b/config/initializers/rack_attack.rb
index 745eb5d3b..72ef7ba80 100644
--- a/config/initializers/rack_attack.rb
+++ b/config/initializers/rack_attack.rb
@@ -17,6 +17,18 @@ class Rack::Attack
       @remote_ip ||= (@env["action_dispatch.remote_ip"] || ip).to_s
     end
 
+    def throttleable_remote_ip
+      @throttleable_remote_ip ||= begin
+        ip = IPAddr.new(remote_ip)
+
+        if ip.ipv6?
+          ip.mask(64)
+        else
+          ip
+        end
+      end.to_s
+    end
+
     def authenticated_user_id
       authenticated_token&.resource_owner_id
     end
@@ -29,6 +41,10 @@ class Rack::Attack
       path.start_with?('/api')
     end
 
+    def path_matches?(other_path)
+      /\A#{Regexp.escape(other_path)}(\..*)?\z/ =~ path
+    end
+
     def web_request?
       !api_request?
     end
@@ -51,19 +67,19 @@ class Rack::Attack
   end
 
   throttle('throttle_unauthenticated_api', limit: 300, period: 5.minutes) do |req|
-    req.remote_ip if req.api_request? && req.unauthenticated?
+    req.throttleable_remote_ip if req.api_request? && req.unauthenticated?
   end
 
   throttle('throttle_api_media', limit: 30, period: 30.minutes) do |req|
-    req.authenticated_user_id if req.post? && req.path.match?('^/api/v\d+/media')
+    req.authenticated_user_id if req.post? && req.path.match?(/\A\/api\/v\d+\/media\z/i)
   end
 
   throttle('throttle_media_proxy', limit: 30, period: 10.minutes) do |req|
-    req.remote_ip if req.path.start_with?('/media_proxy')
+    req.throttleable_remote_ip if req.path.start_with?('/media_proxy')
   end
 
   throttle('throttle_api_sign_up', limit: 5, period: 30.minutes) do |req|
-    req.remote_ip if req.post? && req.path == '/api/v1/accounts'
+    req.throttleable_remote_ip if req.post? && req.path == '/api/v1/accounts'
   end
 
   throttle('throttle_authenticated_paging', limit: 300, period: 15.minutes) do |req|
@@ -71,39 +87,34 @@ class Rack::Attack
   end
 
   throttle('throttle_unauthenticated_paging', limit: 300, period: 15.minutes) do |req|
-    req.remote_ip if req.paging_request? && req.unauthenticated?
+    req.throttleable_remote_ip if req.paging_request? && req.unauthenticated?
   end
 
-  API_DELETE_REBLOG_REGEX = /\A\/api\/v1\/statuses\/[\d]+\/unreblog/.freeze
-  API_DELETE_STATUS_REGEX = /\A\/api\/v1\/statuses\/[\d]+/.freeze
+  API_DELETE_REBLOG_REGEX = /\A\/api\/v1\/statuses\/[\d]+\/unreblog\z/.freeze
+  API_DELETE_STATUS_REGEX = /\A\/api\/v1\/statuses\/[\d]+\z/.freeze
 
   throttle('throttle_api_delete', limit: 30, period: 30.minutes) do |req|
     req.authenticated_user_id if (req.post? && req.path.match?(API_DELETE_REBLOG_REGEX)) || (req.delete? && req.path.match?(API_DELETE_STATUS_REGEX))
   end
 
   throttle('throttle_sign_up_attempts/ip', limit: 25, period: 5.minutes) do |req|
-    if req.post? && req.path == '/auth'
-      addr = req.remote_ip
-      addr = IPAddr.new(addr) if addr.is_a?(String)
-      addr = addr.mask(64) if addr.ipv6?
-      addr.to_s
-    end
+    req.throttleable_remote_ip if req.post? && req.path_matches?('/auth')
   end
 
   throttle('throttle_password_resets/ip', limit: 25, period: 5.minutes) do |req|
-    req.remote_ip if req.post? && req.path == '/auth/password'
+    req.throttleable_remote_ip if req.post? && req.path_matches?('/auth/password')
   end
 
   throttle('throttle_password_resets/email', limit: 5, period: 30.minutes) do |req|
-    req.params.dig('user', 'email').presence if req.post? && req.path == '/auth/password'
+    req.params.dig('user', 'email').presence if req.post? && req.path_matches?('/auth/password')
   end
 
   throttle('throttle_email_confirmations/ip', limit: 25, period: 5.minutes) do |req|
-    req.remote_ip if req.post? && %w(/auth/confirmation /api/v1/emails/confirmations).include?(req.path)
+    req.throttleable_remote_ip if req.post? && (req.path_matches?('/auth/confirmation') || req.path == '/api/v1/emails/confirmations')
   end
 
   throttle('throttle_email_confirmations/email', limit: 5, period: 30.minutes) do |req|
-    if req.post? && req.path == '/auth/password'
+    if req.post? && req.path_matches?('/auth/password')
       req.params.dig('user', 'email').presence
     elsif req.post? && req.path == '/api/v1/emails/confirmations'
       req.authenticated_user_id
@@ -111,11 +122,11 @@ class Rack::Attack
   end
 
   throttle('throttle_login_attempts/ip', limit: 25, period: 5.minutes) do |req|
-    req.remote_ip if req.post? && req.path == '/auth/sign_in'
+    req.throttleable_remote_ip if req.post? && req.path_matches?('/auth/sign_in')
   end
 
   throttle('throttle_login_attempts/email', limit: 25, period: 1.hour) do |req|
-    req.session[:attempt_user_id] || req.params.dig('user', 'email').presence if req.post? && req.path == '/auth/sign_in'
+    req.session[:attempt_user_id] || req.params.dig('user', 'email').presence if req.post? && req.path_matches?('/auth/sign_in')
   end
 
   self.throttled_responder = lambda do |request|
diff --git a/config/routes.rb b/config/routes.rb
index 1c7b4eda2..8639f0ef5 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -74,7 +74,7 @@ Rails.application.routes.draw do
     end
   end
 
-  devise_for :users, path: 'auth', controllers: {
+  devise_for :users, path: 'auth', format: false, controllers: {
     omniauth_callbacks: 'auth/omniauth_callbacks',
     sessions:           'auth/sessions',
     registrations:      'auth/registrations',
@@ -219,7 +219,7 @@ Rails.application.routes.draw do
   resource :relationships, only: [:show, :update]
   resource :statuses_cleanup, controller: :statuses_cleanup, only: [:show, :update]
 
-  get '/media_proxy/:id/(*any)', to: 'media_proxy#show', as: :media_proxy
+  get '/media_proxy/:id/(*any)', to: 'media_proxy#show', as: :media_proxy, format: false
 
   resource :authorize_interaction, only: [:show, :create]
   resource :share, only: [:show, :create]
@@ -426,7 +426,7 @@ Rails.application.routes.draw do
 
   get '/admin', to: redirect('/admin/dashboard', status: 302)
 
-  namespace :api do
+  namespace :api, format: false do
     # OEmbed
     get '/oembed', to: 'oembed#show', as: :oembed
 
diff --git a/spec/config/initializers/rack_attack_spec.rb b/spec/config/initializers/rack_attack_spec.rb
new file mode 100644
index 000000000..581021cb9
--- /dev/null
+++ b/spec/config/initializers/rack_attack_spec.rb
@@ -0,0 +1,82 @@
+require 'rails_helper'
+
+describe Rack::Attack do
+  include Rack::Test::Methods
+
+  def app
+    Rails.application
+  end
+
+  shared_examples 'throttled endpoint' do
+    context 'when the number of requests is lower than the limit' do
+      it 'does not change the request status' do
+        limit.times do
+          request.call
+          expect(last_response.status).to_not eq(429)
+        end
+      end
+    end
+
+    context 'when the number of requests is higher than the limit' do
+      it 'returns http too many requests' do
+        (limit * 2).times do |i|
+          request.call
+          expect(last_response.status).to eq(429) if i > limit
+        end
+      end
+    end
+  end
+
+  let(:remote_ip) { '1.2.3.5' }
+
+  describe 'throttle excessive sign-up requests by IP address' do
+    context 'through the website' do
+      let(:limit) { 25 }
+      let(:request) { ->() { post path, {}, 'REMOTE_ADDR' => remote_ip } }
+
+      context 'for exact path' do
+        let(:path)  { '/auth' }
+        it_behaves_like 'throttled endpoint'
+      end
+
+      context 'for path with format' do
+        let(:path)  { '/auth.html' }
+        it_behaves_like 'throttled endpoint'
+      end
+    end
+
+    context 'through the API' do
+      let(:limit) { 5 }
+      let(:request) { ->() { post path, {}, 'REMOTE_ADDR' => remote_ip } }
+
+      context 'for exact path' do
+        let(:path)  { '/api/v1/accounts' }
+        it_behaves_like 'throttled endpoint'
+      end
+
+      context 'for path with format' do
+        let(:path)  { '/api/v1/accounts.json' }
+
+        it 'returns http not found' do
+          request.call
+          expect(last_response.status).to eq(404)
+        end
+      end
+    end
+  end
+
+  describe 'throttle excessive sign-in requests by IP address' do
+    let(:limit) { 25 }
+    let(:request) { ->() { post path, {}, 'REMOTE_ADDR' => remote_ip } }
+
+    context 'for exact path' do
+      let(:path)  { '/auth/sign_in' }
+      it_behaves_like 'throttled endpoint'
+    end
+
+    context 'for path with format' do
+      let(:path)  { '/auth/sign_in.html' }
+      it_behaves_like 'throttled endpoint'
+    end
+  end
+end