about summary refs log tree commit diff
path: root/app
diff options
context:
space:
mode:
authorClaire <claire.github-309c@sitedethib.com>2022-03-30 15:02:56 +0200
committerClaire <claire.github-309c@sitedethib.com>2022-03-30 15:02:56 +0200
commitfc5dd0c538a8cf33d007a01e168b3bfc0cdc9060 (patch)
tree2b8b804b5fd0265f57ae87f3b0315c6c8d14c9ef /app
parent60f9973f452100475874cd9bd0a8b6ee908bf8e0 (diff)
parent8c7223f4eac80b5725485be742d3fa2c984f4670 (diff)
Merge branch 'main' into glitch-soc/merge-upstream
Diffstat (limited to 'app')
-rw-r--r--app/lib/extractor.rb6
-rw-r--r--app/models/user.rb4
-rw-r--r--app/validators/status_length_validator.rb50
3 files changed, 44 insertions, 16 deletions
diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb
index ef9407864..aea60dae5 100644
--- a/app/lib/extractor.rb
+++ b/app/lib/extractor.rb
@@ -1,6 +1,8 @@
 # frozen_string_literal: true
 
 module Extractor
+  MAX_DOMAIN_LENGTH = 253
+
   extend Twitter::TwitterText::Extractor
 
   module_function
@@ -30,6 +32,10 @@ module Extractor
       after      = $'
 
       unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
+        _, domain = screen_name.split('@')
+
+        next if domain.present? && domain.length > MAX_DOMAIN_LENGTH
+
         start_position = match_data.char_begin(1) - 1
         end_position   = match_data.char_end(1)
 
diff --git a/app/models/user.rb b/app/models/user.rb
index 7c9ced6ae..76ad7d1b2 100644
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -91,11 +91,11 @@ class User < ApplicationRecord
   validates :invite_request, presence: true, on: :create, if: :invite_text_required?
 
   validates :locale, inclusion: I18n.available_locales.map(&:to_s), if: :locale?
-  validates_with BlacklistedEmailValidator, on: :create
+  validates_with BlacklistedEmailValidator, if: -> { !confirmed? }
   validates_with EmailMxValidator, if: :validate_email_dns?
   validates :agreement, acceptance: { allow_nil: false, accept: [true, 'true', '1'] }, on: :create
 
-  # Those are honeypot/antispam fields
+  # Honeypot/anti-spam fields
   attr_accessor :registration_form_time, :website, :confirm_password
 
   validates_with RegistrationFormTimeValidator, on: :create
diff --git a/app/validators/status_length_validator.rb b/app/validators/status_length_validator.rb
index 2a3ac8862..f93450ba6 100644
--- a/app/validators/status_length_validator.rb
+++ b/app/validators/status_length_validator.rb
@@ -3,35 +3,57 @@
 class StatusLengthValidator < ActiveModel::Validator
   MAX_CHARS = (ENV['MAX_TOOT_CHARS'] || 500).to_i
   URL_PLACEHOLDER_CHARS = 23
-  URL_PLACEHOLDER = "\1#{'x' * URL_PLACEHOLDER_CHARS}"
+  URL_PLACEHOLDER = 'x' * 23
 
   def validate(status)
     return unless status.local? && !status.reblog?
 
-    @status = status
-    status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?
+    status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?(status)
   end
 
   private
 
-  def too_long?
-    countable_length > MAX_CHARS
+  def too_long?(status)
+    countable_length(combined_text(status)) > MAX_CHARS
   end
 
-  def countable_length
-    total_text.mb_chars.grapheme_length
+  def countable_length(str)
+    str.mb_chars.grapheme_length
   end
 
-  def total_text
-    [@status.spoiler_text, countable_text].join
+  def combined_text(status)
+    [status.spoiler_text, countable_text(status.text)].join
   end
 
-  def countable_text
-    return '' if @status.text.nil?
+  def countable_text(str)
+    return '' if str.blank?
 
-    @status.text.dup.tap do |new_text|
-      new_text.gsub!(FetchLinkCardService::URL_PATTERN, URL_PLACEHOLDER)
-      new_text.gsub!(Account::MENTION_RE, '@\2')
+    # To ensure that we only give length concessions to entities that
+    # will be correctly parsed during formatting, we go through full
+    # entity extraction
+
+    entities = Extractor.remove_overlapping_entities(Extractor.extract_urls_with_indices(str, extract_url_without_protocol: false) + Extractor.extract_mentions_or_lists_with_indices(str))
+
+    rewrite_entities(str, entities) do |entity|
+      if entity[:url]
+        URL_PLACEHOLDER
+      elsif entity[:screen_name]
+        "@#{entity[:screen_name].split('@').first}"
+      end
     end
   end
+
+  def rewrite_entities(str, entities)
+    entities.sort_by! { |entity| entity[:indices].first }
+    result = ''.dup
+
+    last_index = entities.reduce(0) do |index, entity|
+      result << str[index...entity[:indices].first]
+      result << yield(entity)
+      entity[:indices].last
+    end
+
+    result << str[last_index..-1]
+    result
+  end
 end