From bbc7afa2a24519ac238cbcd4e8aec310a002c40e Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 30 Mar 2022 14:46:03 +0200 Subject: Fix being able to post URLs longer than 4096 characters (#17908) --- app/lib/extractor.rb | 6 ++++ app/validators/status_length_validator.rb | 50 ++++++++++++++++++++++--------- 2 files changed, 42 insertions(+), 14 deletions(-) (limited to 'app') diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb index ef9407864..aea60dae5 100644 --- a/app/lib/extractor.rb +++ b/app/lib/extractor.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true module Extractor + MAX_DOMAIN_LENGTH = 253 + extend Twitter::TwitterText::Extractor module_function @@ -30,6 +32,10 @@ module Extractor after = $' unless Twitter::TwitterText::Regex[:end_mention_match].match?(after) + _, domain = screen_name.split('@') + + next if domain.present? && domain.length > MAX_DOMAIN_LENGTH + start_position = match_data.char_begin(1) - 1 end_position = match_data.char_end(1) diff --git a/app/validators/status_length_validator.rb b/app/validators/status_length_validator.rb index 4c258ec16..e107912b7 100644 --- a/app/validators/status_length_validator.rb +++ b/app/validators/status_length_validator.rb @@ -3,35 +3,57 @@ class StatusLengthValidator < ActiveModel::Validator MAX_CHARS = 500 URL_PLACEHOLDER_CHARS = 23 - URL_PLACEHOLDER = "\1#{'x' * URL_PLACEHOLDER_CHARS}" + URL_PLACEHOLDER = 'x' * 23 def validate(status) return unless status.local? && !status.reblog? - @status = status - status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long? + status.errors.add(:text, I18n.t('statuses.over_character_limit', max: MAX_CHARS)) if too_long?(status) end private - def too_long? - countable_length > MAX_CHARS + def too_long?(status) + countable_length(combined_text(status)) > MAX_CHARS end - def countable_length - total_text.mb_chars.grapheme_length + def countable_length(str) + str.mb_chars.grapheme_length end - def total_text - [@status.spoiler_text, countable_text].join + def combined_text(status) + [status.spoiler_text, countable_text(status.text)].join end - def countable_text - return '' if @status.text.nil? + def countable_text(str) + return '' if str.blank? - @status.text.dup.tap do |new_text| - new_text.gsub!(FetchLinkCardService::URL_PATTERN, URL_PLACEHOLDER) - new_text.gsub!(Account::MENTION_RE, '@\2') + # To ensure that we only give length concessions to entities that + # will be correctly parsed during formatting, we go through full + # entity extraction + + entities = Extractor.remove_overlapping_entities(Extractor.extract_urls_with_indices(str, extract_url_without_protocol: false) + Extractor.extract_mentions_or_lists_with_indices(str)) + + rewrite_entities(str, entities) do |entity| + if entity[:url] + URL_PLACEHOLDER + elsif entity[:screen_name] + "@#{entity[:screen_name].split('@').first}" + end end end + + def rewrite_entities(str, entities) + entities.sort_by! { |entity| entity[:indices].first } + result = ''.dup + + last_index = entities.reduce(0) do |index, entity| + result << str[index...entity[:indices].first] + result << yield(entity) + entity[:indices].last + end + + result << str[last_index..-1] + result + end end -- cgit