From cefa526c6d3a45df2d0fcb7643ced828e2e87dea Mon Sep 17 00:00:00 2001
From: Eugen Rochko <eugen@zeonfederated.com>
Date: Sat, 26 Mar 2022 02:53:34 +0100
Subject: Refactor formatter (#17828)

* Refactor formatter

* Move custom emoji pre-rendering logic to view helpers

* Move more methods out of Formatter

* Fix code style issues

* Remove Formatter

* Add inline poll options to RSS feeds

* Remove unused helper method

* Fix code style issues

* Various fixes and improvements

* Fix test
---
 app/lib/extractor.rb | 82 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 66 insertions(+), 16 deletions(-)

(limited to 'app/lib/extractor.rb')

diff --git a/app/lib/extractor.rb b/app/lib/extractor.rb
index 8020aa916..ef9407864 100644
--- a/app/lib/extractor.rb
+++ b/app/lib/extractor.rb
@@ -5,18 +5,34 @@ module Extractor
 
   module_function
 
-  # :yields: username, list_slug, start, end
+  def extract_entities_with_indices(text, options = {}, &block)
+    entities = begin
+      extract_urls_with_indices(text, options) +
+        extract_hashtags_with_indices(text, check_url_overlap: false) +
+        extract_mentions_or_lists_with_indices(text) +
+        extract_extra_uris_with_indices(text)
+    end
+
+    return [] if entities.empty?
+
+    entities = remove_overlapping_entities(entities)
+    entities.each(&block) if block_given?
+    entities
+  end
+
   def extract_mentions_or_lists_with_indices(text)
-    return [] unless Twitter::TwitterText::Regex[:at_signs].match?(text)
+    return [] unless text && Twitter::TwitterText::Regex[:at_signs].match?(text)
 
     possible_entries = []
 
-    text.to_s.scan(Account::MENTION_RE) do |screen_name, _|
+    text.scan(Account::MENTION_RE) do |screen_name, _|
       match_data = $LAST_MATCH_INFO
-      after = $'
+      after      = $'
+
       unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
         start_position = match_data.char_begin(1) - 1
-        end_position = match_data.char_end(1)
+        end_position   = match_data.char_end(1)
+
         possible_entries << {
           screen_name: screen_name,
           indices: [start_position, end_position],
@@ -29,36 +45,70 @@ module Extractor
         yield mention[:screen_name], mention[:indices].first, mention[:indices].last
       end
     end
+
     possible_entries
   end
 
-  def extract_hashtags_with_indices(text, **)
-    return [] unless /#/.match?(text)
+  def extract_hashtags_with_indices(text, _options = {})
+    return [] unless text&.index('#')
+
+    possible_entries = []
 
-    tags = []
     text.scan(Tag::HASHTAG_RE) do |hash_text, _|
-      match_data = $LAST_MATCH_INFO
+      match_data     = $LAST_MATCH_INFO
       start_position = match_data.char_begin(1) - 1
-      end_position = match_data.char_end(1)
-      after = $'
+      end_position   = match_data.char_end(1)
+      after          = $'
+
       if %r{\A://}.match?(after)
         hash_text.match(/(.+)(https?\Z)/) do |matched|
-          hash_text = matched[1]
+          hash_text     = matched[1]
           end_position -= matched[2].codepoint_length
         end
       end
 
-      tags << {
+      possible_entries << {
         hashtag: hash_text,
         indices: [start_position, end_position],
       }
     end
 
-    tags.each { |tag| yield tag[:hashtag], tag[:indices].first, tag[:indices].last } if block_given?
-    tags
+    if block_given?
+      possible_entries.each do |tag|
+        yield tag[:hashtag], tag[:indices].first, tag[:indices].last
+      end
+    end
+
+    possible_entries
   end
 
   def extract_cashtags_with_indices(_text)
-    [] # always returns empty array
+    []
+  end
+
+  def extract_extra_uris_with_indices(text)
+    return [] unless text&.index(':')
+
+    possible_entries = []
+
+    text.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do
+      valid_uri_match_data = $LAST_MATCH_INFO
+
+      start_position = valid_uri_match_data.char_begin(3)
+      end_position   = valid_uri_match_data.char_end(3)
+
+      possible_entries << {
+        url: valid_uri_match_data[3],
+        indices: [start_position, end_position],
+      }
+    end
+
+    if block_given?
+      possible_entries.each do |url|
+        yield url[:url], url[:indices].first, url[:indices].last
+      end
+    end
+
+    possible_entries
   end
 end
-- 
cgit