diff options
Diffstat (limited to 'config')
-rw-r--r-- | config/initializers/twitter_regex.rb | 51 |
1 files changed, 50 insertions, 1 deletions
diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb index 0ddbbee98..87815d458 100644 --- a/config/initializers/twitter_regex.rb +++ b/config/initializers/twitter_regex.rb @@ -29,7 +29,7 @@ module Twitter ( # $1 total match (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character ( # $3 URL - ((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional) + ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional) (#{REGEXEN[:valid_domain]}) # $5 Domain(s) (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor @@ -37,5 +37,54 @@ module Twitter ) ) }iox + REGEXEN[:validate_nodeid] = /(?: + #{REGEXEN[:validate_url_unreserved]}| + #{REGEXEN[:validate_url_pct_encoded]}| + [!$()*+,;=] + )/iox + REGEXEN[:validate_resid] = /(?: + #{REGEXEN[:validate_url_unreserved]}| + #{REGEXEN[:validate_url_pct_encoded]}| + #{REGEXEN[:validate_url_sub_delims]} + )/iox + REGEXEN[:valid_xmpp_uri] = %r{ + ( # $1 total match + (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character + ( # $3 URL + ((?:xmpp):) # $4 Protocol + (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional) + (#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional) + (#{REGEXEN[:valid_domain]}) # $7 Domain in path + (/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional) + (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String + ) + ) + }iox + end + + module Extractor + # Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along + # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no + # XMPP URIs an empty array will be returned. + # + # If a block is given then it will be called for each XMPP URI. + def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end + return [] unless text && text.index(":") + urls = [] + + text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do + valid_uri_match_data = $~ + + start_position = valid_uri_match_data.char_begin(3) + end_position = valid_uri_match_data.char_end(3) + + urls << { + :url => valid_uri_match_data[3], + :indices => [start_position, end_position] + } + end + urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given? + urls + end end end |