diff options
author | Thibaut Girka <thib@sitedethib.com> | 2020-01-12 15:57:34 +0100 |
---|---|---|
committer | Thibaut Girka <thib@sitedethib.com> | 2020-01-12 15:57:34 +0100 |
commit | 980c004f0601bbfaf440c0ea56de408ccfd79007 (patch) | |
tree | 8ed051d5e8b9c530b3282cfd84e34e50b3bcfa57 /config/initializers | |
parent | 180f1383943ad171d8394ef9af7c7861bfc08056 (diff) | |
parent | 24cd2126c6cfb80844ef9ffbf61647b3d9afdc68 (diff) |
Merge branch 'master' into glitch-soc/merge-upstream
Conflicts: - `Gemfile.lock`: No real conflict, glitch-soc-only dependency (redcarpet) too close to an upstream one (rdf-normalize) - `README.md`: we have different READMEs, discarded upstream's changes - `app/views/admin/custom_emojis/index.html.haml`: No real conflict, different context because of glitch-soc theming - `lib/mastodon/statuses_cli.rb`: Upstream added code to keep bookmarked statuses, we were already doing so with slightly different code. Discarded upstream's changes. - `package.json`: No real conflict, glitch-soc-only dependency (favico.js) too close to an upstream one
Diffstat (limited to 'config/initializers')
-rw-r--r-- | config/initializers/twitter_regex.rb | 51 |
1 files changed, 50 insertions, 1 deletions
diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb index 0ddbbee98..87815d458 100644 --- a/config/initializers/twitter_regex.rb +++ b/config/initializers/twitter_regex.rb @@ -29,7 +29,7 @@ module Twitter ( # $1 total match (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character ( # $3 URL - ((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional) + ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional) (#{REGEXEN[:valid_domain]}) # $5 Domain(s) (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor @@ -37,5 +37,54 @@ module Twitter ) ) }iox + REGEXEN[:validate_nodeid] = /(?: + #{REGEXEN[:validate_url_unreserved]}| + #{REGEXEN[:validate_url_pct_encoded]}| + [!$()*+,;=] + )/iox + REGEXEN[:validate_resid] = /(?: + #{REGEXEN[:validate_url_unreserved]}| + #{REGEXEN[:validate_url_pct_encoded]}| + #{REGEXEN[:validate_url_sub_delims]} + )/iox + REGEXEN[:valid_xmpp_uri] = %r{ + ( # $1 total match + (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character + ( # $3 URL + ((?:xmpp):) # $4 Protocol + (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional) + (#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional) + (#{REGEXEN[:valid_domain]}) # $7 Domain in path + (/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional) + (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String + ) + ) + }iox + end + + module Extractor + # Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along + # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no + # XMPP URIs an empty array will be returned. + # + # If a block is given then it will be called for each XMPP URI. + def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end + return [] unless text && text.index(":") + urls = [] + + text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do + valid_uri_match_data = $~ + + start_position = valid_uri_match_data.char_begin(3) + end_position = valid_uri_match_data.char_end(3) + + urls << { + :url => valid_uri_match_data[3], + :indices => [start_position, end_position] + } + end + urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given? + urls + end end end |