diff options
author | ふぁぼ原 <ko_kurihara@yahoo.co.jp> | 2017-09-15 01:03:20 +0900 |
---|---|---|
committer | Eugen Rochko <eugen@zeonfederated.com> | 2017-09-14 18:03:20 +0200 |
commit | 3816943e6b5e86b22c35f3c068521f7a9007deec (patch) | |
tree | 3a245238e9410a4dce71bba1accbb2bc0e456508 /config/initializers | |
parent | b39d512ade9f556ae29d60239102faf67ff6a89f (diff) |
Enable to recognize most kinds of characters as URL paths (#4941)
Diffstat (limited to 'config/initializers')
-rw-r--r-- | config/initializers/twitter_regex.rb | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb new file mode 100644 index 000000000..5a0723d24 --- /dev/null +++ b/config/initializers/twitter_regex.rb @@ -0,0 +1,42 @@ +module Twitter + class Regex + + REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou + REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}_~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou + REGEXEN[:valid_url_balanced_parens] = / + \( + (?: + #{REGEXEN[:valid_general_url_path_chars]}+ + | + # allow one nested level of balanced parentheses + (?: + #{REGEXEN[:valid_general_url_path_chars]}* + \( + #{REGEXEN[:valid_general_url_path_chars]}+ + \) + #{REGEXEN[:valid_general_url_path_chars]}* + ) + ) + \) + /iox + REGEXEN[:valid_url_path] = /(?: + (?: + #{REGEXEN[:valid_general_url_path_chars]}* + (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)* + #{REGEXEN[:valid_url_path_ending_chars]} + )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/) + )/iox + REGEXEN[:valid_url] = %r{ + ( # $1 total match + (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter + ( # $3 URL + (https?:\/\/)? # $4 Protocol (optional) + (#{REGEXEN[:valid_domain]}) # $5 Domain(s) + (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) + (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor + (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String + ) + ) + }iox + end +end |