From 3816943e6b5e86b22c35f3c068521f7a9007deec Mon Sep 17 00:00:00 2001 From: ちぁぼ原 Date: Fri, 15 Sep 2017 01:03:20 +0900 Subject: Enable to recognize most kinds of characters as URL paths (#4941) --- config/initializers/twitter_regex.rb | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 config/initializers/twitter_regex.rb (limited to 'config/initializers') diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb new file mode 100644 index 000000000..5a0723d24 --- /dev/null +++ b/config/initializers/twitter_regex.rb @@ -0,0 +1,42 @@ +module Twitter + class Regex + + REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou + REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}_~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou + REGEXEN[:valid_url_balanced_parens] = / + \( + (?: + #{REGEXEN[:valid_general_url_path_chars]}+ + | + # allow one nested level of balanced parentheses + (?: + #{REGEXEN[:valid_general_url_path_chars]}* + \( + #{REGEXEN[:valid_general_url_path_chars]}+ + \) + #{REGEXEN[:valid_general_url_path_chars]}* + ) + ) + \) + /iox + REGEXEN[:valid_url_path] = /(?: + (?: + #{REGEXEN[:valid_general_url_path_chars]}* + (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)* + #{REGEXEN[:valid_url_path_ending_chars]} + )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/) + )/iox + REGEXEN[:valid_url] = %r{ + ( # $1 total match + (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter + ( # $3 URL + (https?:\/\/)? # $4 Protocol (optional) + (#{REGEXEN[:valid_domain]}) # $5 Domain(s) + (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) + (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor + (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String + ) + ) + }iox + end +end -- cgit