about summary refs log tree commit diff
path: root/config/initializers/twitter_regex.rb
diff options
context:
space:
mode:
authorふぁぼ原 <ko_kurihara@yahoo.co.jp>2017-09-15 01:03:20 +0900
committerEugen Rochko <eugen@zeonfederated.com>2017-09-14 18:03:20 +0200
commit3816943e6b5e86b22c35f3c068521f7a9007deec (patch)
tree3a245238e9410a4dce71bba1accbb2bc0e456508 /config/initializers/twitter_regex.rb
parentb39d512ade9f556ae29d60239102faf67ff6a89f (diff)
Enable to recognize most kinds of characters as URL paths (#4941)
Diffstat (limited to 'config/initializers/twitter_regex.rb')
-rw-r--r--config/initializers/twitter_regex.rb42
1 files changed, 42 insertions, 0 deletions
diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb
new file mode 100644
index 000000000..5a0723d24
--- /dev/null
+++ b/config/initializers/twitter_regex.rb
@@ -0,0 +1,42 @@
+module Twitter
+  class Regex
+
+    REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou
+    REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}_~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
+    REGEXEN[:valid_url_balanced_parens] = /
+      \(
+        (?:
+          #{REGEXEN[:valid_general_url_path_chars]}+
+          |
+          # allow one nested level of balanced parentheses
+          (?:
+            #{REGEXEN[:valid_general_url_path_chars]}*
+            \(
+              #{REGEXEN[:valid_general_url_path_chars]}+
+            \)
+            #{REGEXEN[:valid_general_url_path_chars]}*
+          )
+        )
+      \)
+    /iox
+    REGEXEN[:valid_url_path] = /(?:
+      (?:
+        #{REGEXEN[:valid_general_url_path_chars]}*
+        (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
+        #{REGEXEN[:valid_url_path_ending_chars]}
+      )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
+    )/iox
+    REGEXEN[:valid_url] = %r{
+      (                                                                                     #   $1 total match
+        (#{REGEXEN[:valid_url_preceding_chars]})                                            #   $2 Preceeding chracter
+        (                                                                                   #   $3 URL
+          (https?:\/\/)?                                                                    #   $4 Protocol (optional)
+          (#{REGEXEN[:valid_domain]})                                                       #   $5 Domain(s)
+          (?::(#{REGEXEN[:valid_port_number]}))?                                            #   $6 Port number (optional)
+          (/#{REGEXEN[:valid_url_path]}*)?                                                  #   $7 URL Path and anchor
+          (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? #   $8 Query String
+        )
+      )
+    }iox
+  end
+end