about summary refs log tree commit diff
path: root/config/initializers/twitter_regex.rb
diff options
context:
space:
mode:
authorOndřej Hruška <ondra@ondrovo.com>2017-09-28 09:12:17 +0200
committerOndřej Hruška <ondra@ondrovo.com>2017-09-28 09:12:17 +0200
commit9330ea1f4d34b6ef4ce6e841f1aa931a7f10f749 (patch)
tree81e346b3d9ca532b3d252135d82ce20f61780a36 /config/initializers/twitter_regex.rb
parent06e299cef591b63bd96f320eadc873b047cd2664 (diff)
parent4aea3f88a6d30f102a79c2da7fcfac96465ba1a8 (diff)
Merge commit '4aea3f88a6d30f102a79c2da7fcfac96465ba1a8' into merging-upstream
Diffstat (limited to 'config/initializers/twitter_regex.rb')
-rw-r--r--config/initializers/twitter_regex.rb42
1 files changed, 42 insertions, 0 deletions
diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb
new file mode 100644
index 000000000..e924fac22
--- /dev/null
+++ b/config/initializers/twitter_regex.rb
@@ -0,0 +1,42 @@
+module Twitter
+  class Regex
+
+    REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou
+    REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou
+    REGEXEN[:valid_url_balanced_parens] = /
+      \(
+        (?:
+          #{REGEXEN[:valid_general_url_path_chars]}+
+          |
+          # allow one nested level of balanced parentheses
+          (?:
+            #{REGEXEN[:valid_general_url_path_chars]}*
+            \(
+              #{REGEXEN[:valid_general_url_path_chars]}+
+            \)
+            #{REGEXEN[:valid_general_url_path_chars]}*
+          )
+        )
+      \)
+    /iox
+    REGEXEN[:valid_url_path] = /(?:
+      (?:
+        #{REGEXEN[:valid_general_url_path_chars]}*
+        (?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)*
+        #{REGEXEN[:valid_url_path_ending_chars]}
+      )|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/)
+    )/iox
+    REGEXEN[:valid_url] = %r{
+      (                                                                                     #   $1 total match
+        (#{REGEXEN[:valid_url_preceding_chars]})                                            #   $2 Preceeding chracter
+        (                                                                                   #   $3 URL
+          (https?:\/\/)?                                                                    #   $4 Protocol (optional)
+          (#{REGEXEN[:valid_domain]})                                                       #   $5 Domain(s)
+          (?::(#{REGEXEN[:valid_port_number]}))?                                            #   $6 Port number (optional)
+          (/#{REGEXEN[:valid_url_path]}*)?                                                  #   $7 URL Path and anchor
+          (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? #   $8 Query String
+        )
+      )
+    }iox
+  end
+end