Enable to recognize most kinds of characters as URL paths (#4941)
parent
b39d512ade
commit
3816943e6b
@ -0,0 +1,42 @@ |
|||||||
|
module Twitter |
||||||
|
class Regex |
||||||
|
|
||||||
|
REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou |
||||||
|
REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}_~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou |
||||||
|
REGEXEN[:valid_url_balanced_parens] = / |
||||||
|
\( |
||||||
|
(?: |
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}+ |
||||||
|
| |
||||||
|
# allow one nested level of balanced parentheses |
||||||
|
(?: |
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}* |
||||||
|
\( |
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}+ |
||||||
|
\) |
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}* |
||||||
|
) |
||||||
|
) |
||||||
|
\) |
||||||
|
/iox |
||||||
|
REGEXEN[:valid_url_path] = /(?: |
||||||
|
(?: |
||||||
|
#{REGEXEN[:valid_general_url_path_chars]}* |
||||||
|
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)* |
||||||
|
#{REGEXEN[:valid_url_path_ending_chars]} |
||||||
|
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/) |
||||||
|
)/iox |
||||||
|
REGEXEN[:valid_url] = %r{ |
||||||
|
( # $1 total match |
||||||
|
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter |
||||||
|
( # $3 URL |
||||||
|
(https?:\/\/)? # $4 Protocol (optional) |
||||||
|
(#{REGEXEN[:valid_domain]}) # $5 Domain(s) |
||||||
|
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) |
||||||
|
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor |
||||||
|
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String |
||||||
|
) |
||||||
|
) |
||||||
|
}iox |
||||||
|
end |
||||||
|
end |
Loading…
Reference in new issue