Enable to recognize most kinds of characters as URL paths (#4941)
parent
b39d512ade
commit
3816943e6b
@ -0,0 +1,42 @@ |
||||
module Twitter |
||||
class Regex |
||||
|
||||
REGEXEN[:valid_general_url_path_chars] = /[^\p{White_Space}\(\)\?]/iou |
||||
REGEXEN[:valid_url_path_ending_chars] = /[^\p{White_Space}\(\)\?!\*';:=\,\.\$%\[\]\p{Pd}_~&\|@]|(?:#{REGEXEN[:valid_url_balanced_parens]})/iou |
||||
REGEXEN[:valid_url_balanced_parens] = / |
||||
\( |
||||
(?: |
||||
#{REGEXEN[:valid_general_url_path_chars]}+ |
||||
| |
||||
# allow one nested level of balanced parentheses |
||||
(?: |
||||
#{REGEXEN[:valid_general_url_path_chars]}* |
||||
\( |
||||
#{REGEXEN[:valid_general_url_path_chars]}+ |
||||
\) |
||||
#{REGEXEN[:valid_general_url_path_chars]}* |
||||
) |
||||
) |
||||
\) |
||||
/iox |
||||
REGEXEN[:valid_url_path] = /(?: |
||||
(?: |
||||
#{REGEXEN[:valid_general_url_path_chars]}* |
||||
(?:#{REGEXEN[:valid_url_balanced_parens]} #{REGEXEN[:valid_general_url_path_chars]}*)* |
||||
#{REGEXEN[:valid_url_path_ending_chars]} |
||||
)|(?:#{REGEXEN[:valid_general_url_path_chars]}+\/) |
||||
)/iox |
||||
REGEXEN[:valid_url] = %r{ |
||||
( # $1 total match |
||||
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceeding chracter |
||||
( # $3 URL |
||||
(https?:\/\/)? # $4 Protocol (optional) |
||||
(#{REGEXEN[:valid_domain]}) # $5 Domain(s) |
||||
(?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) |
||||
(/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor |
||||
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $8 Query String |
||||
) |
||||
) |
||||
}iox |
||||
end |
||||
end |
Loading…
Reference in new issue