|
|
|
@ -47,32 +47,39 @@ module Twitter |
|
|
|
|
#{REGEXEN[:validate_url_pct_encoded]}| |
|
|
|
|
#{REGEXEN[:validate_url_sub_delims]} |
|
|
|
|
)/iox |
|
|
|
|
REGEXEN[:valid_xmpp_uri] = %r{ |
|
|
|
|
REGEXEN[:xmpp_uri] = %r{ |
|
|
|
|
(xmpp:) # Protocol |
|
|
|
|
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # Authority (optional) |
|
|
|
|
(#{REGEXEN[:validate_nodeid]}+@)? # Username in path (optional) |
|
|
|
|
(#{REGEXEN[:valid_domain]}) # Domain in path |
|
|
|
|
(/#{REGEXEN[:validate_resid]}+)? # Resource in path (optional) |
|
|
|
|
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String |
|
|
|
|
}iox |
|
|
|
|
REGEXEN[:magnet_uri] = %r{ |
|
|
|
|
(magnet:) # Protocol |
|
|
|
|
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]}) # Query String |
|
|
|
|
}iox |
|
|
|
|
REGEXEN[:valid_extended_uri] = %r{ |
|
|
|
|
( # $1 total match |
|
|
|
|
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character |
|
|
|
|
( # $3 URL |
|
|
|
|
((?:xmpp):) # $4 Protocol |
|
|
|
|
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional) |
|
|
|
|
(#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional) |
|
|
|
|
(#{REGEXEN[:valid_domain]}) # $7 Domain in path |
|
|
|
|
(/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional) |
|
|
|
|
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String |
|
|
|
|
(#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]}) |
|
|
|
|
) |
|
|
|
|
) |
|
|
|
|
}iox |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
module Extractor |
|
|
|
|
# Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along |
|
|
|
|
# Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along |
|
|
|
|
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no |
|
|
|
|
# XMPP URIs an empty array will be returned. |
|
|
|
|
# XMPP or magnet URIs an empty array will be returned. |
|
|
|
|
# |
|
|
|
|
# If a block is given then it will be called for each XMPP URI. |
|
|
|
|
def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end |
|
|
|
|
def extract_extra_uris_with_indices(text, options = {}) # :yields: uri, start, end |
|
|
|
|
return [] unless text && text.index(":") |
|
|
|
|
urls = [] |
|
|
|
|
|
|
|
|
|
text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do |
|
|
|
|
text.to_s.scan(Twitter::Regex[:valid_extended_uri]) do |
|
|
|
|
valid_uri_match_data = $~ |
|
|
|
|
|
|
|
|
|
start_position = valid_uri_match_data.char_begin(3) |
|
|
|
|