@ -29,7 +29,7 @@ module Twitter
( # $1 total match
( # $1 total match
( #{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( #{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL
( # $3 URL
( ( https? | dat | dweb | ipfs | ipns | ssb | gopher ) : \ / \ / ) ? # $4 Protocol (optional)
( ( ? : https?| dat | dweb | ipfs | ipns | ssb | gopher ) : \ / \ / ) ? # $4 Protocol (optional)
( #{REGEXEN[:valid_domain]}) # $5 Domain(s)
( #{REGEXEN[:valid_domain]}) # $5 Domain(s)
( ?: : ( #{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
( ?: : ( #{REGEXEN[:valid_port_number]}))? # $6 Port number (optional)
( / #{ REGEXEN [ :valid_url_path ] } *)? # $7 URL Path and anchor
( / #{ REGEXEN [ :valid_url_path ] } *)? # $7 URL Path and anchor
@ -37,5 +37,54 @@ module Twitter
)
)
)
)
} iox
} iox
REGEXEN [ :validate_nodeid ] = / (?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
[ ! $ ( ) * + , ; = ]
) / iox
REGEXEN [ :validate_resid ] = / (?:
#{REGEXEN[:validate_url_unreserved]}|
#{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]}
) / iox
REGEXEN [ :valid_xmpp_uri ] = %r{
( # $1 total match
( #{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL
( ( ? :xmpp ) : ) # $4 Protocol
( / / #{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
( #{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
( #{REGEXEN[:valid_domain]}) # $7 Domain in path
( / #{ REGEXEN [ :validate_resid ] } +)? # $8 Resource in path (optional)
( \ ?# { REGEXEN [ :valid_url_query_chars ] } * #{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
)
)
} iox
end
module Extractor
# Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
# XMPP URIs an empty array will be returned.
#
# If a block is given then it will be called for each XMPP URI.
def extract_xmpp_uris_with_indices ( text , options = { } ) # :yields: uri, start, end
return [ ] unless text && text . index ( " : " )
urls = [ ]
text . to_s . scan ( Twitter :: Regex [ :valid_xmpp_uri ] ) do
valid_uri_match_data = $~
start_position = valid_uri_match_data . char_begin ( 3 )
end_position = valid_uri_match_data . char_end ( 3 )
urls << {
:url = > valid_uri_match_data [ 3 ] ,
:indices = > [ start_position , end_position ]
}
end
urls . each { | url | yield url [ :url ] , url [ :indices ] . first , url [ :indices ] . last } if block_given?
urls
end
end
end
end
end