diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index 6ba327614..c771dcaaa 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -245,8 +245,9 @@ class Formatter end standard = Extractor.extract_entities_with_indices(text, options) + xmpp = Extractor.extract_xmpp_uris_with_indices(text, options) - Extractor.remove_overlapping_entities(special + standard) + Extractor.remove_overlapping_entities(special + standard + xmpp) end def link_to_url(entity, options = {}) @@ -284,7 +285,7 @@ class Formatter def link_html(url) url = Addressable::URI.parse(url).to_s - prefix = url.match(/\Ahttps?:\/\/(www\.)?/).to_s + prefix = url.match(/\A(https?:\/\/(www\.)?|xmpp:)/).to_s text = url[prefix.length, 30] suffix = url[prefix.length + 30..-1] cutoff = url[prefix.length..-1].length > 30 diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb index 77045155e..e2480376e 100644 --- a/app/lib/sanitize_config.rb +++ b/app/lib/sanitize_config.rb @@ -2,7 +2,7 @@ class Sanitize module Config - HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', :relative].freeze + HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze CLASS_WHITELIST_TRANSFORMER = lambda do |env| node = env[:node] diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb index 0ddbbee98..87815d458 100644 --- a/config/initializers/twitter_regex.rb +++ b/config/initializers/twitter_regex.rb @@ -29,7 +29,7 @@ module Twitter ( # $1 total match (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character ( # $3 URL - ((https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional) + ((?:https?|dat|dweb|ipfs|ipns|ssb|gopher):\/\/)? # $4 Protocol (optional) (#{REGEXEN[:valid_domain]}) # $5 Domain(s) (?::(#{REGEXEN[:valid_port_number]}))? # $6 Port number (optional) (/#{REGEXEN[:valid_url_path]}*)? # $7 URL Path and anchor @@ -37,5 +37,54 @@ module Twitter ) ) }iox + REGEXEN[:validate_nodeid] = /(?: + #{REGEXEN[:validate_url_unreserved]}| + #{REGEXEN[:validate_url_pct_encoded]}| + [!$()*+,;=] + )/iox + REGEXEN[:validate_resid] = /(?: + #{REGEXEN[:validate_url_unreserved]}| + #{REGEXEN[:validate_url_pct_encoded]}| + #{REGEXEN[:validate_url_sub_delims]} + )/iox + REGEXEN[:valid_xmpp_uri] = %r{ + ( # $1 total match + (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character + ( # $3 URL + ((?:xmpp):) # $4 Protocol + (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional) + (#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional) + (#{REGEXEN[:valid_domain]}) # $7 Domain in path + (/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional) + (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String + ) + ) + }iox + end + + module Extractor + # Extracts a list of all XMPP URIs included in the Tweet text along + # with the indices. If the text is nil or contains no + # XMPP URIs an empty array will be returned. + # + # If a block is given then it will be called for each XMPP URI. + def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end + return [] unless text && text.index(":") + urls = [] + + text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do + valid_uri_match_data = $~ + + start_position = valid_uri_match_data.char_begin(3) + end_position = valid_uri_match_data.char_end(3) + + urls << { + :url => valid_uri_match_data[3], + :indices => [start_position, end_position] + } + end + urls.each{|url| yield url[:url], url[:indices].first, url[:indices].last} if block_given? + urls + end end end diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index b8108a247..83be0a588 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -242,6 +242,22 @@ RSpec.describe Formatter do is_expected.to include '/tags/hashtag%E3%82%BF%E3%82%B0" class="mention hashtag" rel="tag">#hashtagタグ' end end + + context 'given a stand-alone xmpp: URI' do + let(:text) { 'xmpp:user@instance.com' } + + it 'matches the full URI' do + is_expected.to include 'href="xmpp:user@instance.com"' + end + end + + context 'given a an xmpp: URI with a query-string' do + let(:text) { 'please join xmpp:muc@instance.com?join right now' } + + it 'matches the full URI' do + is_expected.to include 'href="xmpp:muc@instance.com?join"' + end + end end describe '#format_spoiler' do