diff --git a/app/lib/formatter.rb b/app/lib/formatter.rb index c771dcaaa..2c5674869 100644 --- a/app/lib/formatter.rb +++ b/app/lib/formatter.rb @@ -245,9 +245,9 @@ class Formatter end standard = Extractor.extract_entities_with_indices(text, options) - xmpp = Extractor.extract_xmpp_uris_with_indices(text, options) + extra = Extractor.extract_extra_uris_with_indices(text, options) - Extractor.remove_overlapping_entities(special + standard + xmpp) + Extractor.remove_overlapping_entities(special + standard + extra) end def link_to_url(entity, options = {}) diff --git a/app/lib/sanitize_config.rb b/app/lib/sanitize_config.rb index e2480376e..a82411127 100644 --- a/app/lib/sanitize_config.rb +++ b/app/lib/sanitize_config.rb @@ -2,7 +2,7 @@ class Sanitize module Config - HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze + HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze CLASS_WHITELIST_TRANSFORMER = lambda do |env| node = env[:node] diff --git a/config/initializers/twitter_regex.rb b/config/initializers/twitter_regex.rb index 87815d458..f84f7c0cb 100644 --- a/config/initializers/twitter_regex.rb +++ b/config/initializers/twitter_regex.rb @@ -47,32 +47,39 @@ module Twitter #{REGEXEN[:validate_url_pct_encoded]}| #{REGEXEN[:validate_url_sub_delims]} )/iox - REGEXEN[:valid_xmpp_uri] = %r{ - ( # $1 total match - (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character - ( # $3 URL - ((?:xmpp):) # $4 Protocol - (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional) - (#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional) - (#{REGEXEN[:valid_domain]}) # $7 Domain in path - (/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional) - (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String + REGEXEN[:xmpp_uri] = %r{ + (xmpp:) # Protocol + (//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # Authority (optional) + (#{REGEXEN[:validate_nodeid]}+@)? # Username in path (optional) + (#{REGEXEN[:valid_domain]}) # Domain in path + (/#{REGEXEN[:validate_resid]}+)? # Resource in path (optional) + (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String + }iox + REGEXEN[:magnet_uri] = %r{ + (magnet:) # Protocol + (\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]}) # Query String + }iox + REGEXEN[:valid_extended_uri] = %r{ + ( # $1 total match + (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character + ( # $3 URL + (#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]}) ) ) }iox end module Extractor - # Extracts a list of all XMPP URIs included in the Tweet text along + # Extracts a list of all XMPP and magnet URIs included in the Toot text along # with the indices. If the text is nil or contains no - # XMPP URIs an empty array will be returned. + # XMPP or magnet URIs an empty array will be returned. # # If a block is given then it will be called for each XMPP URI. - def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end + def extract_extra_uris_with_indices(text, options = {}) # :yields: uri, start, end return [] unless text && text.index(":") urls = [] - text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do + text.to_s.scan(Twitter::Regex[:valid_extended_uri]) do valid_uri_match_data = $~ start_position = valid_uri_match_data.char_begin(3) diff --git a/spec/lib/formatter_spec.rb b/spec/lib/formatter_spec.rb index 83be0a588..633d59c2a 100644 --- a/spec/lib/formatter_spec.rb +++ b/spec/lib/formatter_spec.rb @@ -258,6 +258,14 @@ RSpec.describe Formatter do is_expected.to include 'href="xmpp:muc@instance.com?join"' end end + + context 'given text containing a magnet: URI' do + let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' } + + it 'matches the full URI' do + is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"' + end + end end describe '#format_spoiler' do