Add support for magnet: URIs (#12905)

master
ThibG 5 years ago committed by Eugen Rochko
parent c0006a004d
commit a8e46cf7a1
  1. 4
      app/lib/formatter.rb
  2. 2
      app/lib/sanitize_config.rb
  3. 29
      config/initializers/twitter_regex.rb
  4. 8
      spec/lib/formatter_spec.rb

@ -245,9 +245,9 @@ class Formatter
end end
standard = Extractor.extract_entities_with_indices(text, options) standard = Extractor.extract_entities_with_indices(text, options)
xmpp = Extractor.extract_xmpp_uris_with_indices(text, options) extra = Extractor.extract_extra_uris_with_indices(text, options)
Extractor.remove_overlapping_entities(special + standard + xmpp) Extractor.remove_overlapping_entities(special + standard + extra)
end end
def link_to_url(entity, options = {}) def link_to_url(entity, options = {})

@ -2,7 +2,7 @@
class Sanitize class Sanitize
module Config module Config
HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', :relative].freeze HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze
CLASS_WHITELIST_TRANSFORMER = lambda do |env| CLASS_WHITELIST_TRANSFORMER = lambda do |env|
node = env[:node] node = env[:node]

@ -47,32 +47,39 @@ module Twitter
#{REGEXEN[:validate_url_pct_encoded]}| #{REGEXEN[:validate_url_pct_encoded]}|
#{REGEXEN[:validate_url_sub_delims]} #{REGEXEN[:validate_url_sub_delims]}
)/iox )/iox
REGEXEN[:valid_xmpp_uri] = %r{ REGEXEN[:xmpp_uri] = %r{
(xmpp:) # Protocol
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # Authority (optional)
(#{REGEXEN[:validate_nodeid]}+@)? # Username in path (optional)
(#{REGEXEN[:valid_domain]}) # Domain in path
(/#{REGEXEN[:validate_resid]}+)? # Resource in path (optional)
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # Query String
}iox
REGEXEN[:magnet_uri] = %r{
(magnet:) # Protocol
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]}) # Query String
}iox
REGEXEN[:valid_extended_uri] = %r{
( # $1 total match ( # $1 total match
(#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character (#{REGEXEN[:valid_url_preceding_chars]}) # $2 Preceding character
( # $3 URL ( # $3 URL
((?:xmpp):) # $4 Protocol (#{REGEXEN[:xmpp_uri]}) | (#{REGEXEN[:magnet_uri]})
(//#{REGEXEN[:validate_nodeid]}+@#{REGEXEN[:valid_domain]}/)? # $5 Authority (optional)
(#{REGEXEN[:validate_nodeid]}+@)? # $6 Username in path (optional)
(#{REGEXEN[:valid_domain]}) # $7 Domain in path
(/#{REGEXEN[:validate_resid]}+)? # $8 Resource in path (optional)
(\?#{REGEXEN[:valid_url_query_chars]}*#{REGEXEN[:valid_url_query_ending_chars]})? # $9 Query String
) )
) )
}iox }iox
end end
module Extractor module Extractor
# Extracts a list of all XMPP URIs included in the Tweet <tt>text</tt> along # Extracts a list of all XMPP and magnet URIs included in the Toot <tt>text</tt> along
# with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no # with the indices. If the <tt>text</tt> is <tt>nil</tt> or contains no
# XMPP URIs an empty array will be returned. # XMPP or magnet URIs an empty array will be returned.
# #
# If a block is given then it will be called for each XMPP URI. # If a block is given then it will be called for each XMPP URI.
def extract_xmpp_uris_with_indices(text, options = {}) # :yields: uri, start, end def extract_extra_uris_with_indices(text, options = {}) # :yields: uri, start, end
return [] unless text && text.index(":") return [] unless text && text.index(":")
urls = [] urls = []
text.to_s.scan(Twitter::Regex[:valid_xmpp_uri]) do text.to_s.scan(Twitter::Regex[:valid_extended_uri]) do
valid_uri_match_data = $~ valid_uri_match_data = $~
start_position = valid_uri_match_data.char_begin(3) start_position = valid_uri_match_data.char_begin(3)

@ -258,6 +258,14 @@ RSpec.describe Formatter do
is_expected.to include 'href="xmpp:muc@instance.com?join"' is_expected.to include 'href="xmpp:muc@instance.com?join"'
end end
end end
context 'given text containing a magnet: URI' do
let(:text) { 'wikipedia gives this example of a magnet uri: magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a' }
it 'matches the full URI' do
is_expected.to include 'href="magnet:?xt=urn:btih:c12fe1c06bba254a9dc9f519b335aa7c1367a88a"'
end
end
end end
describe '#format_spoiler' do describe '#format_spoiler' do

Loading…
Cancel
Save