From 5d3feed191bcbe2769512119752b426108152fe9 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 10 Jul 2019 18:59:28 +0200 Subject: [PATCH] Refactor fetching of remote resources (#11251) --- app/helpers/jsonld_helper.rb | 47 +++++----- app/lib/request.rb | 2 +- .../fetch_remote_status_service.rb | 20 ++-- app/services/fetch_atom_service.rb | 93 ------------------- app/services/fetch_link_card_service.rb | 2 +- app/services/fetch_remote_account_service.rb | 2 +- app/services/fetch_remote_status_service.rb | 2 +- app/services/fetch_resource_service.rb | 68 ++++++++++++++ app/services/resolve_url_service.rb | 47 ++++------ app/workers/activitypub/delivery_worker.rb | 16 +--- .../fetch_remote_account_service_spec.rb | 1 + ...spec.rb => fetch_resource_service_spec.rb} | 14 +-- spec/services/resolve_url_service_spec.rb | 44 +-------- 13 files changed, 142 insertions(+), 216 deletions(-) delete mode 100644 app/services/fetch_atom_service.rb create mode 100644 app/services/fetch_resource_service.rb rename spec/services/{fetch_atom_service_spec.rb => fetch_resource_service_spec.rb} (84%) diff --git a/app/helpers/jsonld_helper.rb b/app/helpers/jsonld_helper.rb index 5b4011275..34a657e06 100644 --- a/app/helpers/jsonld_helper.rb +++ b/app/helpers/jsonld_helper.rb @@ -16,13 +16,15 @@ module JsonLdHelper # The url attribute can be a string, an array of strings, or an array of objects. # The objects could include a mimeType. Not-included mimeType means it's text/html. def url_to_href(value, preferred_type = nil) - single_value = if value.is_a?(Array) && !value.first.is_a?(String) - value.find { |link| preferred_type.nil? || ((link['mimeType'].presence || 'text/html') == preferred_type) } - elsif value.is_a?(Array) - value.first - else - value - end + single_value = begin + if value.is_a?(Array) && !value.first.is_a?(String) + value.find { |link| preferred_type.nil? || ((link['mimeType'].presence || 'text/html') == preferred_type) } + elsif value.is_a?(Array) + value.first + else + value + end + end if single_value.nil? || single_value.is_a?(String) single_value @@ -64,7 +66,9 @@ module JsonLdHelper def fetch_resource(uri, id, on_behalf_of = nil) unless id json = fetch_resource_without_id_validation(uri, on_behalf_of) + return unless json + uri = json['id'] end @@ -74,24 +78,26 @@ module JsonLdHelper def fetch_resource_without_id_validation(uri, on_behalf_of = nil, raise_on_temporary_error = false) build_request(uri, on_behalf_of).perform do |response| - unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error - raise Mastodon::UnexpectedResponseError, response - end + raise Mastodon::UnexpectedResponseError, response unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error + return body_to_json(response.body_with_limit) if response.code == 200 end + # If request failed, retry without doing it on behalf of a user return if on_behalf_of.nil? + build_request(uri).perform do |response| - unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error - raise Mastodon::UnexpectedResponseError, response - end + raise Mastodon::UnexpectedResponseError, response unless response_successful?(response) || response_error_unsalvageable?(response) || !raise_on_temporary_error + response.code == 200 ? body_to_json(response.body_with_limit) : nil end end def body_to_json(body, compare_id: nil) json = body.is_a?(String) ? Oj.load(body, mode: :strict) : body + return if compare_id.present? && json['id'] != compare_id + json rescue Oj::ParseError nil @@ -105,35 +111,34 @@ module JsonLdHelper end end - private - def response_successful?(response) (200...300).cover?(response.code) end def response_error_unsalvageable?(response) - (400...500).cover?(response.code) && response.code != 429 + response.code == 501 || ((400...500).cover?(response.code) && ![401, 408, 429].include?(response.code)) end def build_request(uri, on_behalf_of = nil) - request = Request.new(:get, uri) - request.on_behalf_of(on_behalf_of) if on_behalf_of - request.add_headers('Accept' => 'application/activity+json, application/ld+json') - request + Request.new(:get, uri).tap do |request| + request.on_behalf_of(on_behalf_of) if on_behalf_of + request.add_headers('Accept' => 'application/activity+json, application/ld+json') + end end def load_jsonld_context(url, _options = {}, &_block) json = Rails.cache.fetch("jsonld:context:#{url}", expires_in: 30.days, raw: true) do request = Request.new(:get, url) request.add_headers('Accept' => 'application/ld+json') - request.perform do |res| raise JSON::LD::JsonLdError::LoadingDocumentFailed unless res.code == 200 && res.mime_type == 'application/ld+json' + res.body_with_limit end end doc = JSON::LD::API::RemoteDocument.new(url, json) + block_given? ? yield(doc) : doc end end diff --git a/app/lib/request.rb b/app/lib/request.rb index 322457ad7..1fd3f5190 100644 --- a/app/lib/request.rb +++ b/app/lib/request.rb @@ -41,7 +41,7 @@ class Request end def on_behalf_of(account, key_id_format = :acct, sign_with: nil) - raise ArgumentError unless account.local? + raise ArgumentError, 'account must be local' unless account&.local? @account = account @keypair = sign_with.present? ? OpenSSL::PKey::RSA.new(sign_with) : @account.keypair diff --git a/app/services/activitypub/fetch_remote_status_service.rb b/app/services/activitypub/fetch_remote_status_service.rb index 469821032..cf4f62899 100644 --- a/app/services/activitypub/fetch_remote_status_service.rb +++ b/app/services/activitypub/fetch_remote_status_service.rb @@ -5,18 +5,18 @@ class ActivityPub::FetchRemoteStatusService < BaseService # Should be called when uri has already been checked for locality def call(uri, id: true, prefetched_body: nil, on_behalf_of: nil) - @json = if prefetched_body.nil? - fetch_resource(uri, id, on_behalf_of) - else - body_to_json(prefetched_body, compare_id: id ? uri : nil) - end + @json = begin + if prefetched_body.nil? + fetch_resource(uri, id, on_behalf_of) + else + body_to_json(prefetched_body, compare_id: id ? uri : nil) + end + end - return unless supported_context? && expected_type? - - return if actor_id.nil? || !trustworthy_attribution?(@json['id'], actor_id) + return if !(supported_context? && expected_type?) || actor_id.nil? || !trustworthy_attribution?(@json['id'], actor_id) actor = ActivityPub::TagManager.instance.uri_to_resource(actor_id, Account) - actor = ActivityPub::FetchRemoteAccountService.new.call(actor_id, id: true) if actor.nil? || needs_update(actor) + actor = ActivityPub::FetchRemoteAccountService.new.call(actor_id, id: true) if actor.nil? || needs_update?(actor) return if actor.nil? || actor.suspended? @@ -46,7 +46,7 @@ class ActivityPub::FetchRemoteStatusService < BaseService equals_or_includes_any?(@json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES) end - def needs_update(actor) + def needs_update?(actor) actor.possibly_stale? end end diff --git a/app/services/fetch_atom_service.rb b/app/services/fetch_atom_service.rb deleted file mode 100644 index d6508a988..000000000 --- a/app/services/fetch_atom_service.rb +++ /dev/null @@ -1,93 +0,0 @@ -# frozen_string_literal: true - -class FetchAtomService < BaseService - include JsonLdHelper - - def call(url) - return if url.blank? - - result = process(url) - - # retry without ActivityPub - result ||= process(url) if @unsupported_activity - - result - rescue OpenSSL::SSL::SSLError => e - Rails.logger.debug "SSL error: #{e}" - nil - rescue HTTP::ConnectionError => e - Rails.logger.debug "HTTP ConnectionError: #{e}" - nil - end - - private - - def process(url, terminal = false) - @url = url - perform_request { |response| process_response(response, terminal) } - end - - def perform_request(&block) - accept = 'text/html' - accept = 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams", application/atom+xml, ' + accept unless @unsupported_activity - - Request.new(:get, @url).add_headers('Accept' => accept).perform(&block) - end - - def process_response(response, terminal = false) - return nil if response.code != 200 - - if response.mime_type == 'application/atom+xml' - [@url, { prefetched_body: response.body_with_limit }, :ostatus] - elsif ['application/activity+json', 'application/ld+json'].include?(response.mime_type) - body = response.body_with_limit - json = body_to_json(body) - if supported_context?(json) && equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) && json['inbox'].present? - [json['id'], { prefetched_body: body, id: true }, :activitypub] - elsif supported_context?(json) && expected_type?(json) - [json['id'], { prefetched_body: body, id: true }, :activitypub] - else - @unsupported_activity = true - nil - end - elsif !terminal - link_header = response['Link'] && parse_link_header(response) - - if link_header&.find_link(%w(rel alternate)) - process_link_headers(link_header) - elsif response.mime_type == 'text/html' - process_html(response) - end - end - end - - def expected_type?(json) - equals_or_includes_any?(json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES) - end - - def process_html(response) - page = Nokogiri::HTML(response.body_with_limit) - - json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) } - atom_link = page.xpath('//link[@rel="alternate"]').find { |link| link['type'] == 'application/atom+xml' } - - result ||= process(json_link['href'], terminal: true) unless json_link.nil? || @unsupported_activity - result ||= process(atom_link['href'], terminal: true) unless atom_link.nil? - - result - end - - def process_link_headers(link_header) - json_link = link_header.find_link(%w(rel alternate), %w(type application/activity+json)) || link_header.find_link(%w(rel alternate), ['type', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"']) - atom_link = link_header.find_link(%w(rel alternate), %w(type application/atom+xml)) - - result ||= process(json_link.href, terminal: true) unless json_link.nil? || @unsupported_activity - result ||= process(atom_link.href, terminal: true) unless atom_link.nil? - - result - end - - def parse_link_header(response) - LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link']) - end -end diff --git a/app/services/fetch_link_card_service.rb b/app/services/fetch_link_card_service.rb index 75fbd0e8c..4e75c370f 100644 --- a/app/services/fetch_link_card_service.rb +++ b/app/services/fetch_link_card_service.rb @@ -29,7 +29,7 @@ class FetchLinkCardService < BaseService end attach_card if @card&.persisted? - rescue HTTP::Error, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e + rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e Rails.logger.debug "Error fetching link #{@url}: #{e}" nil end diff --git a/app/services/fetch_remote_account_service.rb b/app/services/fetch_remote_account_service.rb index a7f95603d..3cd06e30f 100644 --- a/app/services/fetch_remote_account_service.rb +++ b/app/services/fetch_remote_account_service.rb @@ -3,7 +3,7 @@ class FetchRemoteAccountService < BaseService def call(url, prefetched_body = nil, protocol = :ostatus) if prefetched_body.nil? - resource_url, resource_options, protocol = FetchAtomService.new.call(url) + resource_url, resource_options, protocol = FetchResourceService.new.call(url) else resource_url = url resource_options = { prefetched_body: prefetched_body } diff --git a/app/services/fetch_remote_status_service.rb b/app/services/fetch_remote_status_service.rb index aac39dfd5..208dc7809 100644 --- a/app/services/fetch_remote_status_service.rb +++ b/app/services/fetch_remote_status_service.rb @@ -3,7 +3,7 @@ class FetchRemoteStatusService < BaseService def call(url, prefetched_body = nil, protocol = :ostatus) if prefetched_body.nil? - resource_url, resource_options, protocol = FetchAtomService.new.call(url) + resource_url, resource_options, protocol = FetchResourceService.new.call(url) else resource_url = url resource_options = { prefetched_body: prefetched_body } diff --git a/app/services/fetch_resource_service.rb b/app/services/fetch_resource_service.rb new file mode 100644 index 000000000..c0473f3ad --- /dev/null +++ b/app/services/fetch_resource_service.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +class FetchResourceService < BaseService + include JsonLdHelper + + ACCEPT_HEADER = 'application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams", text/html' + + def call(url) + return if url.blank? + + process(url) + rescue HTTP::Error, OpenSSL::SSL::SSLError, Addressable::URI::InvalidURIError, Mastodon::HostValidationError, Mastodon::LengthValidationError => e + Rails.logger.debug "Error fetching resource #{@url}: #{e}" + nil + end + + private + + def process(url, terminal = false) + @url = url + + perform_request { |response| process_response(response, terminal) } + end + + def perform_request(&block) + Request.new(:get, @url).add_headers('Accept' => ACCEPT_HEADER).perform(&block) + end + + def process_response(response, terminal = false) + return nil if response.code != 200 + + if ['application/activity+json', 'application/ld+json'].include?(response.mime_type) + body = response.body_with_limit + json = body_to_json(body) + + [json['id'], { prefetched_body: body, id: true }, :activitypub] if supported_context?(json) && (equals_or_includes_any?(json['type'], ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) || expected_type?(json)) + elsif !terminal + link_header = response['Link'] && parse_link_header(response) + + if link_header&.find_link(%w(rel alternate)) + process_link_headers(link_header) + elsif response.mime_type == 'text/html' + process_html(response) + end + end + end + + def expected_type?(json) + equals_or_includes_any?(json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES) + end + + def process_html(response) + page = Nokogiri::HTML(response.body_with_limit) + json_link = page.xpath('//link[@rel="alternate"]').find { |link| ['application/activity+json', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"'].include?(link['type']) } + + process(json_link['href'], terminal: true) unless json_link.nil? + end + + def process_link_headers(link_header) + json_link = link_header.find_link(%w(rel alternate), %w(type application/activity+json)) || link_header.find_link(%w(rel alternate), ['type', 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"']) + + process(json_link.href, terminal: true) unless json_link.nil? + end + + def parse_link_header(response) + LinkHeader.parse(response['Link'].is_a?(Array) ? response['Link'].first : response['Link']) + end +end diff --git a/app/services/resolve_url_service.rb b/app/services/resolve_url_service.rb index f941b489a..80381c16b 100644 --- a/app/services/resolve_url_service.rb +++ b/app/services/resolve_url_service.rb @@ -4,64 +4,49 @@ class ResolveURLService < BaseService include JsonLdHelper include Authorization - attr_reader :url - def call(url, on_behalf_of: nil) - @url = url + @url = url @on_behalf_of = on_behalf_of - return process_local_url if local_url? - - process_url unless fetched_atom_feed.nil? + if local_url? + process_local_url + elsif !fetched_resource.nil? + process_url + end end private def process_url if equals_or_includes_any?(type, ActivityPub::FetchRemoteAccountService::SUPPORTED_TYPES) - FetchRemoteAccountService.new.call(atom_url, body, protocol) + FetchRemoteAccountService.new.call(resource_url, body, protocol) elsif equals_or_includes_any?(type, ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES) - FetchRemoteStatusService.new.call(atom_url, body, protocol) + FetchRemoteStatusService.new.call(resource_url, body, protocol) end end - def fetched_atom_feed - @_fetched_atom_feed ||= FetchAtomService.new.call(url) + def fetched_resource + @fetched_resource ||= FetchResourceService.new.call(@url) end - def atom_url - fetched_atom_feed.first + def resource_url + fetched_resource.first end def body - fetched_atom_feed.second[:prefetched_body] + fetched_resource.second[:prefetched_body] end def protocol - fetched_atom_feed.third + fetched_resource.third end def type return json_data['type'] if protocol == :activitypub - - case xml_root - when 'feed' - 'Person' - when 'entry' - 'Note' - end end def json_data - @_json_data ||= body_to_json(body) - end - - def xml_root - xml_data.root.name - end - - def xml_data - @_xml_data ||= Nokogiri::XML(body, nil, 'utf-8') + @json_data ||= body_to_json(body) end def local_url? @@ -83,10 +68,10 @@ class ResolveURLService < BaseService def check_local_status(status) return if status.nil? + authorize_with @on_behalf_of, status, :show? status rescue Mastodon::NotPermittedError - # Do not disclose the existence of status the user is not authorized to see nil end end diff --git a/app/workers/activitypub/delivery_worker.rb b/app/workers/activitypub/delivery_worker.rb index 8b52b8e49..5457d9d4b 100644 --- a/app/workers/activitypub/delivery_worker.rb +++ b/app/workers/activitypub/delivery_worker.rb @@ -2,6 +2,7 @@ class ActivityPub::DeliveryWorker include Sidekiq::Worker + include JsonLdHelper STOPLIGHT_FAILURE_THRESHOLD = 10 STOPLIGHT_COOLDOWN = 60 @@ -32,9 +33,10 @@ class ActivityPub::DeliveryWorker private def build_request(http_client) - request = Request.new(:post, @inbox_url, body: @json, http_client: http_client) - request.on_behalf_of(@source_account, :uri, sign_with: @options[:sign_with]) - request.add_headers(HEADERS) + Request.new(:post, @inbox_url, body: @json, http_client: http_client).tap do |request| + request.on_behalf_of(@source_account, :uri, sign_with: @options[:sign_with]) + request.add_headers(HEADERS) + end end def perform_request @@ -53,14 +55,6 @@ class ActivityPub::DeliveryWorker .run end - def response_successful?(response) - (200...300).cover?(response.code) - end - - def response_error_unsalvageable?(response) - response.code == 501 || ((400...500).cover?(response.code) && ![401, 408, 429].include?(response.code)) - end - def failure_tracker @failure_tracker ||= DeliveryFailureTracker.new(@inbox_url) end diff --git a/spec/services/fetch_remote_account_service_spec.rb b/spec/services/fetch_remote_account_service_spec.rb index 37e9910d4..ee7325be2 100644 --- a/spec/services/fetch_remote_account_service_spec.rb +++ b/spec/services/fetch_remote_account_service_spec.rb @@ -4,6 +4,7 @@ RSpec.describe FetchRemoteAccountService, type: :service do let(:url) { 'https://example.com/alice' } let(:prefetched_body) { nil } let(:protocol) { :ostatus } + subject { FetchRemoteAccountService.new.call(url, prefetched_body, protocol) } let(:actor) do diff --git a/spec/services/fetch_atom_service_spec.rb b/spec/services/fetch_resource_service_spec.rb similarity index 84% rename from spec/services/fetch_atom_service_spec.rb rename to spec/services/fetch_resource_service_spec.rb index 495540004..17c192c44 100644 --- a/spec/services/fetch_atom_service_spec.rb +++ b/spec/services/fetch_resource_service_spec.rb @@ -1,9 +1,11 @@ require 'rails_helper' -RSpec.describe FetchAtomService, type: :service do +RSpec.describe FetchResourceService, type: :service do + let!(:representative) { Fabricate(:account) } + describe '#call' do let(:url) { 'http://example.com' } - subject { FetchAtomService.new.call(url) } + subject { described_class.new.call(url) } context 'url is blank' do let(:url) { '' } @@ -23,8 +25,7 @@ RSpec.describe FetchAtomService, type: :service do allow(Request).to receive_message_chain(:new, :add_headers, :perform).and_raise(OpenSSL::SSL::SSLError) end - it 'output log and return nil' do - expect_any_instance_of(ActiveSupport::Logger).to receive(:debug).with('SSL error: OpenSSL::SSL::SSLError') + it 'return nil' do is_expected.to be_nil end end @@ -34,8 +35,7 @@ RSpec.describe FetchAtomService, type: :service do allow(Request).to receive_message_chain(:new, :add_headers, :perform).and_raise(HTTP::ConnectionError) end - it 'output log and return nil' do - expect_any_instance_of(ActiveSupport::Logger).to receive(:debug).with('HTTP ConnectionError: HTTP::ConnectionError') + it 'return nil' do is_expected.to be_nil end end @@ -57,7 +57,7 @@ RSpec.describe FetchAtomService, type: :service do context 'content type is application/atom+xml' do let(:content_type) { 'application/atom+xml' } - it { is_expected.to eq [url, { :prefetched_body => "" }, :ostatus] } + it { is_expected.to eq nil } end context 'content_type is activity+json' do diff --git a/spec/services/resolve_url_service_spec.rb b/spec/services/resolve_url_service_spec.rb index 7bb5d1940..aa4204637 100644 --- a/spec/services/resolve_url_service_spec.rb +++ b/spec/services/resolve_url_service_spec.rb @@ -6,48 +6,14 @@ describe ResolveURLService, type: :service do subject { described_class.new } describe '#call' do - it 'returns nil when there is no atom url' do - url = 'http://example.com/missing-atom' + it 'returns nil when there is no resource url' do + url = 'http://example.com/missing-resource' service = double - allow(FetchAtomService).to receive(:new).and_return service - allow(service).to receive(:call).with(url).and_return(nil) - - result = subject.call(url) - expect(result).to be_nil - end - - it 'fetches remote accounts for feed types' do - url = 'http://example.com/atom-feed' - service = double - allow(FetchAtomService).to receive(:new).and_return service - feed_url = 'http://feed-url' - feed_content = 'contents' - allow(service).to receive(:call).with(url).and_return([feed_url, { prefetched_body: feed_content }]) - - account_service = double - allow(FetchRemoteAccountService).to receive(:new).and_return(account_service) - allow(account_service).to receive(:call) - - _result = subject.call(url) - expect(account_service).to have_received(:call).with(feed_url, feed_content, nil) - end - - it 'fetches remote statuses for entry types' do - url = 'http://example.com/atom-entry' - service = double - allow(FetchAtomService).to receive(:new).and_return service - feed_url = 'http://feed-url' - feed_content = 'contents' - allow(service).to receive(:call).with(url).and_return([feed_url, { prefetched_body: feed_content }]) - - account_service = double - allow(FetchRemoteStatusService).to receive(:new).and_return(account_service) - allow(account_service).to receive(:call) - - _result = subject.call(url) + allow(FetchResourceService).to receive(:new).and_return service + allow(service).to receive(:call).with(url).and_return(nil) - expect(account_service).to have_received(:call).with(feed_url, feed_content, nil) + expect(subject.call(url)).to be_nil end end end