Account archive download (#6460)

* Fix #201: Account archive download

* Export actor and private key in the archive

* Optimize BackupService

- Add conversation to cached associations of status, because
  somehow it was forgotten and is source of N+1 queries
- Explicitly call GC between batches of records being fetched
  (Model class allocations are the worst offender)
- Stream media files into the tar in 1MB chunks
  (Do not allocate media file (up to 8MB) as string into memory)
- Use #bytesize instead of #size to calculate file size for JSON
  (Fix FileOverflow error)
- Segment media into subfolders by status ID because apparently
  GIF-to-MP4 media are all named "media.mp4" for some reason

* Keep uniquely generated filename in Paperclip::GifTranscoder

* Ensure dumped files do not overwrite each other by maintaing directory partitions

* Give tar archives a good name

* Add scheduler to remove week-old backups

* Fix code style issue
master
Eugen Rochko 7 years ago committed by GitHub
parent c1e77b56a9
commit 61ed133fea
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 1
      Gemfile
  2. 2
      Gemfile.lock
  3. 12
      app/controllers/settings/exports_controller.rb
  4. 4
      app/javascript/images/icon_file_download.svg
  5. BIN
      app/javascript/images/mailer/icon_file_download.png
  6. 12
      app/mailers/user_mailer.rb
  7. 22
      app/models/backup.rb
  8. 2
      app/models/status.rb
  9. 1
      app/models/user.rb
  10. 4
      app/policies/application_policy.rb
  11. 9
      app/policies/backup_policy.rb
  12. 4
      app/serializers/activitypub/collection_serializer.rb
  13. 128
      app/services/backup_service.rb
  14. 23
      app/views/settings/exports/show.html.haml
  15. 59
      app/views/user_mailer/backup_ready.html.haml
  16. 7
      app/views/user_mailer/backup_ready.text.erb
  17. 17
      app/workers/backup_worker.rb
  18. 16
      app/workers/scheduler/backup_cleanup_scheduler.rb
  19. 11
      config/locales/en.yml
  20. 2
      config/routes.rb
  21. 3
      config/sidekiq.yml
  22. 11
      db/migrate/20180211015820_create_backups.rb
  23. 14
      db/schema.rb
  24. 2
      lib/paperclip/gif_transcoder.rb
  25. 3
      spec/fabricators/backup_fabricator.rb
  26. 5
      spec/mailers/previews/user_mailer_preview.rb
  27. 5
      spec/models/backup_spec.rb

@ -116,6 +116,7 @@ group :development do
gem 'bullet', '~> 5.5' gem 'bullet', '~> 5.5'
gem 'letter_opener', '~> 1.4' gem 'letter_opener', '~> 1.4'
gem 'letter_opener_web', '~> 1.3' gem 'letter_opener_web', '~> 1.3'
gem 'memory_profiler'
gem 'rubocop', require: false gem 'rubocop', require: false
gem 'brakeman', '~> 4.0', require: false gem 'brakeman', '~> 4.0', require: false
gem 'bundler-audit', '~> 0.6', require: false gem 'bundler-audit', '~> 0.6', require: false

@ -301,6 +301,7 @@ GEM
mini_mime (>= 0.1.1) mini_mime (>= 0.1.1)
mario-redis-lock (1.2.0) mario-redis-lock (1.2.0)
redis (~> 3, >= 3.0.5) redis (~> 3, >= 3.0.5)
memory_profiler (0.9.10)
method_source (0.9.0) method_source (0.9.0)
microformats (4.0.7) microformats (4.0.7)
json json
@ -664,6 +665,7 @@ DEPENDENCIES
link_header (~> 0.0) link_header (~> 0.0)
lograge (~> 0.7) lograge (~> 0.7)
mario-redis-lock (~> 1.2) mario-redis-lock (~> 1.2)
memory_profiler
microformats (~> 4.0) microformats (~> 4.0)
mime-types (~> 3.1) mime-types (~> 3.1)
nokogiri (~> 1.8) nokogiri (~> 1.8)

@ -1,11 +1,23 @@
# frozen_string_literal: true # frozen_string_literal: true
class Settings::ExportsController < ApplicationController class Settings::ExportsController < ApplicationController
include Authorization
layout 'admin' layout 'admin'
before_action :authenticate_user! before_action :authenticate_user!
def show def show
@export = Export.new(current_account) @export = Export.new(current_account)
@backups = current_user.backups
end
def create
authorize :backup, :create?
backup = current_user.backups.create!
BackupWorker.perform_async(backup.id)
redirect_to settings_export_path
end end
end end

@ -0,0 +1,4 @@
<svg fill="#FFFFFF" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
<path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"/>
<path d="M0 0h24v24H0z" fill="none"/>
</svg>

After

Width:  |  Height:  |  Size: 205 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 271 B

@ -66,4 +66,16 @@ class UserMailer < Devise::Mailer
mail to: @resource.email, subject: I18n.t('user_mailer.welcome.subject') mail to: @resource.email, subject: I18n.t('user_mailer.welcome.subject')
end end
end end
def backup_ready(user, backup)
@resource = user
@instance = Rails.configuration.x.local_domain
@backup = backup
return if @resource.disabled?
I18n.with_locale(@resource.locale || I18n.default_locale) do
mail to: @resource.email, subject: I18n.t('user_mailer.backup_ready.subject')
end
end
end end

@ -0,0 +1,22 @@
# frozen_string_literal: true
# == Schema Information
#
# Table name: backups
#
# id :integer not null, primary key
# user_id :integer
# dump_file_name :string
# dump_content_type :string
# dump_file_size :integer
# dump_updated_at :datetime
# processed :boolean default(FALSE), not null
# created_at :datetime not null
# updated_at :datetime not null
#
class Backup < ApplicationRecord
belongs_to :user, inverse_of: :backups
has_attached_file :dump
do_not_validate_attachment_file_type :dump
end

@ -76,7 +76,7 @@ class Status < ApplicationRecord
scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) } scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) }
scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) } scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) }
cache_associated :account, :application, :media_attachments, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, mentions: :account], thread: :account cache_associated :account, :application, :media_attachments, :conversation, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, :conversation, mentions: :account], thread: :account
delegate :domain, to: :account, prefix: true delegate :domain, to: :account, prefix: true

@ -60,6 +60,7 @@ class User < ApplicationRecord
accepts_nested_attributes_for :account accepts_nested_attributes_for :account
has_many :applications, class_name: 'Doorkeeper::Application', as: :owner has_many :applications, class_name: 'Doorkeeper::Application', as: :owner
has_many :backups, inverse_of: :user
validates :locale, inclusion: I18n.available_locales.map(&:to_s), if: :locale? validates :locale, inclusion: I18n.available_locales.map(&:to_s), if: :locale?
validates_with BlacklistedEmailValidator, if: :email_changed? validates_with BlacklistedEmailValidator, if: :email_changed?

@ -15,4 +15,8 @@ class ApplicationPolicy
def current_user def current_user
current_account&.user current_account&.user
end end
def user_signed_in?
!current_user.nil?
end
end end

@ -0,0 +1,9 @@
# frozen_string_literal: true
class BackupPolicy < ApplicationPolicy
MIN_AGE = 1.week
def create?
user_signed_in? && current_user.backups.where('created_at >= ?', MIN_AGE.ago).count.zero?
end
end

@ -13,8 +13,8 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer
attribute :part_of, if: -> { object.part_of.present? } attribute :part_of, if: -> { object.part_of.present? }
has_one :first, if: -> { object.first.present? } has_one :first, if: -> { object.first.present? }
has_many :items, key: :items, if: -> { (object.items.present? || page?) && !ordered? } has_many :items, key: :items, if: -> { (!object.items.nil? || page?) && !ordered? }
has_many :items, key: :ordered_items, if: -> { (object.items.present? || page?) && ordered? } has_many :items, key: :ordered_items, if: -> { (!object.items.nil? || page?) && ordered? }
def type def type
if page? if page?

@ -0,0 +1,128 @@
# frozen_string_literal: true
require 'rubygems/package'
class BackupService < BaseService
attr_reader :account, :backup, :collection
def call(backup)
@backup = backup
@account = backup.user.account
build_json!
build_archive!
end
private
def build_json!
@collection = serialize(collection_presenter, ActivityPub::CollectionSerializer)
account.statuses.with_includes.find_in_batches do |statuses|
statuses.each do |status|
item = serialize(status, ActivityPub::ActivitySerializer)
item.delete(:'@context')
unless item[:type] == 'Announce' || item[:object][:attachment].blank?
item[:object][:attachment].each do |attachment|
attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '')
end
end
@collection[:orderedItems] << item
end
GC.start
end
end
def build_archive!
tmp_file = Tempfile.new(%w(archive .tar.gz))
File.open(tmp_file, 'wb') do |file|
Zlib::GzipWriter.wrap(file) do |gz|
Gem::Package::TarWriter.new(gz) do |tar|
dump_media_attachments!(tar)
dump_outbox!(tar)
dump_actor!(tar)
end
end
end
archive_filename = ['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(2)].join('-') + '.tar.gz'
@backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename)
@backup.processed = true
@backup.save!
ensure
tmp_file.close
tmp_file.unlink
end
def dump_media_attachments!(tar)
MediaAttachment.attached.where(account: account).find_in_batches do |media_attachments|
media_attachments.each do |m|
download_to_tar(tar, m.file, m.file.path)
end
GC.start
end
end
def dump_outbox!(tar)
json = Oj.dump(collection)
tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io|
io.write(json)
end
end
def dump_actor!(tar)
actor = serialize(account, ActivityPub::ActorSerializer)
actor[:icon][:url] = 'avatar' + File.extname(actor[:icon][:url]) if actor[:icon]
actor[:image][:url] = 'header' + File.extname(actor[:image][:url]) if actor[:image]
download_to_tar(tar, account.avatar, 'avatar' + File.extname(account.avatar.path)) if account.avatar.exists?
download_to_tar(tar, account.header, 'header' + File.extname(account.header.path)) if account.header.exists?
json = Oj.dump(actor)
tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io|
io.write(json)
end
tar.add_file_simple('key.pem', 0o444, account.private_key.bytesize) do |io|
io.write(account.private_key)
end
end
def collection_presenter
ActivityPub::CollectionPresenter.new(
id: account_outbox_url(account),
type: :ordered,
size: account.statuses_count,
items: []
)
end
def serialize(object, serializer)
ActiveModelSerializers::SerializableResource.new(
object,
serializer: serializer,
adapter: ActivityPub::Adapter
).as_json
end
CHUNK_SIZE = 1.megabyte
def download_to_tar(tar, attachment, filename)
adapter = Paperclip.io_adapters.for(attachment)
tar.add_file_simple(filename, 0o444, adapter.size) do |io|
while (buffer = adapter.read(CHUNK_SIZE))
io.write(buffer)
end
end
end
end

@ -20,3 +20,26 @@
%th= t('exports.mutes') %th= t('exports.mutes')
%td= @export.total_mutes %td= @export.total_mutes
%td= table_link_to 'download', t('exports.csv'), settings_exports_mutes_path(format: :csv) %td= table_link_to 'download', t('exports.csv'), settings_exports_mutes_path(format: :csv)
%p.muted-hint= t('exports.archive_takeout.hint_html')
- if policy(:backup).create?
%p= link_to t('exports.archive_takeout.request'), settings_export_path, class: 'button', method: :post
- unless @backups.empty?
.table-wrapper
%table.table
%thead
%tr
%th= t('exports.archive_takeout.date')
%th= t('exports.archive_takeout.size')
%th
%tbody
- @backups.each do |backup|
%tr
%td= l backup.created_at
- if backup.processed?
%td= number_to_human_size backup.dump_file_size
%td= table_link_to 'download', t('exports.archive_takeout.download'), backup.dump.url
- else
%td{ colspan: 2 }= t('exports.archive_takeout.in_progress')

@ -0,0 +1,59 @@
%table.email-table{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.email-body
.email-container
%table.content-section{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.content-cell.hero
.email-row
.col-6
%table.column{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.column-cell.text-center.padded
%table.hero-icon{ align: 'center', cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td
= image_tag full_pack_url('icon_file_download.png'), alt: ''
%h1= t 'user_mailer.backup_ready.title'
%table.email-table{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.email-body
.email-container
%table.content-section{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.content-cell.content-start
.email-row
.col-6
%table.column{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.column-cell.text-center
%p= t 'user_mailer.backup_ready.explanation'
%table.email-table{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.email-body
.email-container
%table.content-section{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.content-cell
%table.column{ cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.column-cell.button-cell
%table.button{ align: 'center', cellspacing: 0, cellpadding: 0 }
%tbody
%tr
%td.button-primary
= link_to full_asset_url(@backup.dump.url) do
%span= t 'exports.archive_takeout.download'

@ -0,0 +1,7 @@
<%= t 'user_mailer.backup_ready.title' %>
===
<%= t 'user_mailer.backup_ready.explanation' %>
=> <%= full_asset_url(@backup.dump.url) %>

@ -0,0 +1,17 @@
# frozen_string_literal: true
class BackupWorker
include Sidekiq::Worker
sidekiq_options queue: 'pull'
def perform(backup_id)
backup = Backup.find(backup_id)
user = backup.user
BackupService.new.call(backup)
user.backups.where.not(id: backup.id).destroy_all
UserMailer.backup_ready(user, backup).deliver_later
end
end

@ -0,0 +1,16 @@
# frozen_string_literal: true
require 'sidekiq-scheduler'
class Scheduler::BackupCleanupScheduler
include Sidekiq::Worker
def perform
old_backups.find_each(&:destroy!)
end
private
def old_backups
Backup.where('created_at < ?', 7.days.ago)
end
end

@ -421,6 +421,13 @@ en:
title: This page is not correct title: This page is not correct
noscript_html: To use the Mastodon web application, please enable JavaScript. Alternatively, try one of the <a href="https://github.com/tootsuite/documentation/blob/master/Using-Mastodon/Apps.md">native apps</a> for Mastodon for your platform. noscript_html: To use the Mastodon web application, please enable JavaScript. Alternatively, try one of the <a href="https://github.com/tootsuite/documentation/blob/master/Using-Mastodon/Apps.md">native apps</a> for Mastodon for your platform.
exports: exports:
archive_takeout:
date: Date
download: Download your archive
hint_html: You can request an archive of your <strong>toots and uploaded media</strong>. The exported data will be in ActivityPub format, readable by any compliant software.
in_progress: Compiling your archive...
request: Request your archive
size: Size
blocks: You block blocks: You block
csv: CSV csv: CSV
follows: You follow follows: You follow
@ -733,6 +740,10 @@ en:
setup: Set up setup: Set up
wrong_code: The entered code was invalid! Are server time and device time correct? wrong_code: The entered code was invalid! Are server time and device time correct?
user_mailer: user_mailer:
backup_ready:
explanation: You requested a full backup of your Mastodon account. It's now ready for download!
subject: Your archive is ready for download
title: Archive takeout
welcome: welcome:
edit_profile_action: Setup profile edit_profile_action: Setup profile
edit_profile_step: You can customize your profile by uploading an avatar, header, changing your display name and more. If you’d like to review new followers before they’re allowed to follow you, you can lock your account. edit_profile_step: You can customize your profile by uploading an avatar, header, changing your display name and more. If you’d like to review new followers before they’re allowed to follow you, you can lock your account.

@ -76,7 +76,7 @@ Rails.application.routes.draw do
resource :notifications, only: [:show, :update] resource :notifications, only: [:show, :update]
resource :import, only: [:show, :create] resource :import, only: [:show, :create]
resource :export, only: [:show] resource :export, only: [:show, :create]
namespace :exports, constraints: { format: :csv } do namespace :exports, constraints: { format: :csv } do
resources :follows, only: :index, controller: :following_accounts resources :follows, only: :index, controller: :following_accounts
resources :blocks, only: :index, controller: :blocked_accounts resources :blocks, only: :index, controller: :blocked_accounts

@ -30,3 +30,6 @@
email_scheduler: email_scheduler:
cron: '0 10 * * 2' cron: '0 10 * * 2'
class: Scheduler::EmailScheduler class: Scheduler::EmailScheduler
backup_cleanup_scheduler:
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
class: Scheduler::BackupCleanupScheduler

@ -0,0 +1,11 @@
class CreateBackups < ActiveRecord::Migration[5.1]
def change
create_table :backups do |t|
t.references :user, foreign_key: { on_delete: :nullify }
t.attachment :dump
t.boolean :processed, null: false, default: false
t.timestamps
end
end
end

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180206000000) do ActiveRecord::Schema.define(version: 20180211015820) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
@ -92,6 +92,18 @@ ActiveRecord::Schema.define(version: 20180206000000) do
t.index ["target_type", "target_id"], name: "index_admin_action_logs_on_target_type_and_target_id" t.index ["target_type", "target_id"], name: "index_admin_action_logs_on_target_type_and_target_id"
end end
create_table "backups", force: :cascade do |t|
t.bigint "user_id"
t.string "dump_file_name"
t.string "dump_content_type"
t.integer "dump_file_size"
t.datetime "dump_updated_at"
t.boolean "processed", default: false, null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["user_id"], name: "index_backups_on_user_id"
end
create_table "blocks", force: :cascade do |t| create_table "blocks", force: :cascade do |t|
t.datetime "created_at", null: false t.datetime "created_at", null: false
t.datetime "updated_at", null: false t.datetime "updated_at", null: false

@ -16,7 +16,7 @@ module Paperclip
final_file = Paperclip::Transcoder.make(file, options, attachment) final_file = Paperclip::Transcoder.make(file, options, attachment)
attachment.instance.file_file_name = 'media.mp4' attachment.instance.file_file_name = File.basename(attachment.instance.file_file_name, '.*') + '.mp4'
attachment.instance.file_content_type = 'video/mp4' attachment.instance.file_content_type = 'video/mp4'
attachment.instance.type = MediaAttachment.types[:gifv] attachment.instance.type = MediaAttachment.types[:gifv]

@ -0,0 +1,3 @@
Fabricator(:backup) do
user
end

@ -34,4 +34,9 @@ class UserMailerPreview < ActionMailer::Preview
def welcome def welcome
UserMailer.welcome(User.first) UserMailer.welcome(User.first)
end end
# Preview this email at http://localhost:3000/rails/mailers/user_mailer/backup_ready
def backup_ready
UserMailer.backup_ready(User.first, Backup.first)
end
end end

@ -0,0 +1,5 @@
require 'rails_helper'
RSpec.describe Backup, type: :model do
end
Loading…
Cancel
Save