diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2018-02-21 23:21:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-21 23:21:32 +0100 |
commit | 61ed133fea80041b354c78b043cec72dd8644101 (patch) | |
tree | fa3c4aec521f5a0004a5d2277b1421fd983256d1 | |
parent | c1e77b56a92fc075f000af9c263c72ba6bdbe5f7 (diff) |
Account archive download (#6460)
* Fix #201: Account archive download * Export actor and private key in the archive * Optimize BackupService - Add conversation to cached associations of status, because somehow it was forgotten and is source of N+1 queries - Explicitly call GC between batches of records being fetched (Model class allocations are the worst offender) - Stream media files into the tar in 1MB chunks (Do not allocate media file (up to 8MB) as string into memory) - Use #bytesize instead of #size to calculate file size for JSON (Fix FileOverflow error) - Segment media into subfolders by status ID because apparently GIF-to-MP4 media are all named "media.mp4" for some reason * Keep uniquely generated filename in Paperclip::GifTranscoder * Ensure dumped files do not overwrite each other by maintaing directory partitions * Give tar archives a good name * Add scheduler to remove week-old backups * Fix code style issue
27 files changed, 374 insertions, 7 deletions
diff --git a/Gemfile b/Gemfile index ad1598af3..da5fc2f38 100644 --- a/Gemfile +++ b/Gemfile @@ -116,6 +116,7 @@ group :development do gem 'bullet', '~> 5.5' gem 'letter_opener', '~> 1.4' gem 'letter_opener_web', '~> 1.3' + gem 'memory_profiler' gem 'rubocop', require: false gem 'brakeman', '~> 4.0', require: false gem 'bundler-audit', '~> 0.6', require: false diff --git a/Gemfile.lock b/Gemfile.lock index 920262ede..65a0dfabf 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -301,6 +301,7 @@ GEM mini_mime (>= 0.1.1) mario-redis-lock (1.2.0) redis (~> 3, >= 3.0.5) + memory_profiler (0.9.10) method_source (0.9.0) microformats (4.0.7) json @@ -664,6 +665,7 @@ DEPENDENCIES link_header (~> 0.0) lograge (~> 0.7) mario-redis-lock (~> 1.2) + memory_profiler microformats (~> 4.0) mime-types (~> 3.1) nokogiri (~> 1.8) diff --git a/app/controllers/settings/exports_controller.rb b/app/controllers/settings/exports_controller.rb index ae62f00c1..869e11d3b 100644 --- a/app/controllers/settings/exports_controller.rb +++ b/app/controllers/settings/exports_controller.rb @@ -1,11 +1,23 @@ # frozen_string_literal: true class Settings::ExportsController < ApplicationController + include Authorization + layout 'admin' before_action :authenticate_user! def show - @export = Export.new(current_account) + @export = Export.new(current_account) + @backups = current_user.backups + end + + def create + authorize :backup, :create? + + backup = current_user.backups.create! + BackupWorker.perform_async(backup.id) + + redirect_to settings_export_path end end diff --git a/app/javascript/images/icon_file_download.svg b/app/javascript/images/icon_file_download.svg new file mode 100644 index 000000000..53e97e4f8 --- /dev/null +++ b/app/javascript/images/icon_file_download.svg @@ -0,0 +1,4 @@ +<svg fill="#FFFFFF" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg"> + <path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"/> + <path d="M0 0h24v24H0z" fill="none"/> +</svg> \ No newline at end of file diff --git a/app/javascript/images/mailer/icon_file_download.png b/app/javascript/images/mailer/icon_file_download.png new file mode 100644 index 000000000..8a6a8673b --- /dev/null +++ b/app/javascript/images/mailer/icon_file_download.png Binary files differdiff --git a/app/mailers/user_mailer.rb b/app/mailers/user_mailer.rb index 2fc9caba3..9848c34a2 100644 --- a/app/mailers/user_mailer.rb +++ b/app/mailers/user_mailer.rb @@ -66,4 +66,16 @@ class UserMailer < Devise::Mailer mail to: @resource.email, subject: I18n.t('user_mailer.welcome.subject') end end + + def backup_ready(user, backup) + @resource = user + @instance = Rails.configuration.x.local_domain + @backup = backup + + return if @resource.disabled? + + I18n.with_locale(@resource.locale || I18n.default_locale) do + mail to: @resource.email, subject: I18n.t('user_mailer.backup_ready.subject') + end + end end diff --git a/app/models/backup.rb b/app/models/backup.rb new file mode 100644 index 000000000..5a7e6a14d --- /dev/null +++ b/app/models/backup.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true +# == Schema Information +# +# Table name: backups +# +# id :integer not null, primary key +# user_id :integer +# dump_file_name :string +# dump_content_type :string +# dump_file_size :integer +# dump_updated_at :datetime +# processed :boolean default(FALSE), not null +# created_at :datetime not null +# updated_at :datetime not null +# + +class Backup < ApplicationRecord + belongs_to :user, inverse_of: :backups + + has_attached_file :dump + do_not_validate_attachment_file_type :dump +end diff --git a/app/models/status.rb b/app/models/status.rb index 8186f4784..f806a59fc 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -76,7 +76,7 @@ class Status < ApplicationRecord scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) } scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) } - cache_associated :account, :application, :media_attachments, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, mentions: :account], thread: :account + cache_associated :account, :application, :media_attachments, :conversation, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, :conversation, mentions: :account], thread: :account delegate :domain, to: :account, prefix: true diff --git a/app/models/user.rb b/app/models/user.rb index fd153912e..fcd574f8b 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -60,6 +60,7 @@ class User < ApplicationRecord accepts_nested_attributes_for :account has_many :applications, class_name: 'Doorkeeper::Application', as: :owner + has_many :backups, inverse_of: :user validates :locale, inclusion: I18n.available_locales.map(&:to_s), if: :locale? validates_with BlacklistedEmailValidator, if: :email_changed? diff --git a/app/policies/application_policy.rb b/app/policies/application_policy.rb index 3e617001f..d1de5e81a 100644 --- a/app/policies/application_policy.rb +++ b/app/policies/application_policy.rb @@ -15,4 +15,8 @@ class ApplicationPolicy def current_user current_account&.user end + + def user_signed_in? + !current_user.nil? + end end diff --git a/app/policies/backup_policy.rb b/app/policies/backup_policy.rb new file mode 100644 index 000000000..0ef89a8d0 --- /dev/null +++ b/app/policies/backup_policy.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +class BackupPolicy < ApplicationPolicy + MIN_AGE = 1.week + + def create? + user_signed_in? && current_user.backups.where('created_at >= ?', MIN_AGE.ago).count.zero? + end +end diff --git a/app/serializers/activitypub/collection_serializer.rb b/app/serializers/activitypub/collection_serializer.rb index 9832133fc..d43af3f8e 100644 --- a/app/serializers/activitypub/collection_serializer.rb +++ b/app/serializers/activitypub/collection_serializer.rb @@ -13,8 +13,8 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer attribute :part_of, if: -> { object.part_of.present? } has_one :first, if: -> { object.first.present? } - has_many :items, key: :items, if: -> { (object.items.present? || page?) && !ordered? } - has_many :items, key: :ordered_items, if: -> { (object.items.present? || page?) && ordered? } + has_many :items, key: :items, if: -> { (!object.items.nil? || page?) && !ordered? } + has_many :items, key: :ordered_items, if: -> { (!object.items.nil? || page?) && ordered? } def type if page? diff --git a/app/services/backup_service.rb b/app/services/backup_service.rb new file mode 100644 index 000000000..fadc24a82 --- /dev/null +++ b/app/services/backup_service.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +require 'rubygems/package' + +class BackupService < BaseService + attr_reader :account, :backup, :collection + + def call(backup) + @backup = backup + @account = backup.user.account + + build_json! + build_archive! + end + + private + + def build_json! + @collection = serialize(collection_presenter, ActivityPub::CollectionSerializer) + + account.statuses.with_includes.find_in_batches do |statuses| + statuses.each do |status| + item = serialize(status, ActivityPub::ActivitySerializer) + item.delete(:'@context') + + unless item[:type] == 'Announce' || item[:object][:attachment].blank? + item[:object][:attachment].each do |attachment| + attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '') + end + end + + @collection[:orderedItems] << item + end + + GC.start + end + end + + def build_archive! + tmp_file = Tempfile.new(%w(archive .tar.gz)) + + File.open(tmp_file, 'wb') do |file| + Zlib::GzipWriter.wrap(file) do |gz| + Gem::Package::TarWriter.new(gz) do |tar| + dump_media_attachments!(tar) + dump_outbox!(tar) + dump_actor!(tar) + end + end + end + + archive_filename = ['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(2)].join('-') + '.tar.gz' + + @backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename) + @backup.processed = true + @backup.save! + ensure + tmp_file.close + tmp_file.unlink + end + + def dump_media_attachments!(tar) + MediaAttachment.attached.where(account: account).find_in_batches do |media_attachments| + media_attachments.each do |m| + download_to_tar(tar, m.file, m.file.path) + end + + GC.start + end + end + + def dump_outbox!(tar) + json = Oj.dump(collection) + + tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io| + io.write(json) + end + end + + def dump_actor!(tar) + actor = serialize(account, ActivityPub::ActorSerializer) + + actor[:icon][:url] = 'avatar' + File.extname(actor[:icon][:url]) if actor[:icon] + actor[:image][:url] = 'header' + File.extname(actor[:image][:url]) if actor[:image] + + download_to_tar(tar, account.avatar, 'avatar' + File.extname(account.avatar.path)) if account.avatar.exists? + download_to_tar(tar, account.header, 'header' + File.extname(account.header.path)) if account.header.exists? + + json = Oj.dump(actor) + + tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io| + io.write(json) + end + + tar.add_file_simple('key.pem', 0o444, account.private_key.bytesize) do |io| + io.write(account.private_key) + end + end + + def collection_presenter + ActivityPub::CollectionPresenter.new( + id: account_outbox_url(account), + type: :ordered, + size: account.statuses_count, + items: [] + ) + end + + def serialize(object, serializer) + ActiveModelSerializers::SerializableResource.new( + object, + serializer: serializer, + adapter: ActivityPub::Adapter + ).as_json + end + + CHUNK_SIZE = 1.megabyte + + def download_to_tar(tar, attachment, filename) + adapter = Paperclip.io_adapters.for(attachment) + + tar.add_file_simple(filename, 0o444, adapter.size) do |io| + while (buffer = adapter.read(CHUNK_SIZE)) + io.write(buffer) + end + end + end +end diff --git a/app/views/settings/exports/show.html.haml b/app/views/settings/exports/show.html.haml index e0df1c480..89d768d3f 100644 --- a/app/views/settings/exports/show.html.haml +++ b/app/views/settings/exports/show.html.haml @@ -20,3 +20,26 @@ %th= t('exports.mutes') %td= @export.total_mutes %td= table_link_to 'download', t('exports.csv'), settings_exports_mutes_path(format: :csv) + +%p.muted-hint= t('exports.archive_takeout.hint_html') + +- if policy(:backup).create? + %p= link_to t('exports.archive_takeout.request'), settings_export_path, class: 'button', method: :post + +- unless @backups.empty? + .table-wrapper + %table.table + %thead + %tr + %th= t('exports.archive_takeout.date') + %th= t('exports.archive_takeout.size') + %th + %tbody + - @backups.each do |backup| + %tr + %td= l backup.created_at + - if backup.processed? + %td= number_to_human_size backup.dump_file_size + %td= table_link_to 'download', t('exports.archive_takeout.download'), backup.dump.url + - else + %td{ colspan: 2 }= t('exports.archive_takeout.in_progress') diff --git a/app/views/user_mailer/backup_ready.html.haml b/app/views/user_mailer/backup_ready.html.haml new file mode 100644 index 000000000..d5a4b8b48 --- /dev/null +++ b/app/views/user_mailer/backup_ready.html.haml @@ -0,0 +1,59 @@ +%table.email-table{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.email-body + .email-container + %table.content-section{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.content-cell.hero + .email-row + .col-6 + %table.column{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.column-cell.text-center.padded + %table.hero-icon{ align: 'center', cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td + = image_tag full_pack_url('icon_file_download.png'), alt: '' + + %h1= t 'user_mailer.backup_ready.title' + +%table.email-table{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.email-body + .email-container + %table.content-section{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.content-cell.content-start + .email-row + .col-6 + %table.column{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.column-cell.text-center + %p= t 'user_mailer.backup_ready.explanation' + +%table.email-table{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.email-body + .email-container + %table.content-section{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.content-cell + %table.column{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.column-cell.button-cell + %table.button{ align: 'center', cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.button-primary + = link_to full_asset_url(@backup.dump.url) do + %span= t 'exports.archive_takeout.download' diff --git a/app/views/user_mailer/backup_ready.text.erb b/app/views/user_mailer/backup_ready.text.erb new file mode 100644 index 000000000..eb89e7d74 --- /dev/null +++ b/app/views/user_mailer/backup_ready.text.erb @@ -0,0 +1,7 @@ +<%= t 'user_mailer.backup_ready.title' %> + +=== + +<%= t 'user_mailer.backup_ready.explanation' %> + +=> <%= full_asset_url(@backup.dump.url) %> diff --git a/app/workers/backup_worker.rb b/app/workers/backup_worker.rb new file mode 100644 index 000000000..ec6db4e9e --- /dev/null +++ b/app/workers/backup_worker.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class BackupWorker + include Sidekiq::Worker + + sidekiq_options queue: 'pull' + + def perform(backup_id) + backup = Backup.find(backup_id) + user = backup.user + + BackupService.new.call(backup) + + user.backups.where.not(id: backup.id).destroy_all + UserMailer.backup_ready(user, backup).deliver_later + end +end diff --git a/app/workers/scheduler/backup_cleanup_scheduler.rb b/app/workers/scheduler/backup_cleanup_scheduler.rb new file mode 100644 index 000000000..7a9d4f894 --- /dev/null +++ b/app/workers/scheduler/backup_cleanup_scheduler.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true +require 'sidekiq-scheduler' + +class Scheduler::BackupCleanupScheduler + include Sidekiq::Worker + + def perform + old_backups.find_each(&:destroy!) + end + + private + + def old_backups + Backup.where('created_at < ?', 7.days.ago) + end +end diff --git a/config/locales/en.yml b/config/locales/en.yml index 5cd3b08cf..b9dd5bd51 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -421,6 +421,13 @@ en: title: This page is not correct noscript_html: To use the Mastodon web application, please enable JavaScript. Alternatively, try one of the <a href="https://github.com/tootsuite/documentation/blob/master/Using-Mastodon/Apps.md">native apps</a> for Mastodon for your platform. exports: + archive_takeout: + date: Date + download: Download your archive + hint_html: You can request an archive of your <strong>toots and uploaded media</strong>. The exported data will be in ActivityPub format, readable by any compliant software. + in_progress: Compiling your archive... + request: Request your archive + size: Size blocks: You block csv: CSV follows: You follow @@ -733,6 +740,10 @@ en: setup: Set up wrong_code: The entered code was invalid! Are server time and device time correct? user_mailer: + backup_ready: + explanation: You requested a full backup of your Mastodon account. It's now ready for download! + subject: Your archive is ready for download + title: Archive takeout welcome: edit_profile_action: Setup profile edit_profile_step: You can customize your profile by uploading an avatar, header, changing your display name and more. If you’d like to review new followers before they’re allowed to follow you, you can lock your account. diff --git a/config/routes.rb b/config/routes.rb index 9f541200a..7ed2d61f1 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -76,7 +76,7 @@ Rails.application.routes.draw do resource :notifications, only: [:show, :update] resource :import, only: [:show, :create] - resource :export, only: [:show] + resource :export, only: [:show, :create] namespace :exports, constraints: { format: :csv } do resources :follows, only: :index, controller: :following_accounts resources :blocks, only: :index, controller: :blocked_accounts diff --git a/config/sidekiq.yml b/config/sidekiq.yml index bfe29b8f8..244e9ea48 100644 --- a/config/sidekiq.yml +++ b/config/sidekiq.yml @@ -30,3 +30,6 @@ email_scheduler: cron: '0 10 * * 2' class: Scheduler::EmailScheduler + backup_cleanup_scheduler: + cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *' + class: Scheduler::BackupCleanupScheduler diff --git a/db/migrate/20180211015820_create_backups.rb b/db/migrate/20180211015820_create_backups.rb new file mode 100644 index 000000000..9725a3e9f --- /dev/null +++ b/db/migrate/20180211015820_create_backups.rb @@ -0,0 +1,11 @@ +class CreateBackups < ActiveRecord::Migration[5.1] + def change + create_table :backups do |t| + t.references :user, foreign_key: { on_delete: :nullify } + t.attachment :dump + t.boolean :processed, null: false, default: false + + t.timestamps + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 281110124..213fbc8d9 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20180206000000) do +ActiveRecord::Schema.define(version: 20180211015820) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -92,6 +92,18 @@ ActiveRecord::Schema.define(version: 20180206000000) do t.index ["target_type", "target_id"], name: "index_admin_action_logs_on_target_type_and_target_id" end + create_table "backups", force: :cascade do |t| + t.bigint "user_id" + t.string "dump_file_name" + t.string "dump_content_type" + t.integer "dump_file_size" + t.datetime "dump_updated_at" + t.boolean "processed", default: false, null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["user_id"], name: "index_backups_on_user_id" + end + create_table "blocks", force: :cascade do |t| t.datetime "created_at", null: false t.datetime "updated_at", null: false diff --git a/lib/paperclip/gif_transcoder.rb b/lib/paperclip/gif_transcoder.rb index 629e37581..62787983c 100644 --- a/lib/paperclip/gif_transcoder.rb +++ b/lib/paperclip/gif_transcoder.rb @@ -16,7 +16,7 @@ module Paperclip final_file = Paperclip::Transcoder.make(file, options, attachment) - attachment.instance.file_file_name = 'media.mp4' + attachment.instance.file_file_name = File.basename(attachment.instance.file_file_name, '.*') + '.mp4' attachment.instance.file_content_type = 'video/mp4' attachment.instance.type = MediaAttachment.types[:gifv] diff --git a/spec/fabricators/backup_fabricator.rb b/spec/fabricators/backup_fabricator.rb new file mode 100644 index 000000000..99a5bdcda --- /dev/null +++ b/spec/fabricators/backup_fabricator.rb @@ -0,0 +1,3 @@ +Fabricator(:backup) do + user +end diff --git a/spec/mailers/previews/user_mailer_preview.rb b/spec/mailers/previews/user_mailer_preview.rb index 8d2a9368d..d9cdb9264 100644 --- a/spec/mailers/previews/user_mailer_preview.rb +++ b/spec/mailers/previews/user_mailer_preview.rb @@ -34,4 +34,9 @@ class UserMailerPreview < ActionMailer::Preview def welcome UserMailer.welcome(User.first) end + + # Preview this email at http://localhost:3000/rails/mailers/user_mailer/backup_ready + def backup_ready + UserMailer.backup_ready(User.first, Backup.first) + end end diff --git a/spec/models/backup_spec.rb b/spec/models/backup_spec.rb new file mode 100644 index 000000000..fabcdc845 --- /dev/null +++ b/spec/models/backup_spec.rb @@ -0,0 +1,5 @@ +require 'rails_helper' + +RSpec.describe Backup, type: :model do + +end |