diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2018-02-21 23:21:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-02-21 23:21:32 +0100 |
commit | 61ed133fea80041b354c78b043cec72dd8644101 (patch) | |
tree | fa3c4aec521f5a0004a5d2277b1421fd983256d1 /app | |
parent | c1e77b56a92fc075f000af9c263c72ba6bdbe5f7 (diff) |
Account archive download (#6460)
* Fix #201: Account archive download * Export actor and private key in the archive * Optimize BackupService - Add conversation to cached associations of status, because somehow it was forgotten and is source of N+1 queries - Explicitly call GC between batches of records being fetched (Model class allocations are the worst offender) - Stream media files into the tar in 1MB chunks (Do not allocate media file (up to 8MB) as string into memory) - Use #bytesize instead of #size to calculate file size for JSON (Fix FileOverflow error) - Segment media into subfolders by status ID because apparently GIF-to-MP4 media are all named "media.mp4" for some reason * Keep uniquely generated filename in Paperclip::GifTranscoder * Ensure dumped files do not overwrite each other by maintaing directory partitions * Give tar archives a good name * Add scheduler to remove week-old backups * Fix code style issue
Diffstat (limited to 'app')
-rw-r--r-- | app/controllers/settings/exports_controller.rb | 14 | ||||
-rw-r--r-- | app/javascript/images/icon_file_download.svg | 4 | ||||
-rw-r--r-- | app/javascript/images/mailer/icon_file_download.png | bin | 0 -> 271 bytes | |||
-rw-r--r-- | app/mailers/user_mailer.rb | 12 | ||||
-rw-r--r-- | app/models/backup.rb | 22 | ||||
-rw-r--r-- | app/models/status.rb | 2 | ||||
-rw-r--r-- | app/models/user.rb | 1 | ||||
-rw-r--r-- | app/policies/application_policy.rb | 4 | ||||
-rw-r--r-- | app/policies/backup_policy.rb | 9 | ||||
-rw-r--r-- | app/serializers/activitypub/collection_serializer.rb | 4 | ||||
-rw-r--r-- | app/services/backup_service.rb | 128 | ||||
-rw-r--r-- | app/views/settings/exports/show.html.haml | 23 | ||||
-rw-r--r-- | app/views/user_mailer/backup_ready.html.haml | 59 | ||||
-rw-r--r-- | app/views/user_mailer/backup_ready.text.erb | 7 | ||||
-rw-r--r-- | app/workers/backup_worker.rb | 17 | ||||
-rw-r--r-- | app/workers/scheduler/backup_cleanup_scheduler.rb | 16 |
16 files changed, 318 insertions, 4 deletions
diff --git a/app/controllers/settings/exports_controller.rb b/app/controllers/settings/exports_controller.rb index ae62f00c1..869e11d3b 100644 --- a/app/controllers/settings/exports_controller.rb +++ b/app/controllers/settings/exports_controller.rb @@ -1,11 +1,23 @@ # frozen_string_literal: true class Settings::ExportsController < ApplicationController + include Authorization + layout 'admin' before_action :authenticate_user! def show - @export = Export.new(current_account) + @export = Export.new(current_account) + @backups = current_user.backups + end + + def create + authorize :backup, :create? + + backup = current_user.backups.create! + BackupWorker.perform_async(backup.id) + + redirect_to settings_export_path end end diff --git a/app/javascript/images/icon_file_download.svg b/app/javascript/images/icon_file_download.svg new file mode 100644 index 000000000..53e97e4f8 --- /dev/null +++ b/app/javascript/images/icon_file_download.svg @@ -0,0 +1,4 @@ +<svg fill="#FFFFFF" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg"> + <path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"/> + <path d="M0 0h24v24H0z" fill="none"/> +</svg> \ No newline at end of file diff --git a/app/javascript/images/mailer/icon_file_download.png b/app/javascript/images/mailer/icon_file_download.png new file mode 100644 index 000000000..8a6a8673b --- /dev/null +++ b/app/javascript/images/mailer/icon_file_download.png Binary files differdiff --git a/app/mailers/user_mailer.rb b/app/mailers/user_mailer.rb index 2fc9caba3..9848c34a2 100644 --- a/app/mailers/user_mailer.rb +++ b/app/mailers/user_mailer.rb @@ -66,4 +66,16 @@ class UserMailer < Devise::Mailer mail to: @resource.email, subject: I18n.t('user_mailer.welcome.subject') end end + + def backup_ready(user, backup) + @resource = user + @instance = Rails.configuration.x.local_domain + @backup = backup + + return if @resource.disabled? + + I18n.with_locale(@resource.locale || I18n.default_locale) do + mail to: @resource.email, subject: I18n.t('user_mailer.backup_ready.subject') + end + end end diff --git a/app/models/backup.rb b/app/models/backup.rb new file mode 100644 index 000000000..5a7e6a14d --- /dev/null +++ b/app/models/backup.rb @@ -0,0 +1,22 @@ +# frozen_string_literal: true +# == Schema Information +# +# Table name: backups +# +# id :integer not null, primary key +# user_id :integer +# dump_file_name :string +# dump_content_type :string +# dump_file_size :integer +# dump_updated_at :datetime +# processed :boolean default(FALSE), not null +# created_at :datetime not null +# updated_at :datetime not null +# + +class Backup < ApplicationRecord + belongs_to :user, inverse_of: :backups + + has_attached_file :dump + do_not_validate_attachment_file_type :dump +end diff --git a/app/models/status.rb b/app/models/status.rb index 8186f4784..f806a59fc 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -76,7 +76,7 @@ class Status < ApplicationRecord scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) } scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) } - cache_associated :account, :application, :media_attachments, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, mentions: :account], thread: :account + cache_associated :account, :application, :media_attachments, :conversation, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, :conversation, mentions: :account], thread: :account delegate :domain, to: :account, prefix: true diff --git a/app/models/user.rb b/app/models/user.rb index fd153912e..fcd574f8b 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -60,6 +60,7 @@ class User < ApplicationRecord accepts_nested_attributes_for :account has_many :applications, class_name: 'Doorkeeper::Application', as: :owner + has_many :backups, inverse_of: :user validates :locale, inclusion: I18n.available_locales.map(&:to_s), if: :locale? validates_with BlacklistedEmailValidator, if: :email_changed? diff --git a/app/policies/application_policy.rb b/app/policies/application_policy.rb index 3e617001f..d1de5e81a 100644 --- a/app/policies/application_policy.rb +++ b/app/policies/application_policy.rb @@ -15,4 +15,8 @@ class ApplicationPolicy def current_user current_account&.user end + + def user_signed_in? + !current_user.nil? + end end diff --git a/app/policies/backup_policy.rb b/app/policies/backup_policy.rb new file mode 100644 index 000000000..0ef89a8d0 --- /dev/null +++ b/app/policies/backup_policy.rb @@ -0,0 +1,9 @@ +# frozen_string_literal: true + +class BackupPolicy < ApplicationPolicy + MIN_AGE = 1.week + + def create? + user_signed_in? && current_user.backups.where('created_at >= ?', MIN_AGE.ago).count.zero? + end +end diff --git a/app/serializers/activitypub/collection_serializer.rb b/app/serializers/activitypub/collection_serializer.rb index 9832133fc..d43af3f8e 100644 --- a/app/serializers/activitypub/collection_serializer.rb +++ b/app/serializers/activitypub/collection_serializer.rb @@ -13,8 +13,8 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer attribute :part_of, if: -> { object.part_of.present? } has_one :first, if: -> { object.first.present? } - has_many :items, key: :items, if: -> { (object.items.present? || page?) && !ordered? } - has_many :items, key: :ordered_items, if: -> { (object.items.present? || page?) && ordered? } + has_many :items, key: :items, if: -> { (!object.items.nil? || page?) && !ordered? } + has_many :items, key: :ordered_items, if: -> { (!object.items.nil? || page?) && ordered? } def type if page? diff --git a/app/services/backup_service.rb b/app/services/backup_service.rb new file mode 100644 index 000000000..fadc24a82 --- /dev/null +++ b/app/services/backup_service.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +require 'rubygems/package' + +class BackupService < BaseService + attr_reader :account, :backup, :collection + + def call(backup) + @backup = backup + @account = backup.user.account + + build_json! + build_archive! + end + + private + + def build_json! + @collection = serialize(collection_presenter, ActivityPub::CollectionSerializer) + + account.statuses.with_includes.find_in_batches do |statuses| + statuses.each do |status| + item = serialize(status, ActivityPub::ActivitySerializer) + item.delete(:'@context') + + unless item[:type] == 'Announce' || item[:object][:attachment].blank? + item[:object][:attachment].each do |attachment| + attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '') + end + end + + @collection[:orderedItems] << item + end + + GC.start + end + end + + def build_archive! + tmp_file = Tempfile.new(%w(archive .tar.gz)) + + File.open(tmp_file, 'wb') do |file| + Zlib::GzipWriter.wrap(file) do |gz| + Gem::Package::TarWriter.new(gz) do |tar| + dump_media_attachments!(tar) + dump_outbox!(tar) + dump_actor!(tar) + end + end + end + + archive_filename = ['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(2)].join('-') + '.tar.gz' + + @backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename) + @backup.processed = true + @backup.save! + ensure + tmp_file.close + tmp_file.unlink + end + + def dump_media_attachments!(tar) + MediaAttachment.attached.where(account: account).find_in_batches do |media_attachments| + media_attachments.each do |m| + download_to_tar(tar, m.file, m.file.path) + end + + GC.start + end + end + + def dump_outbox!(tar) + json = Oj.dump(collection) + + tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io| + io.write(json) + end + end + + def dump_actor!(tar) + actor = serialize(account, ActivityPub::ActorSerializer) + + actor[:icon][:url] = 'avatar' + File.extname(actor[:icon][:url]) if actor[:icon] + actor[:image][:url] = 'header' + File.extname(actor[:image][:url]) if actor[:image] + + download_to_tar(tar, account.avatar, 'avatar' + File.extname(account.avatar.path)) if account.avatar.exists? + download_to_tar(tar, account.header, 'header' + File.extname(account.header.path)) if account.header.exists? + + json = Oj.dump(actor) + + tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io| + io.write(json) + end + + tar.add_file_simple('key.pem', 0o444, account.private_key.bytesize) do |io| + io.write(account.private_key) + end + end + + def collection_presenter + ActivityPub::CollectionPresenter.new( + id: account_outbox_url(account), + type: :ordered, + size: account.statuses_count, + items: [] + ) + end + + def serialize(object, serializer) + ActiveModelSerializers::SerializableResource.new( + object, + serializer: serializer, + adapter: ActivityPub::Adapter + ).as_json + end + + CHUNK_SIZE = 1.megabyte + + def download_to_tar(tar, attachment, filename) + adapter = Paperclip.io_adapters.for(attachment) + + tar.add_file_simple(filename, 0o444, adapter.size) do |io| + while (buffer = adapter.read(CHUNK_SIZE)) + io.write(buffer) + end + end + end +end diff --git a/app/views/settings/exports/show.html.haml b/app/views/settings/exports/show.html.haml index e0df1c480..89d768d3f 100644 --- a/app/views/settings/exports/show.html.haml +++ b/app/views/settings/exports/show.html.haml @@ -20,3 +20,26 @@ %th= t('exports.mutes') %td= @export.total_mutes %td= table_link_to 'download', t('exports.csv'), settings_exports_mutes_path(format: :csv) + +%p.muted-hint= t('exports.archive_takeout.hint_html') + +- if policy(:backup).create? + %p= link_to t('exports.archive_takeout.request'), settings_export_path, class: 'button', method: :post + +- unless @backups.empty? + .table-wrapper + %table.table + %thead + %tr + %th= t('exports.archive_takeout.date') + %th= t('exports.archive_takeout.size') + %th + %tbody + - @backups.each do |backup| + %tr + %td= l backup.created_at + - if backup.processed? + %td= number_to_human_size backup.dump_file_size + %td= table_link_to 'download', t('exports.archive_takeout.download'), backup.dump.url + - else + %td{ colspan: 2 }= t('exports.archive_takeout.in_progress') diff --git a/app/views/user_mailer/backup_ready.html.haml b/app/views/user_mailer/backup_ready.html.haml new file mode 100644 index 000000000..d5a4b8b48 --- /dev/null +++ b/app/views/user_mailer/backup_ready.html.haml @@ -0,0 +1,59 @@ +%table.email-table{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.email-body + .email-container + %table.content-section{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.content-cell.hero + .email-row + .col-6 + %table.column{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.column-cell.text-center.padded + %table.hero-icon{ align: 'center', cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td + = image_tag full_pack_url('icon_file_download.png'), alt: '' + + %h1= t 'user_mailer.backup_ready.title' + +%table.email-table{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.email-body + .email-container + %table.content-section{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.content-cell.content-start + .email-row + .col-6 + %table.column{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.column-cell.text-center + %p= t 'user_mailer.backup_ready.explanation' + +%table.email-table{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.email-body + .email-container + %table.content-section{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.content-cell + %table.column{ cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.column-cell.button-cell + %table.button{ align: 'center', cellspacing: 0, cellpadding: 0 } + %tbody + %tr + %td.button-primary + = link_to full_asset_url(@backup.dump.url) do + %span= t 'exports.archive_takeout.download' diff --git a/app/views/user_mailer/backup_ready.text.erb b/app/views/user_mailer/backup_ready.text.erb new file mode 100644 index 000000000..eb89e7d74 --- /dev/null +++ b/app/views/user_mailer/backup_ready.text.erb @@ -0,0 +1,7 @@ +<%= t 'user_mailer.backup_ready.title' %> + +=== + +<%= t 'user_mailer.backup_ready.explanation' %> + +=> <%= full_asset_url(@backup.dump.url) %> diff --git a/app/workers/backup_worker.rb b/app/workers/backup_worker.rb new file mode 100644 index 000000000..ec6db4e9e --- /dev/null +++ b/app/workers/backup_worker.rb @@ -0,0 +1,17 @@ +# frozen_string_literal: true + +class BackupWorker + include Sidekiq::Worker + + sidekiq_options queue: 'pull' + + def perform(backup_id) + backup = Backup.find(backup_id) + user = backup.user + + BackupService.new.call(backup) + + user.backups.where.not(id: backup.id).destroy_all + UserMailer.backup_ready(user, backup).deliver_later + end +end diff --git a/app/workers/scheduler/backup_cleanup_scheduler.rb b/app/workers/scheduler/backup_cleanup_scheduler.rb new file mode 100644 index 000000000..7a9d4f894 --- /dev/null +++ b/app/workers/scheduler/backup_cleanup_scheduler.rb @@ -0,0 +1,16 @@ +# frozen_string_literal: true +require 'sidekiq-scheduler' + +class Scheduler::BackupCleanupScheduler + include Sidekiq::Worker + + def perform + old_backups.find_each(&:destroy!) + end + + private + + def old_backups + Backup.where('created_at < ?', 7.days.ago) + end +end |