From 61ed133fea80041b354c78b043cec72dd8644101 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 21 Feb 2018 23:21:32 +0100 Subject: Account archive download (#6460) * Fix #201: Account archive download * Export actor and private key in the archive * Optimize BackupService - Add conversation to cached associations of status, because somehow it was forgotten and is source of N+1 queries - Explicitly call GC between batches of records being fetched (Model class allocations are the worst offender) - Stream media files into the tar in 1MB chunks (Do not allocate media file (up to 8MB) as string into memory) - Use #bytesize instead of #size to calculate file size for JSON (Fix FileOverflow error) - Segment media into subfolders by status ID because apparently GIF-to-MP4 media are all named "media.mp4" for some reason * Keep uniquely generated filename in Paperclip::GifTranscoder * Ensure dumped files do not overwrite each other by maintaing directory partitions * Give tar archives a good name * Add scheduler to remove week-old backups * Fix code style issue --- app/services/backup_service.rb | 128 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 app/services/backup_service.rb (limited to 'app/services/backup_service.rb') diff --git a/app/services/backup_service.rb b/app/services/backup_service.rb new file mode 100644 index 000000000..fadc24a82 --- /dev/null +++ b/app/services/backup_service.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +require 'rubygems/package' + +class BackupService < BaseService + attr_reader :account, :backup, :collection + + def call(backup) + @backup = backup + @account = backup.user.account + + build_json! + build_archive! + end + + private + + def build_json! + @collection = serialize(collection_presenter, ActivityPub::CollectionSerializer) + + account.statuses.with_includes.find_in_batches do |statuses| + statuses.each do |status| + item = serialize(status, ActivityPub::ActivitySerializer) + item.delete(:'@context') + + unless item[:type] == 'Announce' || item[:object][:attachment].blank? + item[:object][:attachment].each do |attachment| + attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '') + end + end + + @collection[:orderedItems] << item + end + + GC.start + end + end + + def build_archive! + tmp_file = Tempfile.new(%w(archive .tar.gz)) + + File.open(tmp_file, 'wb') do |file| + Zlib::GzipWriter.wrap(file) do |gz| + Gem::Package::TarWriter.new(gz) do |tar| + dump_media_attachments!(tar) + dump_outbox!(tar) + dump_actor!(tar) + end + end + end + + archive_filename = ['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(2)].join('-') + '.tar.gz' + + @backup.dump = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename) + @backup.processed = true + @backup.save! + ensure + tmp_file.close + tmp_file.unlink + end + + def dump_media_attachments!(tar) + MediaAttachment.attached.where(account: account).find_in_batches do |media_attachments| + media_attachments.each do |m| + download_to_tar(tar, m.file, m.file.path) + end + + GC.start + end + end + + def dump_outbox!(tar) + json = Oj.dump(collection) + + tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io| + io.write(json) + end + end + + def dump_actor!(tar) + actor = serialize(account, ActivityPub::ActorSerializer) + + actor[:icon][:url] = 'avatar' + File.extname(actor[:icon][:url]) if actor[:icon] + actor[:image][:url] = 'header' + File.extname(actor[:image][:url]) if actor[:image] + + download_to_tar(tar, account.avatar, 'avatar' + File.extname(account.avatar.path)) if account.avatar.exists? + download_to_tar(tar, account.header, 'header' + File.extname(account.header.path)) if account.header.exists? + + json = Oj.dump(actor) + + tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io| + io.write(json) + end + + tar.add_file_simple('key.pem', 0o444, account.private_key.bytesize) do |io| + io.write(account.private_key) + end + end + + def collection_presenter + ActivityPub::CollectionPresenter.new( + id: account_outbox_url(account), + type: :ordered, + size: account.statuses_count, + items: [] + ) + end + + def serialize(object, serializer) + ActiveModelSerializers::SerializableResource.new( + object, + serializer: serializer, + adapter: ActivityPub::Adapter + ).as_json + end + + CHUNK_SIZE = 1.megabyte + + def download_to_tar(tar, attachment, filename) + adapter = Paperclip.io_adapters.for(attachment) + + tar.add_file_simple(filename, 0o444, adapter.size) do |io| + while (buffer = adapter.read(CHUNK_SIZE)) + io.write(buffer) + end + end + end +end -- cgit