about summary refs log tree commit diff
path: root/app/services/backup_service.rb
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2018-02-21 23:21:32 +0100
committerGitHub <noreply@github.com>2018-02-21 23:21:32 +0100
commit61ed133fea80041b354c78b043cec72dd8644101 (patch)
treefa3c4aec521f5a0004a5d2277b1421fd983256d1 /app/services/backup_service.rb
parentc1e77b56a92fc075f000af9c263c72ba6bdbe5f7 (diff)
Account archive download (#6460)
* Fix #201: Account archive download

* Export actor and private key in the archive

* Optimize BackupService

- Add conversation to cached associations of status, because
  somehow it was forgotten and is source of N+1 queries
- Explicitly call GC between batches of records being fetched
  (Model class allocations are the worst offender)
- Stream media files into the tar in 1MB chunks
  (Do not allocate media file (up to 8MB) as string into memory)
- Use #bytesize instead of #size to calculate file size for JSON
  (Fix FileOverflow error)
- Segment media into subfolders by status ID because apparently
  GIF-to-MP4 media are all named "media.mp4" for some reason

* Keep uniquely generated filename in Paperclip::GifTranscoder

* Ensure dumped files do not overwrite each other by maintaing directory partitions

* Give tar archives a good name

* Add scheduler to remove week-old backups

* Fix code style issue
Diffstat (limited to 'app/services/backup_service.rb')
-rw-r--r--app/services/backup_service.rb128
1 files changed, 128 insertions, 0 deletions
diff --git a/app/services/backup_service.rb b/app/services/backup_service.rb
new file mode 100644
index 000000000..fadc24a82
--- /dev/null
+++ b/app/services/backup_service.rb
@@ -0,0 +1,128 @@
+# frozen_string_literal: true
+
+require 'rubygems/package'
+
+class BackupService < BaseService
+  attr_reader :account, :backup, :collection
+
+  def call(backup)
+    @backup  = backup
+    @account = backup.user.account
+
+    build_json!
+    build_archive!
+  end
+
+  private
+
+  def build_json!
+    @collection = serialize(collection_presenter, ActivityPub::CollectionSerializer)
+
+    account.statuses.with_includes.find_in_batches do |statuses|
+      statuses.each do |status|
+        item = serialize(status, ActivityPub::ActivitySerializer)
+        item.delete(:'@context')
+
+        unless item[:type] == 'Announce' || item[:object][:attachment].blank?
+          item[:object][:attachment].each do |attachment|
+            attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '')
+          end
+        end
+
+        @collection[:orderedItems] << item
+      end
+
+      GC.start
+    end
+  end
+
+  def build_archive!
+    tmp_file = Tempfile.new(%w(archive .tar.gz))
+
+    File.open(tmp_file, 'wb') do |file|
+      Zlib::GzipWriter.wrap(file) do |gz|
+        Gem::Package::TarWriter.new(gz) do |tar|
+          dump_media_attachments!(tar)
+          dump_outbox!(tar)
+          dump_actor!(tar)
+        end
+      end
+    end
+
+    archive_filename = ['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(2)].join('-') + '.tar.gz'
+
+    @backup.dump      = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename)
+    @backup.processed = true
+    @backup.save!
+  ensure
+    tmp_file.close
+    tmp_file.unlink
+  end
+
+  def dump_media_attachments!(tar)
+    MediaAttachment.attached.where(account: account).find_in_batches do |media_attachments|
+      media_attachments.each do |m|
+        download_to_tar(tar, m.file, m.file.path)
+      end
+
+      GC.start
+    end
+  end
+
+  def dump_outbox!(tar)
+    json = Oj.dump(collection)
+
+    tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io|
+      io.write(json)
+    end
+  end
+
+  def dump_actor!(tar)
+    actor = serialize(account, ActivityPub::ActorSerializer)
+
+    actor[:icon][:url]  = 'avatar' + File.extname(actor[:icon][:url])  if actor[:icon]
+    actor[:image][:url] = 'header' + File.extname(actor[:image][:url]) if actor[:image]
+
+    download_to_tar(tar, account.avatar, 'avatar' + File.extname(account.avatar.path)) if account.avatar.exists?
+    download_to_tar(tar, account.header, 'header' + File.extname(account.header.path)) if account.header.exists?
+
+    json = Oj.dump(actor)
+
+    tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io|
+      io.write(json)
+    end
+
+    tar.add_file_simple('key.pem', 0o444, account.private_key.bytesize) do |io|
+      io.write(account.private_key)
+    end
+  end
+
+  def collection_presenter
+    ActivityPub::CollectionPresenter.new(
+      id: account_outbox_url(account),
+      type: :ordered,
+      size: account.statuses_count,
+      items: []
+    )
+  end
+
+  def serialize(object, serializer)
+    ActiveModelSerializers::SerializableResource.new(
+      object,
+      serializer: serializer,
+      adapter: ActivityPub::Adapter
+    ).as_json
+  end
+
+  CHUNK_SIZE = 1.megabyte
+
+  def download_to_tar(tar, attachment, filename)
+    adapter = Paperclip.io_adapters.for(attachment)
+
+    tar.add_file_simple(filename, 0o444, adapter.size) do |io|
+      while (buffer = adapter.read(CHUNK_SIZE))
+        io.write(buffer)
+      end
+    end
+  end
+end