about summary refs log tree commit diff
path: root/app
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2018-02-21 23:21:32 +0100
committerGitHub <noreply@github.com>2018-02-21 23:21:32 +0100
commit61ed133fea80041b354c78b043cec72dd8644101 (patch)
treefa3c4aec521f5a0004a5d2277b1421fd983256d1 /app
parentc1e77b56a92fc075f000af9c263c72ba6bdbe5f7 (diff)
Account archive download (#6460)
* Fix #201: Account archive download

* Export actor and private key in the archive

* Optimize BackupService

- Add conversation to cached associations of status, because
  somehow it was forgotten and is source of N+1 queries
- Explicitly call GC between batches of records being fetched
  (Model class allocations are the worst offender)
- Stream media files into the tar in 1MB chunks
  (Do not allocate media file (up to 8MB) as string into memory)
- Use #bytesize instead of #size to calculate file size for JSON
  (Fix FileOverflow error)
- Segment media into subfolders by status ID because apparently
  GIF-to-MP4 media are all named "media.mp4" for some reason

* Keep uniquely generated filename in Paperclip::GifTranscoder

* Ensure dumped files do not overwrite each other by maintaing directory partitions

* Give tar archives a good name

* Add scheduler to remove week-old backups

* Fix code style issue
Diffstat (limited to 'app')
-rw-r--r--app/controllers/settings/exports_controller.rb14
-rw-r--r--app/javascript/images/icon_file_download.svg4
-rw-r--r--app/javascript/images/mailer/icon_file_download.pngbin0 -> 271 bytes
-rw-r--r--app/mailers/user_mailer.rb12
-rw-r--r--app/models/backup.rb22
-rw-r--r--app/models/status.rb2
-rw-r--r--app/models/user.rb1
-rw-r--r--app/policies/application_policy.rb4
-rw-r--r--app/policies/backup_policy.rb9
-rw-r--r--app/serializers/activitypub/collection_serializer.rb4
-rw-r--r--app/services/backup_service.rb128
-rw-r--r--app/views/settings/exports/show.html.haml23
-rw-r--r--app/views/user_mailer/backup_ready.html.haml59
-rw-r--r--app/views/user_mailer/backup_ready.text.erb7
-rw-r--r--app/workers/backup_worker.rb17
-rw-r--r--app/workers/scheduler/backup_cleanup_scheduler.rb16
16 files changed, 318 insertions, 4 deletions
diff --git a/app/controllers/settings/exports_controller.rb b/app/controllers/settings/exports_controller.rb
index ae62f00c1..869e11d3b 100644
--- a/app/controllers/settings/exports_controller.rb
+++ b/app/controllers/settings/exports_controller.rb
@@ -1,11 +1,23 @@
 # frozen_string_literal: true
 
 class Settings::ExportsController < ApplicationController
+  include Authorization
+
   layout 'admin'
 
   before_action :authenticate_user!
 
   def show
-    @export = Export.new(current_account)
+    @export  = Export.new(current_account)
+    @backups = current_user.backups
+  end
+
+  def create
+    authorize :backup, :create?
+
+    backup = current_user.backups.create!
+    BackupWorker.perform_async(backup.id)
+
+    redirect_to settings_export_path
   end
 end
diff --git a/app/javascript/images/icon_file_download.svg b/app/javascript/images/icon_file_download.svg
new file mode 100644
index 000000000..53e97e4f8
--- /dev/null
+++ b/app/javascript/images/icon_file_download.svg
@@ -0,0 +1,4 @@
+<svg fill="#FFFFFF" height="24" viewBox="0 0 24 24" width="24" xmlns="http://www.w3.org/2000/svg">
+    <path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"/>
+    <path d="M0 0h24v24H0z" fill="none"/>
+</svg>
\ No newline at end of file
diff --git a/app/javascript/images/mailer/icon_file_download.png b/app/javascript/images/mailer/icon_file_download.png
new file mode 100644
index 000000000..8a6a8673b
--- /dev/null
+++ b/app/javascript/images/mailer/icon_file_download.png
Binary files differdiff --git a/app/mailers/user_mailer.rb b/app/mailers/user_mailer.rb
index 2fc9caba3..9848c34a2 100644
--- a/app/mailers/user_mailer.rb
+++ b/app/mailers/user_mailer.rb
@@ -66,4 +66,16 @@ class UserMailer < Devise::Mailer
       mail to: @resource.email, subject: I18n.t('user_mailer.welcome.subject')
     end
   end
+
+  def backup_ready(user, backup)
+    @resource = user
+    @instance = Rails.configuration.x.local_domain
+    @backup   = backup
+
+    return if @resource.disabled?
+
+    I18n.with_locale(@resource.locale || I18n.default_locale) do
+      mail to: @resource.email, subject: I18n.t('user_mailer.backup_ready.subject')
+    end
+  end
 end
diff --git a/app/models/backup.rb b/app/models/backup.rb
new file mode 100644
index 000000000..5a7e6a14d
--- /dev/null
+++ b/app/models/backup.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+# == Schema Information
+#
+# Table name: backups
+#
+#  id                :integer          not null, primary key
+#  user_id           :integer
+#  dump_file_name    :string
+#  dump_content_type :string
+#  dump_file_size    :integer
+#  dump_updated_at   :datetime
+#  processed         :boolean          default(FALSE), not null
+#  created_at        :datetime         not null
+#  updated_at        :datetime         not null
+#
+
+class Backup < ApplicationRecord
+  belongs_to :user, inverse_of: :backups
+
+  has_attached_file :dump
+  do_not_validate_attachment_file_type :dump
+end
diff --git a/app/models/status.rb b/app/models/status.rb
index 8186f4784..f806a59fc 100644
--- a/app/models/status.rb
+++ b/app/models/status.rb
@@ -76,7 +76,7 @@ class Status < ApplicationRecord
   scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) }
   scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) }
 
-  cache_associated :account, :application, :media_attachments, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, mentions: :account], thread: :account
+  cache_associated :account, :application, :media_attachments, :conversation, :tags, :stream_entry, mentions: :account, reblog: [:account, :application, :stream_entry, :tags, :media_attachments, :conversation, mentions: :account], thread: :account
 
   delegate :domain, to: :account, prefix: true
 
diff --git a/app/models/user.rb b/app/models/user.rb
index fd153912e..fcd574f8b 100644
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -60,6 +60,7 @@ class User < ApplicationRecord
   accepts_nested_attributes_for :account
 
   has_many :applications, class_name: 'Doorkeeper::Application', as: :owner
+  has_many :backups, inverse_of: :user
 
   validates :locale, inclusion: I18n.available_locales.map(&:to_s), if: :locale?
   validates_with BlacklistedEmailValidator, if: :email_changed?
diff --git a/app/policies/application_policy.rb b/app/policies/application_policy.rb
index 3e617001f..d1de5e81a 100644
--- a/app/policies/application_policy.rb
+++ b/app/policies/application_policy.rb
@@ -15,4 +15,8 @@ class ApplicationPolicy
   def current_user
     current_account&.user
   end
+
+  def user_signed_in?
+    !current_user.nil?
+  end
 end
diff --git a/app/policies/backup_policy.rb b/app/policies/backup_policy.rb
new file mode 100644
index 000000000..0ef89a8d0
--- /dev/null
+++ b/app/policies/backup_policy.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+class BackupPolicy < ApplicationPolicy
+  MIN_AGE = 1.week
+
+  def create?
+    user_signed_in? && current_user.backups.where('created_at >= ?', MIN_AGE.ago).count.zero?
+  end
+end
diff --git a/app/serializers/activitypub/collection_serializer.rb b/app/serializers/activitypub/collection_serializer.rb
index 9832133fc..d43af3f8e 100644
--- a/app/serializers/activitypub/collection_serializer.rb
+++ b/app/serializers/activitypub/collection_serializer.rb
@@ -13,8 +13,8 @@ class ActivityPub::CollectionSerializer < ActiveModel::Serializer
   attribute :part_of, if: -> { object.part_of.present? }
 
   has_one :first, if: -> { object.first.present? }
-  has_many :items, key: :items, if: -> { (object.items.present? || page?) && !ordered? }
-  has_many :items, key: :ordered_items, if: -> { (object.items.present? || page?) && ordered? }
+  has_many :items, key: :items, if: -> { (!object.items.nil? || page?) && !ordered? }
+  has_many :items, key: :ordered_items, if: -> { (!object.items.nil? || page?) && ordered? }
 
   def type
     if page?
diff --git a/app/services/backup_service.rb b/app/services/backup_service.rb
new file mode 100644
index 000000000..fadc24a82
--- /dev/null
+++ b/app/services/backup_service.rb
@@ -0,0 +1,128 @@
+# frozen_string_literal: true
+
+require 'rubygems/package'
+
+class BackupService < BaseService
+  attr_reader :account, :backup, :collection
+
+  def call(backup)
+    @backup  = backup
+    @account = backup.user.account
+
+    build_json!
+    build_archive!
+  end
+
+  private
+
+  def build_json!
+    @collection = serialize(collection_presenter, ActivityPub::CollectionSerializer)
+
+    account.statuses.with_includes.find_in_batches do |statuses|
+      statuses.each do |status|
+        item = serialize(status, ActivityPub::ActivitySerializer)
+        item.delete(:'@context')
+
+        unless item[:type] == 'Announce' || item[:object][:attachment].blank?
+          item[:object][:attachment].each do |attachment|
+            attachment[:url] = Addressable::URI.parse(attachment[:url]).path.gsub(/\A\/system\//, '')
+          end
+        end
+
+        @collection[:orderedItems] << item
+      end
+
+      GC.start
+    end
+  end
+
+  def build_archive!
+    tmp_file = Tempfile.new(%w(archive .tar.gz))
+
+    File.open(tmp_file, 'wb') do |file|
+      Zlib::GzipWriter.wrap(file) do |gz|
+        Gem::Package::TarWriter.new(gz) do |tar|
+          dump_media_attachments!(tar)
+          dump_outbox!(tar)
+          dump_actor!(tar)
+        end
+      end
+    end
+
+    archive_filename = ['archive', Time.now.utc.strftime('%Y%m%d%H%M%S'), SecureRandom.hex(2)].join('-') + '.tar.gz'
+
+    @backup.dump      = ActionDispatch::Http::UploadedFile.new(tempfile: tmp_file, filename: archive_filename)
+    @backup.processed = true
+    @backup.save!
+  ensure
+    tmp_file.close
+    tmp_file.unlink
+  end
+
+  def dump_media_attachments!(tar)
+    MediaAttachment.attached.where(account: account).find_in_batches do |media_attachments|
+      media_attachments.each do |m|
+        download_to_tar(tar, m.file, m.file.path)
+      end
+
+      GC.start
+    end
+  end
+
+  def dump_outbox!(tar)
+    json = Oj.dump(collection)
+
+    tar.add_file_simple('outbox.json', 0o444, json.bytesize) do |io|
+      io.write(json)
+    end
+  end
+
+  def dump_actor!(tar)
+    actor = serialize(account, ActivityPub::ActorSerializer)
+
+    actor[:icon][:url]  = 'avatar' + File.extname(actor[:icon][:url])  if actor[:icon]
+    actor[:image][:url] = 'header' + File.extname(actor[:image][:url]) if actor[:image]
+
+    download_to_tar(tar, account.avatar, 'avatar' + File.extname(account.avatar.path)) if account.avatar.exists?
+    download_to_tar(tar, account.header, 'header' + File.extname(account.header.path)) if account.header.exists?
+
+    json = Oj.dump(actor)
+
+    tar.add_file_simple('actor.json', 0o444, json.bytesize) do |io|
+      io.write(json)
+    end
+
+    tar.add_file_simple('key.pem', 0o444, account.private_key.bytesize) do |io|
+      io.write(account.private_key)
+    end
+  end
+
+  def collection_presenter
+    ActivityPub::CollectionPresenter.new(
+      id: account_outbox_url(account),
+      type: :ordered,
+      size: account.statuses_count,
+      items: []
+    )
+  end
+
+  def serialize(object, serializer)
+    ActiveModelSerializers::SerializableResource.new(
+      object,
+      serializer: serializer,
+      adapter: ActivityPub::Adapter
+    ).as_json
+  end
+
+  CHUNK_SIZE = 1.megabyte
+
+  def download_to_tar(tar, attachment, filename)
+    adapter = Paperclip.io_adapters.for(attachment)
+
+    tar.add_file_simple(filename, 0o444, adapter.size) do |io|
+      while (buffer = adapter.read(CHUNK_SIZE))
+        io.write(buffer)
+      end
+    end
+  end
+end
diff --git a/app/views/settings/exports/show.html.haml b/app/views/settings/exports/show.html.haml
index e0df1c480..89d768d3f 100644
--- a/app/views/settings/exports/show.html.haml
+++ b/app/views/settings/exports/show.html.haml
@@ -20,3 +20,26 @@
         %th= t('exports.mutes')
         %td= @export.total_mutes
         %td= table_link_to 'download', t('exports.csv'), settings_exports_mutes_path(format: :csv)
+
+%p.muted-hint= t('exports.archive_takeout.hint_html')
+
+- if policy(:backup).create?
+  %p= link_to t('exports.archive_takeout.request'), settings_export_path, class: 'button', method: :post
+
+- unless @backups.empty?
+  .table-wrapper
+    %table.table
+      %thead
+        %tr
+          %th= t('exports.archive_takeout.date')
+          %th= t('exports.archive_takeout.size')
+          %th
+      %tbody
+        - @backups.each do |backup|
+          %tr
+            %td= l backup.created_at
+            - if backup.processed?
+              %td= number_to_human_size backup.dump_file_size
+              %td= table_link_to 'download', t('exports.archive_takeout.download'), backup.dump.url
+            - else
+              %td{ colspan: 2 }= t('exports.archive_takeout.in_progress')
diff --git a/app/views/user_mailer/backup_ready.html.haml b/app/views/user_mailer/backup_ready.html.haml
new file mode 100644
index 000000000..d5a4b8b48
--- /dev/null
+++ b/app/views/user_mailer/backup_ready.html.haml
@@ -0,0 +1,59 @@
+%table.email-table{ cellspacing: 0, cellpadding: 0 }
+  %tbody
+    %tr
+      %td.email-body
+        .email-container
+          %table.content-section{ cellspacing: 0, cellpadding: 0 }
+            %tbody
+              %tr
+                %td.content-cell.hero
+                  .email-row
+                    .col-6
+                      %table.column{ cellspacing: 0, cellpadding: 0 }
+                        %tbody
+                          %tr
+                            %td.column-cell.text-center.padded
+                              %table.hero-icon{ align: 'center', cellspacing: 0, cellpadding: 0 }
+                                %tbody
+                                  %tr
+                                    %td
+                                      = image_tag full_pack_url('icon_file_download.png'), alt: ''
+
+                              %h1= t 'user_mailer.backup_ready.title'
+
+%table.email-table{ cellspacing: 0, cellpadding: 0 }
+  %tbody
+    %tr
+      %td.email-body
+        .email-container
+          %table.content-section{ cellspacing: 0, cellpadding: 0 }
+            %tbody
+              %tr
+                %td.content-cell.content-start
+                  .email-row
+                    .col-6
+                      %table.column{ cellspacing: 0, cellpadding: 0 }
+                        %tbody
+                          %tr
+                            %td.column-cell.text-center
+                              %p= t 'user_mailer.backup_ready.explanation'
+
+%table.email-table{ cellspacing: 0, cellpadding: 0 }
+  %tbody
+    %tr
+      %td.email-body
+        .email-container
+          %table.content-section{ cellspacing: 0, cellpadding: 0 }
+            %tbody
+              %tr
+                %td.content-cell
+                  %table.column{ cellspacing: 0, cellpadding: 0 }
+                    %tbody
+                      %tr
+                        %td.column-cell.button-cell
+                          %table.button{ align: 'center', cellspacing: 0, cellpadding: 0 }
+                            %tbody
+                              %tr
+                                %td.button-primary
+                                  = link_to full_asset_url(@backup.dump.url) do
+                                    %span= t 'exports.archive_takeout.download'
diff --git a/app/views/user_mailer/backup_ready.text.erb b/app/views/user_mailer/backup_ready.text.erb
new file mode 100644
index 000000000..eb89e7d74
--- /dev/null
+++ b/app/views/user_mailer/backup_ready.text.erb
@@ -0,0 +1,7 @@
+<%= t 'user_mailer.backup_ready.title' %>
+
+===
+
+<%= t 'user_mailer.backup_ready.explanation' %>
+
+=> <%= full_asset_url(@backup.dump.url) %>
diff --git a/app/workers/backup_worker.rb b/app/workers/backup_worker.rb
new file mode 100644
index 000000000..ec6db4e9e
--- /dev/null
+++ b/app/workers/backup_worker.rb
@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+class BackupWorker
+  include Sidekiq::Worker
+
+  sidekiq_options queue: 'pull'
+
+  def perform(backup_id)
+    backup = Backup.find(backup_id)
+    user   = backup.user
+
+    BackupService.new.call(backup)
+
+    user.backups.where.not(id: backup.id).destroy_all
+    UserMailer.backup_ready(user, backup).deliver_later
+  end
+end
diff --git a/app/workers/scheduler/backup_cleanup_scheduler.rb b/app/workers/scheduler/backup_cleanup_scheduler.rb
new file mode 100644
index 000000000..7a9d4f894
--- /dev/null
+++ b/app/workers/scheduler/backup_cleanup_scheduler.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+require 'sidekiq-scheduler'
+
+class Scheduler::BackupCleanupScheduler
+  include Sidekiq::Worker
+
+  def perform
+    old_backups.find_each(&:destroy!)
+  end
+
+  private
+
+  def old_backups
+    Backup.where('created_at < ?', 7.days.ago)
+  end
+end