about summary refs log tree commit diff
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2017-10-08 17:34:34 +0200
committerGitHub <noreply@github.com>2017-10-08 17:34:34 +0200
commit0717d9b3e6904a4dcd5d2dc9e680cc5b21c50e51 (patch)
treefc95b8a715b8035231a6aa009bc82b3662ab236c
parent6e4046fc3f3973ba0b6994930a8b58726e507003 (diff)
Set snowflake IDs for backdated statuses (#5260)
- Rename Mastodon::TimestampIds into Mastodon::Snowflake for clarity
- Skip for statuses coming from inbox, aka delivered in real-time
- Skip for statuses that claim to be from the future
-rw-r--r--app/lib/activitypub/activity.rb7
-rw-r--r--app/lib/activitypub/activity/announce.rb3
-rw-r--r--app/lib/activitypub/activity/create.rb2
-rw-r--r--app/lib/ostatus/activity/base.rb5
-rw-r--r--app/lib/ostatus/activity/creation.rb2
-rw-r--r--app/lib/ostatus/activity/general.rb2
-rw-r--r--app/models/status.rb2
-rw-r--r--app/services/activitypub/process_collection_service.rb5
-rw-r--r--app/services/process_feed_service.rb6
-rw-r--r--app/workers/activitypub/processing_worker.rb2
-rw-r--r--app/workers/processing_worker.rb2
-rw-r--r--config/application.rb1
-rw-r--r--config/brakeman.ignore44
-rw-r--r--lib/mastodon/snowflake.rb (renamed from lib/mastodon/timestamp_ids.rb)33
-rw-r--r--lib/tasks/db.rake6
-rw-r--r--spec/services/activitypub/process_collection_service_spec.rb4
16 files changed, 83 insertions, 43 deletions
diff --git a/app/lib/activitypub/activity.rb b/app/lib/activitypub/activity.rb
index b06dd6194..9688f57a6 100644
--- a/app/lib/activitypub/activity.rb
+++ b/app/lib/activitypub/activity.rb
@@ -3,10 +3,11 @@
 class ActivityPub::Activity
   include JsonLdHelper
 
-  def initialize(json, account)
+  def initialize(json, account, options = {})
     @json    = json
     @account = account
     @object  = @json['object']
+    @options = options
   end
 
   def perform
@@ -14,9 +15,9 @@ class ActivityPub::Activity
   end
 
   class << self
-    def factory(json, account)
+    def factory(json, account, options = {})
       @json = json
-      klass&.new(json, account)
+      klass&.new(json, account, options)
     end
 
     private
diff --git a/app/lib/activitypub/activity/announce.rb b/app/lib/activitypub/activity/announce.rb
index 1cf844281..b84098933 100644
--- a/app/lib/activitypub/activity/announce.rb
+++ b/app/lib/activitypub/activity/announce.rb
@@ -15,8 +15,9 @@ class ActivityPub::Activity::Announce < ActivityPub::Activity
       account: @account,
       reblog: original_status,
       uri: @json['id'],
-      created_at: @json['published'] || Time.now.utc
+      created_at: @options[:override_timestamps] ? nil : @json['published']
     )
+
     distribute(status)
     status
   end
diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb
index 9421a0aa7..d6e9bc1de 100644
--- a/app/lib/activitypub/activity/create.rb
+++ b/app/lib/activitypub/activity/create.rb
@@ -43,7 +43,7 @@ class ActivityPub::Activity::Create < ActivityPub::Activity
       text: text_from_content || '',
       language: language_from_content,
       spoiler_text: @object['summary'] || '',
-      created_at: @object['published'] || Time.now.utc,
+      created_at: @options[:override_timestamps] ? nil : @object['published'],
       reply: @object['inReplyTo'].present?,
       sensitive: @object['sensitive'] || false,
       visibility: visibility_from_audience,
diff --git a/app/lib/ostatus/activity/base.rb b/app/lib/ostatus/activity/base.rb
index 039381397..8b27b124f 100644
--- a/app/lib/ostatus/activity/base.rb
+++ b/app/lib/ostatus/activity/base.rb
@@ -1,9 +1,10 @@
 # frozen_string_literal: true
 
 class OStatus::Activity::Base
-  def initialize(xml, account = nil)
-    @xml = xml
+  def initialize(xml, account = nil, options = {})
+    @xml     = xml
     @account = account
+    @options = options
   end
 
   def status?
diff --git a/app/lib/ostatus/activity/creation.rb b/app/lib/ostatus/activity/creation.rb
index 511c462d4..a1ab522e2 100644
--- a/app/lib/ostatus/activity/creation.rb
+++ b/app/lib/ostatus/activity/creation.rb
@@ -34,7 +34,7 @@ class OStatus::Activity::Creation < OStatus::Activity::Base
         reblog: cached_reblog,
         text: content,
         spoiler_text: content_warning,
-        created_at: published,
+        created_at: @options[:override_timestamps] ? nil : published,
         reply: thread?,
         language: content_language,
         visibility: visibility_scope,
diff --git a/app/lib/ostatus/activity/general.rb b/app/lib/ostatus/activity/general.rb
index b3bef9861..8a6aabc33 100644
--- a/app/lib/ostatus/activity/general.rb
+++ b/app/lib/ostatus/activity/general.rb
@@ -2,7 +2,7 @@
 
 class OStatus::Activity::General < OStatus::Activity::Base
   def specialize
-    special_class&.new(@xml, @account)
+    special_class&.new(@xml, @account, @options)
   end
 
   private
diff --git a/app/models/status.rb b/app/models/status.rb
index ea4c097bf..0d249244f 100644
--- a/app/models/status.rb
+++ b/app/models/status.rb
@@ -136,6 +136,8 @@ class Status < ApplicationRecord
 
   after_create :store_uri, if: :local?
 
+  around_create Mastodon::Snowflake::Callbacks
+
   before_validation :prepare_contents, if: :local?
   before_validation :set_reblog
   before_validation :set_visibility
diff --git a/app/services/activitypub/process_collection_service.rb b/app/services/activitypub/process_collection_service.rb
index 59cb65c65..db4d1b4bc 100644
--- a/app/services/activitypub/process_collection_service.rb
+++ b/app/services/activitypub/process_collection_service.rb
@@ -3,9 +3,10 @@
 class ActivityPub::ProcessCollectionService < BaseService
   include JsonLdHelper
 
-  def call(body, account)
+  def call(body, account, options = {})
     @account = account
     @json    = Oj.load(body, mode: :strict)
+    @options = options
 
     return unless supported_context?
     return if different_actor? && verify_account!.nil?
@@ -38,7 +39,7 @@ class ActivityPub::ProcessCollectionService < BaseService
   end
 
   def process_item(item)
-    activity = ActivityPub::Activity.factory(item, @account)
+    activity = ActivityPub::Activity.factory(item, @account, @options)
     activity&.perform
   end
 
diff --git a/app/services/process_feed_service.rb b/app/services/process_feed_service.rb
index 2a5f1e2bc..60eff135e 100644
--- a/app/services/process_feed_service.rb
+++ b/app/services/process_feed_service.rb
@@ -1,7 +1,9 @@
 # frozen_string_literal: true
 
 class ProcessFeedService < BaseService
-  def call(body, account)
+  def call(body, account, options = {})
+    @options = options
+
     xml = Nokogiri::XML(body)
     xml.encoding = 'utf-8'
 
@@ -20,7 +22,7 @@ class ProcessFeedService < BaseService
   end
 
   def process_entry(xml, account)
-    activity = OStatus::Activity::General.new(xml, account)
+    activity = OStatus::Activity::General.new(xml, account, @options)
     activity.specialize&.perform if activity.status?
   rescue ActiveRecord::RecordInvalid => e
     Rails.logger.debug "Nothing was saved for #{activity.id} because: #{e}"
diff --git a/app/workers/activitypub/processing_worker.rb b/app/workers/activitypub/processing_worker.rb
index bb9adf64b..0e2e0eddd 100644
--- a/app/workers/activitypub/processing_worker.rb
+++ b/app/workers/activitypub/processing_worker.rb
@@ -6,6 +6,6 @@ class ActivityPub::ProcessingWorker
   sidekiq_options backtrace: true
 
   def perform(account_id, body)
-    ActivityPub::ProcessCollectionService.new.call(body, Account.find(account_id))
+    ActivityPub::ProcessCollectionService.new.call(body, Account.find(account_id), override_timestamps: true)
   end
 end
diff --git a/app/workers/processing_worker.rb b/app/workers/processing_worker.rb
index 5df404bcc..978c3aba2 100644
--- a/app/workers/processing_worker.rb
+++ b/app/workers/processing_worker.rb
@@ -6,6 +6,6 @@ class ProcessingWorker
   sidekiq_options backtrace: true
 
   def perform(account_id, body)
-    ProcessFeedService.new.call(body, Account.find(account_id))
+    ProcessFeedService.new.call(body, Account.find(account_id), override_timestamps: true)
   end
 end
diff --git a/config/application.rb b/config/application.rb
index b6ce74147..4860a08a1 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -9,6 +9,7 @@ Bundler.require(*Rails.groups)
 require_relative '../app/lib/exceptions'
 require_relative '../lib/paperclip/gif_transcoder'
 require_relative '../lib/paperclip/video_transcoder'
+require_relative '../lib/mastodon/snowflake'
 require_relative '../lib/mastodon/version'
 
 Dotenv::Railtie.load
diff --git a/config/brakeman.ignore b/config/brakeman.ignore
index 2a1bc1997..f198eebac 100644
--- a/config/brakeman.ignore
+++ b/config/brakeman.ignore
@@ -58,26 +58,6 @@
       "note": ""
     },
     {
-      "warning_type": "SQL Injection",
-      "warning_code": 0,
-      "fingerprint": "34efc76883080f8b1110a30c34ec4f903946ee56651aae46c62477f45d4fc412",
-      "check_name": "SQL",
-      "message": "Possible SQL injection",
-      "file": "lib/mastodon/timestamp_ids.rb",
-      "line": 63,
-      "link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
-      "code": "connection.execute(\"        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n        RETURNS bigint AS\\n        $$\\n          DECLARE\\n            time_part bigint;\\n            sequence_base bigint;\\n            tail bigint;\\n          BEGIN\\n            time_part := (\\n              -- Get the time in milliseconds\\n              ((date_part('epoch', now()) * 1000))::bigint\\n              -- And shift it over two bytes\\n              << 16);\\n\\n            sequence_base := (\\n              'x' ||\\n              -- Take the first two bytes (four hex characters)\\n              substr(\\n                -- Of the MD5 hash of the data we documented\\n                md5(table_name ||\\n                  '#{SecureRandom.hex(16)}' ||\\n                  time_part::text\\n                ),\\n                1, 4\\n              )\\n            -- And turn it into a bigint\\n            )::bit(16)::bigint;\\n\\n            -- Finally, add our sequence number to our base, and chop\\n            -- it to the last two bytes\\n            tail := (\\n              (sequence_base + nextval(table_name || '_id_seq'))\\n              & 65535);\\n\\n            -- Return the time part and the sequence part. OR appears\\n            -- faster here than addition, but they're equivalent:\\n            -- time_part has no trailing two bytes, and tail is only\\n            -- the last two bytes.\\n            RETURN time_part | tail;\\n          END\\n        $$ LANGUAGE plpgsql VOLATILE;\\n\")",
-      "render_path": null,
-      "location": {
-        "type": "method",
-        "class": "Mastodon::TimestampIds",
-        "method": "define_timestamp_id"
-      },
-      "user_input": "SecureRandom.hex(16)",
-      "confidence": "Medium",
-      "note": ""
-    },
-    {
       "warning_type": "Dynamic Render Path",
       "warning_code": 15,
       "fingerprint": "3b0a20b08aef13cf8cf865384fae0cfd3324d8200a83262bf4abbc8091b5fec5",
@@ -106,7 +86,7 @@
       "line": 3,
       "link": "http://brakemanscanner.org/docs/warning_types/dynamic_render_path/",
       "code": "render(action => \"stream_entries/#{Account.find_local!(params[:account_username]).statuses.find(params[:id]).stream_entry.activity_type.downcase}\", { Account.find_local!(params[:account_username]).statuses.find(params[:id]).stream_entry.activity_type.downcase.to_sym => Account.find_local!(params[:account_username]).statuses.find(params[:id]).stream_entry.activity, :centered => true })",
-      "render_path": [{"type":"controller","class":"StatusesController","method":"embed","line":35,"file":"app/controllers/statuses_controller.rb"}],
+      "render_path": [{"type":"controller","class":"StatusesController","method":"embed","line":41,"file":"app/controllers/statuses_controller.rb"}],
       "location": {
         "type": "template",
         "template": "stream_entries/embed"
@@ -154,6 +134,26 @@
       "note": ""
     },
     {
+      "warning_type": "SQL Injection",
+      "warning_code": 0,
+      "fingerprint": "9ccb9ba6a6947400e187d515e0bf719d22993d37cfc123c824d7fafa6caa9ac3",
+      "check_name": "SQL",
+      "message": "Possible SQL injection",
+      "file": "lib/mastodon/snowflake.rb",
+      "line": 86,
+      "link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
+      "code": "connection.execute(\"        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n        RETURNS bigint AS\\n        $$\\n          DECLARE\\n            time_part bigint;\\n            sequence_base bigint;\\n            tail bigint;\\n          BEGIN\\n            time_part := (\\n              -- Get the time in milliseconds\\n              ((date_part('epoch', now()) * 1000))::bigint\\n              -- And shift it over two bytes\\n              << 16);\\n\\n            sequence_base := (\\n              'x' ||\\n              -- Take the first two bytes (four hex characters)\\n              substr(\\n                -- Of the MD5 hash of the data we documented\\n                md5(table_name ||\\n                  '#{SecureRandom.hex(16)}' ||\\n                  time_part::text\\n                ),\\n                1, 4\\n              )\\n            -- And turn it into a bigint\\n            )::bit(16)::bigint;\\n\\n            -- Finally, add our sequence number to our base, and chop\\n            -- it to the last two bytes\\n            tail := (\\n              (sequence_base + nextval(table_name || '_id_seq'))\\n              & 65535);\\n\\n            -- Return the time part and the sequence part. OR appears\\n            -- faster here than addition, but they're equivalent:\\n            -- time_part has no trailing two bytes, and tail is only\\n            -- the last two bytes.\\n            RETURN time_part | tail;\\n          END\\n        $$ LANGUAGE plpgsql VOLATILE;\\n\")",
+      "render_path": null,
+      "location": {
+        "type": "method",
+        "class": "Mastodon::Snowflake",
+        "method": "define_timestamp_id"
+      },
+      "user_input": "SecureRandom.hex(16)",
+      "confidence": "Medium",
+      "note": ""
+    },
+    {
       "warning_type": "Dynamic Render Path",
       "warning_code": 15,
       "fingerprint": "9f31d941f3910dba2e9bfcd81aef4513249bd24c02d0f98e13ad44fdeeccd0e8",
@@ -269,6 +269,6 @@
       "note": ""
     }
   ],
-  "updated": "2017-10-06 03:27:46 +0200",
+  "updated": "2017-10-07 19:24:02 +0200",
   "brakeman_version": "4.0.1"
 }
diff --git a/lib/mastodon/timestamp_ids.rb b/lib/mastodon/snowflake.rb
index 3b048a50c..219e323d4 100644
--- a/lib/mastodon/timestamp_ids.rb
+++ b/lib/mastodon/snowflake.rb
@@ -1,8 +1,32 @@
 # frozen_string_literal: true
 
-module Mastodon::TimestampIds
+module Mastodon::Snowflake
   DEFAULT_REGEX = /timestamp_id\('(?<seq_prefix>\w+)'/
 
+  class Callbacks
+    def self.around_create(record)
+      now = Time.now.utc
+
+      if record.created_at.nil? || record.created_at >= now || record.created_at == record.updated_at
+        yield
+      else
+        record.id = Mastodon::Snowflake.id_at(record.created_at)
+        tries     = 0
+
+        begin
+          yield
+        rescue ActiveRecord::RecordNotUnique
+          raise if tries > 100
+
+          tries     += 1
+          record.id += rand(100)
+
+          retry
+        end
+      end
+    end
+  end
+
   class << self
     # Our ID will be composed of the following:
     # 6 bytes (48 bits) of millisecond-level timestamp
@@ -114,6 +138,13 @@ module Mastodon::TimestampIds
       end
     end
 
+    def id_at(timestamp)
+      id  = timestamp.to_i * 1000 + rand(1000)
+      id  = id << 16
+      id += rand(2**16)
+      id
+    end
+
     private
 
     def already_defined?
diff --git a/lib/tasks/db.rake b/lib/tasks/db.rake
index 6af6bb6fb..32039c31d 100644
--- a/lib/tasks/db.rake
+++ b/lib/tasks/db.rake
@@ -1,6 +1,6 @@
 # frozen_string_literal: true
 
-require Rails.root.join('lib', 'mastodon', 'timestamp_ids')
+require_relative '../mastodon/snowflake'
 
 def each_schema_load_environment
   # If we're in development, also run this for the test environment.
@@ -63,13 +63,13 @@ namespace :db do
 
   task :define_timestamp_id do
     each_schema_load_environment do
-      Mastodon::TimestampIds.define_timestamp_id
+      Mastodon::Snowflake.define_timestamp_id
     end
   end
 
   task :ensure_id_sequences_exist do
     each_schema_load_environment do
-      Mastodon::TimestampIds.ensure_id_sequences_exist
+      Mastodon::Snowflake.ensure_id_sequences_exist
     end
   end
 end
diff --git a/spec/services/activitypub/process_collection_service_spec.rb b/spec/services/activitypub/process_collection_service_spec.rb
index c1cc22523..3cea970cf 100644
--- a/spec/services/activitypub/process_collection_service_spec.rb
+++ b/spec/services/activitypub/process_collection_service_spec.rb
@@ -28,7 +28,7 @@ RSpec.describe ActivityPub::ProcessCollectionService do
 
       it 'processes payload with sender if no signature exists' do
         expect_any_instance_of(ActivityPub::LinkedDataSignature).not_to receive(:verify_account!)
-        expect(ActivityPub::Activity).to receive(:factory).with(instance_of(Hash), forwarder)
+        expect(ActivityPub::Activity).to receive(:factory).with(instance_of(Hash), forwarder, instance_of(Hash))
 
         subject.call(json, forwarder)
       end
@@ -37,7 +37,7 @@ RSpec.describe ActivityPub::ProcessCollectionService do
         payload['signature'] = {'type' => 'RsaSignature2017'}
 
         expect_any_instance_of(ActivityPub::LinkedDataSignature).to receive(:verify_account!).and_return(actor)
-        expect(ActivityPub::Activity).to receive(:factory).with(instance_of(Hash), actor)
+        expect(ActivityPub::Activity).to receive(:factory).with(instance_of(Hash), actor, instance_of(Hash))
 
         subject.call(json, forwarder)
       end