about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--config/brakeman.ignore42
-rw-r--r--lib/mastodon/timestamp_ids.rb201
-rw-r--r--lib/tasks/db.rake2
3 files changed, 125 insertions, 120 deletions
diff --git a/config/brakeman.ignore b/config/brakeman.ignore
index ed6e121d2..2a1bc1997 100644
--- a/config/brakeman.ignore
+++ b/config/brakeman.ignore
@@ -58,6 +58,26 @@
       "note": ""
     },
     {
+      "warning_type": "SQL Injection",
+      "warning_code": 0,
+      "fingerprint": "34efc76883080f8b1110a30c34ec4f903946ee56651aae46c62477f45d4fc412",
+      "check_name": "SQL",
+      "message": "Possible SQL injection",
+      "file": "lib/mastodon/timestamp_ids.rb",
+      "line": 63,
+      "link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
+      "code": "connection.execute(\"        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n        RETURNS bigint AS\\n        $$\\n          DECLARE\\n            time_part bigint;\\n            sequence_base bigint;\\n            tail bigint;\\n          BEGIN\\n            time_part := (\\n              -- Get the time in milliseconds\\n              ((date_part('epoch', now()) * 1000))::bigint\\n              -- And shift it over two bytes\\n              << 16);\\n\\n            sequence_base := (\\n              'x' ||\\n              -- Take the first two bytes (four hex characters)\\n              substr(\\n                -- Of the MD5 hash of the data we documented\\n                md5(table_name ||\\n                  '#{SecureRandom.hex(16)}' ||\\n                  time_part::text\\n                ),\\n                1, 4\\n              )\\n            -- And turn it into a bigint\\n            )::bit(16)::bigint;\\n\\n            -- Finally, add our sequence number to our base, and chop\\n            -- it to the last two bytes\\n            tail := (\\n              (sequence_base + nextval(table_name || '_id_seq'))\\n              & 65535);\\n\\n            -- Return the time part and the sequence part. OR appears\\n            -- faster here than addition, but they're equivalent:\\n            -- time_part has no trailing two bytes, and tail is only\\n            -- the last two bytes.\\n            RETURN time_part | tail;\\n          END\\n        $$ LANGUAGE plpgsql VOLATILE;\\n\")",
+      "render_path": null,
+      "location": {
+        "type": "method",
+        "class": "Mastodon::TimestampIds",
+        "method": "define_timestamp_id"
+      },
+      "user_input": "SecureRandom.hex(16)",
+      "confidence": "Medium",
+      "note": ""
+    },
+    {
       "warning_type": "Dynamic Render Path",
       "warning_code": 15,
       "fingerprint": "3b0a20b08aef13cf8cf865384fae0cfd3324d8200a83262bf4abbc8091b5fec5",
@@ -211,26 +231,6 @@
       "note": ""
     },
     {
-      "warning_type": "SQL Injection",
-      "warning_code": 0,
-      "fingerprint": "cd440d9d0bcb76225f4142030cec0bdec6ad119c537c108c9d514bf87bc34d29",
-      "check_name": "SQL",
-      "message": "Possible SQL injection",
-      "file": "lib/mastodon/timestamp_ids.rb",
-      "line": 69,
-      "link": "http://brakemanscanner.org/docs/warning_types/sql_injection/",
-      "code": "ActiveRecord::Base.connection.execute(\"          CREATE OR REPLACE FUNCTION timestamp_id(table_name text)\\n          RETURNS bigint AS\\n          $$\\n            DECLARE\\n              time_part bigint;\\n              sequence_base bigint;\\n              tail bigint;\\n            BEGIN\\n              -- Our ID will be composed of the following:\\n              -- 6 bytes (48 bits) of millisecond-level timestamp\\n              -- 2 bytes (16 bits) of sequence data\\n\\n              -- The 'sequence data' is intended to be unique within a\\n              -- given millisecond, yet obscure the 'serial number' of\\n              -- this row.\\n\\n              -- To do this, we hash the following data:\\n              -- * Table name (if provided, skipped if not)\\n              -- * Secret salt (should not be guessable)\\n              -- * Timestamp (again, millisecond-level granularity)\\n\\n              -- We then take the first two bytes of that value, and add\\n              -- the lowest two bytes of the table ID sequence number\\n              -- (`table_name`_id_seq). This means that even if we insert\\n              -- two rows at the same millisecond, they will have\\n              -- distinct 'sequence data' portions.\\n\\n              -- If this happens, and an attacker can see both such IDs,\\n              -- they can determine which of the two entries was inserted\\n              -- first, but not the total number of entries in the table\\n              -- (even mod 2**16).\\n\\n              -- The table name is included in the hash to ensure that\\n              -- different tables derive separate sequence bases so rows\\n              -- inserted in the same millisecond in different tables do\\n              -- not reveal the table ID sequence number for one another.\\n\\n              -- The secret salt is included in the hash to ensure that\\n              -- external users cannot derive the sequence base given the\\n              -- timestamp and table name, which would allow them to\\n              -- compute the table ID sequence number.\\n\\n              time_part := (\\n                -- Get the time in milliseconds\\n                ((date_part('epoch', now()) * 1000))::bigint\\n                -- And shift it over two bytes\\n                << 16);\\n\\n              sequence_base := (\\n                'x' ||\\n                -- Take the first two bytes (four hex characters)\\n                substr(\\n                  -- Of the MD5 hash of the data we documented\\n                  md5(table_name ||\\n                    '#{SecureRandom.hex(16)}' ||\\n                    time_part::text\\n                  ),\\n                  1, 4\\n                )\\n              -- And turn it into a bigint\\n              )::bit(16)::bigint;\\n\\n              -- Finally, add our sequence number to our base, and chop\\n              -- it to the last two bytes\\n              tail := (\\n                (sequence_base + nextval(table_name || '_id_seq'))\\n                & 65535);\\n\\n              -- Return the time part and the sequence part. OR appears\\n              -- faster here than addition, but they're equivalent:\\n              -- time_part has no trailing two bytes, and tail is only\\n              -- the last two bytes.\\n              RETURN time_part | tail;\\n            END\\n          $$ LANGUAGE plpgsql VOLATILE;\\n\")",
-      "render_path": null,
-      "location": {
-        "type": "method",
-        "class": "Mastodon::TimestampIds",
-        "method": "s(:self).define_timestamp_id"
-      },
-      "user_input": "SecureRandom.hex(16)",
-      "confidence": "Medium",
-      "note": ""
-    },
-    {
       "warning_type": "Cross-Site Scripting",
       "warning_code": 4,
       "fingerprint": "e04aafe1e06cf8317fb6ac0a7f35783e45aa1274272ee6eaf28d39adfdad489b",
@@ -269,6 +269,6 @@
       "note": ""
     }
   ],
-  "updated": "2017-10-05 20:06:40 +0200",
+  "updated": "2017-10-06 03:27:46 +0200",
   "brakeman_version": "4.0.1"
 }
diff --git a/lib/mastodon/timestamp_ids.rb b/lib/mastodon/timestamp_ids.rb
index d49b5c1b5..3b048a50c 100644
--- a/lib/mastodon/timestamp_ids.rb
+++ b/lib/mastodon/timestamp_ids.rb
@@ -1,120 +1,111 @@
 # frozen_string_literal: true
 
-module Mastodon
-  module TimestampIds
-    def self.define_timestamp_id
-      conn = ActiveRecord::Base.connection
-
-      # Make sure we don't already have a `timestamp_id` function.
-      unless conn.execute(<<~SQL).values.first.first
-        SELECT EXISTS(
-          SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
-        );
+module Mastodon::TimestampIds
+  DEFAULT_REGEX = /timestamp_id\('(?<seq_prefix>\w+)'/
+
+  class << self
+    # Our ID will be composed of the following:
+    # 6 bytes (48 bits) of millisecond-level timestamp
+    # 2 bytes (16 bits) of sequence data
+    #
+    # The 'sequence data' is intended to be unique within a
+    # given millisecond, yet obscure the 'serial number' of
+    # this row.
+    #
+    # To do this, we hash the following data:
+    # * Table name (if provided, skipped if not)
+    # * Secret salt (should not be guessable)
+    # * Timestamp (again, millisecond-level granularity)
+    #
+    # We then take the first two bytes of that value, and add
+    # the lowest two bytes of the table ID sequence number
+    # (`table_name`_id_seq). This means that even if we insert
+    # two rows at the same millisecond, they will have
+    # distinct 'sequence data' portions.
+    #
+    # If this happens, and an attacker can see both such IDs,
+    # they can determine which of the two entries was inserted
+    # first, but not the total number of entries in the table
+    # (even mod 2**16).
+    #
+    # The table name is included in the hash to ensure that
+    # different tables derive separate sequence bases so rows
+    # inserted in the same millisecond in different tables do
+    # not reveal the table ID sequence number for one another.
+    #
+    # The secret salt is included in the hash to ensure that
+    # external users cannot derive the sequence base given the
+    # timestamp and table name, which would allow them to
+    # compute the table ID sequence number.
+    def define_timestamp_id
+      return if already_defined?
+
+      connection.execute(<<~SQL)
+        CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
+        RETURNS bigint AS
+        $$
+          DECLARE
+            time_part bigint;
+            sequence_base bigint;
+            tail bigint;
+          BEGIN
+            time_part := (
+              -- Get the time in milliseconds
+              ((date_part('epoch', now()) * 1000))::bigint
+              -- And shift it over two bytes
+              << 16);
+
+            sequence_base := (
+              'x' ||
+              -- Take the first two bytes (four hex characters)
+              substr(
+                -- Of the MD5 hash of the data we documented
+                md5(table_name ||
+                  '#{SecureRandom.hex(16)}' ||
+                  time_part::text
+                ),
+                1, 4
+              )
+            -- And turn it into a bigint
+            )::bit(16)::bigint;
+
+            -- Finally, add our sequence number to our base, and chop
+            -- it to the last two bytes
+            tail := (
+              (sequence_base + nextval(table_name || '_id_seq'))
+              & 65535);
+
+            -- Return the time part and the sequence part. OR appears
+            -- faster here than addition, but they're equivalent:
+            -- time_part has no trailing two bytes, and tail is only
+            -- the last two bytes.
+            RETURN time_part | tail;
+          END
+        $$ LANGUAGE plpgsql VOLATILE;
       SQL
-        # The function doesn't exist, so we'll define it.
-        conn.execute(<<~SQL)
-          CREATE OR REPLACE FUNCTION timestamp_id(table_name text)
-          RETURNS bigint AS
-          $$
-            DECLARE
-              time_part bigint;
-              sequence_base bigint;
-              tail bigint;
-            BEGIN
-              -- Our ID will be composed of the following:
-              -- 6 bytes (48 bits) of millisecond-level timestamp
-              -- 2 bytes (16 bits) of sequence data
-
-              -- The 'sequence data' is intended to be unique within a
-              -- given millisecond, yet obscure the 'serial number' of
-              -- this row.
-
-              -- To do this, we hash the following data:
-              -- * Table name (if provided, skipped if not)
-              -- * Secret salt (should not be guessable)
-              -- * Timestamp (again, millisecond-level granularity)
-
-              -- We then take the first two bytes of that value, and add
-              -- the lowest two bytes of the table ID sequence number
-              -- (`table_name`_id_seq). This means that even if we insert
-              -- two rows at the same millisecond, they will have
-              -- distinct 'sequence data' portions.
-
-              -- If this happens, and an attacker can see both such IDs,
-              -- they can determine which of the two entries was inserted
-              -- first, but not the total number of entries in the table
-              -- (even mod 2**16).
-
-              -- The table name is included in the hash to ensure that
-              -- different tables derive separate sequence bases so rows
-              -- inserted in the same millisecond in different tables do
-              -- not reveal the table ID sequence number for one another.
-
-              -- The secret salt is included in the hash to ensure that
-              -- external users cannot derive the sequence base given the
-              -- timestamp and table name, which would allow them to
-              -- compute the table ID sequence number.
-
-              time_part := (
-                -- Get the time in milliseconds
-                ((date_part('epoch', now()) * 1000))::bigint
-                -- And shift it over two bytes
-                << 16);
-
-              sequence_base := (
-                'x' ||
-                -- Take the first two bytes (four hex characters)
-                substr(
-                  -- Of the MD5 hash of the data we documented
-                  md5(table_name ||
-                    '#{SecureRandom.hex(16)}' ||
-                    time_part::text
-                  ),
-                  1, 4
-                )
-              -- And turn it into a bigint
-              )::bit(16)::bigint;
-
-              -- Finally, add our sequence number to our base, and chop
-              -- it to the last two bytes
-              tail := (
-                (sequence_base + nextval(table_name || '_id_seq'))
-                & 65535);
-
-              -- Return the time part and the sequence part. OR appears
-              -- faster here than addition, but they're equivalent:
-              -- time_part has no trailing two bytes, and tail is only
-              -- the last two bytes.
-              RETURN time_part | tail;
-            END
-          $$ LANGUAGE plpgsql VOLATILE;
-        SQL
-      end
     end
 
-    def self.ensure_id_sequences_exist
-      conn = ActiveRecord::Base.connection
-
+    def ensure_id_sequences_exist
       # Find tables using timestamp IDs.
-      default_regex = /timestamp_id\('(?<seq_prefix>\w+)'/
-      conn.tables.each do |table|
+      connection.tables.each do |table|
         # We're only concerned with "id" columns.
-        next unless (id_col = conn.columns(table).find { |col| col.name == 'id' })
+        next unless (id_col = connection.columns(table).find { |col| col.name == 'id' })
 
         # And only those that are using timestamp_id.
-        next unless (data = default_regex.match(id_col.default_function))
+        next unless (data = DEFAULT_REGEX.match(id_col.default_function))
 
         seq_name = data[:seq_prefix] + '_id_seq'
+
         # If we were on Postgres 9.5+, we could do CREATE SEQUENCE IF
         # NOT EXISTS, but we can't depend on that. Instead, catch the
         # possible exception and ignore it.
         # Note that seq_name isn't a column name, but it's a
         # relation, like a column, and follows the same quoting rules
         # in Postgres.
-        conn.execute(<<~SQL)
+        connection.execute(<<~SQL)
           DO $$
             BEGIN
-              CREATE SEQUENCE #{conn.quote_column_name(seq_name)};
+              CREATE SEQUENCE #{connection.quote_column_name(seq_name)};
             EXCEPTION WHEN duplicate_table THEN
               -- Do nothing, we have the sequence already.
             END
@@ -122,5 +113,19 @@ module Mastodon
         SQL
       end
     end
+
+    private
+
+    def already_defined?
+      connection.execute(<<~SQL).values.first.first
+        SELECT EXISTS(
+          SELECT * FROM pg_proc WHERE proname = 'timestamp_id'
+        );
+      SQL
+    end
+
+    def connection
+      ActiveRecord::Base.connection
+    end
   end
 end
diff --git a/lib/tasks/db.rake b/lib/tasks/db.rake
index 66468d999..6af6bb6fb 100644
--- a/lib/tasks/db.rake
+++ b/lib/tasks/db.rake
@@ -20,10 +20,10 @@ def each_schema_load_environment
 
   if Rails.env == 'development'
     test_conf = ActiveRecord::Base.configurations['test']
+
     if test_conf['database']&.present?
       ActiveRecord::Base.establish_connection(:test)
       yield
-
       ActiveRecord::Base.establish_connection(Rails.env.to_sym)
     end
   end