From 9a435494c2efdd2ca8fc7f5fa3dbb81bf88633a1 Mon Sep 17 00:00:00 2001 From: multiple creatures Date: Wed, 11 Dec 2019 20:04:53 -0600 Subject: move normalized text into own table --- app/helpers/filter_helper.rb | 2 +- app/lib/bangtags.rb | 2 +- app/models/normalized_status.rb | 12 +++++++++++ app/models/status.rb | 25 ++++++++++++++-------- .../20191118084127_migrate_to_new_search_impl.rb | 3 +-- .../20191211235208_create_normalized_statuses.rb | 23 ++++++++++++++++++++ ...91212002705_add_index_to_normalized_statuses.rb | 13 +++++++++++ db/schema.rb | 12 ++++++++--- spec/fabricators/normalized_status_fabricator.rb | 4 ++++ spec/models/normalized_status_spec.rb | 5 +++++ streaming/index.js | 2 +- 11 files changed, 86 insertions(+), 17 deletions(-) create mode 100644 app/models/normalized_status.rb create mode 100644 db/migrate/20191211235208_create_normalized_statuses.rb create mode 100644 db/migrate/20191212002705_add_index_to_normalized_statuses.rb create mode 100644 spec/fabricators/normalized_status_fabricator.rb create mode 100644 spec/models/normalized_status_spec.rb diff --git a/app/helpers/filter_helper.rb b/app/helpers/filter_helper.rb index e5144b981..db2652557 100644 --- a/app/helpers/filter_helper.rb +++ b/app/helpers/filter_helper.rb @@ -7,7 +7,7 @@ module FilterHelper status = status.reblog if status.reblog? - if Status.where(id: status.id).where("statuses.normalized_text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))", receiver_id).exists? + if Status.where(id: status.id).regex_filtered_by_account(receiver_id).exists? redis.sadd("filtered_statuses:#{receiver_id}", status.id) return true end diff --git a/app/lib/bangtags.rb b/app/lib/bangtags.rb index 1b27f712e..5d8389892 100644 --- a/app/lib/bangtags.rb +++ b/app/lib/bangtags.rb @@ -717,7 +717,7 @@ class Bangtags q = cmd[1..-1].join.strip next if q.blank? begin - data = @account.statuses.where('normalized_text ~ ?', expand_search_query(q)) + data = @account.statuses.regex(expand_search_query(q)) .reorder(:created_at) .pluck(:created_at) .map { |d| d.strftime('%Y-%m') } diff --git a/app/models/normalized_status.rb b/app/models/normalized_status.rb new file mode 100644 index 000000000..10c2bf788 --- /dev/null +++ b/app/models/normalized_status.rb @@ -0,0 +1,12 @@ +# == Schema Information +# +# Table name: normalized_statuses +# +# id :bigint(8) not null, primary key +# status_id :bigint(8) +# text :text +# + +class NormalizedStatus < ApplicationRecord + belongs_to :status, inverse_of: :normalized_status +end diff --git a/app/models/status.rb b/app/models/status.rb index 8fdfd0737..27184591b 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -33,7 +33,6 @@ # origin :string # boostable :boolean # reject_replies :boolean -# normalized_text :text default(""), not null # class Status < ApplicationRecord @@ -86,6 +85,7 @@ class Status < ApplicationRecord has_one :status_stat, inverse_of: :status has_one :poll, inverse_of: :status, dependent: :destroy has_one :destructing_status, inverse_of: :status, dependent: :destroy + has_one :normalized_status, inverse_of: :status, dependent: :destroy validates :uri, uniqueness: true, presence: true, unless: :local? validates :text, presence: true, unless: -> { with_media? || reblog? } @@ -119,7 +119,11 @@ class Status < ApplicationRecord scope :mention_not_excluded_by_account, ->(account) { left_outer_joins(:mentions).where('mentions.account_id IS NULL OR mentions.account_id NOT IN (?)', account.excluded_from_timeline_account_ids) } scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) } - scope :not_string_filtered_by_account, ->(account) { where("statuses.normalized_text !~ ALL(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))", account.id) } + scope :like, ->(needle) { joins(:normalized_status).where('normalized_statuses.text LIKE f_normalize(?)', needle) } + scope :regex, ->(needle) { joins(:normalized_status).where('normalized_statuses.text ~ f_normalize(?)', needle) } + scope :regex_filtered_by_account, ->(account) { joins(:normalized_status).where('normalized_statuses.text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))', account.id) } + scope :regex_not_filtered_by_account, ->(account) { joins(:normalized_status).where('normalized_statuses.text !~ ALL(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = ?))', account.id) } + scope :not_missing_media_desc, -> { left_outer_joins(:media_attachments).where('media_attachments.id IS NULL OR media_attachments.description IS NOT NULL') } scope :tagged_with_all, ->(tags) { @@ -340,10 +344,9 @@ class Status < ApplicationRecord before_validation :infer_reject_replies after_create :set_poll_id + after_create :update_normalized_text after_create :process_bangtags, if: :local? - before_save :update_normalized_text - class << self include SearchHelper @@ -359,7 +362,7 @@ class Status < ApplicationRecord end return none if term.blank? || term.length < 3 query = query.without_reblogs - .where('normalized_text ~ ?', expand_search_query(term)) + .regex(expand_search_query(term)) .offset(offset).limit(limit) apply_timeline_filters(query, account, true) rescue ActiveRecord::StatementInvalid @@ -554,7 +557,7 @@ class Status < ApplicationRecord query = query.in_chosen_languages(account) if account.chosen_languages.present? query = query.reply_not_excluded_by_account(account) unless tag_timeline query = query.mention_not_excluded_by_account(account) - query = query.not_string_filtered_by_account(account) if account.custom_filters.present? + query = query.regex_not_filtered_by_account(account) if account.custom_filters.present? query = query.not_missing_media_desc if account.filter_undescribed? query.merge(account_silencing_filter(account)) end @@ -630,9 +633,13 @@ class Status < ApplicationRecord end def update_normalized_text - return if destroyed? - return unless (normalized_text.blank? && !text.blank?) || saved_change_to_text? - self.normalized_text = normalize_status(self) + return if destroyed? || text.blank? || !(text_changed? || saved_change_to_text?) + normalized_text = normalize_status(self) + if self.normalized_status.nil? + self.create_normalized_status(text: normalized_text) + else + self.normalized_status.update_attributes(text: normalized_text) + end end def set_conversation diff --git a/db/migrate/20191118084127_migrate_to_new_search_impl.rb b/db/migrate/20191118084127_migrate_to_new_search_impl.rb index b068443ca..52a50af36 100644 --- a/db/migrate/20191118084127_migrate_to_new_search_impl.rb +++ b/db/migrate/20191118084127_migrate_to_new_search_impl.rb @@ -22,7 +22,6 @@ class MigrateToNewSearchImpl < ActiveRecord::Migration[5.2] end def down - #raise ActiveRecord::IrreversibleMigration - true + raise ActiveRecord::IrreversibleMigration end end diff --git a/db/migrate/20191211235208_create_normalized_statuses.rb b/db/migrate/20191211235208_create_normalized_statuses.rb new file mode 100644 index 000000000..9baaa3f62 --- /dev/null +++ b/db/migrate/20191211235208_create_normalized_statuses.rb @@ -0,0 +1,23 @@ +class CreateNormalizedStatuses < ActiveRecord::Migration[5.2] + def up + create_table :normalized_statuses do |t| + t.references :status, foreign_key: true + t.text :text + end + + safety_assured do + remove_index :statuses, name: 'index_statuses_on_normalized_text_trgm' + execute 'INSERT INTO normalized_statuses (status_id, text) SELECT id, normalized_text FROM statuses' + remove_column :statuses, :normalized_text + end + end + + def down + safety_assured do + execute 'UPDATE statuses SET normalized_text = s.text FROM (SELECT status_id, text FROM normalized_statuses) AS s WHERE statuses.id = s.id' + remove_index :normalized_statuses, name: 'index_statuses_on_normalized_text_trgm' + drop_table :normalized_statuses + add_column :statuses, :normalized_text, :text, null: false, default: '' + end + end +end diff --git a/db/migrate/20191212002705_add_index_to_normalized_statuses.rb b/db/migrate/20191212002705_add_index_to_normalized_statuses.rb new file mode 100644 index 000000000..7a9dce39a --- /dev/null +++ b/db/migrate/20191212002705_add_index_to_normalized_statuses.rb @@ -0,0 +1,13 @@ +class AddIndexToNormalizedStatuses < ActiveRecord::Migration[5.2] + disable_ddl_transaction! + + def up + safety_assured do + execute 'CREATE INDEX CONCURRENTLY IF NOT EXISTS index_statuses_on_normalized_text_trgm ON normalized_statuses USING GIN (text gin_trgm_ops)' + end + end + + def down + remove_index :normalized_statuses, name: 'index_statuses_on_normalized_text_trgm' + end +end diff --git a/db/schema.rb b/db/schema.rb index 63897a0e7..96e9148ed 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_11_18_102858) do +ActiveRecord::Schema.define(version: 2019_12_12_002705) do # These are extensions that must be enabled in order to support this database enable_extension "pg_trgm" @@ -432,6 +432,13 @@ ActiveRecord::Schema.define(version: 2019_11_18_102858) do t.index ["target_account_id"], name: "index_mutes_on_target_account_id" end + create_table "normalized_statuses", force: :cascade do |t| + t.bigint "status_id" + t.text "text" + t.index ["status_id"], name: "index_normalized_statuses_on_status_id" + t.index ["text"], name: "index_statuses_on_normalized_text_trgm", opclass: :gin_trgm_ops, using: :gin + end + create_table "notifications", force: :cascade do |t| t.bigint "activity_id", null: false t.string "activity_type", null: false @@ -689,13 +696,11 @@ ActiveRecord::Schema.define(version: 2019_11_18_102858) do t.string "origin" t.boolean "boostable" t.boolean "reject_replies" - t.text "normalized_text", default: "", null: false t.index ["account_id", "id", "visibility", "updated_at"], name: "index_statuses_20180106", order: { id: :desc } t.index ["account_id", "id", "visibility"], name: "index_statuses_on_account_id_and_id_and_visibility", order: { id: :desc }, where: "(visibility = ANY (ARRAY[0, 1, 2, 4]))" t.index ["in_reply_to_account_id"], name: "index_statuses_on_in_reply_to_account_id" t.index ["in_reply_to_id"], name: "index_statuses_on_in_reply_to_id" t.index ["network"], name: "index_statuses_on_network", where: "network" - t.index ["normalized_text"], name: "index_statuses_on_normalized_text_trgm", opclass: :gin_trgm_ops, using: :gin t.index ["origin"], name: "index_statuses_on_origin", unique: true t.index ["reblog_of_id", "account_id"], name: "index_statuses_on_reblog_of_id_and_account_id" t.index ["uri"], name: "index_statuses_on_uri", unique: true @@ -860,6 +865,7 @@ ActiveRecord::Schema.define(version: 2019_11_18_102858) do add_foreign_key "mentions", "statuses", on_delete: :cascade add_foreign_key "mutes", "accounts", column: "target_account_id", name: "fk_eecff219ea", on_delete: :cascade add_foreign_key "mutes", "accounts", name: "fk_b8d8daf315", on_delete: :cascade + add_foreign_key "normalized_statuses", "statuses" add_foreign_key "notifications", "accounts", column: "from_account_id", name: "fk_fbd6b0bf9e", on_delete: :cascade add_foreign_key "notifications", "accounts", name: "fk_c141c8ee55", on_delete: :cascade add_foreign_key "oauth_access_grants", "oauth_applications", column: "application_id", name: "fk_34d54b0a33", on_delete: :cascade diff --git a/spec/fabricators/normalized_status_fabricator.rb b/spec/fabricators/normalized_status_fabricator.rb new file mode 100644 index 000000000..a77857175 --- /dev/null +++ b/spec/fabricators/normalized_status_fabricator.rb @@ -0,0 +1,4 @@ +Fabricator(:normalized_status) do + status nil + text "MyText" +end diff --git a/spec/models/normalized_status_spec.rb b/spec/models/normalized_status_spec.rb new file mode 100644 index 000000000..b7b241f8e --- /dev/null +++ b/spec/models/normalized_status_spec.rb @@ -0,0 +1,5 @@ +require 'rails_helper' + +RSpec.describe NormalizedStatus, type: :model do + pending "add some examples to (or delete) #{__FILE__}" +end diff --git a/streaming/index.js b/streaming/index.js index dd04d070a..f3d75f443 100644 --- a/streaming/index.js +++ b/streaming/index.js @@ -417,7 +417,7 @@ const startWorker = (workerId) => { } const queries = [ - client.query(`SELECT 1 FROM blocks WHERE (account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)})) OR (account_id = $2 AND target_account_id = $1) UNION SELECT 1 FROM mutes WHERE account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)}) UNION SELECT 1 FROM statuses WHERE id = $3 AND normalized_text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = $1)) UNION SELECT 1 FROM media_attachments WHERE (1 = (SELECT 1 FROM accounts WHERE id = $1 AND filter_undescribed)) AND status_id = $3 AND description IS NULL LIMIT 1`, [req.accountId, unpackedPayload.account.id, unpackedPayload.id].concat(targetAccountIds)), + client.query(`SELECT 1 FROM blocks WHERE (account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)})) OR (account_id = $2 AND target_account_id = $1) UNION SELECT 1 FROM mutes WHERE account_id = $1 AND target_account_id IN (${placeholders(targetAccountIds, 3)}) UNION SELECT 1 FROM normalized_statuses WHERE status_id = $3 AND text ~ ANY(ARRAY(SELECT f_normalize(phrase) FROM custom_filters WHERE account_id = $1)) UNION SELECT 1 FROM media_attachments WHERE (1 = (SELECT 1 FROM accounts WHERE id = $1 AND filter_undescribed)) AND status_id = $3 AND description IS NULL LIMIT 1`, [req.accountId, unpackedPayload.account.id, unpackedPayload.id].concat(targetAccountIds)), ]; if (accountDomain) { -- cgit