diff options
author | Claire <claire.github-309c@sitedethib.com> | 2022-01-23 18:10:10 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-23 18:10:10 +0100 |
commit | 0a120d86d28e3f2e20455f56c1656f5d5f2f4af6 (patch) | |
tree | 36b2184e4a61f655240880c1f50cd004cf6ec27f /app | |
parent | a63495230a3a28e022504f36356cd75b17b635ba (diff) |
Fix error-prone SQL queries (#15828)
* Fix error-prone SQL queries in Account search While this code seems to not present an actual vulnerability, one could easily be introduced by mistake due to how the query is built. This PR parameterises the `to_tsquery` input to make the query more robust. * Harden code for Status#tagged_with_all and Status#tagged_with_none Those two scopes aren't used in a way that could be vulnerable to an SQL injection, but keeping them unchanged might be a hazard. * Remove unneeded spaces surrounding tsquery term * Please CodeClimate * Move advanced_search_for SQL template to its own function This avoids one level of indentation while making clearer that the SQL template isn't build from all the dynamic parameters of advanced_search_for. * Add tests covering tagged_with, tagged_with_all and tagged_with_none * Rewrite tagged_with_none to avoid multiple joins and make it more robust * Remove obsolete brakeman warnings * Revert "Remove unneeded spaces surrounding tsquery term" The two queries are not strictly equivalent. This reverts commit 86f16c537e06c6ba4a8b250f25dcce9f049023ff.
Diffstat (limited to 'app')
-rw-r--r-- | app/models/account.rb | 105 | ||||
-rw-r--r-- | app/models/status.rb | 7 |
2 files changed, 57 insertions, 55 deletions
diff --git a/app/models/account.rb b/app/models/account.rb index c459125c7..771cc0b1b 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -427,6 +427,9 @@ class Account < ApplicationRecord end class << self + DISALLOWED_TSQUERY_CHARACTERS = /['?\\:‘’]/.freeze + TEXTSEARCH = "(setweight(to_tsvector('simple', accounts.display_name), 'A') || setweight(to_tsvector('simple', accounts.username), 'B') || setweight(to_tsvector('simple', coalesce(accounts.domain, '')), 'C'))" + def readonly_attributes super - %w(statuses_count following_count followers_count) end @@ -437,98 +440,100 @@ class Account < ApplicationRecord end def search_for(terms, limit = 10, offset = 0) - textsearch, query = generate_query_for_search(terms) + tsquery = generate_query_for_search(terms) sql = <<-SQL.squish SELECT accounts.*, - ts_rank_cd(#{textsearch}, #{query}, 32) AS rank + ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank FROM accounts - WHERE #{query} @@ #{textsearch} + WHERE to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH} AND accounts.suspended_at IS NULL AND accounts.moved_to_account_id IS NULL ORDER BY rank DESC - LIMIT ? OFFSET ? + LIMIT :limit OFFSET :offset SQL - records = find_by_sql([sql, limit, offset]) + records = find_by_sql([sql, limit: limit, offset: offset, tsquery: tsquery]) ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) records end def advanced_search_for(terms, account, limit = 10, following = false, offset = 0) - textsearch, query = generate_query_for_search(terms) + tsquery = generate_query_for_search(terms) + sql = advanced_search_for_sql_template(following) + records = find_by_sql([sql, id: account.id, limit: limit, offset: offset, tsquery: tsquery]) + ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) + records + end + + def from_text(text) + return [] if text.blank? + text.scan(MENTION_RE).map { |match| match.first.split('@', 2) }.uniq.filter_map do |(username, domain)| + domain = begin + if TagManager.instance.local_domain?(domain) + nil + else + TagManager.instance.normalize_domain(domain) + end + end + EntityCache.instance.mention(username, domain) + end + end + + private + + def generate_query_for_search(unsanitized_terms) + terms = unsanitized_terms.gsub(DISALLOWED_TSQUERY_CHARACTERS, ' ') + + # The final ":*" is for prefix search. + # The trailing space does not seem to fit any purpose, but `to_tsquery` + # behaves differently with and without a leading space if the terms start + # with `./`, `../`, or `.. `. I don't understand why, so, in doubt, keep + # the same query. + "' #{terms} ':*" + end + + def advanced_search_for_sql_template(following) if following - sql = <<-SQL.squish + <<-SQL.squish WITH first_degree AS ( SELECT target_account_id FROM follows - WHERE account_id = ? + WHERE account_id = :id UNION ALL - SELECT ? + SELECT :id ) SELECT accounts.*, - (count(f.id) + 1) * ts_rank_cd(#{textsearch}, #{query}, 32) AS rank + (count(f.id) + 1) * ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank FROM accounts - LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = ?) + LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id) WHERE accounts.id IN (SELECT * FROM first_degree) - AND #{query} @@ #{textsearch} + AND to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH} AND accounts.suspended_at IS NULL AND accounts.moved_to_account_id IS NULL GROUP BY accounts.id ORDER BY rank DESC - LIMIT ? OFFSET ? + LIMIT :limit OFFSET :offset SQL - - records = find_by_sql([sql, account.id, account.id, account.id, limit, offset]) else - sql = <<-SQL.squish + <<-SQL.squish SELECT accounts.*, - (count(f.id) + 1) * ts_rank_cd(#{textsearch}, #{query}, 32) AS rank + (count(f.id) + 1) * ts_rank_cd(#{TEXTSEARCH}, to_tsquery('simple', :tsquery), 32) AS rank FROM accounts - LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = ?) OR (accounts.id = f.target_account_id AND f.account_id = ?) - WHERE #{query} @@ #{textsearch} + LEFT OUTER JOIN follows AS f ON (accounts.id = f.account_id AND f.target_account_id = :id) OR (accounts.id = f.target_account_id AND f.account_id = :id) + WHERE to_tsquery('simple', :tsquery) @@ #{TEXTSEARCH} AND accounts.suspended_at IS NULL AND accounts.moved_to_account_id IS NULL GROUP BY accounts.id ORDER BY rank DESC - LIMIT ? OFFSET ? + LIMIT :limit OFFSET :offset SQL - - records = find_by_sql([sql, account.id, account.id, limit, offset]) - end - - ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) - records - end - - def from_text(text) - return [] if text.blank? - - text.scan(MENTION_RE).map { |match| match.first.split('@', 2) }.uniq.filter_map do |(username, domain)| - domain = begin - if TagManager.instance.local_domain?(domain) - nil - else - TagManager.instance.normalize_domain(domain) - end - end - EntityCache.instance.mention(username, domain) end end - - private - - def generate_query_for_search(terms) - terms = Arel.sql(connection.quote(terms.gsub(/['?\\:]/, ' '))) - textsearch = "(setweight(to_tsvector('simple', accounts.display_name), 'A') || setweight(to_tsvector('simple', accounts.username), 'B') || setweight(to_tsvector('simple', coalesce(accounts.domain, '')), 'C'))" - query = "to_tsquery('simple', ''' ' || #{terms} || ' ''' || ':*')" - - [textsearch, query] - end end def emojis diff --git a/app/models/status.rb b/app/models/status.rb index 3358d6891..47671c0f5 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -99,15 +99,12 @@ class Status < ApplicationRecord scope :not_excluded_by_account, ->(account) { where.not(account_id: account.excluded_from_timeline_account_ids) } scope :not_domain_blocked_by_account, ->(account) { account.excluded_from_timeline_domains.blank? ? left_outer_joins(:account) : left_outer_joins(:account).where('accounts.domain IS NULL OR accounts.domain NOT IN (?)', account.excluded_from_timeline_domains) } scope :tagged_with_all, ->(tag_ids) { - Array(tag_ids).reduce(self) do |result, id| + Array(tag_ids).map(&:to_i).reduce(self) do |result, id| result.joins("INNER JOIN statuses_tags t#{id} ON t#{id}.status_id = statuses.id AND t#{id}.tag_id = #{id}") end } scope :tagged_with_none, ->(tag_ids) { - Array(tag_ids).reduce(self) do |result, id| - result.joins("LEFT OUTER JOIN statuses_tags t#{id} ON t#{id}.status_id = statuses.id AND t#{id}.tag_id = #{id}") - .where("t#{id}.tag_id IS NULL") - end + where('NOT EXISTS (SELECT * FROM statuses_tags forbidden WHERE forbidden.status_id = statuses.id AND forbidden.tag_id IN (?))', tag_ids) } cache_associated :application, |