From 47ef4a6c7a74072daff8b23c4af3e300bb75ba1a Mon Sep 17 00:00:00 2001 From: ThibG Date: Tue, 25 Jun 2019 14:45:14 +0200 Subject: Apply filters to poll options (#11174) * Apply filters to poll options in WebUI Fixes #11128 * Apply filters to poll options server-side * Add poll options to searchable text --- app/chewy/statuses_index.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/chewy') diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index 8ce413f8a..f5983a5a5 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -51,7 +51,7 @@ class StatusesIndex < Chewy::Index field :id, type: 'long' field :account_id, type: 'long' - field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).join("\n\n") } do + field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status_preloadable_poll.options : []).join("\n\n") } do field :stemmed, type: 'text', analyzer: 'content' end -- cgit From a02f4b7cd47783bb1938e7e7e81e966657b1c5e6 Mon Sep 17 00:00:00 2001 From: mayaeh Date: Thu, 27 Jun 2019 16:16:55 +0900 Subject: Fix NameError (#11192) --- app/chewy/statuses_index.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'app/chewy') diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index f5983a5a5..b7365d5ca 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -51,7 +51,7 @@ class StatusesIndex < Chewy::Index field :id, type: 'long' field :account_id, type: 'long' - field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status_preloadable_poll.options : []).join("\n\n") } do + field :text, type: 'text', value: ->(status) { [status.spoiler_text, Formatter.instance.plaintext(status)].concat(status.media_attachments.map(&:description)).concat(status.preloadable_poll ? status.preloadable_poll.options : []).join("\n\n") } do field :stemmed, type: 'text', analyzer: 'content' end -- cgit From 8fdff2748ffbc4ce3d36c75f2bd33182b641e895 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Fri, 16 Aug 2019 01:24:03 +0200 Subject: Add more accurate account search (#11537) * Add more accurate account search When ElasticSearch is available, a more accurate search is implemented: - Using edge n-gram index for acct and display name - Using asciifolding and cjk width normalization on display names - Using Gaussian decay on account activity for additional scoring (recency) - Using followers/friends ratio for additional scoring (spamminess) - Using followers number for additional scoring (size) The exact match precedence only takes effect when the input conforms to the username format and the username part of it is complete, i.e. when the user started typing the domain part. * Support single-letter usernames * Fix tests * Fix not picking up account updates * Add weights and normalization for scores, skip zero terms queries * Use local counts for accounts index, adjust search parameters * Fix mistakes * Using updated_at of accounts is inadequate for remote accounts --- app/chewy/accounts_index.rb | 36 +++++ app/models/account.rb | 6 + app/models/account_stat.rb | 2 + app/services/account_search_service.rb | 189 ++++++++++++++++++--------- spec/services/account_search_service_spec.rb | 122 ++++------------- 5 files changed, 194 insertions(+), 161 deletions(-) create mode 100644 app/chewy/accounts_index.rb (limited to 'app/chewy') diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb new file mode 100644 index 000000000..e11b80039 --- /dev/null +++ b/app/chewy/accounts_index.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +class AccountsIndex < Chewy::Index + settings index: { refresh_interval: '5m' }, analysis: { + analyzer: { + content: { + tokenizer: 'whitespace', + filter: %w(lowercase asciifolding cjk_width), + }, + + edge_ngram: { + tokenizer: 'edge_ngram', + filter: %w(lowercase asciifolding cjk_width), + }, + }, + + tokenizer: { + edge_ngram: { + type: 'edge_ngram', + min_gram: 1, + max_gram: 15, + }, + }, + } + + define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do + root date_detection: false do + field :id, type: 'long' + field :display_name, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' + field :acct, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } + field :following_count, type: 'long', value: ->(account) { account.active_relationships.count } + field :followers_count, type: 'long', value: ->(account) { account.passive_relationships.count } + field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at } + end + end +end diff --git a/app/models/account.rb b/app/models/account.rb index 60c06aaf0..392cc625f 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -127,6 +127,8 @@ class Account < ApplicationRecord delegate :chosen_languages, to: :user, prefix: false, allow_nil: true + update_index('accounts#account', :self) if Chewy.enabled? + def local? domain.nil? end @@ -169,6 +171,10 @@ class Account < ApplicationRecord subscription_expires_at.present? end + def searchable? + !(suspended? || moved?) + end + def possibly_stale? last_webfingered_at.nil? || last_webfingered_at <= 1.day.ago end diff --git a/app/models/account_stat.rb b/app/models/account_stat.rb index 9813aa84f..6d1097cec 100644 --- a/app/models/account_stat.rb +++ b/app/models/account_stat.rb @@ -16,6 +16,8 @@ class AccountStat < ApplicationRecord belongs_to :account, inverse_of: :account_stat + update_index('accounts#account', :account) if Chewy.enabled? + def increment_count!(key) update(attributes_for_increment(key)) end diff --git a/app/services/account_search_service.rb b/app/services/account_search_service.rb index e1874d045..2d602a31d 100644 --- a/app/services/account_search_service.rb +++ b/app/services/account_search_service.rb @@ -4,105 +4,134 @@ class AccountSearchService < BaseService attr_reader :query, :limit, :offset, :options, :account def call(query, account = nil, options = {}) - @query = query.strip - @limit = options[:limit].to_i - @offset = options[:offset].to_i - @options = options - @account = account + @acct_hint = query.start_with?('@') + @query = query.strip.gsub(/\A@/, '') + @limit = options[:limit].to_i + @offset = options[:offset].to_i + @options = options + @account = account - search_service_results + search_service_results.compact.uniq end private def search_service_results - return [] if query_blank_or_hashtag? || limit < 1 + return [] if query.blank? || limit < 1 - if resolving_non_matching_remote_account? - [ResolveAccountService.new.call("#{query_username}@#{query_domain}")].compact - else - search_results_and_exact_match.compact.uniq - end + [exact_match] + search_results end - def resolving_non_matching_remote_account? - offset.zero? && options[:resolve] && !exact_match? && !domain_is_local? - end + def exact_match + return unless offset.zero? && username_complete? - def search_results_and_exact_match - return search_results.to_a unless offset.zero? + return @exact_match if defined?(@exact_match) - results = [exact_match] + @exact_match = begin + if options[:resolve] + ResolveAccountService.new.call(query) + elsif domain_is_local? + Account.find_local(query_username) + else + Account.find_remote(query_username, query_domain) + end + end + end - return results if exact_match? && limit == 1 + def search_results + return [] if limit_for_non_exact_results.zero? - results + search_results.to_a + @search_results ||= begin + if Chewy.enabled? + from_elasticsearch + else + from_database + end + end end - def query_blank_or_hashtag? - query.blank? || query.start_with?('#') + def from_database + if account + advanced_search_results + else + simple_search_results + end end - def split_query_string - @split_query_string ||= query.gsub(/\A@/, '').split('@') + def advanced_search_results + Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset) end - def query_username - @query_username ||= split_query_string.first || '' + def simple_search_results + Account.search_for(terms_for_query, limit_for_non_exact_results, offset) end - def query_domain - @query_domain ||= query_without_split? ? nil : split_query_string.last - end + def from_elasticsearch + must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct) : %w(acct^2 display_name), type: 'best_fields' } }] + should_clauses = [] - def query_without_split? - split_query_string.size == 1 - end + if account + return [] if options[:following] && following_ids.empty? - def domain_is_local? - @domain_is_local ||= TagManager.instance.local_domain?(query_domain) - end + if options[:following] + must_clauses << { terms: { id: following_ids } } + elsif following_ids.any? + should_clauses << { terms: { id: following_ids, boost: 100 } } + end + end - def search_from - options[:following] && account ? account.following : Account - end + query = { bool: { must: must_clauses, should: should_clauses } } + functions = [reputation_score_function, followers_score_function, time_distance_function] - def exact_match? - exact_match.present? - end + records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' }) + .limit(limit_for_non_exact_results) + .offset(offset) + .objects + .compact - def exact_match - return @exact_match if defined?(@exact_match) + ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) - @exact_match = begin - if domain_is_local? - search_from.without_suspended.find_local(query_username) - else - search_from.without_suspended.find_remote(query_username, query_domain) - end - end + records end - def search_results - @search_results ||= begin - if account - advanced_search_results - else - simple_search_results - end - end + def reputation_score_function + { + script_score: { + script: { + source: "(doc['followers_count'].value + 0.0) / (doc['followers_count'].value + doc['following_count'].value + 1)", + }, + }, + } end - def advanced_search_results - Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset) + def followers_score_function + { + field_value_factor: { + field: 'followers_count', + modifier: 'log2p', + missing: 1, + }, + } end - def simple_search_results - Account.search_for(terms_for_query, limit_for_non_exact_results, offset) + def time_distance_function + { + gauss: { + last_status_at: { + scale: '30d', + offset: '30d', + decay: 0.3, + }, + }, + } + end + + def following_ids + @following_ids ||= account.active_relationships.pluck(:target_account_id) end def limit_for_non_exact_results - if offset.zero? && exact_match? + if exact_match? limit - 1 else limit @@ -113,7 +142,39 @@ class AccountSearchService < BaseService if domain_is_local? query_username else - "#{query_username} #{query_domain}" + query end end + + def split_query_string + @split_query_string ||= query.split('@') + end + + def query_username + @query_username ||= split_query_string.first || '' + end + + def query_domain + @query_domain ||= query_without_split? ? nil : split_query_string.last + end + + def query_without_split? + split_query_string.size == 1 + end + + def domain_is_local? + @domain_is_local ||= TagManager.instance.local_domain?(query_domain) + end + + def exact_match? + exact_match.present? + end + + def username_complete? + query.include?('@') && "@#{query}" =~ Account::MENTION_RE + end + + def likely_acct? + @acct_hint || username_complete? + end end diff --git a/spec/services/account_search_service_spec.rb b/spec/services/account_search_service_spec.rb index 7b071b378..5b7182586 100644 --- a/spec/services/account_search_service_spec.rb +++ b/spec/services/account_search_service_spec.rb @@ -1,126 +1,56 @@ require 'rails_helper' describe AccountSearchService, type: :service do - describe '.call' do - describe 'with a query to ignore' do + describe '#call' do + context 'with a query to ignore' do it 'returns empty array for missing query' do results = subject.call('', nil, limit: 10) expect(results).to eq [] end - it 'returns empty array for hashtag query' do - results = subject.call('#tag', nil, limit: 10) - expect(results).to eq [] - end it 'returns empty array for limit zero' do Fabricate(:account, username: 'match') + results = subject.call('match', nil, limit: 0) expect(results).to eq [] end end - describe 'searching for a simple term that is not an exact match' do + context 'searching for a simple term that is not an exact match' do it 'does not return a nil entry in the array for the exact match' do - match = Fabricate(:account, username: 'matchingusername') - + account = Fabricate(:account, username: 'matchingusername') results = subject.call('match', nil, limit: 5) - expect(results).to eq [match] - end - end - describe 'searching local and remote users' do - describe "when only '@'" do - before do - allow(Account).to receive(:find_local) - allow(Account).to receive(:search_for) - subject.call('@', nil, limit: 10) - end - - it 'uses find_local with empty query to look for local accounts' do - expect(Account).to have_received(:find_local).with('') - end - end - - describe 'when no domain' do - before do - allow(Account).to receive(:find_local) - allow(Account).to receive(:search_for) - subject.call('one', nil, limit: 10) - end - - it 'uses find_local to look for local accounts' do - expect(Account).to have_received(:find_local).with('one') - end - - it 'uses search_for to find matches' do - expect(Account).to have_received(:search_for).with('one', 10, 0) - end - end - - describe 'when there is a domain' do - before do - allow(Account).to receive(:find_remote) - end - - it 'uses find_remote to look for remote accounts' do - subject.call('two@example.com', nil, limit: 10) - expect(Account).to have_received(:find_remote).with('two', 'example.com') - end - - describe 'and there is no account provided' do - it 'uses search_for to find matches' do - allow(Account).to receive(:search_for) - subject.call('two@example.com', nil, limit: 10, resolve: false) - - expect(Account).to have_received(:search_for).with('two example.com', 10, 0) - end - end - - describe 'and there is an account provided' do - it 'uses advanced_search_for to find matches' do - account = Fabricate(:account) - allow(Account).to receive(:advanced_search_for) - subject.call('two@example.com', account, limit: 10, resolve: false) - - expect(Account).to have_received(:advanced_search_for).with('two example.com', account, 10, nil, 0) - end - end + expect(results).to eq [account] end end - describe 'with an exact match' do - it 'returns exact match first, and does not return duplicates' do - partial = Fabricate(:account, username: 'exactness') - exact = Fabricate(:account, username: 'exact') - - results = subject.call('exact', nil, limit: 10) - expect(results.size).to eq 2 - expect(results).to eq [exact, partial] - end - end - - describe 'when there is a local domain' do + context 'when there is a local domain' do around do |example| before = Rails.configuration.x.local_domain + example.run + Rails.configuration.x.local_domain = before end it 'returns exact match first' do remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e') remote_too = Fabricate(:account, username: 'b', domain: 'remote', display_name: 'e') - exact = Fabricate(:account, username: 'e') + exact = Fabricate(:account, username: 'e') + Rails.configuration.x.local_domain = 'example.com' results = subject.call('e@example.com', nil, limit: 2) + expect(results.size).to eq 2 expect(results).to eq([exact, remote]).or eq([exact, remote_too]) end end - describe 'when there is a domain but no exact match' do + context 'when there is a domain but no exact match' do it 'follows the remote account when resolve is true' do service = double(call: nil) allow(ResolveAccountService).to receive(:new).and_return(service) @@ -138,23 +68,21 @@ describe AccountSearchService, type: :service do end end - describe 'should not include suspended accounts' do - it 'returns the fuzzy match first, and does not return suspended exacts' do - partial = Fabricate(:account, username: 'exactness') - exact = Fabricate(:account, username: 'exact', suspended: true) + it 'returns the fuzzy match first, and does not return suspended exacts' do + partial = Fabricate(:account, username: 'exactness') + exact = Fabricate(:account, username: 'exact', suspended: true) + results = subject.call('exact', nil, limit: 10) - results = subject.call('exact', nil, limit: 10) - expect(results.size).to eq 1 - expect(results).to eq [partial] - end + expect(results.size).to eq 1 + expect(results).to eq [partial] + end - it "does not return suspended remote accounts" do - remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e', suspended: true) + it "does not return suspended remote accounts" do + remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e', suspended: true) + results = subject.call('a@example.com', nil, limit: 2) - results = subject.call('a@example.com', nil, limit: 2) - expect(results.size).to eq 0 - expect(results).to eq [] - end + expect(results.size).to eq 0 + expect(results).to eq [] end end end -- cgit From 70da6d663078fb7d04aed387ac085afb2e9e2cd2 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Fri, 16 Aug 2019 13:00:30 +0200 Subject: Fix accounts search by full/partial display name and others (#11580) - Restrict followers counts to local users to minimize local advantage - Fix emoji shortcodes causing error in search - Fix search syntax parse errors not being caught --- app/chewy/accounts_index.rb | 15 +++++++++++---- app/lib/search_query_parser.rb | 19 ++++++++++--------- app/lib/search_query_transformer.rb | 2 ++ app/services/account_search_service.rb | 2 +- app/services/search_service.rb | 2 +- 5 files changed, 25 insertions(+), 15 deletions(-) (limited to 'app/chewy') diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb index e11b80039..b814e009e 100644 --- a/app/chewy/accounts_index.rb +++ b/app/chewy/accounts_index.rb @@ -26,10 +26,17 @@ class AccountsIndex < Chewy::Index define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do root date_detection: false do field :id, type: 'long' - field :display_name, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' - field :acct, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } - field :following_count, type: 'long', value: ->(account) { account.active_relationships.count } - field :followers_count, type: 'long', value: ->(account) { account.passive_relationships.count } + + field :display_name, type: 'text', analyzer: 'content' do + field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' + end + + field :acct, type: 'text', analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } do + field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' + end + + field :following_count, type: 'long', value: ->(account) { account.following.local.count } + field :followers_count, type: 'long', value: ->(account) { account.followers.local.count } field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at } end end diff --git a/app/lib/search_query_parser.rb b/app/lib/search_query_parser.rb index 405ad15b8..15956d4cf 100644 --- a/app/lib/search_query_parser.rb +++ b/app/lib/search_query_parser.rb @@ -1,14 +1,15 @@ # frozen_string_literal: true class SearchQueryParser < Parslet::Parser - rule(:term) { match('[^\s":]').repeat(1).as(:term) } - rule(:quote) { str('"') } - rule(:colon) { str(':') } - rule(:space) { match('\s').repeat(1) } - rule(:operator) { (str('+') | str('-')).as(:operator) } - rule(:prefix) { (term >> colon).as(:prefix) } - rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) } - rule(:clause) { (prefix.maybe >> operator.maybe >> (phrase | term)).as(:clause) } - rule(:query) { (clause >> space.maybe).repeat.as(:query) } + rule(:term) { match('[^\s":]').repeat(1).as(:term) } + rule(:quote) { str('"') } + rule(:colon) { str(':') } + rule(:space) { match('\s').repeat(1) } + rule(:operator) { (str('+') | str('-')).as(:operator) } + rule(:prefix) { (term >> colon).as(:prefix) } + rule(:shortcode) { (colon >> term >> colon.maybe).as(:shortcode) } + rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) } + rule(:clause) { (prefix.maybe >> operator.maybe >> (phrase | term | shortcode)).as(:clause) } + rule(:query) { (clause >> space.maybe).repeat.as(:query) } root(:query) end diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb index 2c4144790..6a299f59d 100644 --- a/app/lib/search_query_transformer.rb +++ b/app/lib/search_query_transformer.rb @@ -75,6 +75,8 @@ class SearchQueryTransformer < Parslet::Transform if clause[:term] TermClause.new(prefix, operator, clause[:term].to_s) + elsif clause[:shortcode] + TermClause.new(prefix, operator, ":#{clause[:term]}:") elsif clause[:phrase] PhraseClause.new(prefix, operator, clause[:phrase].map { |p| p[:term].to_s }.join(' ')) else diff --git a/app/services/account_search_service.rb b/app/services/account_search_service.rb index 2d602a31d..d7bccdfe0 100644 --- a/app/services/account_search_service.rb +++ b/app/services/account_search_service.rb @@ -67,7 +67,7 @@ class AccountSearchService < BaseService end def from_elasticsearch - must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct) : %w(acct^2 display_name), type: 'best_fields' } }] + must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct.edge_ngram acct) : %w(acct.edge_ngram acct display_name.edge_ngram display_name), type: 'most_fields', operator: 'and' } }] should_clauses = [] if account diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 769d1ac7a..786d34b15 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -52,7 +52,7 @@ class SearchService < BaseService preloaded_relations = relations_map_for_account(@account, account_ids, account_domains) results.reject { |status| StatusFilter.new(status, @account, preloaded_relations).filtered? } - rescue Faraday::ConnectionFailed + rescue Faraday::ConnectionFailed, Parslet::ParseFailed [] end -- cgit From cc0a55cf9aead00e1cb044649f84c2187e0e4a35 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sun, 18 Aug 2019 03:45:51 +0200 Subject: Add more accurate hashtag search (#11579) * Add more accurate hashtag search Using ElasticSearch to index hashtags with edge n-grams and score them by usage within the last 7 days since last activity. Only hashtags that have been reviewed and are listable can appear in searches, unless they match the query exactly * Fix search analyzer dropping non-ascii characters --- app/chewy/tags_index.rb | 37 ++++++++++ app/models/tag.rb | 14 ++-- app/models/trending_tags.rb | 3 + app/services/account_search_service.rb | 2 +- app/services/search_service.rb | 8 +-- app/services/tag_search_service.rb | 82 ++++++++++++++++++++++ config/locales/simple_form.en.yml | 2 +- .../20190815225426_add_last_status_at_to_tags.rb | 6 ++ db/schema.rb | 4 +- spec/models/tag_spec.rb | 4 +- 10 files changed, 149 insertions(+), 13 deletions(-) create mode 100644 app/chewy/tags_index.rb create mode 100644 app/services/tag_search_service.rb create mode 100644 db/migrate/20190815225426_add_last_status_at_to_tags.rb (limited to 'app/chewy') diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb new file mode 100644 index 000000000..300fc128f --- /dev/null +++ b/app/chewy/tags_index.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +class TagsIndex < Chewy::Index + settings index: { refresh_interval: '15m' }, analysis: { + analyzer: { + content: { + tokenizer: 'keyword', + filter: %w(lowercase asciifolding cjk_width), + }, + + edge_ngram: { + tokenizer: 'edge_ngram', + filter: %w(lowercase asciifolding cjk_width), + }, + }, + + tokenizer: { + edge_ngram: { + type: 'edge_ngram', + min_gram: 2, + max_gram: 15, + }, + }, + } + + define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do + root date_detection: false do + field :name, type: 'text', analyzer: 'content' do + field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' + end + + field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? } + field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } } + field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at } + end + end +end diff --git a/app/models/tag.rb b/app/models/tag.rb index 1364d1dba..5094d973d 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -13,6 +13,8 @@ # listable :boolean # reviewed_at :datetime # requested_review_at :datetime +# last_status_at :datetime +# last_trend_at :datetime # class Tag < ApplicationRecord @@ -33,7 +35,8 @@ class Tag < ApplicationRecord scope :unreviewed, -> { where(reviewed_at: nil) } scope :pending_review, -> { unreviewed.where.not(requested_review_at: nil) } scope :usable, -> { where(usable: [true, nil]) } - scope :discoverable, -> { where(listable: [true, nil]).joins(:account_tag_stat).where(AccountTagStat.arel_table[:accounts_count].gt(0)).order(Arel.sql('account_tag_stats.accounts_count desc')) } + scope :listable, -> { where(listable: [true, nil]) } + scope :discoverable, -> { listable.joins(:account_tag_stat).where(AccountTagStat.arel_table[:accounts_count].gt(0)).order(Arel.sql('account_tag_stats.accounts_count desc')) } scope :most_used, ->(account) { joins(:statuses).where(statuses: { account: account }).group(:id).order(Arel.sql('count(*) desc')) } delegate :accounts_count, @@ -44,6 +47,8 @@ class Tag < ApplicationRecord after_save :save_account_tag_stat + update_index('tags#tag', :self) if Chewy.enabled? + def account_tag_stat super || build_account_tag_stat end @@ -121,9 +126,10 @@ class Tag < ApplicationRecord normalized_term = normalize(term.strip).mb_chars.downcase.to_s pattern = sanitize_sql_like(normalized_term) + '%' - Tag.where(arel_table[:name].lower.matches(pattern)) - .where(arel_table[:score].gt(0).or(arel_table[:name].lower.eq(normalized_term))) - .order(Arel.sql('length(name) ASC, score DESC, name ASC')) + Tag.listable + .where(arel_table[:name].lower.matches(pattern)) + .where(arel_table[:name].lower.eq(normalized_term).or(arel_table[:reviewed_at].not_eq(nil))) + .order(Arel.sql('length(name) ASC, name ASC')) .limit(limit) .offset(offset) end diff --git a/app/models/trending_tags.rb b/app/models/trending_tags.rb index 3d60a7fea..e4ce988c1 100644 --- a/app/models/trending_tags.rb +++ b/app/models/trending_tags.rb @@ -17,6 +17,9 @@ class TrendingTags increment_historical_use!(tag.id, at_time) increment_unique_use!(tag.id, account.id, at_time) increment_vote!(tag, at_time) + + tag.update(last_status_at: Time.now.utc) if tag.last_status_at.nil? || tag.last_status_at < 12.hours.ago + tag.update(last_trend_at: Time.now.utc) if trending?(tag) && (tag.last_trend_at.nil? || tag.last_trend_at < 12.hours.ago) end def get(limit, filtered: true) diff --git a/app/services/account_search_service.rb b/app/services/account_search_service.rb index d7bccdfe0..01caaefa9 100644 --- a/app/services/account_search_service.rb +++ b/app/services/account_search_service.rb @@ -109,7 +109,7 @@ class AccountSearchService < BaseService field_value_factor: { field: 'followers_count', modifier: 'log2p', - missing: 1, + missing: 0, }, } end diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 786d34b15..fe601bbf4 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -57,10 +57,10 @@ class SearchService < BaseService end def perform_hashtags_search! - Tag.search_for( - @query.gsub(/\A#/, ''), - @limit, - @offset + TagSearchService.new.call( + @query, + limit: @limit, + offset: @offset ) end diff --git a/app/services/tag_search_service.rb b/app/services/tag_search_service.rb new file mode 100644 index 000000000..64dd76bb7 --- /dev/null +++ b/app/services/tag_search_service.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +class TagSearchService < BaseService + def call(query, options = {}) + @query = query.strip.gsub(/\A#/, '') + @offset = options[:offset].to_i + @limit = options[:limit].to_i + + if Chewy.enabled? + from_elasticsearch + else + from_database + end + end + + private + + def from_elasticsearch + query = { + function_score: { + query: { + multi_match: { + query: @query, + fields: %w(name.edge_ngram name), + type: 'most_fields', + operator: 'and', + }, + }, + + functions: [ + { + field_value_factor: { + field: 'usage', + modifier: 'log2p', + missing: 0, + }, + }, + + { + gauss: { + last_status_at: { + scale: '7d', + offset: '14d', + decay: 0.5, + }, + }, + }, + ], + + boost_mode: 'multiply', + }, + } + + filter = { + bool: { + should: [ + { + term: { + reviewed: { + value: true, + }, + }, + }, + + { + term: { + name: { + value: @query, + }, + }, + }, + ], + }, + } + + TagsIndex.query(query).filter(filter).limit(@limit).offset(@offset).objects.compact + end + + def from_database + Tag.search_for(@query, @limit, @offset) + end +end diff --git a/config/locales/simple_form.en.yml b/config/locales/simple_form.en.yml index e15d5904f..98f0843d0 100644 --- a/config/locales/simple_form.en.yml +++ b/config/locales/simple_form.en.yml @@ -142,7 +142,7 @@ en: report: Send e-mail when a new report is submitted trending_tag: Send e-mail when an unreviewed hashtag is trending tag: - listable: Allow this hashtag to appear on the profile directory + listable: Allow this hashtag to appear in searches and on the profile directory trendable: Allow this hashtag to appear under trends usable: Allow toots to use this hashtag 'no': 'No' diff --git a/db/migrate/20190815225426_add_last_status_at_to_tags.rb b/db/migrate/20190815225426_add_last_status_at_to_tags.rb new file mode 100644 index 000000000..d83537c47 --- /dev/null +++ b/db/migrate/20190815225426_add_last_status_at_to_tags.rb @@ -0,0 +1,6 @@ +class AddLastStatusAtToTags < ActiveRecord::Migration[5.2] + def change + add_column :tags, :last_status_at, :datetime + add_column :tags, :last_trend_at, :datetime + end +end diff --git a/db/schema.rb b/db/schema.rb index f8fc6a821..0da20d62f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_08_07_135426) do +ActiveRecord::Schema.define(version: 2019_08_15_225426) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -667,6 +667,8 @@ ActiveRecord::Schema.define(version: 2019_08_07_135426) do t.boolean "listable" t.datetime "reviewed_at" t.datetime "requested_review_at" + t.datetime "last_status_at" + t.datetime "last_trend_at" t.index "lower((name)::text)", name: "index_tags_on_name_lower", unique: true end diff --git a/spec/models/tag_spec.rb b/spec/models/tag_spec.rb index 9d700849b..2bb30fb57 100644 --- a/spec/models/tag_spec.rb +++ b/spec/models/tag_spec.rb @@ -136,8 +136,8 @@ RSpec.describe Tag, type: :model do end it 'finds the exact matching tag as the first item' do - similar_tag = Fabricate(:tag, name: "matchlater", score: 1) - tag = Fabricate(:tag, name: "match", score: 1) + similar_tag = Fabricate(:tag, name: "matchlater", reviewed_at: Time.now.utc) + tag = Fabricate(:tag, name: "match", reviewed_at: Time.now.utc) results = Tag.search_for("match") -- cgit From 575dc11cb2045c32b8fc0325de7bc321bd4728aa Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Wed, 2 Oct 2019 20:04:46 +0200 Subject: Fix needlessly indexing unsearchable statuses into ElasticSearch (#12041) --- app/chewy/statuses_index.rb | 8 ++++---- app/models/status.rb | 10 ++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'app/chewy') diff --git a/app/chewy/statuses_index.rb b/app/chewy/statuses_index.rb index b7365d5ca..f5735421c 100644 --- a/app/chewy/statuses_index.rb +++ b/app/chewy/statuses_index.rb @@ -31,19 +31,19 @@ class StatusesIndex < Chewy::Index }, } - define_type ::Status.unscoped.without_reblogs.includes(:media_attachments) do + define_type ::Status.unscoped.kept.without_reblogs.includes(:media_attachments), delete_if: ->(status) { status.searchable_by.empty? } do crutch :mentions do |collection| - data = ::Mention.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id) + data = ::Mention.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id) data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } end crutch :favourites do |collection| - data = ::Favourite.where(status_id: collection.map(&:id)).pluck(:status_id, :account_id) + data = ::Favourite.where(status_id: collection.map(&:id)).where(account: Account.local).pluck(:status_id, :account_id) data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } end crutch :reblogs do |collection| - data = ::Status.where(reblog_of_id: collection.map(&:id)).pluck(:reblog_of_id, :account_id) + data = ::Status.where(reblog_of_id: collection.map(&:id)).where(account: Account.local).pluck(:reblog_of_id, :account_id) data.each.with_object({}) { |(id, name), result| (result[id] ||= []).push(name) } end diff --git a/app/models/status.rb b/app/models/status.rb index 078a64566..3504ac370 100644 --- a/app/models/status.rb +++ b/app/models/status.rb @@ -129,12 +129,14 @@ class Status < ApplicationRecord REAL_TIME_WINDOW = 6.hours def searchable_by(preloaded = nil) - ids = [account_id] + ids = [] + + ids << account_id if local? if preloaded.nil? - ids += mentions.pluck(:account_id) - ids += favourites.pluck(:account_id) - ids += reblogs.pluck(:account_id) + ids += mentions.where(account: Account.local).pluck(:account_id) + ids += favourites.where(account: Account.local).pluck(:account_id) + ids += reblogs.where(account: Account.local).pluck(:account_id) else ids += preloaded.mentions[id] || [] ids += preloaded.favourites[id] || [] -- cgit