about summary refs log tree commit diff
path: root/app/services/account_search_service.rb
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-08-16 01:24:03 +0200
committerGitHub <noreply@github.com>2019-08-16 01:24:03 +0200
commit8fdff2748ffbc4ce3d36c75f2bd33182b641e895 (patch)
treeec8d1455c492e9b454ddf55dcb914bbe37007d6b /app/services/account_search_service.rb
parent2ca6b2bb6c9e811ad98e3df23e70efbf22882e42 (diff)
Add more accurate account search (#11537)
* Add more accurate account search

When ElasticSearch is available, a more accurate search is implemented:

- Using edge n-gram index for acct and display name
- Using asciifolding and cjk width normalization on display names
- Using Gaussian decay on account activity for additional scoring (recency)
- Using followers/friends ratio for additional scoring (spamminess)
- Using followers number for additional scoring (size)

The exact match precedence only takes effect when the input conforms
to the username format and the username part of it is complete, i.e.
when the user started typing the domain part.

* Support single-letter usernames

* Fix tests

* Fix not picking up account updates

* Add weights and normalization for scores, skip zero terms queries

* Use local counts for accounts index, adjust search parameters

* Fix mistakes

* Using updated_at of accounts is inadequate for remote accounts
Diffstat (limited to 'app/services/account_search_service.rb')
-rw-r--r--app/services/account_search_service.rb189
1 files changed, 125 insertions, 64 deletions
diff --git a/app/services/account_search_service.rb b/app/services/account_search_service.rb
index e1874d045..2d602a31d 100644
--- a/app/services/account_search_service.rb
+++ b/app/services/account_search_service.rb
@@ -4,105 +4,134 @@ class AccountSearchService < BaseService
   attr_reader :query, :limit, :offset, :options, :account
 
   def call(query, account = nil, options = {})
-    @query   = query.strip
-    @limit   = options[:limit].to_i
-    @offset  = options[:offset].to_i
-    @options = options
-    @account = account
+    @acct_hint = query.start_with?('@')
+    @query     = query.strip.gsub(/\A@/, '')
+    @limit     = options[:limit].to_i
+    @offset    = options[:offset].to_i
+    @options   = options
+    @account   = account
 
-    search_service_results
+    search_service_results.compact.uniq
   end
 
   private
 
   def search_service_results
-    return [] if query_blank_or_hashtag? || limit < 1
+    return [] if query.blank? || limit < 1
 
-    if resolving_non_matching_remote_account?
-      [ResolveAccountService.new.call("#{query_username}@#{query_domain}")].compact
-    else
-      search_results_and_exact_match.compact.uniq
-    end
+    [exact_match] + search_results
   end
 
-  def resolving_non_matching_remote_account?
-    offset.zero? && options[:resolve] && !exact_match? && !domain_is_local?
-  end
+  def exact_match
+    return unless offset.zero? && username_complete?
 
-  def search_results_and_exact_match
-    return search_results.to_a unless offset.zero?
+    return @exact_match if defined?(@exact_match)
 
-    results = [exact_match]
+    @exact_match = begin
+      if options[:resolve]
+        ResolveAccountService.new.call(query)
+      elsif domain_is_local?
+        Account.find_local(query_username)
+      else
+        Account.find_remote(query_username, query_domain)
+      end
+    end
+  end
 
-    return results if exact_match? && limit == 1
+  def search_results
+    return [] if limit_for_non_exact_results.zero?
 
-    results + search_results.to_a
+    @search_results ||= begin
+      if Chewy.enabled?
+        from_elasticsearch
+      else
+        from_database
+      end
+    end
   end
 
-  def query_blank_or_hashtag?
-    query.blank? || query.start_with?('#')
+  def from_database
+    if account
+      advanced_search_results
+    else
+      simple_search_results
+    end
   end
 
-  def split_query_string
-    @split_query_string ||= query.gsub(/\A@/, '').split('@')
+  def advanced_search_results
+    Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset)
   end
 
-  def query_username
-    @query_username ||= split_query_string.first || ''
+  def simple_search_results
+    Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
   end
 
-  def query_domain
-    @query_domain ||= query_without_split? ? nil : split_query_string.last
-  end
+  def from_elasticsearch
+    must_clauses   = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct) : %w(acct^2 display_name), type: 'best_fields' } }]
+    should_clauses = []
 
-  def query_without_split?
-    split_query_string.size == 1
-  end
+    if account
+      return [] if options[:following] && following_ids.empty?
 
-  def domain_is_local?
-    @domain_is_local ||= TagManager.instance.local_domain?(query_domain)
-  end
+      if options[:following]
+        must_clauses << { terms: { id: following_ids } }
+      elsif following_ids.any?
+        should_clauses << { terms: { id: following_ids, boost: 100 } }
+      end
+    end
 
-  def search_from
-    options[:following] && account ? account.following : Account
-  end
+    query     = { bool: { must: must_clauses, should: should_clauses } }
+    functions = [reputation_score_function, followers_score_function, time_distance_function]
 
-  def exact_match?
-    exact_match.present?
-  end
+    records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' })
+                           .limit(limit_for_non_exact_results)
+                           .offset(offset)
+                           .objects
+                           .compact
 
-  def exact_match
-    return @exact_match if defined?(@exact_match)
+    ActiveRecord::Associations::Preloader.new.preload(records, :account_stat)
 
-    @exact_match = begin
-      if domain_is_local?
-        search_from.without_suspended.find_local(query_username)
-      else
-        search_from.without_suspended.find_remote(query_username, query_domain)
-      end
-    end
+    records
   end
 
-  def search_results
-    @search_results ||= begin
-      if account
-        advanced_search_results
-      else
-        simple_search_results
-      end
-    end
+  def reputation_score_function
+    {
+      script_score: {
+        script: {
+          source: "(doc['followers_count'].value + 0.0) / (doc['followers_count'].value + doc['following_count'].value + 1)",
+        },
+      },
+    }
   end
 
-  def advanced_search_results
-    Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset)
+  def followers_score_function
+    {
+      field_value_factor: {
+        field: 'followers_count',
+        modifier: 'log2p',
+        missing: 1,
+      },
+    }
   end
 
-  def simple_search_results
-    Account.search_for(terms_for_query, limit_for_non_exact_results, offset)
+  def time_distance_function
+    {
+      gauss: {
+        last_status_at: {
+          scale: '30d',
+          offset: '30d',
+          decay: 0.3,
+        },
+      },
+    }
+  end
+
+  def following_ids
+    @following_ids ||= account.active_relationships.pluck(:target_account_id)
   end
 
   def limit_for_non_exact_results
-    if offset.zero? && exact_match?
+    if exact_match?
       limit - 1
     else
       limit
@@ -113,7 +142,39 @@ class AccountSearchService < BaseService
     if domain_is_local?
       query_username
     else
-      "#{query_username} #{query_domain}"
+      query
     end
   end
+
+  def split_query_string
+    @split_query_string ||= query.split('@')
+  end
+
+  def query_username
+    @query_username ||= split_query_string.first || ''
+  end
+
+  def query_domain
+    @query_domain ||= query_without_split? ? nil : split_query_string.last
+  end
+
+  def query_without_split?
+    split_query_string.size == 1
+  end
+
+  def domain_is_local?
+    @domain_is_local ||= TagManager.instance.local_domain?(query_domain)
+  end
+
+  def exact_match?
+    exact_match.present?
+  end
+
+  def username_complete?
+    query.include?('@') && "@#{query}" =~ Account::MENTION_RE
+  end
+
+  def likely_acct?
+    @acct_hint || username_complete?
+  end
 end