diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2019-08-18 03:45:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-18 03:45:51 +0200 |
commit | cc0a55cf9aead00e1cb044649f84c2187e0e4a35 (patch) | |
tree | 57c0920e813d7c8df9a0f483f111d2808f5045d3 /app/services | |
parent | 3a77090d015203a1ae20376ed69ca699eed3976d (diff) |
Add more accurate hashtag search (#11579)
* Add more accurate hashtag search Using ElasticSearch to index hashtags with edge n-grams and score them by usage within the last 7 days since last activity. Only hashtags that have been reviewed and are listable can appear in searches, unless they match the query exactly * Fix search analyzer dropping non-ascii characters
Diffstat (limited to 'app/services')
-rw-r--r-- | app/services/account_search_service.rb | 2 | ||||
-rw-r--r-- | app/services/search_service.rb | 8 | ||||
-rw-r--r-- | app/services/tag_search_service.rb | 82 |
3 files changed, 87 insertions, 5 deletions
diff --git a/app/services/account_search_service.rb b/app/services/account_search_service.rb index d7bccdfe0..01caaefa9 100644 --- a/app/services/account_search_service.rb +++ b/app/services/account_search_service.rb @@ -109,7 +109,7 @@ class AccountSearchService < BaseService field_value_factor: { field: 'followers_count', modifier: 'log2p', - missing: 1, + missing: 0, }, } end diff --git a/app/services/search_service.rb b/app/services/search_service.rb index 786d34b15..fe601bbf4 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -57,10 +57,10 @@ class SearchService < BaseService end def perform_hashtags_search! - Tag.search_for( - @query.gsub(/\A#/, ''), - @limit, - @offset + TagSearchService.new.call( + @query, + limit: @limit, + offset: @offset ) end diff --git a/app/services/tag_search_service.rb b/app/services/tag_search_service.rb new file mode 100644 index 000000000..64dd76bb7 --- /dev/null +++ b/app/services/tag_search_service.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +class TagSearchService < BaseService + def call(query, options = {}) + @query = query.strip.gsub(/\A#/, '') + @offset = options[:offset].to_i + @limit = options[:limit].to_i + + if Chewy.enabled? + from_elasticsearch + else + from_database + end + end + + private + + def from_elasticsearch + query = { + function_score: { + query: { + multi_match: { + query: @query, + fields: %w(name.edge_ngram name), + type: 'most_fields', + operator: 'and', + }, + }, + + functions: [ + { + field_value_factor: { + field: 'usage', + modifier: 'log2p', + missing: 0, + }, + }, + + { + gauss: { + last_status_at: { + scale: '7d', + offset: '14d', + decay: 0.5, + }, + }, + }, + ], + + boost_mode: 'multiply', + }, + } + + filter = { + bool: { + should: [ + { + term: { + reviewed: { + value: true, + }, + }, + }, + + { + term: { + name: { + value: @query, + }, + }, + }, + ], + }, + } + + TagsIndex.query(query).filter(filter).limit(@limit).offset(@offset).objects.compact + end + + def from_database + Tag.search_for(@query, @limit, @offset) + end +end |