diff options
author | Eugen Rochko <eugen@zeonfederated.com> | 2019-08-18 03:45:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-18 03:45:51 +0200 |
commit | cc0a55cf9aead00e1cb044649f84c2187e0e4a35 (patch) | |
tree | 57c0920e813d7c8df9a0f483f111d2808f5045d3 /app/chewy | |
parent | 3a77090d015203a1ae20376ed69ca699eed3976d (diff) |
Add more accurate hashtag search (#11579)
* Add more accurate hashtag search Using ElasticSearch to index hashtags with edge n-grams and score them by usage within the last 7 days since last activity. Only hashtags that have been reviewed and are listable can appear in searches, unless they match the query exactly * Fix search analyzer dropping non-ascii characters
Diffstat (limited to 'app/chewy')
-rw-r--r-- | app/chewy/tags_index.rb | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb new file mode 100644 index 000000000..300fc128f --- /dev/null +++ b/app/chewy/tags_index.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +class TagsIndex < Chewy::Index + settings index: { refresh_interval: '15m' }, analysis: { + analyzer: { + content: { + tokenizer: 'keyword', + filter: %w(lowercase asciifolding cjk_width), + }, + + edge_ngram: { + tokenizer: 'edge_ngram', + filter: %w(lowercase asciifolding cjk_width), + }, + }, + + tokenizer: { + edge_ngram: { + type: 'edge_ngram', + min_gram: 2, + max_gram: 15, + }, + }, + } + + define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do + root date_detection: false do + field :name, type: 'text', analyzer: 'content' do + field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' + end + + field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? } + field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } } + field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at } + end + end +end |