From cc0a55cf9aead00e1cb044649f84c2187e0e4a35 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sun, 18 Aug 2019 03:45:51 +0200 Subject: Add more accurate hashtag search (#11579) * Add more accurate hashtag search Using ElasticSearch to index hashtags with edge n-grams and score them by usage within the last 7 days since last activity. Only hashtags that have been reviewed and are listable can appear in searches, unless they match the query exactly * Fix search analyzer dropping non-ascii characters --- app/chewy/tags_index.rb | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 app/chewy/tags_index.rb (limited to 'app/chewy') diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb new file mode 100644 index 000000000..300fc128f --- /dev/null +++ b/app/chewy/tags_index.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +class TagsIndex < Chewy::Index + settings index: { refresh_interval: '15m' }, analysis: { + analyzer: { + content: { + tokenizer: 'keyword', + filter: %w(lowercase asciifolding cjk_width), + }, + + edge_ngram: { + tokenizer: 'edge_ngram', + filter: %w(lowercase asciifolding cjk_width), + }, + }, + + tokenizer: { + edge_ngram: { + type: 'edge_ngram', + min_gram: 2, + max_gram: 15, + }, + }, + } + + define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do + root date_detection: false do + field :name, type: 'text', analyzer: 'content' do + field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' + end + + field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? } + field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } } + field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at } + end + end +end -- cgit