about summary refs log tree commit diff
path: root/app/chewy
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-08-18 03:45:51 +0200
committerGitHub <noreply@github.com>2019-08-18 03:45:51 +0200
commitcc0a55cf9aead00e1cb044649f84c2187e0e4a35 (patch)
tree57c0920e813d7c8df9a0f483f111d2808f5045d3 /app/chewy
parent3a77090d015203a1ae20376ed69ca699eed3976d (diff)
Add more accurate hashtag search (#11579)
* Add more accurate hashtag search

Using ElasticSearch to index hashtags with edge n-grams and score
them by usage within the last 7 days since last activity. Only
hashtags that have been reviewed and are listable can appear in
searches, unless they match the query exactly

* Fix search analyzer dropping non-ascii characters
Diffstat (limited to 'app/chewy')
-rw-r--r--app/chewy/tags_index.rb37
1 files changed, 37 insertions, 0 deletions
diff --git a/app/chewy/tags_index.rb b/app/chewy/tags_index.rb
new file mode 100644
index 000000000..300fc128f
--- /dev/null
+++ b/app/chewy/tags_index.rb
@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+
+class TagsIndex < Chewy::Index
+  settings index: { refresh_interval: '15m' }, analysis: {
+    analyzer: {
+      content: {
+        tokenizer: 'keyword',
+        filter: %w(lowercase asciifolding cjk_width),
+      },
+
+      edge_ngram: {
+        tokenizer: 'edge_ngram',
+        filter: %w(lowercase asciifolding cjk_width),
+      },
+    },
+
+    tokenizer: {
+      edge_ngram: {
+        type: 'edge_ngram',
+        min_gram: 2,
+        max_gram: 15,
+      },
+    },
+  }
+
+  define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do
+    root date_detection: false do
+      field :name, type: 'text', analyzer: 'content' do
+        field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content'
+      end
+
+      field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? }
+      field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } }
+      field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at }
+    end
+  end
+end