about summary refs log tree commit diff
path: root/app/services/tag_search_service.rb
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2019-08-18 03:45:51 +0200
committerGitHub <noreply@github.com>2019-08-18 03:45:51 +0200
commitcc0a55cf9aead00e1cb044649f84c2187e0e4a35 (patch)
tree57c0920e813d7c8df9a0f483f111d2808f5045d3 /app/services/tag_search_service.rb
parent3a77090d015203a1ae20376ed69ca699eed3976d (diff)
Add more accurate hashtag search (#11579)
* Add more accurate hashtag search

Using ElasticSearch to index hashtags with edge n-grams and score
them by usage within the last 7 days since last activity. Only
hashtags that have been reviewed and are listable can appear in
searches, unless they match the query exactly

* Fix search analyzer dropping non-ascii characters
Diffstat (limited to 'app/services/tag_search_service.rb')
-rw-r--r--app/services/tag_search_service.rb82
1 files changed, 82 insertions, 0 deletions
diff --git a/app/services/tag_search_service.rb b/app/services/tag_search_service.rb
new file mode 100644
index 000000000..64dd76bb7
--- /dev/null
+++ b/app/services/tag_search_service.rb
@@ -0,0 +1,82 @@
+# frozen_string_literal: true
+
+class TagSearchService < BaseService
+  def call(query, options = {})
+    @query  = query.strip.gsub(/\A#/, '')
+    @offset = options[:offset].to_i
+    @limit  = options[:limit].to_i
+
+    if Chewy.enabled?
+      from_elasticsearch
+    else
+      from_database
+    end
+  end
+
+  private
+
+  def from_elasticsearch
+    query = {
+      function_score: {
+        query: {
+          multi_match: {
+            query: @query,
+            fields: %w(name.edge_ngram name),
+            type: 'most_fields',
+            operator: 'and',
+          },
+        },
+
+        functions: [
+          {
+            field_value_factor: {
+              field: 'usage',
+              modifier: 'log2p',
+              missing: 0,
+            },
+          },
+
+          {
+            gauss: {
+              last_status_at: {
+                scale: '7d',
+                offset: '14d',
+                decay: 0.5,
+              },
+            },
+          },
+        ],
+
+        boost_mode: 'multiply',
+      },
+    }
+
+    filter = {
+      bool: {
+        should: [
+          {
+            term: {
+              reviewed: {
+                value: true,
+              },
+            },
+          },
+
+          {
+            term: {
+              name: {
+                value: @query,
+              },
+            },
+          },
+        ],
+      },
+    }
+
+    TagsIndex.query(query).filter(filter).limit(@limit).offset(@offset).objects.compact
+  end
+
+  def from_database
+    Tag.search_for(@query, @limit, @offset)
+  end
+end