From b9fbcbfe4e0a15fcf8a457ce17ea080f0eb939fc Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sat, 27 Jul 2019 04:42:08 +0200 Subject: Add search syntax for operators and phrases (#11411) --- app/lib/search_query_parser.rb | 14 ++++++ app/lib/search_query_transformer.rb | 86 +++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 app/lib/search_query_parser.rb create mode 100644 app/lib/search_query_transformer.rb (limited to 'app/lib') diff --git a/app/lib/search_query_parser.rb b/app/lib/search_query_parser.rb new file mode 100644 index 000000000..405ad15b8 --- /dev/null +++ b/app/lib/search_query_parser.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +class SearchQueryParser < Parslet::Parser + rule(:term) { match('[^\s":]').repeat(1).as(:term) } + rule(:quote) { str('"') } + rule(:colon) { str(':') } + rule(:space) { match('\s').repeat(1) } + rule(:operator) { (str('+') | str('-')).as(:operator) } + rule(:prefix) { (term >> colon).as(:prefix) } + rule(:phrase) { (quote >> (term >> space.maybe).repeat >> quote).as(:phrase) } + rule(:clause) { (prefix.maybe >> operator.maybe >> (phrase | term)).as(:clause) } + rule(:query) { (clause >> space.maybe).repeat.as(:query) } + root(:query) +end diff --git a/app/lib/search_query_transformer.rb b/app/lib/search_query_transformer.rb new file mode 100644 index 000000000..2c4144790 --- /dev/null +++ b/app/lib/search_query_transformer.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +class SearchQueryTransformer < Parslet::Transform + class Query + attr_reader :should_clauses, :must_not_clauses, :must_clauses + + def initialize(clauses) + grouped = clauses.chunk(&:operator).to_h + @should_clauses = grouped.fetch(:should, []) + @must_not_clauses = grouped.fetch(:must_not, []) + @must_clauses = grouped.fetch(:must, []) + end + + def apply(search) + should_clauses.each { |clause| search = search.query.should(clause_to_query(clause)) } + must_clauses.each { |clause| search = search.query.must(clause_to_query(clause)) } + must_not_clauses.each { |clause| search = search.query.must_not(clause_to_query(clause)) } + search.query.minimum_should_match(1) + end + + private + + def clause_to_query(clause) + case clause + when TermClause + { multi_match: { type: 'most_fields', query: clause.term, fields: ['text', 'text.stemmed'] } } + when PhraseClause + { match_phrase: { text: { query: clause.phrase } } } + else + raise "Unexpected clause type: #{clause}" + end + end + end + + class Operator + class << self + def symbol(str) + case str + when '+' + :must + when '-' + :must_not + when nil + :should + else + raise "Unknown operator: #{str}" + end + end + end + end + + class TermClause + attr_reader :prefix, :operator, :term + + def initialize(prefix, operator, term) + @prefix = prefix + @operator = Operator.symbol(operator) + @term = term + end + end + + class PhraseClause + attr_reader :prefix, :operator, :phrase + + def initialize(prefix, operator, phrase) + @prefix = prefix + @operator = Operator.symbol(operator) + @phrase = phrase + end + end + + rule(clause: subtree(:clause)) do + prefix = clause[:prefix][:term].to_s if clause[:prefix] + operator = clause[:operator]&.to_s + + if clause[:term] + TermClause.new(prefix, operator, clause[:term].to_s) + elsif clause[:phrase] + PhraseClause.new(prefix, operator, clause[:phrase].map { |p| p[:term].to_s }.join(' ')) + else + raise "Unexpected clause type: #{clause}" + end + end + + rule(query: sequence(:clauses)) { Query.new(clauses) } +end -- cgit From f371b32137ccd7e74ca29d25af2072fb79654b15 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Sun, 28 Jul 2019 05:59:51 +0200 Subject: Change hashtags to preserve first-used casing (#11416) --- app/lib/activitypub/activity/create.rb | 9 ++---- app/models/tag.rb | 34 +++++++++++++++++++--- app/services/hashtag_query_service.rb | 4 +-- app/services/process_hashtags_service.rb | 4 +-- ...726175042_add_case_insensitive_index_to_tags.rb | 15 ++++++++++ db/schema.rb | 5 ++-- 6 files changed, 53 insertions(+), 18 deletions(-) create mode 100644 db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb (limited to 'app/lib') diff --git a/app/lib/activitypub/activity/create.rb b/app/lib/activitypub/activity/create.rb index 56c24680a..000b77df5 100644 --- a/app/lib/activitypub/activity/create.rb +++ b/app/lib/activitypub/activity/create.rb @@ -148,12 +148,9 @@ class ActivityPub::Activity::Create < ActivityPub::Activity def process_hashtag(tag) return if tag['name'].blank? - hashtag = tag['name'].gsub(/\A#/, '').mb_chars.downcase - hashtag = Tag.where(name: hashtag).first_or_create!(name: hashtag) - - return if @tags.include?(hashtag) - - @tags << hashtag + Tag.find_or_create_by_names(tag['name']) do |hashtag| + @tags << hashtag unless @tags.include?(hashtag) + end rescue ActiveRecord::RecordInvalid nil end diff --git a/app/models/tag.rb b/app/models/tag.rb index b371d59c1..972242064 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -20,7 +20,7 @@ class Tag < ApplicationRecord HASHTAG_NAME_RE = '([[:word:]_][[:word:]_·]*[[:alpha:]_·][[:word:]_·]*[[:word:]_])|([[:word:]_]*[[:alpha:]][[:word:]_]*)' HASHTAG_RE = /(?:^|[^\/\)\w])#(#{HASHTAG_NAME_RE})/i - validates :name, presence: true, uniqueness: true, format: { with: /\A(#{HASHTAG_NAME_RE})\z/i } + validates :name, presence: true, format: { with: /\A(#{HASHTAG_NAME_RE})\z/i } scope :discoverable, -> { joins(:account_tag_stat).where(AccountTagStat.arel_table[:accounts_count].gt(0)).where(account_tag_stats: { hidden: false }).order(Arel.sql('account_tag_stats.accounts_count desc')) } scope :hidden, -> { where(account_tag_stats: { hidden: true }) } @@ -64,22 +64,48 @@ class Tag < ApplicationRecord end class << self + def find_or_create_by_names(name_or_names) + Array(name_or_names).map(&method(:normalize)).uniq.map do |normalized_name| + tag = matching_name(normalized_name).first || create(name: normalized_name) + + yield tag if block_given? + + tag + end + end + def search_for(term, limit = 5, offset = 0) - pattern = sanitize_sql_like(term.strip) + '%' + pattern = sanitize_sql_like(normalize(term.strip)) + '%' - Tag.where('lower(name) like lower(?)', pattern) + Tag.where(arel_table[:name].lower.matches(pattern.downcase)) .order(:name) .limit(limit) .offset(offset) end def find_normalized(name) - find_by(name: name.mb_chars.downcase.to_s) + matching_name(name).first end def find_normalized!(name) find_normalized(name) || raise(ActiveRecord::RecordNotFound) end + + def matching_name(name_or_names) + names = Array(name_or_names).map { |name| normalize(name).downcase } + + if names.size == 1 + where(arel_table[:name].lower.eq(names.first)) + else + where(arel_table[:name].lower.in(names)) + end + end + + private + + def normalize(str) + str.gsub(/\A#/, '').mb_chars.to_s + end end private diff --git a/app/services/hashtag_query_service.rb b/app/services/hashtag_query_service.rb index 5773d78c6..282821710 100644 --- a/app/services/hashtag_query_service.rb +++ b/app/services/hashtag_query_service.rb @@ -14,7 +14,7 @@ class HashtagQueryService < BaseService private - def tags_for(tags) - Tag.where(name: tags.map(&:downcase)) if tags.presence + def tags_for(names) + Tag.matching_name(names) if names.presence end end diff --git a/app/services/process_hashtags_service.rb b/app/services/process_hashtags_service.rb index b6974e598..e8e139b05 100644 --- a/app/services/process_hashtags_service.rb +++ b/app/services/process_hashtags_service.rb @@ -5,9 +5,7 @@ class ProcessHashtagsService < BaseService tags = Extractor.extract_hashtags(status.text) if status.local? records = [] - tags.map { |str| str.mb_chars.downcase }.uniq(&:to_s).each do |name| - tag = Tag.where(name: name).first_or_create(name: name) - + Tag.find_or_create_by_names(tags) do |tag| status.tags << tag records << tag diff --git a/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb b/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb new file mode 100644 index 000000000..6fa8c0ec4 --- /dev/null +++ b/db/migrate/20190726175042_add_case_insensitive_index_to_tags.rb @@ -0,0 +1,15 @@ +class AddCaseInsensitiveIndexToTags < ActiveRecord::Migration[5.2] + disable_ddl_transaction! + + def up + safety_assured { execute 'CREATE UNIQUE INDEX CONCURRENTLY index_tags_on_name_lower ON tags (lower(name))' } + remove_index :tags, name: 'index_tags_on_name' + remove_index :tags, name: 'hashtag_search_index' + end + + def down + add_index :tags, :name, unique: true, algorithm: :concurrently + safety_assured { execute 'CREATE INDEX CONCURRENTLY hashtag_search_index ON tags (name text_pattern_ops)' } + remove_index :tags, name: 'index_tags_on_name_lower' + end +end diff --git a/db/schema.rb b/db/schema.rb index 6319dd932..1847305c7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_07_15_164535) do +ActiveRecord::Schema.define(version: 2019_07_26_175042) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -652,8 +652,7 @@ ActiveRecord::Schema.define(version: 2019_07_15_164535) do t.string "name", default: "", null: false t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index "lower((name)::text) text_pattern_ops", name: "hashtag_search_index" - t.index ["name"], name: "index_tags_on_name", unique: true + t.index "lower((name)::text)", name: "index_tags_on_name_lower", unique: true end create_table "tombstones", force: :cascade do |t| -- cgit