Add more accurate hashtag search (#11579)
* Add more accurate hashtag search Using ElasticSearch to index hashtags with edge n-grams and score them by usage within the last 7 days since last activity. Only hashtags that have been reviewed and are listable can appear in searches, unless they match the query exactly * Fix search analyzer dropping non-ascii charactersmaster
parent
3a77090d01
commit
cc0a55cf9a
@ -0,0 +1,37 @@ |
||||
# frozen_string_literal: true |
||||
|
||||
class TagsIndex < Chewy::Index |
||||
settings index: { refresh_interval: '15m' }, analysis: { |
||||
analyzer: { |
||||
content: { |
||||
tokenizer: 'keyword', |
||||
filter: %w(lowercase asciifolding cjk_width), |
||||
}, |
||||
|
||||
edge_ngram: { |
||||
tokenizer: 'edge_ngram', |
||||
filter: %w(lowercase asciifolding cjk_width), |
||||
}, |
||||
}, |
||||
|
||||
tokenizer: { |
||||
edge_ngram: { |
||||
type: 'edge_ngram', |
||||
min_gram: 2, |
||||
max_gram: 15, |
||||
}, |
||||
}, |
||||
} |
||||
|
||||
define_type ::Tag.listable, delete_if: ->(tag) { tag.destroyed? || !tag.listable? } do |
||||
root date_detection: false do |
||||
field :name, type: 'text', analyzer: 'content' do |
||||
field :edge_ngram, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' |
||||
end |
||||
|
||||
field :reviewed, type: 'boolean', value: ->(tag) { tag.reviewed? } |
||||
field :usage, type: 'long', value: ->(tag) { tag.history.reduce(0) { |total, day| total + day[:accounts].to_i } } |
||||
field :last_status_at, type: 'date', value: ->(tag) { tag.last_status_at || tag.created_at } |
||||
end |
||||
end |
||||
end |
@ -0,0 +1,82 @@ |
||||
# frozen_string_literal: true |
||||
|
||||
class TagSearchService < BaseService |
||||
def call(query, options = {}) |
||||
@query = query.strip.gsub(/\A#/, '') |
||||
@offset = options[:offset].to_i |
||||
@limit = options[:limit].to_i |
||||
|
||||
if Chewy.enabled? |
||||
from_elasticsearch |
||||
else |
||||
from_database |
||||
end |
||||
end |
||||
|
||||
private |
||||
|
||||
def from_elasticsearch |
||||
query = { |
||||
function_score: { |
||||
query: { |
||||
multi_match: { |
||||
query: @query, |
||||
fields: %w(name.edge_ngram name), |
||||
type: 'most_fields', |
||||
operator: 'and', |
||||
}, |
||||
}, |
||||
|
||||
functions: [ |
||||
{ |
||||
field_value_factor: { |
||||
field: 'usage', |
||||
modifier: 'log2p', |
||||
missing: 0, |
||||
}, |
||||
}, |
||||
|
||||
{ |
||||
gauss: { |
||||
last_status_at: { |
||||
scale: '7d', |
||||
offset: '14d', |
||||
decay: 0.5, |
||||
}, |
||||
}, |
||||
}, |
||||
], |
||||
|
||||
boost_mode: 'multiply', |
||||
}, |
||||
} |
||||
|
||||
filter = { |
||||
bool: { |
||||
should: [ |
||||
{ |
||||
term: { |
||||
reviewed: { |
||||
value: true, |
||||
}, |
||||
}, |
||||
}, |
||||
|
||||
{ |
||||
term: { |
||||
name: { |
||||
value: @query, |
||||
}, |
||||
}, |
||||
}, |
||||
], |
||||
}, |
||||
} |
||||
|
||||
TagsIndex.query(query).filter(filter).limit(@limit).offset(@offset).objects.compact |
||||
end |
||||
|
||||
def from_database |
||||
Tag.search_for(@query, @limit, @offset) |
||||
end |
||||
end |
@ -0,0 +1,6 @@ |
||||
class AddLastStatusAtToTags < ActiveRecord::Migration[5.2] |
||||
def change |
||||
add_column :tags, :last_status_at, :datetime |
||||
add_column :tags, :last_trend_at, :datetime |
||||
end |
||||
end |
Loading…
Reference in new issue