diff --git a/app/chewy/accounts_index.rb b/app/chewy/accounts_index.rb new file mode 100644 index 000000000..e11b80039 --- /dev/null +++ b/app/chewy/accounts_index.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: true + +class AccountsIndex < Chewy::Index + settings index: { refresh_interval: '5m' }, analysis: { + analyzer: { + content: { + tokenizer: 'whitespace', + filter: %w(lowercase asciifolding cjk_width), + }, + + edge_ngram: { + tokenizer: 'edge_ngram', + filter: %w(lowercase asciifolding cjk_width), + }, + }, + + tokenizer: { + edge_ngram: { + type: 'edge_ngram', + min_gram: 1, + max_gram: 15, + }, + }, + } + + define_type ::Account.searchable.includes(:account_stat), delete_if: ->(account) { account.destroyed? || !account.searchable? } do + root date_detection: false do + field :id, type: 'long' + field :display_name, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content' + field :acct, type: 'text', analyzer: 'edge_ngram', search_analyzer: 'content', value: ->(account) { [account.username, account.domain].compact.join('@') } + field :following_count, type: 'long', value: ->(account) { account.active_relationships.count } + field :followers_count, type: 'long', value: ->(account) { account.passive_relationships.count } + field :last_status_at, type: 'date', value: ->(account) { account.last_status_at || account.created_at } + end + end +end diff --git a/app/models/account.rb b/app/models/account.rb index 60c06aaf0..392cc625f 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -127,6 +127,8 @@ class Account < ApplicationRecord delegate :chosen_languages, to: :user, prefix: false, allow_nil: true + update_index('accounts#account', :self) if Chewy.enabled? + def local? domain.nil? end @@ -169,6 +171,10 @@ class Account < ApplicationRecord subscription_expires_at.present? end + def searchable? + !(suspended? || moved?) + end + def possibly_stale? last_webfingered_at.nil? || last_webfingered_at <= 1.day.ago end diff --git a/app/models/account_stat.rb b/app/models/account_stat.rb index 9813aa84f..6d1097cec 100644 --- a/app/models/account_stat.rb +++ b/app/models/account_stat.rb @@ -16,6 +16,8 @@ class AccountStat < ApplicationRecord belongs_to :account, inverse_of: :account_stat + update_index('accounts#account', :account) if Chewy.enabled? + def increment_count!(key) update(attributes_for_increment(key)) end diff --git a/app/services/account_search_service.rb b/app/services/account_search_service.rb index e1874d045..2d602a31d 100644 --- a/app/services/account_search_service.rb +++ b/app/services/account_search_service.rb @@ -4,105 +4,134 @@ class AccountSearchService < BaseService attr_reader :query, :limit, :offset, :options, :account def call(query, account = nil, options = {}) - @query = query.strip - @limit = options[:limit].to_i - @offset = options[:offset].to_i - @options = options - @account = account + @acct_hint = query.start_with?('@') + @query = query.strip.gsub(/\A@/, '') + @limit = options[:limit].to_i + @offset = options[:offset].to_i + @options = options + @account = account - search_service_results + search_service_results.compact.uniq end private def search_service_results - return [] if query_blank_or_hashtag? || limit < 1 + return [] if query.blank? || limit < 1 - if resolving_non_matching_remote_account? - [ResolveAccountService.new.call("#{query_username}@#{query_domain}")].compact - else - search_results_and_exact_match.compact.uniq - end + [exact_match] + search_results end - def resolving_non_matching_remote_account? - offset.zero? && options[:resolve] && !exact_match? && !domain_is_local? - end + def exact_match + return unless offset.zero? && username_complete? - def search_results_and_exact_match - return search_results.to_a unless offset.zero? + return @exact_match if defined?(@exact_match) - results = [exact_match] + @exact_match = begin + if options[:resolve] + ResolveAccountService.new.call(query) + elsif domain_is_local? + Account.find_local(query_username) + else + Account.find_remote(query_username, query_domain) + end + end + end - return results if exact_match? && limit == 1 + def search_results + return [] if limit_for_non_exact_results.zero? - results + search_results.to_a + @search_results ||= begin + if Chewy.enabled? + from_elasticsearch + else + from_database + end + end end - def query_blank_or_hashtag? - query.blank? || query.start_with?('#') + def from_database + if account + advanced_search_results + else + simple_search_results + end end - def split_query_string - @split_query_string ||= query.gsub(/\A@/, '').split('@') + def advanced_search_results + Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset) end - def query_username - @query_username ||= split_query_string.first || '' + def simple_search_results + Account.search_for(terms_for_query, limit_for_non_exact_results, offset) end - def query_domain - @query_domain ||= query_without_split? ? nil : split_query_string.last - end + def from_elasticsearch + must_clauses = [{ multi_match: { query: terms_for_query, fields: likely_acct? ? %w(acct) : %w(acct^2 display_name), type: 'best_fields' } }] + should_clauses = [] - def query_without_split? - split_query_string.size == 1 - end + if account + return [] if options[:following] && following_ids.empty? - def domain_is_local? - @domain_is_local ||= TagManager.instance.local_domain?(query_domain) - end + if options[:following] + must_clauses << { terms: { id: following_ids } } + elsif following_ids.any? + should_clauses << { terms: { id: following_ids, boost: 100 } } + end + end - def search_from - options[:following] && account ? account.following : Account - end + query = { bool: { must: must_clauses, should: should_clauses } } + functions = [reputation_score_function, followers_score_function, time_distance_function] - def exact_match? - exact_match.present? - end + records = AccountsIndex.query(function_score: { query: query, functions: functions, boost_mode: 'multiply', score_mode: 'avg' }) + .limit(limit_for_non_exact_results) + .offset(offset) + .objects + .compact - def exact_match - return @exact_match if defined?(@exact_match) + ActiveRecord::Associations::Preloader.new.preload(records, :account_stat) - @exact_match = begin - if domain_is_local? - search_from.without_suspended.find_local(query_username) - else - search_from.without_suspended.find_remote(query_username, query_domain) - end - end + records end - def search_results - @search_results ||= begin - if account - advanced_search_results - else - simple_search_results - end - end + def reputation_score_function + { + script_score: { + script: { + source: "(doc['followers_count'].value + 0.0) / (doc['followers_count'].value + doc['following_count'].value + 1)", + }, + }, + } end - def advanced_search_results - Account.advanced_search_for(terms_for_query, account, limit_for_non_exact_results, options[:following], offset) + def followers_score_function + { + field_value_factor: { + field: 'followers_count', + modifier: 'log2p', + missing: 1, + }, + } end - def simple_search_results - Account.search_for(terms_for_query, limit_for_non_exact_results, offset) + def time_distance_function + { + gauss: { + last_status_at: { + scale: '30d', + offset: '30d', + decay: 0.3, + }, + }, + } + end + + def following_ids + @following_ids ||= account.active_relationships.pluck(:target_account_id) end def limit_for_non_exact_results - if offset.zero? && exact_match? + if exact_match? limit - 1 else limit @@ -113,7 +142,39 @@ class AccountSearchService < BaseService if domain_is_local? query_username else - "#{query_username} #{query_domain}" + query end end + + def split_query_string + @split_query_string ||= query.split('@') + end + + def query_username + @query_username ||= split_query_string.first || '' + end + + def query_domain + @query_domain ||= query_without_split? ? nil : split_query_string.last + end + + def query_without_split? + split_query_string.size == 1 + end + + def domain_is_local? + @domain_is_local ||= TagManager.instance.local_domain?(query_domain) + end + + def exact_match? + exact_match.present? + end + + def username_complete? + query.include?('@') && "@#{query}" =~ Account::MENTION_RE + end + + def likely_acct? + @acct_hint || username_complete? + end end diff --git a/spec/services/account_search_service_spec.rb b/spec/services/account_search_service_spec.rb index 7b071b378..5b7182586 100644 --- a/spec/services/account_search_service_spec.rb +++ b/spec/services/account_search_service_spec.rb @@ -1,126 +1,56 @@ require 'rails_helper' describe AccountSearchService, type: :service do - describe '.call' do - describe 'with a query to ignore' do + describe '#call' do + context 'with a query to ignore' do it 'returns empty array for missing query' do results = subject.call('', nil, limit: 10) expect(results).to eq [] end - it 'returns empty array for hashtag query' do - results = subject.call('#tag', nil, limit: 10) - expect(results).to eq [] - end it 'returns empty array for limit zero' do Fabricate(:account, username: 'match') + results = subject.call('match', nil, limit: 0) expect(results).to eq [] end end - describe 'searching for a simple term that is not an exact match' do + context 'searching for a simple term that is not an exact match' do it 'does not return a nil entry in the array for the exact match' do - match = Fabricate(:account, username: 'matchingusername') - + account = Fabricate(:account, username: 'matchingusername') results = subject.call('match', nil, limit: 5) - expect(results).to eq [match] - end - end - describe 'searching local and remote users' do - describe "when only '@'" do - before do - allow(Account).to receive(:find_local) - allow(Account).to receive(:search_for) - subject.call('@', nil, limit: 10) - end - - it 'uses find_local with empty query to look for local accounts' do - expect(Account).to have_received(:find_local).with('') - end - end - - describe 'when no domain' do - before do - allow(Account).to receive(:find_local) - allow(Account).to receive(:search_for) - subject.call('one', nil, limit: 10) - end - - it 'uses find_local to look for local accounts' do - expect(Account).to have_received(:find_local).with('one') - end - - it 'uses search_for to find matches' do - expect(Account).to have_received(:search_for).with('one', 10, 0) - end - end - - describe 'when there is a domain' do - before do - allow(Account).to receive(:find_remote) - end - - it 'uses find_remote to look for remote accounts' do - subject.call('two@example.com', nil, limit: 10) - expect(Account).to have_received(:find_remote).with('two', 'example.com') - end - - describe 'and there is no account provided' do - it 'uses search_for to find matches' do - allow(Account).to receive(:search_for) - subject.call('two@example.com', nil, limit: 10, resolve: false) - - expect(Account).to have_received(:search_for).with('two example.com', 10, 0) - end - end - - describe 'and there is an account provided' do - it 'uses advanced_search_for to find matches' do - account = Fabricate(:account) - allow(Account).to receive(:advanced_search_for) - subject.call('two@example.com', account, limit: 10, resolve: false) - - expect(Account).to have_received(:advanced_search_for).with('two example.com', account, 10, nil, 0) - end - end + expect(results).to eq [account] end end - describe 'with an exact match' do - it 'returns exact match first, and does not return duplicates' do - partial = Fabricate(:account, username: 'exactness') - exact = Fabricate(:account, username: 'exact') - - results = subject.call('exact', nil, limit: 10) - expect(results.size).to eq 2 - expect(results).to eq [exact, partial] - end - end - - describe 'when there is a local domain' do + context 'when there is a local domain' do around do |example| before = Rails.configuration.x.local_domain + example.run + Rails.configuration.x.local_domain = before end it 'returns exact match first' do remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e') remote_too = Fabricate(:account, username: 'b', domain: 'remote', display_name: 'e') - exact = Fabricate(:account, username: 'e') + exact = Fabricate(:account, username: 'e') + Rails.configuration.x.local_domain = 'example.com' results = subject.call('e@example.com', nil, limit: 2) + expect(results.size).to eq 2 expect(results).to eq([exact, remote]).or eq([exact, remote_too]) end end - describe 'when there is a domain but no exact match' do + context 'when there is a domain but no exact match' do it 'follows the remote account when resolve is true' do service = double(call: nil) allow(ResolveAccountService).to receive(:new).and_return(service) @@ -138,23 +68,21 @@ describe AccountSearchService, type: :service do end end - describe 'should not include suspended accounts' do - it 'returns the fuzzy match first, and does not return suspended exacts' do - partial = Fabricate(:account, username: 'exactness') - exact = Fabricate(:account, username: 'exact', suspended: true) + it 'returns the fuzzy match first, and does not return suspended exacts' do + partial = Fabricate(:account, username: 'exactness') + exact = Fabricate(:account, username: 'exact', suspended: true) + results = subject.call('exact', nil, limit: 10) - results = subject.call('exact', nil, limit: 10) - expect(results.size).to eq 1 - expect(results).to eq [partial] - end + expect(results.size).to eq 1 + expect(results).to eq [partial] + end - it "does not return suspended remote accounts" do - remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e', suspended: true) + it "does not return suspended remote accounts" do + remote = Fabricate(:account, username: 'a', domain: 'remote', display_name: 'e', suspended: true) + results = subject.call('a@example.com', nil, limit: 2) - results = subject.call('a@example.com', nil, limit: 2) - expect(results.size).to eq 0 - expect(results).to eq [] - end + expect(results.size).to eq 0 + expect(results).to eq [] end end end