Fix search on description with a new config

This commit is contained in:
Chocobozzz 2022-06-03 10:10:14 +02:00
parent 375c521986
commit 316adda0db
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
4 changed files with 93 additions and 31 deletions

View File

@ -59,44 +59,70 @@ videos-search:
boost-languages: boost-languages:
enabled: true enabled: true
# Add ability to change videos search fields boost value # Add ability to change videos search fields boost and match value
# See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information # See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information
#
# If boost == 0, the field will not be part of the search # If boost == 0, the field will not be part of the search
#
# match_type could be 'default' or 'phrase'
# * default: use default Elastic Search match query, including fuzziness
# * phrase: use Elastic Search phrase match query
search-fields: search-fields:
name: name:
boost: 5 boost: 5
match_type: 'default'
description: description:
boost: 1 boost: 1
match_type: 'phrase'
tags: tags:
boost: 2 boost: 2
match_type: 'default'
account-display-name: account-display-name:
boost: 2 boost: 2
match_type: 'default'
channel-display-name: channel-display-name:
boost: 2 boost: 2
match_type: 'default'
channels-search: channels-search:
# Add ability to change channels search fields boost value # Add ability to change channels search fields boost and match value
# See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information # See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information
#
# If boost == 0, the field will not be part of the search # If boost == 0, the field will not be part of the search
#
# match_type could be 'default' or 'phrase'
# * default: use default Elastic Search match query, including fuzziness
# * phrase: use Elastic Search phrase match query
search-fields: search-fields:
name: name:
boost: 5 boost: 5
match_type: 'default'
description: description:
boost: 1 boost: 1
match_type: 'phrase'
display-name: display-name:
boost: 3 boost: 3
match_type: 'default'
account-display-name: account-display-name:
boost: 2 boost: 2
match_type: 'default'
playlists-search: playlists-search:
# Add ability to change playlists search fields boost value # Add ability to change playlists search fields boost and match value
# See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information # See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information
#
# If boost == 0, the field will not be part of the search # If boost == 0, the field will not be part of the search
#
# match_type could be 'default' or 'phrase'
# * default: use default Elastic Search match query, including fuzziness
# * phrase: use Elastic Search phrase match query
search-fields: search-fields:
display-name: display-name:
boost: 5 boost: 5
match_type: 'default'
description: description:
boost: 1 boost: 1
match_type: 'phrase'
api: api:
# Blacklist hosts that will not be returned by the search API # Blacklist hosts that will not be returned by the search API

View File

@ -28,6 +28,7 @@ instances-index:
- 'tube.nah.re' - 'tube.nah.re'
- 'peertube.parleur.net' - 'peertube.parleur.net'
- 'video.passageenseine.fr' - 'video.passageenseine.fr'
- 'exode.me'
api: api:
blacklist: blacklist:

View File

@ -47,23 +47,28 @@ const CONFIG = {
SEARCH_FIELDS: { SEARCH_FIELDS: {
NAME: { NAME: {
FIELD_NAME: 'name', FIELD_NAME: 'name',
BOOST: config.get<number>('videos-search.search-fields.name.boost') BOOST: config.get<number>('videos-search.search-fields.name.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.name.match_type')
}, },
DESCRIPTION: { DESCRIPTION: {
FIELD_NAME: 'description', FIELD_NAME: 'description',
BOOST: config.get<number>('videos-search.search-fields.description.boost') BOOST: config.get<number>('videos-search.search-fields.description.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.description.match_type')
}, },
TAGS: { TAGS: {
FIELD_NAME: 'tags', FIELD_NAME: 'tags',
BOOST: config.get<number>('videos-search.search-fields.tags.boost') BOOST: config.get<number>('videos-search.search-fields.tags.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.tags.match_type')
}, },
ACCOUNT_DISPLAY_NAME: { ACCOUNT_DISPLAY_NAME: {
FIELD_NAME: 'account.displayName', FIELD_NAME: 'account.displayName',
BOOST: config.get<number>('videos-search.search-fields.account-display-name.boost') BOOST: config.get<number>('videos-search.search-fields.account-display-name.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.account-display-name.match_type')
}, },
CHANNEL_DISPLAY_NAME: { CHANNEL_DISPLAY_NAME: {
FIELD_NAME: 'channel.displayName', FIELD_NAME: 'channel.displayName',
BOOST: config.get<number>('videos-search.search-fields.channel-display-name.boost') BOOST: config.get<number>('videos-search.search-fields.channel-display-name.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.channel-display-name.match_type')
} }
} }
}, },
@ -71,19 +76,23 @@ const CONFIG = {
SEARCH_FIELDS: { SEARCH_FIELDS: {
NAME: { NAME: {
FIELD_NAME: 'name', FIELD_NAME: 'name',
BOOST: config.get<number>('channels-search.search-fields.name.boost') BOOST: config.get<number>('channels-search.search-fields.name.boost'),
MATCH_TYPE: config.get<string>('channels-search.search-fields.name.match_type')
}, },
DESCRIPTION: { DESCRIPTION: {
FIELD_NAME: 'description', FIELD_NAME: 'description',
BOOST: config.get<number>('channels-search.search-fields.description.boost') BOOST: config.get<number>('channels-search.search-fields.description.boost'),
MATCH_TYPE: config.get<string>('channels-search.search-fields.description.match_type')
}, },
DISPLAY_NAME: { DISPLAY_NAME: {
FIELD_NAME: 'displayName', FIELD_NAME: 'displayName',
BOOST: config.get<number>('channels-search.search-fields.display-name.boost') BOOST: config.get<number>('channels-search.search-fields.display-name.boost'),
MATCH_TYPE: config.get<string>('channels-search.search-fields.display-name.match_type')
}, },
ACCOUNT_DISPLAY_NAME: { ACCOUNT_DISPLAY_NAME: {
FIELD_NAME: 'ownerAccount.displayName', FIELD_NAME: 'ownerAccount.displayName',
BOOST: config.get<number>('channels-search.search-fields.account-display-name.boost') BOOST: config.get<number>('channels-search.search-fields.account-display-name.boost'),
MATCH_TYPE: config.get<string>('channels-search.search-fields.account-display-name.match_type')
} }
} }
}, },
@ -91,11 +100,13 @@ const CONFIG = {
SEARCH_FIELDS: { SEARCH_FIELDS: {
DISPLAY_NAME: { DISPLAY_NAME: {
FIELD_NAME: 'displayName', FIELD_NAME: 'displayName',
BOOST: config.get<number>('playlists-search.search-fields.display-name.boost') BOOST: config.get<number>('playlists-search.search-fields.display-name.boost'),
MATCH_TYPE: config.get<string>('playlists-search.search-fields.display-name.match_type')
}, },
DESCRIPTION: { DESCRIPTION: {
FIELD_NAME: 'description', FIELD_NAME: 'description',
BOOST: config.get<number>('playlists-search.search-fields.description.boost') BOOST: config.get<number>('playlists-search.search-fields.description.boost'),
MATCH_TYPE: config.get<string>('playlists-search.search-fields.description.match_type')
} }
} }
}, },
@ -151,9 +162,9 @@ const ELASTIC_SEARCH_QUERY = {
MINIMUM_SHOULD_MATCH: '3<75%', MINIMUM_SHOULD_MATCH: '3<75%',
BOOST_LANGUAGE_VALUE: 1, BOOST_LANGUAGE_VALUE: 1,
MALUS_LANGUAGE_VALUE: 0.5, MALUS_LANGUAGE_VALUE: 0.5,
VIDEOS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.VIDEOS_SEARCH.SEARCH_FIELDS), VIDEOS_MULTI_MATCH_FIELDS: buildMatchFieldConfig(CONFIG.VIDEOS_SEARCH.SEARCH_FIELDS),
CHANNELS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.CHANNELS_SEARCH.SEARCH_FIELDS), CHANNELS_MULTI_MATCH_FIELDS: buildMatchFieldConfig(CONFIG.CHANNELS_SEARCH.SEARCH_FIELDS),
PLAYLISTS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.PLAYLISTS_SEARCH.SEARCH_FIELDS) PLAYLISTS_MULTI_MATCH_FIELDS: buildMatchFieldConfig(CONFIG.PLAYLISTS_SEARCH.SEARCH_FIELDS)
} }
function getWebserverUrl () { function getWebserverUrl () {
@ -164,15 +175,26 @@ function getWebserverUrl () {
return CONFIG.WEBSERVER.SCHEME + '://' + CONFIG.WEBSERVER.HOSTNAME + ':' + CONFIG.WEBSERVER.PORT return CONFIG.WEBSERVER.SCHEME + '://' + CONFIG.WEBSERVER.HOSTNAME + ':' + CONFIG.WEBSERVER.PORT
} }
function buildMultiMatchFields (fields: { [name: string]: { BOOST: number, FIELD_NAME: string } }) { function buildMatchFieldConfig (fields: { [name: string]: { BOOST: number, FIELD_NAME: string, MATCH_TYPE: string } }) {
return Object.keys(fields) const selectFields = (matchType: 'phrase' | 'default') => {
.map(id => { return Object.keys(fields)
const obj = fields[id] .filter(fieldName => fields[fieldName].MATCH_TYPE === matchType)
if (obj.BOOST <= 0) return '' .map(fieldName => fields[fieldName])
}
return `${obj.FIELD_NAME}^${obj.BOOST}` const buildMultiMatch = (fields: { BOOST: number, FIELD_NAME: string }[]) => {
return fields.map(fieldObj => {
if (fieldObj.BOOST <= 0) return ''
return `${fieldObj.FIELD_NAME}^${fieldObj.BOOST}`
}) })
.filter(v => !!v) .filter(v => !!v)
}
return {
default: buildMultiMatch(selectFields('default')),
phrase: buildMultiMatch(selectFields('phrase'))
}
} }
if (isTestInstance()) { if (isTestInstance()) {

View File

@ -32,16 +32,29 @@ function addUUIDFilters (filters: any[], uuids: string[]) {
}) })
} }
function buildMultiMatchBool (search: string, fields: string[]) { function buildMultiMatchBool (search: string, fieldsObject: { default: string[], phrase: string[] }) {
return { return {
must: [ must: [
{ {
multi_match: { bool: {
query: search, should: [
fields, {
fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS, multi_match: {
operator: ELASTIC_SEARCH_QUERY.OPERATOR, query: search,
minimum_should_match: ELASTIC_SEARCH_QUERY.MINIMUM_SHOULD_MATCH fields: fieldsObject.default,
fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS,
operator: ELASTIC_SEARCH_QUERY.OPERATOR,
minimum_should_match: ELASTIC_SEARCH_QUERY.MINIMUM_SHOULD_MATCH
}
},
{
multi_match: {
query: search,
fields: fieldsObject.phrase,
type: 'phrase'
}
}
]
} }
} }
], ],
@ -50,7 +63,7 @@ function buildMultiMatchBool (search: string, fields: string[]) {
{ {
multi_match: { multi_match: {
query: search, query: search,
fields, fields: [ ...fieldsObject.default, ...fieldsObject.phrase ],
operator: ELASTIC_SEARCH_QUERY.OPERATOR, operator: ELASTIC_SEARCH_QUERY.OPERATOR,
minimum_should_match: ELASTIC_SEARCH_QUERY.MINIMUM_SHOULD_MATCH minimum_should_match: ELASTIC_SEARCH_QUERY.MINIMUM_SHOULD_MATCH
} }