Fix search on description with a new config

This commit is contained in:
Chocobozzz 2022-06-03 10:10:14 +02:00
parent 375c521986
commit 316adda0db
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
4 changed files with 93 additions and 31 deletions

View File

@ -59,44 +59,70 @@ videos-search:
boost-languages:
enabled: true
# Add ability to change videos search fields boost value
# Add ability to change videos search fields boost and match value
# See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information
#
# If boost == 0, the field will not be part of the search
#
# match_type could be 'default' or 'phrase'
# * default: use default Elastic Search match query, including fuzziness
# * phrase: use Elastic Search phrase match query
search-fields:
name:
boost: 5
match_type: 'default'
description:
boost: 1
match_type: 'phrase'
tags:
boost: 2
match_type: 'default'
account-display-name:
boost: 2
match_type: 'default'
channel-display-name:
boost: 2
match_type: 'default'
channels-search:
# Add ability to change channels search fields boost value
# Add ability to change channels search fields boost and match value
# See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information
#
# If boost == 0, the field will not be part of the search
#
# match_type could be 'default' or 'phrase'
# * default: use default Elastic Search match query, including fuzziness
# * phrase: use Elastic Search phrase match query
search-fields:
name:
boost: 5
match_type: 'default'
description:
boost: 1
match_type: 'phrase'
display-name:
boost: 3
match_type: 'default'
account-display-name:
boost: 2
match_type: 'default'
playlists-search:
# Add ability to change playlists search fields boost value
# Add ability to change playlists search fields boost and match value
# See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information
#
# If boost == 0, the field will not be part of the search
#
# match_type could be 'default' or 'phrase'
# * default: use default Elastic Search match query, including fuzziness
# * phrase: use Elastic Search phrase match query
search-fields:
display-name:
boost: 5
match_type: 'default'
description:
boost: 1
match_type: 'phrase'
api:
# Blacklist hosts that will not be returned by the search API

View File

@ -28,6 +28,7 @@ instances-index:
- 'tube.nah.re'
- 'peertube.parleur.net'
- 'video.passageenseine.fr'
- 'exode.me'
api:
blacklist:

View File

@ -47,23 +47,28 @@ const CONFIG = {
SEARCH_FIELDS: {
NAME: {
FIELD_NAME: 'name',
BOOST: config.get<number>('videos-search.search-fields.name.boost')
BOOST: config.get<number>('videos-search.search-fields.name.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.name.match_type')
},
DESCRIPTION: {
FIELD_NAME: 'description',
BOOST: config.get<number>('videos-search.search-fields.description.boost')
BOOST: config.get<number>('videos-search.search-fields.description.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.description.match_type')
},
TAGS: {
FIELD_NAME: 'tags',
BOOST: config.get<number>('videos-search.search-fields.tags.boost')
BOOST: config.get<number>('videos-search.search-fields.tags.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.tags.match_type')
},
ACCOUNT_DISPLAY_NAME: {
FIELD_NAME: 'account.displayName',
BOOST: config.get<number>('videos-search.search-fields.account-display-name.boost')
BOOST: config.get<number>('videos-search.search-fields.account-display-name.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.account-display-name.match_type')
},
CHANNEL_DISPLAY_NAME: {
FIELD_NAME: 'channel.displayName',
BOOST: config.get<number>('videos-search.search-fields.channel-display-name.boost')
BOOST: config.get<number>('videos-search.search-fields.channel-display-name.boost'),
MATCH_TYPE: config.get<string>('videos-search.search-fields.channel-display-name.match_type')
}
}
},
@ -71,19 +76,23 @@ const CONFIG = {
SEARCH_FIELDS: {
NAME: {
FIELD_NAME: 'name',
BOOST: config.get<number>('channels-search.search-fields.name.boost')
BOOST: config.get<number>('channels-search.search-fields.name.boost'),
MATCH_TYPE: config.get<string>('channels-search.search-fields.name.match_type')
},
DESCRIPTION: {
FIELD_NAME: 'description',
BOOST: config.get<number>('channels-search.search-fields.description.boost')
BOOST: config.get<number>('channels-search.search-fields.description.boost'),
MATCH_TYPE: config.get<string>('channels-search.search-fields.description.match_type')
},
DISPLAY_NAME: {
FIELD_NAME: 'displayName',
BOOST: config.get<number>('channels-search.search-fields.display-name.boost')
BOOST: config.get<number>('channels-search.search-fields.display-name.boost'),
MATCH_TYPE: config.get<string>('channels-search.search-fields.display-name.match_type')
},
ACCOUNT_DISPLAY_NAME: {
FIELD_NAME: 'ownerAccount.displayName',
BOOST: config.get<number>('channels-search.search-fields.account-display-name.boost')
BOOST: config.get<number>('channels-search.search-fields.account-display-name.boost'),
MATCH_TYPE: config.get<string>('channels-search.search-fields.account-display-name.match_type')
}
}
},
@ -91,11 +100,13 @@ const CONFIG = {
SEARCH_FIELDS: {
DISPLAY_NAME: {
FIELD_NAME: 'displayName',
BOOST: config.get<number>('playlists-search.search-fields.display-name.boost')
BOOST: config.get<number>('playlists-search.search-fields.display-name.boost'),
MATCH_TYPE: config.get<string>('playlists-search.search-fields.display-name.match_type')
},
DESCRIPTION: {
FIELD_NAME: 'description',
BOOST: config.get<number>('playlists-search.search-fields.description.boost')
BOOST: config.get<number>('playlists-search.search-fields.description.boost'),
MATCH_TYPE: config.get<string>('playlists-search.search-fields.description.match_type')
}
}
},
@ -151,9 +162,9 @@ const ELASTIC_SEARCH_QUERY = {
MINIMUM_SHOULD_MATCH: '3<75%',
BOOST_LANGUAGE_VALUE: 1,
MALUS_LANGUAGE_VALUE: 0.5,
VIDEOS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.VIDEOS_SEARCH.SEARCH_FIELDS),
CHANNELS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.CHANNELS_SEARCH.SEARCH_FIELDS),
PLAYLISTS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.PLAYLISTS_SEARCH.SEARCH_FIELDS)
VIDEOS_MULTI_MATCH_FIELDS: buildMatchFieldConfig(CONFIG.VIDEOS_SEARCH.SEARCH_FIELDS),
CHANNELS_MULTI_MATCH_FIELDS: buildMatchFieldConfig(CONFIG.CHANNELS_SEARCH.SEARCH_FIELDS),
PLAYLISTS_MULTI_MATCH_FIELDS: buildMatchFieldConfig(CONFIG.PLAYLISTS_SEARCH.SEARCH_FIELDS)
}
function getWebserverUrl () {
@ -164,15 +175,26 @@ function getWebserverUrl () {
return CONFIG.WEBSERVER.SCHEME + '://' + CONFIG.WEBSERVER.HOSTNAME + ':' + CONFIG.WEBSERVER.PORT
}
function buildMultiMatchFields (fields: { [name: string]: { BOOST: number, FIELD_NAME: string } }) {
return Object.keys(fields)
.map(id => {
const obj = fields[id]
if (obj.BOOST <= 0) return ''
function buildMatchFieldConfig (fields: { [name: string]: { BOOST: number, FIELD_NAME: string, MATCH_TYPE: string } }) {
const selectFields = (matchType: 'phrase' | 'default') => {
return Object.keys(fields)
.filter(fieldName => fields[fieldName].MATCH_TYPE === matchType)
.map(fieldName => fields[fieldName])
}
return `${obj.FIELD_NAME}^${obj.BOOST}`
const buildMultiMatch = (fields: { BOOST: number, FIELD_NAME: string }[]) => {
return fields.map(fieldObj => {
if (fieldObj.BOOST <= 0) return ''
return `${fieldObj.FIELD_NAME}^${fieldObj.BOOST}`
})
.filter(v => !!v)
}
return {
default: buildMultiMatch(selectFields('default')),
phrase: buildMultiMatch(selectFields('phrase'))
}
}
if (isTestInstance()) {

View File

@ -32,16 +32,29 @@ function addUUIDFilters (filters: any[], uuids: string[]) {
})
}
function buildMultiMatchBool (search: string, fields: string[]) {
function buildMultiMatchBool (search: string, fieldsObject: { default: string[], phrase: string[] }) {
return {
must: [
{
multi_match: {
query: search,
fields,
fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS,
operator: ELASTIC_SEARCH_QUERY.OPERATOR,
minimum_should_match: ELASTIC_SEARCH_QUERY.MINIMUM_SHOULD_MATCH
bool: {
should: [
{
multi_match: {
query: search,
fields: fieldsObject.default,
fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS,
operator: ELASTIC_SEARCH_QUERY.OPERATOR,
minimum_should_match: ELASTIC_SEARCH_QUERY.MINIMUM_SHOULD_MATCH
}
},
{
multi_match: {
query: search,
fields: fieldsObject.phrase,
type: 'phrase'
}
}
]
}
}
],
@ -50,7 +63,7 @@ function buildMultiMatchBool (search: string, fields: string[]) {
{
multi_match: {
query: search,
fields,
fields: [ ...fieldsObject.default, ...fieldsObject.phrase ],
operator: ELASTIC_SEARCH_QUERY.OPERATOR,
minimum_should_match: ELASTIC_SEARCH_QUERY.MINIMUM_SHOULD_MATCH
}