sepia-search-motore-di-rice.../server/lib/elastic-search-videos.ts

641 lines
12 KiB
TypeScript

import { difference } from 'lodash'
import { buildIndex, buildSort, elasticSearch, extractQueryResult, indexDocuments } from '../helpers/elastic-search'
import { logger } from '../helpers/logger'
import { buildUrl } from '../helpers/utils'
import { CONFIG, ELASTIC_SEARCH_QUERY } from '../initializers/constants'
import { VideosSearchQuery } from '../types/video-search.model'
import { DBVideo, DBVideoDetails, EnhancedVideo, IndexableVideo, IndexableVideoDetails } from '../types/video.model'
import { buildAvatarMapping, formatAvatarForAPI, formatAvatarForDB } from './elastic-search-avatar'
function initVideosIndex () {
return buildIndex(CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, buildVideosMapping())
}
async function indexVideos (videos: IndexableVideo[], replace = false) {
return indexDocuments({
objects: videos,
formatter: v => formatVideoForDB(v),
replace,
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS
})
}
function refreshVideosIndex () {
return elasticSearch.indices.refresh({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS })
}
function removeVideosFromHosts (hosts: string[]) {
if (hosts.length === 0) return
logger.info({ hosts }, 'Will remove videos from hosts.')
return elasticSearch.delete_by_query({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
query: {
bool: {
filter: {
terms: {
host: hosts
}
}
}
}
}
})
}
async function removeNotExistingVideos (host: string, existingVideos: Set<number>) {
const idsFromDB = await getVideoIdsOf(host)
const idsToRemove = difference(idsFromDB, Array.from(existingVideos))
logger.info({ idsToRemove }, 'Will remove %d videos from %s.', idsToRemove.length, host)
return elasticSearch.delete_by_query({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
query: {
bool: {
filter: [
{
terms: {
id: idsToRemove
}
},
{
term: {
host
}
}
]
}
}
}
})
}
async function getVideoIdsOf (host: string) {
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
size: 0,
aggs: {
ids: {
terms: {
size: 500000,
field: 'id'
}
}
},
query: {
bool: {
filter: [
{
term: {
host
}
}
]
}
}
}
})
return res.body.aggregations.ids.buckets.map(b => b.key)
}
async function queryVideos (search: VideosSearchQuery) {
const bool: any = {}
const filter: any[] = []
const mustNot: any[] = []
if (search.search) {
Object.assign(bool, {
must: [
{
multi_match: {
query: search.search,
fields: [ 'name^5', 'description', 'tags^3' ],
fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS
}
}
]
})
}
if (search.blockedAccounts) {
mustNot.push({
terms: {
'account.handle': search.blockedAccounts
}
})
}
if (search.blockedHosts) {
mustNot.push({
terms: {
host: search.blockedHosts
}
})
}
if (search.startDate) {
filter.push({
range: {
publishedAt: {
gte: search.startDate
}
}
})
}
if (search.endDate) {
filter.push({
range: {
publishedAt: {
lte: search.endDate
}
}
})
}
if (search.originallyPublishedStartDate) {
filter.push({
range: {
originallyPublishedAt: {
gte: search.startDate
}
}
})
}
if (search.originallyPublishedEndDate) {
filter.push({
range: {
originallyPublishedAt: {
lte: search.endDate
}
}
})
}
if (search.nsfw && search.nsfw !== 'both') {
filter.push({
term: {
nsfw: (search.nsfw + '') === 'true'
}
})
}
if (search.categoryOneOf) {
filter.push({
terms: {
'category.id': search.categoryOneOf
}
})
}
if (search.licenceOneOf) {
filter.push({
terms: {
'licence.id': search.licenceOneOf
}
})
}
if (search.languageOneOf) {
filter.push({
terms: {
'language.id': search.languageOneOf
}
})
}
if (search.tagsOneOf) {
filter.push({
terms: {
tags: search.tagsOneOf
}
})
}
if (search.tagsAllOf) {
for (const t of search.tagsAllOf) {
filter.push({
term: {
tags: t
}
})
}
}
if (search.durationMin) {
filter.push({
range: {
duration: {
gte: search.durationMin
}
}
})
}
if (search.durationMax) {
filter.push({
range: {
duration: {
lte: search.durationMax
}
}
})
}
Object.assign(bool, { filter })
if (mustNot.length !== 0) {
Object.assign(bool, { must_not: mustNot })
}
const body = {
from: search.start,
size: search.count,
sort: buildSort(search.sort)
}
// Allow to boost results depending on query languages
if (
CONFIG.VIDEOS_SEARCH.BOOST_LANGUAGES.ENABLED &&
Array.isArray(search.boostLanguages) &&
search.boostLanguages.length !== 0
) {
const boostScript = `
if (doc['language.id'].size() == 0) {
return _score;
}
String language = doc['language.id'].value;
for (String docLang: params.boostLanguages) {
if (docLang == language) return _score * params.boost;
}
return _score;
`
Object.assign(body, {
query: {
script_score: {
query: { bool },
script: {
source: boostScript,
params: {
boostLanguages: search.boostLanguages,
boost: ELASTIC_SEARCH_QUERY.BOOST_LANGUAGE_VALUE
}
}
}
}
})
} else {
Object.assign(body, { query: { bool } })
}
logger.debug({ body }, 'Will query Elastic Search for videos.')
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body
})
return extractQueryResult(res)
}
export {
indexVideos,
removeNotExistingVideos,
queryVideos,
refreshVideosIndex,
removeVideosFromHosts,
initVideosIndex,
formatVideoForAPI
}
// ############################################################################
function formatVideoForDB (v: IndexableVideo | IndexableVideoDetails): DBVideo | DBVideoDetails {
return {
id: v.id,
uuid: v.uuid,
indexedAt: new Date(),
createdAt: v.createdAt,
updatedAt: v.updatedAt,
publishedAt: v.publishedAt,
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
thumbnailPath: v.thumbnailPath,
previewPath: v.previewPath,
embedPath: v.embedPath,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
nsfw: v.nsfw,
host: v.host,
url: v.url,
tags: (v as IndexableVideoDetails).tags ? (v as IndexableVideoDetails).tags : undefined,
account: {
id: v.account.id,
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
handle: `${v.account.name}@${v.account.host}`,
avatar: formatAvatarForDB(v.account)
},
channel: {
id: v.channel.id,
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
handle: `${v.channel.name}@${v.channel.host}`,
avatar: formatAvatarForDB(v.channel)
}
}
}
function formatVideoForAPI (v: DBVideoDetails, fromHost?: string): EnhancedVideo {
return {
id: v.id,
uuid: v.uuid,
score: v.score,
createdAt: new Date(v.createdAt),
updatedAt: new Date(v.updatedAt),
publishedAt: new Date(v.publishedAt),
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
tags: v.tags,
thumbnailPath: v.thumbnailPath,
thumbnailUrl: buildUrl(v.host, v.thumbnailPath),
previewPath: v.previewPath,
previewUrl: buildUrl(v.host, v.previewPath),
embedPath: v.embedPath,
embedUrl: buildUrl(v.host, v.embedPath),
url: v.url,
isLocal: fromHost && fromHost === v.host,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
nsfw: v.nsfw,
account: {
id: v.account.id,
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
avatar: formatAvatarForAPI(v.account)
},
channel: {
id: v.channel.id,
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
avatar: formatAvatarForAPI(v.channel)
}
}
}
function buildChannelOrAccountMapping () {
return {
id: {
type: 'long'
},
name: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
displayName: {
type: 'text'
},
url: {
type: 'keyword'
},
host: {
type: 'keyword'
},
handle: {
type: 'keyword'
},
avatar: {
properties: buildAvatarMapping()
}
}
}
function buildVideosMapping () {
return {
id: {
type: 'long'
},
uuid: {
type: 'keyword'
},
createdAt: {
type: 'date',
format: 'date_optional_time'
},
updatedAt: {
type: 'date',
format: 'date_optional_time'
},
publishedAt: {
type: 'date',
format: 'date_optional_time'
},
originallyPublishedAt: {
type: 'date',
format: 'date_optional_time'
},
indexedAt: {
type: 'date',
format: 'date_optional_time'
},
category: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
licence: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
language: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
privacy: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
name: {
type: 'text'
},
description: {
type: 'text'
},
tags: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
duration: {
type: 'long'
},
thumbnailPath: {
type: 'keyword'
},
previewPath: {
type: 'keyword'
},
embedPath: {
type: 'keyword'
},
url: {
type: 'keyword'
},
views: {
type: 'long'
},
likes: {
type: 'long'
},
dislikes: {
type: 'long'
},
nsfw: {
type: 'boolean'
},
host: {
type: 'keyword'
},
account: {
properties: buildChannelOrAccountMapping()
},
channel: {
properties: buildChannelOrAccountMapping()
}
}
}