sepia-search-motore-di-rice.../server/lib/elastic-search-videos.ts

604 lines
11 KiB
TypeScript
Raw Normal View History

2020-05-29 16:16:55 +02:00
import { difference } from 'lodash'
2020-02-19 15:39:35 +01:00
import { buildIndex, buildSort, elasticSearch, extractQueryResult, indexDocuments } from '../helpers/elastic-search'
2020-02-18 15:33:21 +01:00
import { logger } from '../helpers/logger'
2020-08-27 14:44:21 +02:00
import { buildUrl } from '../helpers/utils'
2020-05-29 16:16:55 +02:00
import { CONFIG } from '../initializers/constants'
import { VideosSearchQuery } from '../types/video-search.model'
2020-08-27 14:44:21 +02:00
import { DBVideo, DBVideoDetails, EnhancedVideo, IndexableVideo, IndexableVideoDetails } from '../types/video.model'
2020-02-19 15:39:35 +01:00
import { buildAvatarMapping, formatAvatarForAPI, formatAvatarForDB } from './elastic-search-avatar'
2020-02-14 14:09:31 +01:00
function initVideosIndex () {
2020-02-19 15:39:35 +01:00
return buildIndex(CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, buildVideosMapping())
2020-02-14 14:09:31 +01:00
}
2020-02-18 15:33:21 +01:00
async function indexVideos (videos: IndexableVideo[], replace = false) {
2020-02-19 15:39:35 +01:00
return indexDocuments({
objects: videos,
formatter: v => formatVideoForDB(v),
replace,
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS
2020-02-14 14:09:31 +01:00
})
}
function refreshVideosIndex () {
return elasticSearch.indices.refresh({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS })
}
2020-02-19 15:39:35 +01:00
function removeVideosFromHosts (hosts: string[]) {
if (hosts.length === 0) return
logger.info({ hosts }, 'Will remove videos from hosts.')
2020-02-19 15:39:35 +01:00
return elasticSearch.delete_by_query({
2020-02-14 14:09:31 +01:00
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
2020-02-19 15:39:35 +01:00
query: {
bool: {
filter: {
terms: {
host: hosts
}
2020-02-14 14:09:31 +01:00
}
}
}
}
})
}
2020-02-20 14:16:55 +01:00
async function removeNotExistingVideos (host: string, existingVideos: Set<number>) {
const idsFromDB = await getVideoIdsOf(host)
const idsToRemove = difference(idsFromDB, Array.from(existingVideos))
logger.info({ idsToRemove }, 'Will remove %d videos from %s.', idsToRemove.length, host)
2020-02-20 14:16:55 +01:00
return elasticSearch.delete_by_query({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
query: {
bool: {
filter: [
{
terms: {
id: idsToRemove
}
},
{
term: {
host
}
}
]
}
}
}
})
}
async function getVideoIdsOf (host: string) {
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
size: 0,
aggs: {
ids: {
terms: {
2020-09-02 15:22:10 +02:00
size: 500000,
2020-02-20 14:16:55 +01:00
field: 'id'
}
}
},
query: {
bool: {
filter: [
{
term: {
host
}
}
]
}
}
}
})
return res.body.aggregations.ids.buckets.map(b => b.key)
}
2020-02-14 16:14:45 +01:00
async function queryVideos (search: VideosSearchQuery) {
const bool: any = {}
const filter: any[] = []
const mustNot: any[] = []
2020-02-14 16:14:45 +01:00
if (search.search) {
Object.assign(bool, {
must: [
{
multi_match: {
query: search.search,
2020-02-18 15:33:21 +01:00
fields: [ 'name', 'description', 'tags' ],
fuzziness: 'AUTO'
2020-02-14 16:14:45 +01:00
}
}
]
})
}
if (search.blockedAccounts) {
mustNot.push({
terms: {
'account.handle': search.blockedAccounts
}
})
}
if (search.blockedHosts) {
mustNot.push({
terms: {
host: search.blockedHosts
}
})
}
2020-02-14 16:14:45 +01:00
if (search.startDate) {
filter.push({
range: {
publishedAt: {
gte: search.startDate
}
}
})
}
if (search.endDate) {
filter.push({
range: {
publishedAt: {
lte: search.endDate
}
}
})
}
if (search.originallyPublishedStartDate) {
filter.push({
range: {
originallyPublishedAt: {
gte: search.startDate
}
}
})
}
if (search.originallyPublishedEndDate) {
filter.push({
range: {
originallyPublishedAt: {
lte: search.endDate
}
}
})
}
2020-02-18 15:33:21 +01:00
if (search.nsfw && search.nsfw !== 'both') {
2020-02-14 16:14:45 +01:00
filter.push({
term: {
2020-02-18 15:33:21 +01:00
nsfw: (search.nsfw + '') === 'true'
2020-02-14 16:14:45 +01:00
}
})
}
if (search.categoryOneOf) {
filter.push({
terms: {
2020-02-18 15:33:21 +01:00
'category.id': search.categoryOneOf
2020-02-14 16:14:45 +01:00
}
})
}
if (search.licenceOneOf) {
filter.push({
terms: {
2020-02-18 15:33:21 +01:00
'licence.id': search.licenceOneOf
2020-02-14 16:14:45 +01:00
}
})
}
if (search.languageOneOf) {
filter.push({
terms: {
2020-02-18 15:33:21 +01:00
'language.id': search.languageOneOf
2020-02-14 16:14:45 +01:00
}
})
}
if (search.tagsOneOf) {
filter.push({
terms: {
2020-02-14 16:28:11 +01:00
tags: search.tagsOneOf
2020-02-14 16:14:45 +01:00
}
})
}
if (search.tagsAllOf) {
2020-02-14 16:28:11 +01:00
for (const t of search.tagsAllOf) {
filter.push({
term: {
tags: t
}
})
}
2020-02-14 16:14:45 +01:00
}
if (search.durationMin) {
filter.push({
range: {
duration: {
gte: search.durationMin
}
}
})
}
if (search.durationMax) {
filter.push({
range: {
duration: {
lte: search.durationMax
}
}
})
}
Object.assign(bool, { filter })
if (mustNot.length !== 0) {
Object.assign(bool, { must_not: mustNot })
}
2020-02-18 15:33:21 +01:00
const body = {
from: search.start,
size: search.count,
2020-02-19 15:39:35 +01:00
sort: buildSort(search.sort),
2020-02-18 15:33:21 +01:00
query: { bool }
}
logger.debug({ body }, 'Will query Elastic Search for videos.')
2020-02-18 15:33:21 +01:00
2020-02-14 14:09:31 +01:00
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
2020-02-18 15:33:21 +01:00
body
2020-02-14 14:09:31 +01:00
})
2020-02-19 15:39:35 +01:00
return extractQueryResult(res)
2020-02-14 14:09:31 +01:00
}
export {
indexVideos,
2020-02-20 14:16:55 +01:00
removeNotExistingVideos,
2020-02-14 14:09:31 +01:00
queryVideos,
refreshVideosIndex,
2020-02-19 15:39:35 +01:00
removeVideosFromHosts,
2020-02-14 14:09:31 +01:00
initVideosIndex,
2020-02-18 15:33:21 +01:00
formatVideoForAPI
2020-02-14 14:09:31 +01:00
}
// ############################################################################
2020-02-18 15:33:21 +01:00
function formatVideoForDB (v: IndexableVideo | IndexableVideoDetails): DBVideo | DBVideoDetails {
2020-02-14 14:09:31 +01:00
return {
2020-02-14 16:14:45 +01:00
id: v.id,
2020-02-14 14:09:31 +01:00
uuid: v.uuid,
indexedAt: new Date(),
createdAt: v.createdAt,
updatedAt: v.updatedAt,
publishedAt: v.publishedAt,
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
thumbnailPath: v.thumbnailPath,
previewPath: v.previewPath,
embedPath: v.embedPath,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
nsfw: v.nsfw,
host: v.host,
2020-06-05 14:37:39 +02:00
url: v.url,
2020-02-14 14:09:31 +01:00
2020-02-18 15:33:21 +01:00
tags: (v as IndexableVideoDetails).tags ? (v as IndexableVideoDetails).tags : undefined,
account: {
id: v.account.id,
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
handle: `${v.account.name}@${v.account.host}`,
2020-02-18 15:33:21 +01:00
avatar: formatAvatarForDB(v.account)
},
channel: {
id: v.channel.id,
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
handle: `${v.channel.name}@${v.channel.host}`,
2020-02-18 15:33:21 +01:00
avatar: formatAvatarForDB(v.channel)
}
}
}
2020-08-27 14:44:21 +02:00
function formatVideoForAPI (v: DBVideoDetails, fromHost?: string): EnhancedVideo {
2020-02-18 15:33:21 +01:00
return {
id: v.id,
uuid: v.uuid,
2020-08-27 14:44:21 +02:00
score: v.score,
2020-02-18 15:33:21 +01:00
createdAt: new Date(v.createdAt),
updatedAt: new Date(v.updatedAt),
publishedAt: new Date(v.publishedAt),
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
2020-05-29 16:16:55 +02:00
2020-08-27 14:44:21 +02:00
tags: v.tags,
2020-02-18 15:33:21 +01:00
thumbnailPath: v.thumbnailPath,
2020-05-29 16:16:55 +02:00
thumbnailUrl: buildUrl(v.host, v.thumbnailPath),
2020-02-18 15:33:21 +01:00
previewPath: v.previewPath,
2020-05-29 16:16:55 +02:00
previewUrl: buildUrl(v.host, v.previewPath),
2020-02-18 15:33:21 +01:00
embedPath: v.embedPath,
2020-05-29 16:16:55 +02:00
embedUrl: buildUrl(v.host, v.embedPath),
2020-06-05 14:37:39 +02:00
url: v.url,
2020-02-18 15:33:21 +01:00
isLocal: fromHost && fromHost === v.host,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
nsfw: v.nsfw,
2020-02-14 16:14:45 +01:00
2020-02-14 14:09:31 +01:00
account: {
2020-02-14 16:14:45 +01:00
id: v.account.id,
2020-02-14 14:09:31 +01:00
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
2020-02-18 15:33:21 +01:00
avatar: formatAvatarForAPI(v.account)
2020-02-14 14:09:31 +01:00
},
channel: {
2020-02-14 16:14:45 +01:00
id: v.channel.id,
2020-02-14 14:09:31 +01:00
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
2020-02-18 15:33:21 +01:00
avatar: formatAvatarForAPI(v.channel)
2020-02-14 14:09:31 +01:00
}
}
}
function buildChannelOrAccountMapping () {
return {
2020-02-14 16:14:45 +01:00
id: {
type: 'long'
},
2020-02-14 14:09:31 +01:00
name: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
displayName: {
type: 'text'
},
url: {
type: 'keyword'
},
host: {
type: 'keyword'
},
handle: {
type: 'keyword'
},
2020-02-14 14:09:31 +01:00
avatar: {
2020-02-19 15:39:35 +01:00
properties: buildAvatarMapping()
2020-02-14 14:09:31 +01:00
}
}
}
function buildVideosMapping () {
return {
2020-02-14 16:14:45 +01:00
id: {
type: 'long'
},
2020-02-14 14:09:31 +01:00
uuid: {
type: 'keyword'
},
createdAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
updatedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
publishedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
originallyPublishedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
indexedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
category: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
licence: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
language: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
privacy: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
name: {
type: 'text'
},
description: {
type: 'text'
},
2020-02-14 16:14:45 +01:00
tags: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
2020-02-14 14:09:31 +01:00
duration: {
type: 'long'
},
thumbnailPath: {
type: 'keyword'
},
previewPath: {
type: 'keyword'
},
embedPath: {
type: 'keyword'
},
2020-06-05 14:37:39 +02:00
url: {
type: 'keyword'
},
2020-02-14 14:09:31 +01:00
views: {
type: 'long'
},
likes: {
type: 'long'
},
dislikes: {
type: 'long'
},
nsfw: {
type: 'boolean'
},
host: {
type: 'keyword'
},
account: {
properties: buildChannelOrAccountMapping()
},
channel: {
properties: buildChannelOrAccountMapping()
}
}
}