sepia-search-motore-di-rice.../server/lib/elastic-search-videos.ts

596 lines
11 KiB
TypeScript
Raw Normal View History

2020-02-14 14:09:31 +01:00
import { CONFIG } from '../initializers/constants'
2020-02-14 16:14:45 +01:00
import { DBVideo, DBVideoDetails, IndexableVideo, IndexableVideoDetails } from '../types/video.model'
2020-02-14 14:09:31 +01:00
import { flatMap } from 'lodash'
2020-02-18 15:33:21 +01:00
import { Avatar, Video } from '@shared/models'
2020-02-14 16:14:45 +01:00
import { buildSort, elasticSearch } from '../helpers/elastic-search'
import { VideosSearchQuery } from '../types/video-search.model'
2020-02-18 15:33:21 +01:00
import { logger } from '../helpers/logger'
2020-02-14 14:09:31 +01:00
function initVideosIndex () {
2020-02-18 15:33:21 +01:00
logger.info('Initialize %s Elastic Search index.', CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS)
2020-02-14 14:09:31 +01:00
return elasticSearch.indices.create({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
settings: {
number_of_shards: 1,
number_of_replicas: 1
},
mappings: {
properties: buildVideosMapping()
}
}
}).catch(err => {
if (err.name === 'ResponseError' && err.meta?.body?.error.root_cause[0]?.type === 'resource_already_exists_exception') return
throw err
})
}
2020-02-18 15:33:21 +01:00
async function indexVideos (videos: IndexableVideo[], replace = false) {
2020-02-14 16:14:45 +01:00
const elIdIndex: { [elId: string]: string } = {}
for (const video of videos) {
elIdIndex[video.elasticSearchId] = video.uuid
}
2020-02-18 15:33:21 +01:00
const method = replace ? 'index' : 'update'
2020-02-14 14:09:31 +01:00
const body = flatMap(videos, v => {
2020-02-18 15:33:21 +01:00
const doc = formatVideoForDB(v)
const options = replace
? doc
: { doc, doc_as_upsert: true }
2020-02-14 14:09:31 +01:00
return [
{
2020-02-18 15:33:21 +01:00
[method]: {
2020-02-14 14:09:31 +01:00
_id: v.elasticSearchId,
_index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS
}
},
2020-02-18 15:33:21 +01:00
options
2020-02-14 14:09:31 +01:00
]
})
2020-02-14 16:14:45 +01:00
const result = await elasticSearch.bulk({
2020-02-14 14:09:31 +01:00
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body
})
2020-02-14 16:14:45 +01:00
2020-02-18 15:33:21 +01:00
const resultBody = result.body
if (resultBody.errors === true) {
const msg = 'Cannot insert data in elastic search.'
logger.error(msg, { err: resultBody })
throw new Error(msg)
}
2020-02-14 16:14:45 +01:00
const created: string[] = result.body.items
2020-02-18 15:33:21 +01:00
.map(i => i[method])
2020-02-14 16:14:45 +01:00
.filter(i => i.result === 'created')
.map(i => elIdIndex[i._id])
return { created }
2020-02-14 14:09:31 +01:00
}
function refreshVideosIndex () {
return elasticSearch.indices.refresh({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS })
}
async function listIndexInstances () {
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
size: 0,
aggs: {
hosts: {
terms: {
field: 'host'
}
}
}
}
})
return res.body.aggregations.hosts.buckets.map(b => b.key)
}
2020-02-14 16:14:45 +01:00
async function queryVideos (search: VideosSearchQuery) {
const bool: any = {}
const filter: any[] = []
if (search.search) {
Object.assign(bool, {
must: [
{
multi_match: {
query: search.search,
2020-02-18 15:33:21 +01:00
fields: [ 'name', 'description', 'tags' ],
fuzziness: 'AUTO'
2020-02-14 16:14:45 +01:00
}
}
]
})
}
if (search.startDate) {
filter.push({
range: {
publishedAt: {
gte: search.startDate
}
}
})
}
if (search.endDate) {
filter.push({
range: {
publishedAt: {
lte: search.endDate
}
}
})
}
if (search.originallyPublishedStartDate) {
filter.push({
range: {
originallyPublishedAt: {
gte: search.startDate
}
}
})
}
if (search.originallyPublishedEndDate) {
filter.push({
range: {
originallyPublishedAt: {
lte: search.endDate
}
}
})
}
2020-02-18 15:33:21 +01:00
if (search.nsfw && search.nsfw !== 'both') {
2020-02-14 16:14:45 +01:00
filter.push({
term: {
2020-02-18 15:33:21 +01:00
nsfw: (search.nsfw + '') === 'true'
2020-02-14 16:14:45 +01:00
}
})
}
if (search.categoryOneOf) {
filter.push({
terms: {
2020-02-18 15:33:21 +01:00
'category.id': search.categoryOneOf
2020-02-14 16:14:45 +01:00
}
})
}
if (search.licenceOneOf) {
filter.push({
terms: {
2020-02-18 15:33:21 +01:00
'licence.id': search.licenceOneOf
2020-02-14 16:14:45 +01:00
}
})
}
if (search.languageOneOf) {
filter.push({
terms: {
2020-02-18 15:33:21 +01:00
'language.id': search.languageOneOf
2020-02-14 16:14:45 +01:00
}
})
}
if (search.tagsOneOf) {
filter.push({
terms: {
2020-02-14 16:28:11 +01:00
tags: search.tagsOneOf
2020-02-14 16:14:45 +01:00
}
})
}
if (search.tagsAllOf) {
2020-02-14 16:28:11 +01:00
for (const t of search.tagsAllOf) {
filter.push({
term: {
tags: t
}
})
}
2020-02-14 16:14:45 +01:00
}
if (search.durationMin) {
filter.push({
range: {
duration: {
gte: search.durationMin
}
}
})
}
if (search.durationMax) {
filter.push({
range: {
duration: {
lte: search.durationMax
}
}
})
}
Object.assign(bool, { filter })
2020-02-18 15:33:21 +01:00
const body = {
from: search.start,
size: search.count,
sort: buildVideosSort(search.sort),
query: { bool }
}
logger.debug('Will query Elastic Search for videos.', { body })
2020-02-14 14:09:31 +01:00
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
2020-02-18 15:33:21 +01:00
body
2020-02-14 14:09:31 +01:00
})
2020-02-14 16:14:45 +01:00
const hits = res.body.hits
return { total: hits.total.value, data: hits.hits.map(h => h._source) }
2020-02-14 14:09:31 +01:00
}
export {
indexVideos,
queryVideos,
refreshVideosIndex,
initVideosIndex,
2020-02-18 15:33:21 +01:00
listIndexInstances,
formatVideoForAPI
2020-02-14 14:09:31 +01:00
}
// ############################################################################
2020-02-14 16:14:45 +01:00
function buildVideosSort (sort: string) {
const { direction, field: sortField } = buildSort(sort)
const field = sortField === 'match'
? '_score'
: sortField
return [
{
[field]: { order: direction }
}
]
}
2020-02-18 15:33:21 +01:00
function formatVideoForDB (v: IndexableVideo | IndexableVideoDetails): DBVideo | DBVideoDetails {
2020-02-14 14:09:31 +01:00
return {
2020-02-14 16:14:45 +01:00
id: v.id,
2020-02-14 14:09:31 +01:00
uuid: v.uuid,
indexedAt: new Date(),
createdAt: v.createdAt,
updatedAt: v.updatedAt,
publishedAt: v.publishedAt,
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
thumbnailPath: v.thumbnailPath,
previewPath: v.previewPath,
embedPath: v.embedPath,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
nsfw: v.nsfw,
host: v.host,
2020-02-18 15:33:21 +01:00
tags: (v as IndexableVideoDetails).tags ? (v as IndexableVideoDetails).tags : undefined,
account: {
id: v.account.id,
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
avatar: formatAvatarForDB(v.account)
},
channel: {
id: v.channel.id,
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
avatar: formatAvatarForDB(v.channel)
}
}
}
function formatAvatarForDB (obj: { avatar?: Avatar }) {
if (!obj.avatar) return null
return {
path: obj.avatar.path,
createdAt: obj.avatar.createdAt,
updatedAt: obj.avatar.updatedAt
}
}
function formatVideoForAPI (v: DBVideo, fromHost?: string): Video {
return {
id: v.id,
uuid: v.uuid,
createdAt: new Date(v.createdAt),
updatedAt: new Date(v.updatedAt),
publishedAt: new Date(v.publishedAt),
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
thumbnailPath: v.thumbnailPath,
previewPath: v.previewPath,
embedPath: v.embedPath,
isLocal: fromHost && fromHost === v.host,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
nsfw: v.nsfw,
2020-02-14 16:14:45 +01:00
2020-02-14 14:09:31 +01:00
account: {
2020-02-14 16:14:45 +01:00
id: v.account.id,
2020-02-14 14:09:31 +01:00
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
2020-02-18 15:33:21 +01:00
avatar: formatAvatarForAPI(v.account)
2020-02-14 14:09:31 +01:00
},
channel: {
2020-02-14 16:14:45 +01:00
id: v.channel.id,
2020-02-14 14:09:31 +01:00
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
2020-02-18 15:33:21 +01:00
avatar: formatAvatarForAPI(v.channel)
2020-02-14 14:09:31 +01:00
}
}
}
2020-02-18 15:33:21 +01:00
function formatAvatarForAPI (obj: { avatar?: Avatar }) {
2020-02-14 14:09:31 +01:00
if (!obj.avatar) return null
return {
path: obj.avatar.path,
createdAt: obj.avatar.createdAt,
updatedAt: obj.avatar.updatedAt
}
}
function buildChannelOrAccountMapping () {
return {
2020-02-14 16:14:45 +01:00
id: {
type: 'long'
},
2020-02-14 14:09:31 +01:00
name: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
displayName: {
type: 'text'
},
url: {
type: 'keyword'
},
host: {
type: 'keyword'
},
avatar: {
properties: {
path: {
type: 'keyword'
},
createdAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
updatedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
}
}
}
}
}
function buildVideosMapping () {
return {
2020-02-14 16:14:45 +01:00
id: {
type: 'long'
},
2020-02-14 14:09:31 +01:00
uuid: {
type: 'keyword'
},
createdAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
updatedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
publishedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
originallyPublishedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
indexedAt: {
2020-02-18 15:33:21 +01:00
type: 'date',
format: 'date_optional_time'
2020-02-14 14:09:31 +01:00
},
category: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
licence: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
language: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
privacy: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
name: {
type: 'text'
},
description: {
type: 'text'
},
2020-02-14 16:14:45 +01:00
tags: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
2020-02-14 14:09:31 +01:00
duration: {
type: 'long'
},
thumbnailPath: {
type: 'keyword'
},
previewPath: {
type: 'keyword'
},
embedPath: {
type: 'keyword'
},
views: {
type: 'long'
},
likes: {
type: 'long'
},
dislikes: {
type: 'long'
},
nsfw: {
type: 'boolean'
},
host: {
type: 'keyword'
},
account: {
properties: buildChannelOrAccountMapping()
},
channel: {
properties: buildChannelOrAccountMapping()
}
}
}