sepia-search-motore-di-rice.../server/lib/elastic-search-videos.ts

496 lines
8.6 KiB
TypeScript
Raw Normal View History

2020-02-14 14:09:31 +01:00
import { CONFIG } from '../initializers/constants'
2020-02-14 16:14:45 +01:00
import { DBVideo, DBVideoDetails, IndexableVideo, IndexableVideoDetails } from '../types/video.model'
2020-02-14 14:09:31 +01:00
import { flatMap } from 'lodash'
import { Avatar } from '@shared/models'
2020-02-14 16:14:45 +01:00
import { buildSort, elasticSearch } from '../helpers/elastic-search'
import { VideosSearchQuery } from '../types/video-search.model'
import { inspect } from 'util'
2020-02-14 14:09:31 +01:00
function initVideosIndex () {
return elasticSearch.indices.create({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
settings: {
number_of_shards: 1,
number_of_replicas: 1
},
mappings: {
properties: buildVideosMapping()
}
}
}).catch(err => {
if (err.name === 'ResponseError' && err.meta?.body?.error.root_cause[0]?.type === 'resource_already_exists_exception') return
throw err
})
}
2020-02-14 16:14:45 +01:00
async function indexVideos (videos: IndexableVideo[]) {
const elIdIndex: { [elId: string]: string } = {}
for (const video of videos) {
elIdIndex[video.elasticSearchId] = video.uuid
}
2020-02-14 14:09:31 +01:00
const body = flatMap(videos, v => {
return [
{
update: {
_id: v.elasticSearchId,
_index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS
}
},
{
doc: formatVideo(v),
doc_as_upsert: true
}
]
})
2020-02-14 16:14:45 +01:00
const result = await elasticSearch.bulk({
2020-02-14 14:09:31 +01:00
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body
})
2020-02-14 16:14:45 +01:00
const created: string[] = result.body.items
.map(i => i.update)
.filter(i => i.result === 'created')
.map(i => elIdIndex[i._id])
return { created }
2020-02-14 14:09:31 +01:00
}
function refreshVideosIndex () {
return elasticSearch.indices.refresh({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS })
}
async function listIndexInstances () {
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
size: 0,
aggs: {
hosts: {
terms: {
field: 'host'
}
}
}
}
})
return res.body.aggregations.hosts.buckets.map(b => b.key)
}
2020-02-14 16:14:45 +01:00
async function queryVideos (search: VideosSearchQuery) {
const bool: any = {}
const filter: any[] = []
if (search.search) {
Object.assign(bool, {
must: [
{
multi_match: {
query: search.search,
fields: [ 'name', 'description' ]
}
}
]
})
}
if (search.startDate) {
filter.push({
range: {
publishedAt: {
gte: search.startDate
}
}
})
}
if (search.endDate) {
filter.push({
range: {
publishedAt: {
lte: search.endDate
}
}
})
}
if (search.originallyPublishedStartDate) {
filter.push({
range: {
originallyPublishedAt: {
gte: search.startDate
}
}
})
}
if (search.originallyPublishedEndDate) {
filter.push({
range: {
originallyPublishedAt: {
lte: search.endDate
}
}
})
}
if (search.nsfw) {
filter.push({
term: {
nsfw: search.nsfw
}
})
}
if (search.categoryOneOf) {
filter.push({
terms: {
category: search.categoryOneOf
}
})
}
if (search.licenceOneOf) {
filter.push({
terms: {
licence: search.licenceOneOf
}
})
}
if (search.languageOneOf) {
filter.push({
terms: {
language: search.languageOneOf
}
})
}
// FIXME: rework
if (search.tagsOneOf) {
filter.push({
terms: {
tags: search.tagsOneOf,
minimum_should_match: 1
}
})
}
if (search.tagsAllOf) {
filter.push({
terms: {
tags: search.tagsOneOf,
minimum_should_match: 1
}
})
}
if (search.durationMin) {
filter.push({
range: {
duration: {
gte: search.durationMin
}
}
})
}
if (search.durationMax) {
filter.push({
range: {
duration: {
lte: search.durationMax
}
}
})
}
Object.assign(bool, { filter })
2020-02-14 14:09:31 +01:00
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
2020-02-14 16:14:45 +01:00
from: search.start,
size: search.count,
sort: buildVideosSort(search.sort),
query: { bool }
2020-02-14 14:09:31 +01:00
}
})
2020-02-14 16:14:45 +01:00
const hits = res.body.hits
return { total: hits.total.value, data: hits.hits.map(h => h._source) }
2020-02-14 14:09:31 +01:00
}
export {
indexVideos,
queryVideos,
refreshVideosIndex,
initVideosIndex,
listIndexInstances
}
// ############################################################################
2020-02-14 16:14:45 +01:00
function buildVideosSort (sort: string) {
const { direction, field: sortField } = buildSort(sort)
const field = sortField === 'match'
? '_score'
: sortField
return [
{
[field]: { order: direction }
}
]
}
function formatVideo (v: IndexableVideo | IndexableVideoDetails): DBVideo | DBVideoDetails {
2020-02-14 14:09:31 +01:00
return {
2020-02-14 16:14:45 +01:00
id: v.id,
2020-02-14 14:09:31 +01:00
uuid: v.uuid,
indexedAt: new Date(),
createdAt: v.createdAt,
updatedAt: v.updatedAt,
publishedAt: v.publishedAt,
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
thumbnailPath: v.thumbnailPath,
previewPath: v.previewPath,
embedPath: v.embedPath,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
nsfw: v.nsfw,
host: v.host,
2020-02-14 16:14:45 +01:00
tags: (v as any).tags ? (v as any).tags : [],
2020-02-14 14:09:31 +01:00
account: {
2020-02-14 16:14:45 +01:00
id: v.account.id,
2020-02-14 14:09:31 +01:00
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
avatar: formatAvatar(v.account)
},
channel: {
2020-02-14 16:14:45 +01:00
id: v.channel.id,
2020-02-14 14:09:31 +01:00
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
avatar: formatAvatar(v.channel)
}
}
}
function formatAvatar (obj: { avatar?: Avatar }) {
if (!obj.avatar) return null
return {
path: obj.avatar.path,
createdAt: obj.avatar.createdAt,
updatedAt: obj.avatar.updatedAt
}
}
function buildChannelOrAccountMapping () {
return {
2020-02-14 16:14:45 +01:00
id: {
type: 'long'
},
2020-02-14 14:09:31 +01:00
name: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
displayName: {
type: 'text'
},
url: {
type: 'keyword'
},
host: {
type: 'keyword'
},
avatar: {
properties: {
path: {
type: 'keyword'
},
createdAt: {
type: 'date'
},
updatedAt: {
type: 'date'
}
}
}
}
}
function buildVideosMapping () {
return {
2020-02-14 16:14:45 +01:00
id: {
type: 'long'
},
2020-02-14 14:09:31 +01:00
uuid: {
type: 'keyword'
},
createdAt: {
type: 'date'
},
updatedAt: {
type: 'date'
},
publishedAt: {
type: 'date'
},
originallyPublishedAt: {
type: 'date'
},
indexedAt: {
type: 'date'
},
category: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
licence: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
language: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
privacy: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
name: {
type: 'text'
},
description: {
type: 'text'
},
2020-02-14 16:14:45 +01:00
tags: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
2020-02-14 14:09:31 +01:00
duration: {
type: 'long'
},
thumbnailPath: {
type: 'keyword'
},
previewPath: {
type: 'keyword'
},
embedPath: {
type: 'keyword'
},
views: {
type: 'long'
},
likes: {
type: 'long'
},
dislikes: {
type: 'long'
},
nsfw: {
type: 'boolean'
},
host: {
type: 'keyword'
},
account: {
properties: buildChannelOrAccountMapping()
},
channel: {
properties: buildChannelOrAccountMapping()
}
}
}