import { difference } from 'lodash' import { buildIndex, buildSort, elasticSearch, extractQueryResult, indexDocuments } from '../helpers/elastic-search' import { logger } from '../helpers/logger' import { buildUrl } from '../helpers/utils' import { CONFIG, ELASTIC_SEARCH_QUERY } from '../initializers/constants' import { VideosSearchQuery } from '../types/video-search.model' import { DBVideo, DBVideoDetails, EnhancedVideo, IndexableVideo, IndexableVideoDetails } from '../types/video.model' import { buildAvatarMapping, formatAvatarForAPI, formatAvatarForDB } from './elastic-search-avatar' function initVideosIndex () { return buildIndex(CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, buildVideosMapping()) } async function indexVideos (videos: IndexableVideo[], replace = false) { return indexDocuments({ objects: videos, formatter: v => formatVideoForDB(v), replace, index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS }) } function refreshVideosIndex () { logger.info('Refreshing videos index.') return elasticSearch.indices.refresh({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS }) } function removeVideosFromHosts (hosts: string[]) { if (hosts.length === 0) return logger.info({ hosts }, 'Will remove videos from hosts.') return elasticSearch.delete_by_query({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, body: { query: { bool: { filter: { terms: { host: hosts } } } } } }) } async function removeNotExistingVideos (host: string, existingVideos: Set) { const idsFromDB = await getVideoIdsOf(host) const idsToRemove = difference(idsFromDB, Array.from(existingVideos)) logger.info({ idsToRemove }, 'Will remove %d videos from %s.', idsToRemove.length, host) return elasticSearch.delete_by_query({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, body: { query: { bool: { filter: [ { terms: { id: idsToRemove } }, { term: { host } } ] } } } }) } async function getVideoIdsOf (host: string) { const res = await elasticSearch.search({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, body: { size: 0, aggs: { ids: { terms: { size: 500000, field: 'id' } } }, query: { bool: { filter: [ { term: { host } } ] } } } }) return res.body.aggregations.ids.buckets.map(b => b.key) } async function queryVideos (search: VideosSearchQuery) { const bool: any = {} const filter: any[] = [] const mustNot: any[] = [] if (search.search) { Object.assign(bool, { must: [ { multi_match: { query: search.search, fields: ELASTIC_SEARCH_QUERY.VIDEOS_MULTI_MATCH_FIELDS, fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS } } ] }) } if (search.blockedAccounts) { mustNot.push({ terms: { 'account.handle': search.blockedAccounts } }) } if (search.blockedHosts) { mustNot.push({ terms: { host: search.blockedHosts } }) } if (search.startDate) { filter.push({ range: { publishedAt: { gte: search.startDate } } }) } if (search.endDate) { filter.push({ range: { publishedAt: { lte: search.endDate } } }) } if (search.originallyPublishedStartDate) { filter.push({ range: { originallyPublishedAt: { gte: search.startDate } } }) } if (search.originallyPublishedEndDate) { filter.push({ range: { originallyPublishedAt: { lte: search.endDate } } }) } if (search.nsfw && search.nsfw !== 'both') { filter.push({ term: { nsfw: (search.nsfw + '') === 'true' } }) } if (search.categoryOneOf) { filter.push({ terms: { 'category.id': search.categoryOneOf } }) } if (search.licenceOneOf) { filter.push({ terms: { 'licence.id': search.licenceOneOf } }) } if (search.languageOneOf) { filter.push({ terms: { 'language.id': search.languageOneOf } }) } if (search.tagsOneOf) { filter.push({ terms: { tags: search.tagsOneOf } }) } if (search.tagsAllOf) { for (const t of search.tagsAllOf) { filter.push({ term: { tags: t } }) } } if (search.durationMin) { filter.push({ range: { duration: { gte: search.durationMin } } }) } if (search.durationMax) { filter.push({ range: { duration: { lte: search.durationMax } } }) } Object.assign(bool, { filter }) if (mustNot.length !== 0) { Object.assign(bool, { must_not: mustNot }) } const body = { from: search.start, size: search.count, sort: buildSort(search.sort) } // Allow to boost results depending on query languages if ( CONFIG.VIDEOS_SEARCH.BOOST_LANGUAGES.ENABLED && Array.isArray(search.boostLanguages) && search.boostLanguages.length !== 0 ) { const boostScript = ` if (doc['language.id'].size() == 0) { return _score; } String language = doc['language.id'].value; for (String docLang: params.boostLanguages) { if (docLang == language) return _score * params.boost; } return _score; ` Object.assign(body, { query: { script_score: { query: { bool }, script: { source: boostScript, params: { boostLanguages: search.boostLanguages, boost: ELASTIC_SEARCH_QUERY.BOOST_LANGUAGE_VALUE } } } } }) } else { Object.assign(body, { query: { bool } }) } logger.debug({ body }, 'Will query Elastic Search for videos.') const res = await elasticSearch.search({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, body }) return extractQueryResult(res) } export { indexVideos, removeNotExistingVideos, queryVideos, refreshVideosIndex, removeVideosFromHosts, initVideosIndex, formatVideoForAPI } // ############################################################################ function formatVideoForDB (v: IndexableVideo | IndexableVideoDetails): DBVideo | DBVideoDetails { return { id: v.id, uuid: v.uuid, indexedAt: new Date(), createdAt: v.createdAt, updatedAt: v.updatedAt, publishedAt: v.publishedAt, originallyPublishedAt: v.originallyPublishedAt, category: { id: v.category.id, label: v.category.label }, licence: { id: v.licence.id, label: v.licence.label }, language: { id: v.language.id, label: v.language.label }, privacy: { id: v.privacy.id, label: v.privacy.label }, name: v.name, description: v.description, duration: v.duration, thumbnailPath: v.thumbnailPath, previewPath: v.previewPath, embedPath: v.embedPath, views: v.views, likes: v.likes, dislikes: v.dislikes, nsfw: v.nsfw, host: v.host, url: v.url, tags: (v as IndexableVideoDetails).tags ? (v as IndexableVideoDetails).tags : undefined, account: { id: v.account.id, name: v.account.name, displayName: v.account.displayName, url: v.account.url, host: v.account.host, handle: `${v.account.name}@${v.account.host}`, avatar: formatAvatarForDB(v.account) }, channel: { id: v.channel.id, name: v.channel.name, displayName: v.channel.displayName, url: v.channel.url, host: v.channel.host, handle: `${v.channel.name}@${v.channel.host}`, avatar: formatAvatarForDB(v.channel) } } } function formatVideoForAPI (v: DBVideoDetails, fromHost?: string): EnhancedVideo { return { id: v.id, uuid: v.uuid, score: v.score, createdAt: new Date(v.createdAt), updatedAt: new Date(v.updatedAt), publishedAt: new Date(v.publishedAt), originallyPublishedAt: v.originallyPublishedAt, category: { id: v.category.id, label: v.category.label }, licence: { id: v.licence.id, label: v.licence.label }, language: { id: v.language.id, label: v.language.label }, privacy: { id: v.privacy.id, label: v.privacy.label }, name: v.name, description: v.description, duration: v.duration, tags: v.tags, thumbnailPath: v.thumbnailPath, thumbnailUrl: buildUrl(v.host, v.thumbnailPath), previewPath: v.previewPath, previewUrl: buildUrl(v.host, v.previewPath), embedPath: v.embedPath, embedUrl: buildUrl(v.host, v.embedPath), url: v.url, isLocal: fromHost && fromHost === v.host, views: v.views, likes: v.likes, dislikes: v.dislikes, nsfw: v.nsfw, account: { id: v.account.id, name: v.account.name, displayName: v.account.displayName, url: v.account.url, host: v.account.host, avatar: formatAvatarForAPI(v.account) }, channel: { id: v.channel.id, name: v.channel.name, displayName: v.channel.displayName, url: v.channel.url, host: v.channel.host, avatar: formatAvatarForAPI(v.channel) } } } function buildChannelOrAccountMapping () { return { id: { type: 'long' }, name: { type: 'text', fields: { raw: { type: 'keyword' } } }, displayName: { type: 'text' }, url: { type: 'keyword' }, host: { type: 'keyword' }, handle: { type: 'keyword' }, avatar: { properties: buildAvatarMapping() } } } function buildVideosMapping () { return { id: { type: 'long' }, uuid: { type: 'keyword' }, createdAt: { type: 'date', format: 'date_optional_time' }, updatedAt: { type: 'date', format: 'date_optional_time' }, publishedAt: { type: 'date', format: 'date_optional_time' }, originallyPublishedAt: { type: 'date', format: 'date_optional_time' }, indexedAt: { type: 'date', format: 'date_optional_time' }, category: { properties: { id: { type: 'keyword' }, label: { type: 'text' } } }, licence: { properties: { id: { type: 'keyword' }, label: { type: 'text' } } }, language: { properties: { id: { type: 'keyword' }, label: { type: 'text' } } }, privacy: { properties: { id: { type: 'keyword' }, label: { type: 'text' } } }, name: { type: 'text' }, description: { type: 'text' }, tags: { type: 'text', fields: { raw: { type: 'keyword' } } }, duration: { type: 'long' }, thumbnailPath: { type: 'keyword' }, previewPath: { type: 'keyword' }, embedPath: { type: 'keyword' }, url: { type: 'keyword' }, views: { type: 'long' }, likes: { type: 'long' }, dislikes: { type: 'long' }, nsfw: { type: 'boolean' }, host: { type: 'keyword' }, account: { properties: buildChannelOrAccountMapping() }, channel: { properties: buildChannelOrAccountMapping() } } }