Handle playlist search in server

This commit is contained in:
Chocobozzz 2021-06-24 15:18:54 +02:00
parent 4c01e2e6ee
commit db36a2fb6a
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
41 changed files with 1363 additions and 991 deletions

@ -1 +1 @@
Subproject commit f676e0e32112821255b70018282d59207932d987
Subproject commit 6b4359476c462ea178c99b0a04349f553ddb8d9d

View File

@ -21,6 +21,7 @@ elastic-search:
indexes:
videos: 'peertube-index-videos'
channels: 'peertube-index-channels'
playlists: 'peertube-index-playlists'
log:
level: 'debug' # debug/info/warning/error
@ -87,6 +88,16 @@ channels-search:
account-display-name:
boost: 2
playlists-search:
# Add ability to change playlists search fields boost value
# See https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html for more information
# If boost == 0, the field will not be part of the search
search-fields:
name:
boost: 5
description:
boost: 1
api:
# Blacklist hosts that will not be returned by the search API
blacklist:

View File

@ -11,9 +11,7 @@ import * as morgan from 'morgan'
import { apiRouter } from './server/controllers/api'
import { logger } from './server/helpers/logger'
import { API_VERSION, CONFIG, getWebserverUrl } from './server/initializers/constants'
import { VideosIndexer } from './server/lib/schedulers/videos-indexer'
import { initVideosIndex } from './server/lib/elastic-search-videos'
import { initChannelsIndex } from './server/lib/elastic-search-channels'
import { IndexationScheduler } from './server/lib/schedulers/indexation-scheduler'
import { join } from 'path'
import { readFile } from 'fs-extra'
@ -140,17 +138,15 @@ app.use(function (err, req, res, next) {
app.listen(CONFIG.LISTEN.PORT, async () => {
logger.info('Server listening on port %d', CONFIG.LISTEN.PORT)
IndexationScheduler.Instance.enable()
try {
await Promise.all([
initVideosIndex(),
initChannelsIndex()
])
await IndexationScheduler.Instance.initIndexes()
} catch (err) {
logger.error('Cannot init videos index.', { err })
process.exit(-1)
}
VideosIndexer.Instance.enable()
VideosIndexer.Instance.execute()
IndexationScheduler.Instance.execute()
.catch(err => logger.error('Cannot run video indexer', { err }))
})

View File

@ -1,7 +1,7 @@
import * as express from 'express'
import { VideosIndexer } from '../../lib/schedulers/videos-indexer'
import { ServerConfig } from '../../../shared'
import { CONFIG } from '../../initializers/constants'
import { IndexationScheduler } from '../../lib/schedulers/indexation-scheduler'
const configRouter = express.Router()
@ -21,7 +21,7 @@ async function getConfig (req: express.Request, res: express.Response) {
searchInstanceNameImage: CONFIG.SEARCH_INSTANCE.NAME_IMAGE,
searchInstanceSearchImage: CONFIG.SEARCH_INSTANCE.SEARCH_IMAGE,
legalNoticesUrl: CONFIG.SEARCH_INSTANCE.LEGAL_NOTICES_URL,
indexedHostsCount: VideosIndexer.Instance.getIndexedHosts().length,
indexedHostsCount: IndexationScheduler.Instance.getIndexedHosts().length,
indexedInstancesUrl: CONFIG.INSTANCES_INDEX.PUBLIC_URL
} as ServerConfig)
}

View File

@ -2,6 +2,7 @@ import * as express from 'express'
import { badRequest } from '../../helpers/utils'
import { configRouter } from './config'
import { searchChannelsRouter } from './search-channels'
import { searchPlaylistsRouter } from './search-playlists'
import { searchVideosRouter } from './search-videos'
const apiRouter = express.Router()
@ -9,6 +10,7 @@ const apiRouter = express.Router()
apiRouter.use('/', configRouter)
apiRouter.use('/', searchVideosRouter)
apiRouter.use('/', searchChannelsRouter)
apiRouter.use('/', searchPlaylistsRouter)
apiRouter.use('/ping', pong)
apiRouter.use('/*', badRequest)

View File

@ -1,7 +1,6 @@
import * as express from 'express'
import { ChannelsSearchQuery } from 'server/types/channel-search.model'
import { CONFIG } from '../../initializers/constants'
import { formatChannelForAPI, queryChannels } from '../../lib/elastic-search-channels'
import { Searcher } from '../../lib/controllers/searcher'
import { formatChannelForAPI, queryChannels } from '../../lib/elastic-search/elastic-search-channels'
import { asyncMiddleware } from '../../middlewares/async'
import { setDefaultPagination } from '../../middlewares/pagination'
import { setDefaultSearchSort } from '../../middlewares/sort'
@ -9,6 +8,7 @@ import { methodsValidator } from '../../middlewares/validators/method'
import { paginationValidator } from '../../middlewares/validators/pagination'
import { commonFiltersValidators, videoChannelsSearchValidator } from '../../middlewares/validators/search'
import { channelsSearchSortValidator } from '../../middlewares/validators/sort'
import { ChannelsSearchQuery } from '../../types/search-query/channel-search.model'
const searchChannelsRouter = express.Router()
@ -32,18 +32,8 @@ export { searchChannelsRouter }
async function searchChannels (req: express.Request, res: express.Response) {
const query = Object.assign(req.query || {}, req.body || {}) as ChannelsSearchQuery
if (!Array.isArray(query.blockedHosts)) {
query.blockedHosts = []
}
const searcher = new Searcher(queryChannels, formatChannelForAPI)
const result = await searcher.getResult(query)
if (CONFIG.API.BLACKLIST.ENABLED && Array.isArray(CONFIG.API.BLACKLIST.HOSTS)) {
query.blockedHosts = query.blockedHosts.concat(CONFIG.API.BLACKLIST.HOSTS)
}
const resultList = await queryChannels(query)
return res.json({
total: resultList.total,
data: resultList.data.map(v => formatChannelForAPI(v, query.fromHost))
})
return res.json(result)
}

View File

@ -0,0 +1,39 @@
import * as express from 'express'
import { Searcher } from '../../lib/controllers/searcher'
import { formatPlaylistForAPI, queryPlaylists } from '../../lib/elastic-search/elastic-search-playlists'
import { asyncMiddleware } from '../../middlewares/async'
import { setDefaultPagination } from '../../middlewares/pagination'
import { setDefaultSearchSort } from '../../middlewares/sort'
import { methodsValidator } from '../../middlewares/validators/method'
import { paginationValidator } from '../../middlewares/validators/pagination'
import { commonFiltersValidators, videoPlaylistsSearchValidator } from '../../middlewares/validators/search'
import { playlistsSearchSortValidator } from '../../middlewares/validators/sort'
import { PlaylistsSearchQuery } from '../../types/search-query/playlist-search.model'
const searchPlaylistsRouter = express.Router()
searchPlaylistsRouter.all('/search/video-playlists',
methodsValidator([ 'POST', 'GET' ]),
paginationValidator,
setDefaultPagination,
playlistsSearchSortValidator,
setDefaultSearchSort,
commonFiltersValidators,
videoPlaylistsSearchValidator,
asyncMiddleware(searchPlaylists)
)
// ---------------------------------------------------------------------------
export { searchPlaylistsRouter }
// ---------------------------------------------------------------------------
async function searchPlaylists (req: express.Request, res: express.Response) {
const query = Object.assign(req.query || {}, req.body || {}) as PlaylistsSearchQuery
const searcher = new Searcher(queryPlaylists, formatPlaylistForAPI)
const result = await searcher.getResult(query)
return res.json(result)
}

View File

@ -1,7 +1,6 @@
import * as express from 'express'
import { VideosSearchQuery } from 'server/types/video-search.model'
import { CONFIG } from '../../initializers/constants'
import { formatVideoForAPI, queryVideos } from '../../lib/elastic-search-videos'
import { Searcher } from '../../lib/controllers/searcher'
import { formatVideoForAPI, queryVideos } from '../../lib/elastic-search/elastic-search-videos'
import { asyncMiddleware } from '../../middlewares/async'
import { setDefaultPagination } from '../../middlewares/pagination'
import { setDefaultSearchSort } from '../../middlewares/sort'
@ -9,6 +8,7 @@ import { methodsValidator } from '../../middlewares/validators/method'
import { paginationValidator } from '../../middlewares/validators/pagination'
import { commonFiltersValidators, commonVideosFiltersValidator, videosSearchValidator } from '../../middlewares/validators/search'
import { videosSearchSortValidator } from '../../middlewares/validators/sort'
import { VideosSearchQuery } from '../../types/search-query/video-search.model'
const searchVideosRouter = express.Router()
@ -33,18 +33,8 @@ export { searchVideosRouter }
async function searchVideos (req: express.Request, res: express.Response) {
const query = Object.assign(req.query || {}, req.body || {}) as VideosSearchQuery
if (!Array.isArray(query.blockedHosts)) {
query.blockedHosts = []
}
const searcher = new Searcher(queryVideos, formatVideoForAPI)
const result = await searcher.getResult(query)
if (CONFIG.API.BLACKLIST.ENABLED && Array.isArray(CONFIG.API.BLACKLIST.HOSTS)) {
query.blockedHosts = query.blockedHosts.concat(CONFIG.API.BLACKLIST.HOSTS)
}
const resultList = await queryVideos(query)
return res.json({
total: resultList.total,
data: resultList.data.map(v => formatVideoForAPI(v, query.fromHost))
})
return res.json(result)
}

View File

@ -1,9 +1,6 @@
import { readFileSync } from 'fs-extra'
import { flatMap } from 'lodash'
import { ApiResponse, Client } from '@elastic/elasticsearch'
import { Client } from '@elastic/elasticsearch'
import { CONFIG } from '../initializers/constants'
import { IndexableDoc } from '../types/elastic-search.model'
import { logger } from './logger'
const elasticOptions = {
node: CONFIG.ELASTIC_SEARCH.HTTP + '://' + CONFIG.ELASTIC_SEARCH.HOSTNAME + ':' + CONFIG.ELASTIC_SEARCH.PORT
@ -28,115 +25,6 @@ if (CONFIG.ELASTIC_SEARCH.AUTH.USERNAME) {
const elasticSearch = new Client(elasticOptions)
function buildSort (value: string) {
let sortField: string
let direction: 'asc' | 'desc'
if (value.substring(0, 1) === '-') {
direction = 'desc'
sortField = value.substring(1)
} else {
direction = 'asc'
sortField = value
}
const field = sortField === 'match'
? '_score'
: sortField
return [
{
[field]: { order: direction }
}
]
}
function buildIndex (name: string, mapping: object) {
logger.info('Initialize %s Elastic Search index.', name)
return elasticSearch.indices.create({
index: name,
body: {
settings: {
number_of_shards: 1,
number_of_replicas: 1
},
mappings: {
properties: mapping
}
}
}).catch(err => {
if (err.name === 'ResponseError' && err.meta?.body?.error.root_cause[0]?.type === 'resource_already_exists_exception') return
throw err
})
}
async function indexDocuments <T extends IndexableDoc> (options: {
objects: T[]
formatter: (o: T) => any
replace: boolean
index: string
}) {
const { objects, formatter, replace, index } = options
const elIdIndex: { [elId: string]: T } = {}
for (const object of objects) {
elIdIndex[object.elasticSearchId] = object
}
const method = replace ? 'index' : 'update'
const body = flatMap(objects, v => {
const doc = formatter(v)
const options = replace
? doc
: { doc, doc_as_upsert: true }
return [
{
[method]: {
_id: v.elasticSearchId,
_index: index
}
},
options
]
})
const result = await elasticSearch.bulk({
index,
body
})
const resultBody = result.body
if (resultBody.errors === true) {
const msg = 'Cannot insert data in elastic search.'
logger.error({ err: resultBody }, msg)
throw new Error(msg)
}
const created: T[] = result.body.items
.map(i => i[method])
.filter(i => i.result === 'created')
.map(i => elIdIndex[i._id])
return { created }
}
function extractQueryResult (result: ApiResponse<any, any>) {
const hits = result.body.hits
return { total: hits.total.value, data: hits.hits.map(h => Object.assign(h._source, { score: h._score })) }
}
export {
elasticSearch,
indexDocuments,
buildSort,
extractQueryResult,
buildIndex
elasticSearch
}

View File

@ -25,7 +25,8 @@ const CONFIG = {
PORT: config.get<number>('elastic-search.port'),
INDEXES: {
VIDEOS: config.get<string>('elastic-search.indexes.videos'),
CHANNELS: config.get<string>('elastic-search.indexes.channels')
CHANNELS: config.get<string>('elastic-search.indexes.channels'),
PLAYLISTS: config.get<string>('elastic-search.indexes.playlists')
}
},
LOG: {
@ -86,6 +87,18 @@ const CONFIG = {
}
}
},
PLAYLISTS_SEARCH: {
SEARCH_FIELDS: {
NAME: {
FIELD_NAME: 'name',
BOOST: config.get<number>('playlists-search.search-fields.name.boost')
},
DESCRIPTION: {
FIELD_NAME: 'description',
BOOST: config.get<number>('playlists-search.search-fields.description.boost')
}
}
},
INSTANCES_INDEX: {
URL: config.get<string>('instances-index.url'),
PUBLIC_URL: config.get<string>('instances-index.public_url'),
@ -104,7 +117,8 @@ const CONFIG = {
const SORTABLE_COLUMNS = {
VIDEOS_SEARCH: [ 'name', 'duration', 'createdAt', 'publishedAt', 'originallyPublishedAt', 'views', 'likes', 'match' ],
CHANNELS_SEARCH: [ 'match', 'displayName', 'createdAt' ]
CHANNELS_SEARCH: [ 'match', 'displayName', 'createdAt' ],
PLAYLISTS_SEARCH: [ 'match', 'displayName', 'createdAt' ]
}
const PAGINATION_COUNT_DEFAULT = 20
@ -113,9 +127,8 @@ const SCHEDULER_INTERVALS_MS = {
videosIndexer: 60000 * 60 * 24 // 24 hours
}
const INDEXER_COUNT = {
VIDEOS: 10
}
const INDEXER_COUNT = 10
const INDEXER_LIMIT = 500000
const INDEXER_CONCURRENCY = 3
@ -130,7 +143,8 @@ const ELASTIC_SEARCH_QUERY = {
FUZZINESS: 'AUTO:4,7',
BOOST_LANGUAGE_VALUE: 2,
VIDEOS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.VIDEOS_SEARCH.SEARCH_FIELDS),
CHANNELS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.CHANNELS_SEARCH.SEARCH_FIELDS)
CHANNELS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.CHANNELS_SEARCH.SEARCH_FIELDS),
PLAYLISTS_MULTI_MATCH_FIELDS: buildMultiMatchFields(CONFIG.PLAYLISTS_SEARCH.SEARCH_FIELDS)
}
function getWebserverUrl () {
@ -167,6 +181,7 @@ export {
SCHEDULER_INTERVALS_MS,
INDEXER_CONCURRENCY,
INDEXER_COUNT,
INDEXER_LIMIT,
REQUESTS,
ELASTIC_SEARCH_QUERY
}

View File

@ -0,0 +1,30 @@
import { ResultList } from '../../../PeerTube/shared/models'
import { CONFIG } from '../../initializers/constants'
import { CommonSearch } from '../../types/search-query/common-search.model'
export class Searcher <T extends CommonSearch, R, F> {
constructor (
private readonly queryFn: (query: T) => Promise<ResultList<R>>,
private readonly formatFn: (data: R, fromHost: string) => F
) {}
async getResult (queryArg: T): Promise<ResultList<F>> {
const query = { ...queryArg }
if (!Array.isArray(query.blockedHosts)) {
query.blockedHosts = []
}
if (CONFIG.API.BLACKLIST.ENABLED && Array.isArray(CONFIG.API.BLACKLIST.HOSTS)) {
query.blockedHosts = query.blockedHosts.concat(CONFIG.API.BLACKLIST.HOSTS)
}
const resultList = await this.queryFn(query)
return {
total: resultList.total,
data: resultList.data.map(d => this.formatFn(d, query.fromHost))
}
}
}

View File

@ -1,329 +0,0 @@
import { difference } from 'lodash'
import { buildIndex, buildSort, elasticSearch, extractQueryResult, indexDocuments } from '../helpers/elastic-search'
import { logger } from '../helpers/logger'
import { CONFIG, ELASTIC_SEARCH_QUERY } from '../initializers/constants'
import { ChannelsSearchQuery } from '../types/channel-search.model'
import { DBChannel, EnhancedVideoChannel, IndexableChannel } from '../types/channel.model'
import { buildAvatarMapping, formatAvatarForAPI, formatAvatarForDB } from './elastic-search-avatar'
function initChannelsIndex () {
return buildIndex(CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS, buildChannelsMapping())
}
async function indexChannels (channels: IndexableChannel[], replace = false) {
return indexDocuments({
objects: channels,
formatter: c => formatChannelForDB(c),
replace,
index: CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS
})
}
function refreshChannelsIndex () {
logger.info('Refreshing channels index.')
return elasticSearch.indices.refresh({ index: CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS })
}
async function removeNotExistingChannels (host: string, existingChannels: Set<number>) {
const idsFromDB = await getChannelIdsOf(host)
const idsToRemove = difference(idsFromDB, Array.from(existingChannels))
logger.info({ idsToRemove }, 'Will remove %d channels from %s.', idsToRemove.length, host)
return elasticSearch.delete_by_query({
index: CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS,
body: {
query: {
bool: {
filter: [
{
terms: {
id: idsToRemove
}
},
{
term: {
host
}
}
]
}
}
}
})
}
function removeChannelsFromHosts (hosts: string[]) {
if (hosts.length === 0) return
logger.info({ hosts }, 'Will remove channels from hosts.')
return elasticSearch.delete_by_query({
index: CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS,
body: {
query: {
bool: {
filter: {
terms: {
host: hosts
}
}
}
}
}
})
}
async function queryChannels (search: ChannelsSearchQuery) {
const bool: any = {}
const mustNot: any[] = []
if (search.search) {
Object.assign(bool, {
must: [
{
multi_match: {
query: search.search,
fields: ELASTIC_SEARCH_QUERY.CHANNELS_MULTI_MATCH_FIELDS,
fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS
}
}
]
})
}
if (search.blockedAccounts) {
mustNot.push({
terms: {
'ownerAccount.handle': search.blockedAccounts
}
})
}
if (search.blockedHosts) {
mustNot.push({
terms: {
host: search.blockedHosts
}
})
}
if (mustNot.length !== 0) {
Object.assign(bool, { must_not: mustNot })
}
const body = {
from: search.start,
size: search.count,
sort: buildSort(search.sort),
query: { bool }
}
logger.debug({ body }, 'Will query Elastic Search for channels.')
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS,
body
})
return extractQueryResult(res)
}
async function getChannelIdsOf (host: string) {
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS,
body: {
size: 0,
aggs: {
ids: {
terms: {
field: 'id'
}
}
},
query: {
bool: {
filter: [
{
term: {
host
}
}
]
}
}
}
})
return res.body.aggregations.ids.buckets.map(b => b.key)
}
export {
removeChannelsFromHosts,
initChannelsIndex,
indexChannels,
refreshChannelsIndex,
formatChannelForAPI,
queryChannels,
getChannelIdsOf,
removeNotExistingChannels
}
// ############################################################################
function formatChannelForDB (c: IndexableChannel): DBChannel {
return {
id: c.id,
name: c.name,
host: c.host,
url: c.url,
avatar: formatAvatarForDB(c),
displayName: c.displayName,
indexedAt: new Date(),
followingCount: c.followingCount,
followersCount: c.followersCount,
createdAt: c.createdAt,
updatedAt: c.updatedAt,
description: c.description,
support: c.support,
handle: `${c.name}@${c.host}`,
ownerAccount: {
id: c.ownerAccount.id,
url: c.ownerAccount.url,
displayName: c.ownerAccount.displayName,
description: c.ownerAccount.description,
name: c.ownerAccount.name,
host: c.ownerAccount.host,
followingCount: c.ownerAccount.followingCount,
followersCount: c.ownerAccount.followersCount,
createdAt: c.ownerAccount.createdAt,
updatedAt: c.ownerAccount.updatedAt,
handle: `${c.ownerAccount.name}@${c.ownerAccount.host}`,
avatar: formatAvatarForDB(c.ownerAccount)
}
}
}
function formatChannelForAPI (c: DBChannel, fromHost?: string): EnhancedVideoChannel {
return {
id: c.id,
score: c.score,
url: c.url,
name: c.name,
host: c.host,
followingCount: c.followingCount,
followersCount: c.followersCount,
createdAt: c.createdAt,
updatedAt: c.updatedAt,
avatar: formatAvatarForAPI(c),
displayName: c.displayName,
description: c.description,
support: c.support,
isLocal: fromHost === c.host,
ownerAccount: {
id: c.ownerAccount.id,
url: c.ownerAccount.url,
displayName: c.ownerAccount.displayName,
description: c.ownerAccount.description,
name: c.ownerAccount.name,
host: c.ownerAccount.host,
followingCount: c.ownerAccount.followingCount,
followersCount: c.ownerAccount.followersCount,
createdAt: c.ownerAccount.createdAt,
updatedAt: c.ownerAccount.updatedAt,
avatar: formatAvatarForAPI(c.ownerAccount)
}
}
}
function buildChannelOrAccountCommonMapping () {
return {
id: {
type: 'long'
},
url: {
type: 'keyword'
},
name: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
host: {
type: 'keyword'
},
handle: {
type: 'keyword'
},
displayName: {
type: 'text'
},
avatar: {
properties: buildAvatarMapping()
},
followingCount: {
type: 'long'
},
followersCount: {
type: 'long'
},
createdAt: {
type: 'date',
format: 'date_optional_time'
},
updatedAt: {
type: 'date',
format: 'date_optional_time'
},
description: {
type: 'text'
}
}
}
function buildChannelsMapping () {
const base = buildChannelOrAccountCommonMapping()
Object.assign(base, {
support: {
type: 'keyword'
},
ownerAccount: {
properties: buildChannelOrAccountCommonMapping()
}
})
return base
}

View File

@ -0,0 +1,169 @@
import { elasticSearch } from '../../helpers/elastic-search'
import { logger } from '../../helpers/logger'
import { CONFIG, ELASTIC_SEARCH_QUERY } from '../../initializers/constants'
import { DBChannel, EnhancedVideoChannel, IndexableChannel } from '../../types/channel.model'
import { ChannelsSearchQuery } from '../../types/search-query/channel-search.model'
import { buildSort, extractQueryResult } from './elastic-search-queries'
import { buildChannelOrAccountCommonMapping } from './shared'
import { formatAvatarForAPI, formatAvatarForDB } from './shared/elastic-search-avatar'
async function queryChannels (search: ChannelsSearchQuery) {
const bool: any = {}
const mustNot: any[] = []
if (search.search) {
Object.assign(bool, {
must: [
{
multi_match: {
query: search.search,
fields: ELASTIC_SEARCH_QUERY.CHANNELS_MULTI_MATCH_FIELDS,
fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS
}
}
]
})
}
if (search.blockedAccounts) {
mustNot.push({
terms: {
'ownerAccount.handle': search.blockedAccounts
}
})
}
if (search.blockedHosts) {
mustNot.push({
terms: {
host: search.blockedHosts
}
})
}
if (mustNot.length !== 0) {
Object.assign(bool, { must_not: mustNot })
}
const body = {
from: search.start,
size: search.count,
sort: buildSort(search.sort),
query: { bool }
}
logger.debug({ body }, 'Will query Elastic Search for channels.')
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS,
body
})
return extractQueryResult(res)
}
function formatChannelForAPI (c: DBChannel, fromHost?: string): EnhancedVideoChannel {
return {
id: c.id,
score: c.score,
url: c.url,
name: c.name,
host: c.host,
followingCount: c.followingCount,
followersCount: c.followersCount,
createdAt: c.createdAt,
updatedAt: c.updatedAt,
avatar: formatAvatarForAPI(c),
displayName: c.displayName,
description: c.description,
support: c.support,
isLocal: fromHost === c.host,
ownerAccount: {
id: c.ownerAccount.id,
url: c.ownerAccount.url,
displayName: c.ownerAccount.displayName,
description: c.ownerAccount.description,
name: c.ownerAccount.name,
host: c.ownerAccount.host,
followingCount: c.ownerAccount.followingCount,
followersCount: c.ownerAccount.followersCount,
createdAt: c.ownerAccount.createdAt,
updatedAt: c.ownerAccount.updatedAt,
avatar: formatAvatarForAPI(c.ownerAccount)
}
}
}
function formatChannelForDB (c: IndexableChannel): DBChannel {
return {
id: c.id,
name: c.name,
host: c.host,
url: c.url,
avatar: formatAvatarForDB(c),
displayName: c.displayName,
indexedAt: new Date(),
followingCount: c.followingCount,
followersCount: c.followersCount,
createdAt: c.createdAt,
updatedAt: c.updatedAt,
description: c.description,
support: c.support,
handle: `${c.name}@${c.host}`,
ownerAccount: {
id: c.ownerAccount.id,
url: c.ownerAccount.url,
displayName: c.ownerAccount.displayName,
description: c.ownerAccount.description,
name: c.ownerAccount.name,
host: c.ownerAccount.host,
followingCount: c.ownerAccount.followingCount,
followersCount: c.ownerAccount.followersCount,
createdAt: c.ownerAccount.createdAt,
updatedAt: c.ownerAccount.updatedAt,
handle: `${c.ownerAccount.name}@${c.ownerAccount.host}`,
avatar: formatAvatarForDB(c.ownerAccount)
}
}
}
function buildChannelsMapping () {
const base = buildChannelOrAccountCommonMapping()
Object.assign(base, {
support: {
type: 'keyword'
},
ownerAccount: {
properties: buildChannelOrAccountCommonMapping()
}
})
return base
}
export {
buildChannelsMapping,
formatChannelForDB,
formatChannelForAPI,
queryChannels
}

View File

@ -0,0 +1,92 @@
import { flatMap } from 'lodash'
import { elasticSearch } from '../../helpers/elastic-search'
import { logger } from '../../helpers/logger'
import { IndexableDoc } from '../../types/indexable-doc.model'
function buildIndex (name: string, mapping: object) {
logger.info('Initialize %s Elastic Search index.', name)
return elasticSearch.indices.create({
index: name,
body: {
settings: {
number_of_shards: 1,
number_of_replicas: 1
},
mappings: {
properties: mapping
}
}
}).catch(err => {
if (err.name === 'ResponseError' && err.meta?.body?.error.root_cause[0]?.type === 'resource_already_exists_exception') return
throw err
})
}
async function indexDocuments <T extends IndexableDoc> (options: {
objects: T[]
formatter: (o: T) => any
replace: boolean
index: string
}) {
const { objects, formatter, replace, index } = options
const elIdIndex: { [elId: string]: T } = {}
for (const object of objects) {
elIdIndex[object.elasticSearchId] = object
}
const method = replace ? 'index' : 'update'
const body = flatMap(objects, v => {
const doc = formatter(v)
const options = replace
? doc
: { doc, doc_as_upsert: true }
return [
{
[method]: {
_id: v.elasticSearchId,
_index: index
}
},
options
]
})
const result = await elasticSearch.bulk({
index,
body
})
const resultBody = result.body
if (resultBody.errors === true) {
const msg = 'Cannot insert data in elastic search.'
logger.error({ err: resultBody }, msg)
throw new Error(msg)
}
const created: T[] = result.body.items
.map(i => i[method])
.filter(i => i.result === 'created')
.map(i => elIdIndex[i._id])
return { created }
}
function refreshIndex (indexName: string) {
logger.info('Refreshing %s index.', indexName)
return elasticSearch.indices.refresh({ index: indexName })
}
export {
buildIndex,
indexDocuments,
refreshIndex
}

View File

@ -1,6 +1,6 @@
import { elasticSearch } from '../helpers/elastic-search'
import { CONFIG } from '../initializers/constants'
import { listIndexInstancesHost } from './instances-index'
import { elasticSearch } from '../../helpers/elastic-search'
import { CONFIG } from '../../initializers/constants'
import { listIndexInstancesHost } from '../requests/instances-index'
async function buildInstanceHosts () {
let indexHosts = await listIndexInstancesHost()

View File

@ -0,0 +1,214 @@
import { elasticSearch } from '../../helpers/elastic-search'
import { logger } from '../../helpers/logger'
import { buildUrl } from '../../helpers/utils'
import { CONFIG, ELASTIC_SEARCH_QUERY } from '../../initializers/constants'
import { DBPlaylist, EnhancedPlaylist, IndexablePlaylist } from '../../types/playlist.model'
import { PlaylistsSearchQuery } from '../../types/search-query/playlist-search.model'
import { buildSort, extractQueryResult } from './elastic-search-queries'
import { buildChannelOrAccountSummaryMapping, formatActorForDB, formatActorSummaryForAPI } from './shared/elastic-search-actor'
async function queryPlaylists (search: PlaylistsSearchQuery) {
const bool: any = {}
const mustNot: any[] = []
if (search.search) {
Object.assign(bool, {
must: [
{
multi_match: {
query: search.search,
fields: ELASTIC_SEARCH_QUERY.PLAYLISTS_MULTI_MATCH_FIELDS,
fuzziness: ELASTIC_SEARCH_QUERY.FUZZINESS
}
}
]
})
}
if (search.blockedAccounts) {
mustNot.push({
terms: {
'ownerAccount.handle': search.blockedAccounts
}
})
}
if (search.blockedHosts) {
mustNot.push({
terms: {
host: search.blockedHosts
}
})
}
if (mustNot.length !== 0) {
Object.assign(bool, { must_not: mustNot })
}
const body = {
from: search.start,
size: search.count,
sort: buildSort(search.sort),
query: { bool }
}
logger.debug({ body }, 'Will query Elastic Search for playlists.')
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.PLAYLISTS,
body
})
return extractQueryResult(res)
}
function formatPlaylistForAPI (p: DBPlaylist, fromHost?: string): EnhancedPlaylist {
return {
id: p.id,
uuid: p.uuid,
score: p.score,
isLocal: fromHost === p.host,
url: p.url,
displayName: p.displayName,
description: p.description,
privacy: {
id: p.privacy.id,
label: p.privacy.label
},
videosLength: p.videosLength,
type: {
id: p.type.id,
label: p.type.label
},
thumbnailPath: p.thumbnailPath,
thumbnailUrl: buildUrl(p.host, p.thumbnailPath),
embedPath: p.embedPath,
embedUrl: buildUrl(p.host, p.embedPath),
createdAt: p.createdAt,
updatedAt: p.updatedAt,
ownerAccount: formatActorSummaryForAPI(p.ownerAccount),
videoChannel: formatActorSummaryForAPI(p.videoChannel)
}
}
function formatPlaylistForDB (p: IndexablePlaylist): DBPlaylist {
return {
id: p.id,
uuid: p.uuid,
indexedAt: new Date(),
createdAt: p.createdAt,
updatedAt: p.updatedAt,
host: p.host,
url: p.url,
displayName: p.displayName,
description: p.description,
thumbnailPath: p.thumbnailPath,
embedPath: p.embedPath,
type: {
id: p.type.id,
label: p.type.label
},
privacy: {
id: p.privacy.id,
label: p.privacy.label
},
videosLength: p.videosLength,
ownerAccount: formatActorForDB(p.ownerAccount),
videoChannel: formatActorForDB(p.videoChannel)
}
}
function buildPlaylistsMapping () {
return {
id: {
type: 'long'
},
uuid: {
type: 'keyword'
},
createdAt: {
type: 'date',
format: 'date_optional_time'
},
updatedAt: {
type: 'date',
format: 'date_optional_time'
},
indexedAt: {
type: 'date',
format: 'date_optional_time'
},
privacy: {
properties: {
id: {
type: 'keyword'
},
label: {
type: 'text'
}
}
},
displayName: {
type: 'text'
},
description: {
type: 'text'
},
thumbnailPath: {
type: 'keyword'
},
embedPath: {
type: 'keyword'
},
url: {
type: 'keyword'
},
host: {
type: 'keyword'
},
videosLength: {
type: 'long'
},
ownerAccount: {
properties: buildChannelOrAccountSummaryMapping()
},
videoChannel: {
properties: buildChannelOrAccountSummaryMapping()
}
}
}
export {
formatPlaylistForAPI,
buildPlaylistsMapping,
formatPlaylistForDB,
queryPlaylists
}

View File

@ -0,0 +1,123 @@
import { difference } from 'lodash'
import { ApiResponse } from '@elastic/elasticsearch'
import { elasticSearch } from '../../helpers/elastic-search'
import { logger } from '../../helpers/logger'
async function removeNotExistingIdsFromHost (indexName: string, host: string, existingIds: Set<number>) {
const idsFromDB = await getIdsOf(indexName, host)
const idsToRemove = difference(idsFromDB, Array.from(existingIds))
logger.info({ idsToRemove }, 'Will remove %d entries from %s of host %s.', idsToRemove.length, indexName, host)
return elasticSearch.delete_by_query({
index: indexName,
body: {
query: {
bool: {
filter: [
{
terms: {
id: idsToRemove
}
},
{
term: {
host
}
}
]
}
}
}
})
}
function removeFromHosts (indexName: string, hosts: string[]) {
if (hosts.length === 0) return
logger.info({ hosts }, 'Will remove entries of index %s from hosts.', indexName)
return elasticSearch.delete_by_query({
index: indexName,
body: {
query: {
bool: {
filter: {
terms: {
host: hosts
}
}
}
}
}
})
}
async function getIdsOf (indexName: string, host: string) {
const res = await elasticSearch.search({
index: indexName,
body: {
size: 0,
aggs: {
ids: {
terms: {
size: 500000,
field: 'id'
}
}
},
query: {
bool: {
filter: [
{
term: {
host
}
}
]
}
}
}
})
return res.body.aggregations.ids.buckets.map(b => b.key)
}
function extractQueryResult (result: ApiResponse<any, any>) {
const hits = result.body.hits
return { total: hits.total.value, data: hits.hits.map(h => Object.assign(h._source, { score: h._score })) }
}
function buildSort (value: string) {
let sortField: string
let direction: 'asc' | 'desc'
if (value.substring(0, 1) === '-') {
direction = 'desc'
sortField = value.substring(1)
} else {
direction = 'asc'
sortField = value
}
const field = sortField === 'match'
? '_score'
: sortField
return [
{
[field]: { order: direction }
}
]
}
export {
elasticSearch,
removeNotExistingIdsFromHost,
getIdsOf,
extractQueryResult,
removeFromHosts,
buildSort
}

View File

@ -1,112 +1,13 @@
import { difference } from 'lodash'
import { exists } from '../helpers/custom-validators/misc'
import { buildIndex, buildSort, elasticSearch, extractQueryResult, indexDocuments } from '../helpers/elastic-search'
import { logger } from '../helpers/logger'
import { buildUrl } from '../helpers/utils'
import { CONFIG, ELASTIC_SEARCH_QUERY } from '../initializers/constants'
import { VideosSearchQuery } from '../types/video-search.model'
import { DBVideo, DBVideoDetails, EnhancedVideo, IndexableVideo, IndexableVideoDetails } from '../types/video.model'
import { buildAvatarMapping, formatAvatarForAPI, formatAvatarForDB } from './elastic-search-avatar'
function initVideosIndex () {
return buildIndex(CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, buildVideosMapping())
}
async function indexVideos (videos: IndexableVideo[], replace = false) {
return indexDocuments({
objects: videos,
formatter: v => formatVideoForDB(v),
replace,
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS
})
}
function refreshVideosIndex () {
logger.info('Refreshing videos index.')
return elasticSearch.indices.refresh({ index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS })
}
function removeVideosFromHosts (hosts: string[]) {
if (hosts.length === 0) return
logger.info({ hosts }, 'Will remove videos from hosts.')
return elasticSearch.delete_by_query({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
query: {
bool: {
filter: {
terms: {
host: hosts
}
}
}
}
}
})
}
async function removeNotExistingVideos (host: string, existingVideos: Set<number>) {
const idsFromDB = await getVideoIdsOf(host)
const idsToRemove = difference(idsFromDB, Array.from(existingVideos))
logger.info({ idsToRemove }, 'Will remove %d videos from %s.', idsToRemove.length, host)
return elasticSearch.delete_by_query({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
query: {
bool: {
filter: [
{
terms: {
id: idsToRemove
}
},
{
term: {
host
}
}
]
}
}
}
})
}
async function getVideoIdsOf (host: string) {
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
size: 0,
aggs: {
ids: {
terms: {
size: 500000,
field: 'id'
}
}
},
query: {
bool: {
filter: [
{
term: {
host
}
}
]
}
}
}
})
return res.body.aggregations.ids.buckets.map(b => b.key)
}
import { exists } from '../../helpers/custom-validators/misc'
import { elasticSearch } from '../../helpers/elastic-search'
import { logger } from '../../helpers/logger'
import { buildUrl } from '../../helpers/utils'
import { CONFIG, ELASTIC_SEARCH_QUERY } from '../../initializers/constants'
import { VideosSearchQuery } from '../../types/search-query/video-search.model'
import { DBVideo, DBVideoDetails, EnhancedVideo, IndexableVideo, IndexableVideoDetails } from '../../types/video.model'
import { buildSort, extractQueryResult } from './elastic-search-queries'
import { buildChannelOrAccountSummaryMapping, formatActorForDB, formatActorSummaryForAPI } from './shared/elastic-search-actor'
async function queryVideos (search: VideosSearchQuery) {
const bool: any = {}
@ -321,201 +222,6 @@ async function queryVideos (search: VideosSearchQuery) {
return extractQueryResult(res)
}
export {
indexVideos,
removeNotExistingVideos,
queryVideos,
refreshVideosIndex,
removeVideosFromHosts,
initVideosIndex,
formatVideoForAPI
}
// ############################################################################
function formatVideoForDB (v: IndexableVideo | IndexableVideoDetails): DBVideo | DBVideoDetails {
return {
id: v.id,
uuid: v.uuid,
indexedAt: new Date(),
createdAt: v.createdAt,
updatedAt: v.updatedAt,
publishedAt: v.publishedAt,
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
thumbnailPath: v.thumbnailPath,
previewPath: v.previewPath,
embedPath: v.embedPath,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
isLive: v.isLive || false,
nsfw: v.nsfw,
host: v.host,
url: v.url,
tags: (v as IndexableVideoDetails).tags ? (v as IndexableVideoDetails).tags : undefined,
account: {
id: v.account.id,
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
handle: `${v.account.name}@${v.account.host}`,
avatar: formatAvatarForDB(v.account)
},
channel: {
id: v.channel.id,
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
handle: `${v.channel.name}@${v.channel.host}`,
avatar: formatAvatarForDB(v.channel)
}
}
}
function formatVideoForAPI (v: DBVideoDetails, fromHost?: string): EnhancedVideo {
return {
id: v.id,
uuid: v.uuid,
score: v.score,
createdAt: new Date(v.createdAt),
updatedAt: new Date(v.updatedAt),
publishedAt: new Date(v.publishedAt),
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
tags: v.tags,
thumbnailPath: v.thumbnailPath,
thumbnailUrl: buildUrl(v.host, v.thumbnailPath),
previewPath: v.previewPath,
previewUrl: buildUrl(v.host, v.previewPath),
embedPath: v.embedPath,
embedUrl: buildUrl(v.host, v.embedPath),
url: v.url,
isLocal: fromHost && fromHost === v.host,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
isLive: v.isLive,
nsfw: v.nsfw,
account: {
id: v.account.id,
name: v.account.name,
displayName: v.account.displayName,
url: v.account.url,
host: v.account.host,
avatar: formatAvatarForAPI(v.account)
},
channel: {
id: v.channel.id,
name: v.channel.name,
displayName: v.channel.displayName,
url: v.channel.url,
host: v.channel.host,
avatar: formatAvatarForAPI(v.channel)
}
}
}
function buildChannelOrAccountMapping () {
return {
id: {
type: 'long'
},
name: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
displayName: {
type: 'text'
},
url: {
type: 'keyword'
},
host: {
type: 'keyword'
},
handle: {
type: 'keyword'
},
avatar: {
properties: buildAvatarMapping()
}
}
}
function buildVideosMapping () {
return {
id: {
@ -647,11 +353,130 @@ function buildVideosMapping () {
},
account: {
properties: buildChannelOrAccountMapping()
properties: buildChannelOrAccountSummaryMapping()
},
channel: {
properties: buildChannelOrAccountMapping()
properties: buildChannelOrAccountSummaryMapping()
}
}
}
function formatVideoForDB (v: IndexableVideo | IndexableVideoDetails): DBVideo | DBVideoDetails {
return {
id: v.id,
uuid: v.uuid,
indexedAt: new Date(),
createdAt: v.createdAt,
updatedAt: v.updatedAt,
publishedAt: v.publishedAt,
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
thumbnailPath: v.thumbnailPath,
previewPath: v.previewPath,
embedPath: v.embedPath,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
isLive: v.isLive || false,
nsfw: v.nsfw,
host: v.host,
url: v.url,
tags: (v as IndexableVideoDetails).tags ? (v as IndexableVideoDetails).tags : undefined,
account: formatActorForDB(v.account),
channel: formatActorForDB(v.channel)
}
}
function formatVideoForAPI (v: DBVideoDetails, fromHost?: string): EnhancedVideo {
return {
id: v.id,
uuid: v.uuid,
score: v.score,
createdAt: new Date(v.createdAt),
updatedAt: new Date(v.updatedAt),
publishedAt: new Date(v.publishedAt),
originallyPublishedAt: v.originallyPublishedAt,
category: {
id: v.category.id,
label: v.category.label
},
licence: {
id: v.licence.id,
label: v.licence.label
},
language: {
id: v.language.id,
label: v.language.label
},
privacy: {
id: v.privacy.id,
label: v.privacy.label
},
name: v.name,
description: v.description,
duration: v.duration,
tags: v.tags,
thumbnailPath: v.thumbnailPath,
thumbnailUrl: buildUrl(v.host, v.thumbnailPath),
previewPath: v.previewPath,
previewUrl: buildUrl(v.host, v.previewPath),
embedPath: v.embedPath,
embedUrl: buildUrl(v.host, v.embedPath),
url: v.url,
isLocal: fromHost && fromHost === v.host,
views: v.views,
likes: v.likes,
dislikes: v.dislikes,
isLive: v.isLive,
nsfw: v.nsfw,
account: formatActorSummaryForAPI(v.account),
channel: formatActorSummaryForAPI(v.channel)
}
}
export {
queryVideos,
formatVideoForDB,
formatVideoForAPI,
buildVideosMapping
}

View File

@ -0,0 +1,95 @@
import { AccountSummary, VideoChannelSummary } from '../../../../PeerTube/shared/models'
import { AdditionalActorAttributes } from '../../../types/actor.model'
import { buildAvatarMapping, formatAvatarForAPI, formatAvatarForDB } from './elastic-search-avatar'
function buildChannelOrAccountSummaryMapping () {
return {
id: {
type: 'long'
},
name: {
type: 'text',
fields: {
raw: {
type: 'keyword'
}
}
},
displayName: {
type: 'text'
},
url: {
type: 'keyword'
},
host: {
type: 'keyword'
},
handle: {
type: 'keyword'
},
avatar: {
properties: buildAvatarMapping()
}
}
}
function buildChannelOrAccountCommonMapping () {
return {
...buildChannelOrAccountSummaryMapping(),
followingCount: {
type: 'long'
},
followersCount: {
type: 'long'
},
createdAt: {
type: 'date',
format: 'date_optional_time'
},
updatedAt: {
type: 'date',
format: 'date_optional_time'
},
description: {
type: 'text'
}
}
}
function formatActorSummaryForAPI (actor: (AccountSummary | VideoChannelSummary) & AdditionalActorAttributes) {
return {
id: actor.id,
name: actor.name,
displayName: actor.displayName,
url: actor.url,
host: actor.host,
avatar: formatAvatarForAPI(actor)
}
}
function formatActorForDB (actor: AccountSummary | VideoChannelSummary) {
return {
id: actor.id,
name: actor.name,
displayName: actor.displayName,
url: actor.url,
host: actor.host,
handle: `${actor.name}@${actor.host}`,
avatar: formatAvatarForDB(actor)
}
}
export {
buildChannelOrAccountCommonMapping,
buildChannelOrAccountSummaryMapping,
formatActorSummaryForAPI,
formatActorForDB
}

View File

@ -1,6 +1,6 @@
import { ActorImage } from '@shared/models'
import { buildUrl } from '../helpers/utils'
import { ActorImage } from '../../../../PeerTube/shared/models'
import { buildUrl } from '../../../helpers/utils'
function formatAvatarForAPI (obj: { avatar?: ActorImage & { url: string } }) {
if (!obj.avatar) return null

View File

@ -0,0 +1,2 @@
export * from './elastic-search-actor'
export * from './elastic-search-avatar'

View File

@ -0,0 +1,27 @@
import { logger } from '../../helpers/logger'
import { CONFIG } from '../../initializers/constants'
import { DBChannel, IndexableChannel } from '../../types/channel.model'
import { formatChannelForDB } from '../elastic-search/elastic-search-channels'
import { getChannel } from '../requests/peertube-instance'
import { AbstractIndexer } from './shared'
export class ChannelIndexer extends AbstractIndexer <IndexableChannel, DBChannel> {
constructor () {
super(CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS, formatChannelForDB)
this.indexQueue.drain(async () => {
logger.info('Refresh channels index.')
await this.refreshIndex()
})
}
async indexSpecificElement (host: string, name: string) {
const channel = await getChannel(host, name)
logger.info('Indexing specific channel %s@%s.', name, host)
return this.indexElements([ channel ], true)
}
}

View File

@ -0,0 +1,16 @@
import { CONFIG } from '../../initializers/constants'
import { DBPlaylist, IndexablePlaylist } from '../../types/playlist.model'
import { formatPlaylistForDB } from '../elastic-search/elastic-search-playlists'
import { AbstractIndexer } from './shared'
export class PlaylistIndexer extends AbstractIndexer <IndexablePlaylist, DBPlaylist> {
constructor () {
super(CONFIG.ELASTIC_SEARCH.INDEXES.PLAYLISTS, formatPlaylistForDB)
}
async indexSpecificElement (host: string, uuid: string) {
// We don't need to index a specific element yet, since we have all playlist information in the list endpoint
throw new Error('Not implemented')
}
}

View File

@ -0,0 +1,63 @@
import { AsyncQueue, queue } from 'async'
import { inspect } from 'util'
import { logger } from '../../../helpers/logger'
import { INDEXER_QUEUE_CONCURRENCY } from '../../../initializers/constants'
import { buildIndex, indexDocuments, refreshIndex } from '../../../lib/elastic-search/elastic-search-index'
import { removeFromHosts, removeNotExistingIdsFromHost } from '../../../lib/elastic-search/elastic-search-queries'
import { buildVideosMapping } from '../../../lib/elastic-search/elastic-search-videos'
import { IndexableDoc } from '../../../types/indexable-doc.model'
// identifier could be an uuid, an handle or a url for example
export type QueueParam = { host: string, identifier: string }
export abstract class AbstractIndexer <T extends IndexableDoc, DB> {
protected readonly indexQueue: AsyncQueue<QueueParam>
abstract indexSpecificElement (host: string, uuid: string): Promise<any>
constructor (
protected readonly indexName: string,
protected readonly formatterFn: (o: T) => DB
) {
this.indexQueue = queue<QueueParam, Error>((task, cb) => {
this.indexSpecificElement(task.host, task.identifier)
.then(() => cb())
.catch(err => {
logger.error(
{ err: inspect(err) },
'Error in index specific element %s of %s in index %s.', task.identifier, task.host, this.indexName
)
cb()
})
}, INDEXER_QUEUE_CONCURRENCY)
}
initIndex () {
return buildIndex(this.indexName, buildVideosMapping())
}
scheduleIndexation (host: string, identifier: string) {
this.indexQueue.push({ identifier, host })
}
refreshIndex () {
return refreshIndex(this.indexName)
}
removeNotExisting (host: string, existingIds: Set<number>) {
return removeNotExistingIdsFromHost(this.indexName, host, existingIds)
}
removeFromHosts (hosts: string[]) {
return removeFromHosts(this.indexName, hosts)
}
indexElements (elements: T[], replace = false) {
return indexDocuments({
objects: elements,
formatter: v => this.formatterFn(v),
replace,
index: this.indexName
})
}
}

View File

@ -0,0 +1 @@
export * from './abstract-indexer'

View File

@ -0,0 +1,21 @@
import { logger } from '../../helpers/logger'
import { CONFIG } from '../../initializers/constants'
import { DBVideo, IndexableVideo } from '../../types/video.model'
import { formatVideoForDB } from '../elastic-search/elastic-search-videos'
import { getVideo } from '../requests/peertube-instance'
import { AbstractIndexer } from './shared'
export class VideoIndexer extends AbstractIndexer <IndexableVideo, DBVideo> {
constructor () {
super(CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS, formatVideoForDB)
}
async indexSpecificElement (host: string, uuid: string) {
const video = await getVideo(host, uuid)
logger.info('Indexing specific video %s of %s.', uuid, host)
return this.indexElements([ video ], true)
}
}

View File

@ -1,5 +1,5 @@
import { CONFIG } from '../initializers/constants'
import { doRequest } from '../helpers/requests'
import { CONFIG } from '../../initializers/constants'
import { doRequest } from '../../helpers/requests'
async function listIndexInstancesHost (): Promise<string[]> {
const uri = CONFIG.INSTANCES_INDEX.URL

View File

@ -1,9 +1,10 @@
import { ResultList, Video, VideoChannel, VideoDetails } from '@shared/models'
import { doRequestWithRetries } from '../helpers/requests'
import { INDEXER_COUNT, REQUESTS } from '../initializers/constants'
import { IndexableChannel } from '../types/channel.model'
import { IndexableDoc } from '../types/elastic-search.model'
import { IndexableVideo } from '../types/video.model'
import { IndexablePlaylist } from 'server/types/playlist.model'
import { ResultList, Video, VideoChannel, VideoDetails, VideoPlaylist } from '@shared/models'
import { doRequestWithRetries } from '../../helpers/requests'
import { INDEXER_COUNT, REQUESTS } from '../../initializers/constants'
import { IndexableChannel } from '../../types/channel.model'
import { IndexableDoc } from '../../types/indexable-doc.model'
import { IndexableVideo } from '../../types/video.model'
async function getVideo (host: string, uuid: string): Promise<IndexableVideo> {
const url = 'https://' + host + '/api/v1/videos/' + uuid
@ -37,7 +38,7 @@ async function getVideos (host: string, start: number): Promise<IndexableVideo[]
filter: 'local',
nsfw: 'both',
skipCount: true,
count: INDEXER_COUNT.VIDEOS
count: INDEXER_COUNT
},
json: true
}, REQUESTS.MAX_RETRIES, REQUESTS.WAIT)
@ -49,6 +50,26 @@ async function getVideos (host: string, start: number): Promise<IndexableVideo[]
return res.body.data.map(v => prepareVideoForDB(v, host))
}
async function getPlaylistsOf (host: string, handle: string, start: number): Promise<IndexablePlaylist[]> {
const url = 'https://' + host + '/api/v1/video-channels/' + handle + '/video-playlists'
const res = await doRequestWithRetries<ResultList<VideoPlaylist>>({
uri: url,
qs: {
start,
filter: 'local',
count: INDEXER_COUNT
},
json: true
}, REQUESTS.MAX_RETRIES, REQUESTS.WAIT)
if (!res.body || !Array.isArray(res.body.data)) {
throw new Error('Invalid playlist data from ' + url)
}
return res.body.data.map(v => preparePlaylistForDB(v, host))
}
function prepareVideoForDB <T extends Video> (video: T, host: string): T & IndexableDoc {
return Object.assign(video, {
elasticSearchId: host + video.id,
@ -57,7 +78,7 @@ function prepareVideoForDB <T extends Video> (video: T, host: string): T & Index
})
}
function prepareChannelForDB <T extends VideoChannel> (channel: T, host: string): T & IndexableDoc {
function prepareChannelForDB (channel: VideoChannel, host: string): IndexableChannel {
return Object.assign(channel, {
elasticSearchId: host + channel.id,
host,
@ -65,9 +86,22 @@ function prepareChannelForDB <T extends VideoChannel> (channel: T, host: string)
})
}
function preparePlaylistForDB (playlist: VideoPlaylist, host: string): IndexablePlaylist {
return Object.assign(playlist, {
elasticSearchId: host + playlist.id,
host,
url: 'https://' + host + '/videos/watch/playlist/' + playlist.uuid
})
}
export {
getVideo,
getChannel,
getVideos,
prepareChannelForDB
getPlaylistsOf,
prepareVideoForDB,
prepareChannelForDB,
preparePlaylistForDB
}

View File

@ -0,0 +1,166 @@
import * as Bluebird from 'bluebird'
import { IndexablePlaylist } from 'server/types/playlist.model'
import { inspect } from 'util'
import { logger } from '../../helpers/logger'
import { INDEXER_CONCURRENCY, INDEXER_COUNT, INDEXER_LIMIT, SCHEDULER_INTERVALS_MS } from '../../initializers/constants'
import { IndexableVideo } from '../../types/video.model'
import { buildInstanceHosts } from '../elastic-search/elastic-search-instances'
import { ChannelIndexer } from '../indexers/channel-indexer'
import { PlaylistIndexer } from '../indexers/playlist-indexer'
import { VideoIndexer } from '../indexers/video-indexer'
import { getPlaylistsOf, getVideos } from '../requests/peertube-instance'
import { AbstractScheduler } from './abstract-scheduler'
export class IndexationScheduler extends AbstractScheduler {
private static instance: IndexationScheduler
protected schedulerIntervalMs = SCHEDULER_INTERVALS_MS.videosIndexer
private indexedHosts: string[] = []
private readonly channelIndexer: ChannelIndexer
private readonly videoIndexer: VideoIndexer
private readonly playlistIndexer: PlaylistIndexer
private readonly indexers: [ ChannelIndexer, VideoIndexer, PlaylistIndexer ]
private constructor () {
super()
this.channelIndexer = new ChannelIndexer()
this.videoIndexer = new VideoIndexer()
this.playlistIndexer = new PlaylistIndexer()
this.indexers = [
this.channelIndexer,
this.videoIndexer,
this.playlistIndexer
]
}
async initIndexes () {
return Promise.all(this.indexers.map(i => i.initIndex()))
}
getIndexedHosts () {
return this.indexedHosts
}
protected async internalExecute () {
return this.runIndexer()
}
private async runIndexer () {
logger.info('Running indexer.')
const { indexHosts, removedHosts } = await buildInstanceHosts()
this.indexedHosts = indexHosts
for (const o of this.indexers) {
await o.removeFromHosts(removedHosts)
}
await Bluebird.map(indexHosts, async host => {
try {
await this.indexHost(host)
} catch (err) {
console.error(inspect(err, { depth: 10 }))
logger.warn({ err: inspect(err) }, 'Cannot index videos from %s.', host)
}
}, { concurrency: INDEXER_CONCURRENCY })
for (const o of this.indexers) {
await o.refreshIndex()
}
logger.info('Indexer ended.')
}
private async indexHost (host: string) {
const channelsToSync = new Set<string>()
const existingChannelsId = new Set<number>()
const existingVideosId = new Set<number>()
let videos: IndexableVideo[] = []
let start = 0
logger.info('Adding video data from %s.', host)
do {
logger.debug('Getting video results from %s (from = %d).', host, start)
videos = await getVideos(host, start)
start += videos.length
logger.debug('Got %d video results from %s (from = %d).', videos.length, host, start)
if (videos.length !== 0) {
const { created } = await this.videoIndexer.indexElements(videos)
logger.debug('Indexed %d videos from %s.', videos.length, host)
// Fetch complete video foreach created video (to get tags)
for (const c of created) {
this.videoIndexer.scheduleIndexation(host, c.uuid)
}
}
for (const video of videos) {
channelsToSync.add(video.channel.name)
existingChannelsId.add(video.channel.id)
existingVideosId.add(video.id)
}
} while (videos.length === INDEXER_COUNT && start < INDEXER_LIMIT)
logger.info('Added video data from %s.', host)
for (const c of channelsToSync) {
this.channelIndexer.scheduleIndexation(host, c)
}
await this.channelIndexer.removeNotExisting(host, existingChannelsId)
await this.videoIndexer.removeNotExisting(host, existingVideosId)
await this.indexPlaylists(host, Array.from(channelsToSync))
}
private async indexPlaylists (host: string, channelHandles: string[]) {
const existingPlaylistsId = new Set<number>()
logger.info('Adding playlist data from %s.', host)
for (const channelHandle of channelHandles) {
let playlists: IndexablePlaylist[] = []
let start = 0
do {
logger.debug('Getting playlist results from %s (from = %d, channelHandle = %s).', host, start, channelHandle)
playlists = await getPlaylistsOf(host, channelHandle, start)
start += playlists.length
logger.debug('Got %d playlist results from %s (from = %d, channelHandle = %s).', playlists.length, host, start, channelHandle)
if (playlists.length !== 0) {
await this.playlistIndexer.indexElements(playlists)
logger.debug('Indexed %d playlists from %s.', playlists.length, host)
}
for (const playlist of playlists) {
existingPlaylistsId.add(playlist.id)
}
} while (playlists.length === INDEXER_COUNT && start < INDEXER_LIMIT)
}
logger.info('Added playlist data from %s.', host)
await this.playlistIndexer.removeNotExisting(host, existingPlaylistsId)
}
static get Instance () {
return this.instance || (this.instance = new this())
}
}

View File

@ -1,159 +0,0 @@
import { AsyncQueue, queue } from 'async'
import * as Bluebird from 'bluebird'
import { inspect } from 'util'
import { logger } from '../../helpers/logger'
import { INDEXER_CONCURRENCY, INDEXER_COUNT, INDEXER_QUEUE_CONCURRENCY, SCHEDULER_INTERVALS_MS } from '../../initializers/constants'
import { IndexableVideo } from '../../types/video.model'
import { indexChannels, refreshChannelsIndex, removeChannelsFromHosts, removeNotExistingChannels } from '../elastic-search-channels'
import { buildInstanceHosts } from '../elastic-search-instances'
import { indexVideos, refreshVideosIndex, removeNotExistingVideos, removeVideosFromHosts } from '../elastic-search-videos'
import { getChannel, getVideo, getVideos } from '../peertube-instance'
import { AbstractScheduler } from './abstract-scheduler'
type GetVideoQueueParam = { host: string, uuid: string }
type GetChannelQueueParam = { host: string, name: string }
export class VideosIndexer extends AbstractScheduler {
private static instance: VideosIndexer
protected schedulerIntervalMs = SCHEDULER_INTERVALS_MS.videosIndexer
private indexedHosts: string[] = []
private readonly indexVideoQueue: AsyncQueue<GetVideoQueueParam>
private readonly indexChannelQueue: AsyncQueue<GetChannelQueueParam>
private constructor () {
super()
this.indexVideoQueue = queue<GetVideoQueueParam, Error>((task, cb) => {
this.indexSpecificVideo(task.host, task.uuid)
.then(() => cb())
.catch(err => {
logger.error({ err: inspect(err) }, 'Error in index specific video %s of %s.', task.uuid, task.host)
cb()
})
}, INDEXER_QUEUE_CONCURRENCY)
this.indexChannelQueue = queue<GetChannelQueueParam, Error>((task, cb) => {
this.indexSpecificChannel(task.host, task.name)
.then(() => cb())
.catch(err => {
logger.error({ err: inspect(err) }, 'Error in index specific channel %s@%s.', task.name, task.host)
cb()
})
}, INDEXER_QUEUE_CONCURRENCY)
this.indexChannelQueue.drain(async () => {
logger.info('Refresh channels index.')
await refreshChannelsIndex()
})
}
scheduleVideoIndexation (host: string, uuid: string) {
this.indexVideoQueue.push({ uuid, host })
}
scheduleChannelIndexation (host: string, name: string) {
this.indexChannelQueue.push({ name, host })
}
getIndexedHosts () {
return this.indexedHosts
}
protected async internalExecute () {
return this.runVideosIndexer()
}
private async runVideosIndexer () {
logger.info('Running videos indexer.')
const { indexHosts, removedHosts } = await buildInstanceHosts()
this.indexedHosts = indexHosts
await removeVideosFromHosts(removedHosts)
await removeChannelsFromHosts(removedHosts)
await Bluebird.map(indexHosts, async host => {
try {
await this.indexHost(host)
} catch (err) {
console.error(inspect(err, { depth: 10 }))
logger.warn({ err: inspect(err) }, 'Cannot index videos from %s.', host)
}
}, { concurrency: INDEXER_CONCURRENCY })
await refreshChannelsIndex()
await refreshVideosIndex()
logger.info('Videos indexer ended.')
}
private async indexHost (host: string) {
const channelsToSync = new Set<string>()
const channelsId = new Set<number>()
const videosId = new Set<number>()
let videos: IndexableVideo[] = []
let start = 0
logger.info('Adding video data from %s.', host)
do {
logger.debug('Getting results from %s (from = %d).', host, start)
videos = await getVideos(host, start)
start += videos.length
logger.debug('Got %d results from %s (from = %d).', videos.length, host, start)
if (videos.length !== 0) {
const { created } = await indexVideos(videos)
logger.debug('Indexed %d videos from %s.', videos.length, host)
// Fetch complete video foreach created video (to get tags)
for (const c of created) {
this.scheduleVideoIndexation(host, c.uuid)
}
}
for (const video of videos) {
channelsToSync.add(video.channel.name)
channelsId.add(video.channel.id)
videosId.add(video.id)
}
} while (videos.length === INDEXER_COUNT.VIDEOS && start < 500000)
logger.info('Added video data from %s.', host)
for (const c of channelsToSync) {
this.scheduleChannelIndexation(host, c)
}
await removeNotExistingChannels(host, channelsId)
await removeNotExistingVideos(host, videosId)
}
private async indexSpecificVideo (host: string, uuid: string) {
const video = await getVideo(host, uuid)
logger.info('Indexing specific video %s of %s.', uuid, host)
await indexVideos([ video ], true)
}
private async indexSpecificChannel (host: string, name: string) {
const channel = await getChannel(host, name)
logger.info('Indexing specific channel %s@%s.', name, host)
await indexChannels([ channel ], true)
}
static get Instance () {
return this.instance || (this.instance = new this())
}
}

View File

@ -97,11 +97,24 @@ const videoChannelsSearchValidator = [
}
]
const videoPlaylistsSearchValidator = [
check('search').not().isEmpty().withMessage('Should have a valid search'),
(req: express.Request, res: express.Response, next: express.NextFunction) => {
logger.debug({ query: req.query, body: req.body }, 'Checking video playlists search query')
if (areValidationErrors(req, res)) return
return next()
}
]
// ---------------------------------------------------------------------------
export {
videoChannelsSearchValidator,
commonFiltersValidators,
commonVideosFiltersValidator,
videoPlaylistsSearchValidator,
videosSearchValidator
}

View File

@ -3,13 +3,16 @@ import { checkSort, createSortableColumns } from './utils'
const SORTABLE_VIDEOS_SEARCH_COLUMNS = createSortableColumns(SORTABLE_COLUMNS.VIDEOS_SEARCH)
const SORTABLE_CHANNELS_SEARCH_COLUMNS = createSortableColumns(SORTABLE_COLUMNS.CHANNELS_SEARCH)
const SORTABLE_PLAYLISTS_SEARCH_COLUMNS = createSortableColumns(SORTABLE_COLUMNS.PLAYLISTS_SEARCH)
const videosSearchSortValidator = checkSort(SORTABLE_VIDEOS_SEARCH_COLUMNS)
const channelsSearchSortValidator = checkSort(SORTABLE_CHANNELS_SEARCH_COLUMNS)
const playlistsSearchSortValidator = checkSort(SORTABLE_PLAYLISTS_SEARCH_COLUMNS)
// ---------------------------------------------------------------------------
export {
videosSearchSortValidator,
channelsSearchSortValidator
channelsSearchSortValidator,
playlistsSearchSortValidator
}

View File

@ -0,0 +1,9 @@
import { ActorImage } from '../../PeerTube/shared/models'
export type AdditionalActorAttributes = {
handle: string
avatar: ActorImageExtended
url: string
}
export type ActorImageExtended = ActorImage & { url: string }

View File

@ -1,5 +1,6 @@
import { IndexableDoc } from './elastic-search.model'
import { VideoChannel, VideoChannelSummary, ActorImage, Account } from '../../PeerTube/shared/models'
import { Account, VideoChannel, VideoChannelSummary } from '../../PeerTube/shared/models'
import { ActorImageExtended, AdditionalActorAttributes } from './actor.model'
import { IndexableDoc } from './indexable-doc.model'
export interface IndexableChannel extends VideoChannel, IndexableDoc {
url: string
@ -10,9 +11,9 @@ export interface DBChannel extends Omit<VideoChannel, 'isLocal'> {
handle: string
url: string
ownerAccount?: Account & { handle: string, avatar: ActorImage & { url: string } }
ownerAccount?: Account & AdditionalActorAttributes
avatar?: ActorImage & { url: string }
avatar?: ActorImageExtended
score?: number
}

View File

@ -0,0 +1,24 @@
import { AccountSummary, VideoChannelSummary, VideoPlaylist } from '../../PeerTube/shared/models'
import { AdditionalActorAttributes } from './actor.model'
import { IndexableDoc } from './indexable-doc.model'
export interface IndexablePlaylist extends VideoPlaylist, IndexableDoc {
url: string
}
export interface DBPlaylist extends Omit<VideoPlaylist, 'isLocal'> {
indexedAt: Date
host: string
// Added by the query
score?: number
ownerAccount: AccountSummary & AdditionalActorAttributes
videoChannel: VideoChannelSummary & AdditionalActorAttributes
}
// Results from the search API
export interface EnhancedPlaylist extends VideoPlaylist {
score: number
}

View File

@ -1,6 +1,6 @@
import {
VideoChannelsSearchQuery as PeerTubeChannelsSearchQuery
} from '../../PeerTube/shared/models/search/video-channels-search-query.model'
} from '../../../PeerTube/shared/models/search/video-channels-search-query.model'
import { CommonSearch } from './common-search.model'
export type ChannelsSearchQuery = PeerTubeChannelsSearchQuery & CommonSearch

View File

@ -0,0 +1,4 @@
import { VideoPlaylistsSearchQuery as PeerTubePlaylistsSearchQuery } from '../../../PeerTube/shared/models'
import { CommonSearch } from './common-search.model'
export type PlaylistsSearchQuery = PeerTubePlaylistsSearchQuery & CommonSearch

View File

@ -1,4 +1,4 @@
import { VideosSearchQuery as PeerTubeVideosSearchQuery } from '../../PeerTube/shared/models/search/videos-search-query.model'
import { VideosSearchQuery as PeerTubeVideosSearchQuery } from '../../../PeerTube/shared/models/search/videos-search-query.model'
import { CommonSearch } from './common-search.model'
export type VideosSearchQuery = Omit<PeerTubeVideosSearchQuery, 'skipCount' | 'filter'> & CommonSearch & { boostLanguages: string[] }

View File

@ -1,11 +1,7 @@
import { Account, AccountSummary, ActorImage, Video, VideoChannel, VideoChannelSummary, VideoDetails } from '../../PeerTube/shared/models'
import { IndexableDoc } from './elastic-search.model'
type ActorExtended = {
handle: string
avatar: ActorImage & { url: string }
}
import { Account, AccountSummary, Video, VideoChannel, VideoChannelSummary, VideoDetails } from '../../PeerTube/shared/models'
import { AdditionalActorAttributes } from './actor.model'
import { IndexableDoc } from './indexable-doc.model'
export interface IndexableVideo extends Video, IndexableDoc {
}
@ -16,9 +12,10 @@ export interface IndexableVideoDetails extends VideoDetails, IndexableDoc {
export interface DBVideoDetails extends Omit<VideoDetails, 'isLocal'> {
indexedAt: Date
host: string
url: string
account: Account & ActorExtended
channel: VideoChannel & ActorExtended
account: Account & AdditionalActorAttributes
channel: VideoChannel & AdditionalActorAttributes
score?: number
}
@ -28,8 +25,8 @@ export interface DBVideo extends Omit<Video, 'isLocal'> {
host: string
url: string
account: AccountSummary & ActorExtended
channel: VideoChannelSummary & ActorExtended
account: AccountSummary & AdditionalActorAttributes
channel: VideoChannelSummary & AdditionalActorAttributes
}
// Results from the search API