Also remove channels from DB

This commit is contained in:
Chocobozzz 2020-09-03 14:25:57 +02:00
parent 779c303721
commit fd69bf6be0
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
4 changed files with 78 additions and 41 deletions

View File

@ -1,11 +1,11 @@
import { CONFIG } from '../initializers/constants'
import { VideoChannel } from '@shared/models'
import { difference } from 'lodash'
import { buildIndex, buildSort, elasticSearch, extractQueryResult, indexDocuments } from '../helpers/elastic-search'
import { logger } from '../helpers/logger'
import { DBChannel, IndexableChannel, EnhancedVideoChannel } from '../types/channel.model'
import { CONFIG } from '../initializers/constants'
import { ChannelsSearchQuery } from '../types/channel-search.model'
import { DBChannel, EnhancedVideoChannel, IndexableChannel } from '../types/channel.model'
import { buildAvatarMapping, formatAvatarForAPI, formatAvatarForDB } from './elastic-search-avatar'
import { difference } from 'lodash'
function initChannelsIndex () {
return buildIndex(CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS, buildChannelsMapping())
@ -54,6 +54,27 @@ async function removeNotExistingChannels (host: string, existingChannels: Set<nu
})
}
function removeChannelsFromHosts (hosts: string[]) {
if (hosts.length === 0) return
logger.info({ hosts }, 'Will remove channels from hosts.')
return elasticSearch.delete_by_query({
index: CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS,
body: {
query: {
bool: {
filter: {
terms: {
host: hosts
}
}
}
}
}
})
}
async function queryChannels (search: ChannelsSearchQuery) {
const bool: any = {}
const mustNot: any[] = []
@ -139,6 +160,7 @@ async function getChannelIdsOf (host: string) {
}
export {
removeChannelsFromHosts,
initChannelsIndex,
indexChannels,
refreshChannelsIndex,

View File

@ -1,42 +1,61 @@
import { elasticSearch } from '../helpers/elastic-search'
import { CONFIG } from '../initializers/constants'
import { getRemovedHosts, listIndexInstancesHost } from './instances-index'
async function listIndexInstances () {
const res = await elasticSearch.search({
index: CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
body: {
size: 0,
aggs: {
hosts: {
terms: {
size: 5000,
field: 'host'
}
}
}
}
})
return res.body.aggregations.hosts.buckets.map(b => b.key)
}
import { listIndexInstancesHost } from './instances-index'
async function buildInstanceHosts () {
let indexHosts = await listIndexInstancesHost()
if (CONFIG.INSTANCES_INDEX.WHITELIST.ENABLED) {
const whitelistHosts = Array.isArray(CONFIG.INSTANCES_INDEX.WHITELIST.HOSTS) ? CONFIG.INSTANCES_INDEX.WHITELIST.HOSTS : []
const whitelistHosts = Array.isArray(CONFIG.INSTANCES_INDEX.WHITELIST.HOSTS)
? CONFIG.INSTANCES_INDEX.WHITELIST.HOSTS
: []
indexHosts = indexHosts.filter(h => whitelistHosts.includes(h))
}
const dbHosts = await listIndexInstances()
const dbHosts = await listDBInstances()
const removedHosts = getRemovedHosts(dbHosts, indexHosts)
return { indexHosts, removedHosts }
}
export {
listIndexInstances,
buildInstanceHosts
}
// ##################################################
async function listDBInstances () {
const setResult = new Set<string>()
const indexes = [
CONFIG.ELASTIC_SEARCH.INDEXES.VIDEOS,
CONFIG.ELASTIC_SEARCH.INDEXES.CHANNELS
]
for (const index of indexes) {
const res = await elasticSearch.search({
index,
body: {
size: 0,
aggs: {
hosts: {
terms: {
size: 5000,
field: 'host'
}
}
}
}
})
for (const b of res.body.aggregations.hosts.buckets) {
setResult.add(b.key)
}
}
return Array.from(setResult)
}
function getRemovedHosts (dbHosts: string[], indexHosts: string[]) {
return dbHosts.filter(dbHost => indexHosts.includes(dbHost) === false)
}

View File

@ -14,11 +14,6 @@ async function listIndexInstancesHost (): Promise<string[]> {
return body.data.map(o => o.host as string)
}
function getRemovedHosts (dbHosts: string[], indexHosts: string[]) {
return dbHosts.filter(dbHost => indexHosts.includes(dbHost) === false)
}
export {
getRemovedHosts,
listIndexInstancesHost
}

View File

@ -1,14 +1,14 @@
import { AbstractScheduler } from './abstract-scheduler'
import { INDEXER_CONCURRENCY, INDEXER_COUNT, INDEXER_QUEUE_CONCURRENCY, SCHEDULER_INTERVALS_MS } from '../../initializers/constants'
import { logger } from '../../helpers/logger'
import { indexVideos, refreshVideosIndex, removeNotExistingVideos, removeVideosFromHosts } from '../elastic-search-videos'
import { IndexableVideo } from '../../types/video.model'
import { inspect } from 'util'
import { AsyncQueue, queue } from 'async'
import { buildInstanceHosts } from '../elastic-search-instances'
import { getChannel, getVideo, getVideos } from '../peertube-instance'
import { indexChannels, refreshChannelsIndex, removeNotExistingChannels } from '../elastic-search-channels'
import * as Bluebird from 'bluebird'
import { inspect } from 'util'
import { logger } from '../../helpers/logger'
import { INDEXER_CONCURRENCY, INDEXER_COUNT, INDEXER_QUEUE_CONCURRENCY, SCHEDULER_INTERVALS_MS } from '../../initializers/constants'
import { IndexableVideo } from '../../types/video.model'
import { indexChannels, refreshChannelsIndex, removeChannelsFromHosts, removeNotExistingChannels } from '../elastic-search-channels'
import { buildInstanceHosts } from '../elastic-search-instances'
import { indexVideos, refreshVideosIndex, removeNotExistingVideos, removeVideosFromHosts } from '../elastic-search-videos'
import { getChannel, getVideo, getVideos } from '../peertube-instance'
import { AbstractScheduler } from './abstract-scheduler'
type GetVideoQueueParam = { host: string, uuid: string }
type GetChannelQueueParam = { host: string, name: string }
@ -74,6 +74,7 @@ export class VideosIndexer extends AbstractScheduler {
this.indexedHosts = indexHosts
await removeVideosFromHosts(removedHosts)
await removeChannelsFromHosts(removedHosts)
await Bluebird.map(indexHosts, async host => {
try {