Try to have faster indexation
This commit is contained in:
parent
c9f901d6a3
commit
fde7007788
|
@ -73,6 +73,8 @@ const INDEXER_LIMIT = 500000
|
|||
const INDEXER_HOST_CONCURRENCY = 3
|
||||
const INDEXER_QUEUE_CONCURRENCY = 3
|
||||
|
||||
const INDEXER_BULK_INDEXATION_MS = 5000
|
||||
|
||||
const REQUESTS = {
|
||||
MAX_RETRIES: 10,
|
||||
WAIT: 10000 // 10 seconds
|
||||
|
@ -103,5 +105,6 @@ export {
|
|||
INDEXER_HOST_CONCURRENCY,
|
||||
INDEXER_COUNT,
|
||||
INDEXER_LIMIT,
|
||||
INDEXER_BULK_INDEXATION_MS,
|
||||
REQUESTS
|
||||
}
|
||||
|
|
|
@ -17,10 +17,12 @@ export class ChannelIndexer extends AbstractIndexer <IndexableChannel, DBChannel
|
|||
}
|
||||
|
||||
async indexSpecificElement (host: string, name: string) {
|
||||
await this.waitForBulkIndexation()
|
||||
|
||||
const channel = await getChannel(host, name)
|
||||
|
||||
logger.info('Indexing specific channel %s@%s.', name, host)
|
||||
|
||||
return this.indexElements([ channel ])
|
||||
this.addElementsToBulkIndex([ channel ])
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
import { QueueObject, queue } from 'async'
|
||||
import { inspect } from 'util'
|
||||
import { logger } from '../../../helpers/logger'
|
||||
import { INDEXER_QUEUE_CONCURRENCY } from '../../../initializers/constants'
|
||||
import { INDEXER_BULK_INDEXATION_MS, INDEXER_QUEUE_CONCURRENCY } from '../../../initializers/constants'
|
||||
import { IndexableDoc } from '../../../types/indexable-doc.model'
|
||||
import { client } from '../../../helpers/meilisearch'
|
||||
import { buildInValuesArray } from '../../meilisearch/meilisearch-queries'
|
||||
import { EnqueuedTask } from 'meilisearch'
|
||||
|
||||
// identifier could be an uuid, an handle or a url for example
|
||||
export type QueueParam = { host: string, identifier: string }
|
||||
|
@ -19,7 +20,11 @@ export abstract class AbstractIndexer <T extends IndexableDoc, DB> {
|
|||
|
||||
protected readonly rankingRules: string[]
|
||||
|
||||
abstract indexSpecificElement (host: string, uuid: string): Promise<{ taskUid: number }>
|
||||
private elementsToBulkIndex: T[] = []
|
||||
private bulkIndexationTimer: any
|
||||
private bulkProcessEnqueuedTask: EnqueuedTask
|
||||
|
||||
abstract indexSpecificElement (host: string, uuid: string): Promise<void>
|
||||
|
||||
constructor (
|
||||
protected readonly indexName: string,
|
||||
|
@ -27,8 +32,7 @@ export abstract class AbstractIndexer <T extends IndexableDoc, DB> {
|
|||
) {
|
||||
this.indexQueue = queue<QueueParam, Error>(async (task, cb) => {
|
||||
try {
|
||||
const { taskUid } = await this.indexSpecificElement(task.host, task.identifier)
|
||||
await client.index(this.indexName).waitForTask(taskUid, { timeOutMs: 1000 * 60 * 5 }) // 5 minutes timeout
|
||||
await this.indexSpecificElement(task.host, task.identifier)
|
||||
|
||||
return cb()
|
||||
} catch (err) {
|
||||
|
@ -62,10 +66,7 @@ export abstract class AbstractIndexer <T extends IndexableDoc, DB> {
|
|||
}
|
||||
}
|
||||
|
||||
scheduleIndexation (host: string, identifier: string) {
|
||||
this.indexQueue.push({ identifier, host })
|
||||
.catch(err => logger.error({ err: inspect(err) }, 'Cannot schedule indexation of %s for %s', identifier, host))
|
||||
}
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
removeNotExisting (host: string, existingPrimaryKeys: Set<string>) {
|
||||
return client.index(this.indexName).deleteDocuments({
|
||||
|
@ -79,7 +80,43 @@ export abstract class AbstractIndexer <T extends IndexableDoc, DB> {
|
|||
})
|
||||
}
|
||||
|
||||
async indexElements (elements: T[]) {
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
scheduleParallelIndexation (host: string, identifier: string) {
|
||||
this.indexQueue.push({ identifier, host })
|
||||
.catch(err => logger.error({ err: inspect(err) }, 'Cannot schedule indexation of %s for %s', identifier, host))
|
||||
}
|
||||
|
||||
async waitForBulkIndexation () {
|
||||
if (!this.bulkProcessEnqueuedTask) return
|
||||
|
||||
await this.waitForTask(this.bulkProcessEnqueuedTask.taskUid, 1000)
|
||||
|
||||
this.bulkProcessEnqueuedTask = undefined
|
||||
}
|
||||
|
||||
addElementsToBulkIndex (elements: T[]) {
|
||||
this.elementsToBulkIndex = this.elementsToBulkIndex.concat(elements)
|
||||
|
||||
this.scheduleBulkIndexationProcess()
|
||||
}
|
||||
|
||||
private scheduleBulkIndexationProcess () {
|
||||
if (this.bulkIndexationTimer) return
|
||||
|
||||
this.bulkIndexationTimer = setTimeout(async () => {
|
||||
try {
|
||||
const elements = this.elementsToBulkIndex
|
||||
this.elementsToBulkIndex = []
|
||||
|
||||
this.bulkProcessEnqueuedTask = await this.indexElements(elements)
|
||||
} catch (err) {
|
||||
logger.error({ err }, 'Cannot schedule bulk indexation')
|
||||
}
|
||||
}, INDEXER_BULK_INDEXATION_MS)
|
||||
}
|
||||
|
||||
private async indexElements (elements: T[]) {
|
||||
const documents = elements.map(e => this.formatterFn(e))
|
||||
|
||||
const result = await client.index(this.indexName).updateDocuments(documents, { primaryKey: this.primaryKey.toString() })
|
||||
|
@ -87,4 +124,10 @@ export abstract class AbstractIndexer <T extends IndexableDoc, DB> {
|
|||
|
||||
return result
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
private waitForTask (taskId: number, intervalMs?: number) {
|
||||
return client.index(this.indexName).waitForTask(taskId, { timeOutMs: 1000 * 60 * 5, intervalMs })
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,10 +52,12 @@ export class VideoIndexer extends AbstractIndexer <IndexableVideo, DBVideo> {
|
|||
}
|
||||
|
||||
async indexSpecificElement (host: string, uuid: string) {
|
||||
await this.waitForBulkIndexation()
|
||||
|
||||
const video = await getVideo(host, uuid)
|
||||
|
||||
logger.info('Indexing specific video %s of %s.', uuid, host)
|
||||
|
||||
return this.indexElements([ video ])
|
||||
this.addElementsToBulkIndex([ video ])
|
||||
}
|
||||
}
|
||||
|
|
|
@ -88,6 +88,8 @@ export class IndexationScheduler extends AbstractScheduler {
|
|||
logger.info('Adding video data from %s.', host)
|
||||
|
||||
do {
|
||||
await this.videoIndexer.waitForBulkIndexation()
|
||||
|
||||
logger.debug('Getting video results from %s (from = %d).', host, start)
|
||||
|
||||
videos = await getVideos(host, start)
|
||||
|
@ -98,16 +100,15 @@ export class IndexationScheduler extends AbstractScheduler {
|
|||
if (videos.length !== 0) {
|
||||
const videosFromDB = await getVideosUpdatedAt(videos.map(v => v.uuid))
|
||||
|
||||
await this.videoIndexer.indexElements(videos)
|
||||
|
||||
logger.debug('Indexed %d videos from %s.', videos.length, host)
|
||||
logger.debug('Indexing %d videos from %s.', videos.length, host)
|
||||
this.videoIndexer.addElementsToBulkIndex(videos)
|
||||
|
||||
// Fetch complete video foreach created video (to get tags) if needed
|
||||
for (const video of videos) {
|
||||
const videoDB = videosFromDB.find(v => v.uuid === video.uuid)
|
||||
|
||||
if (!videoDB || videoDB.updatedAt !== new Date(video.updatedAt).getTime()) {
|
||||
this.videoIndexer.scheduleIndexation(host, video.uuid)
|
||||
this.videoIndexer.scheduleParallelIndexation(host, video.uuid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -123,7 +124,7 @@ export class IndexationScheduler extends AbstractScheduler {
|
|||
logger.info('Added video data from %s.', host)
|
||||
|
||||
for (const c of channelsToSync) {
|
||||
this.channelIndexer.scheduleIndexation(host, c)
|
||||
this.channelIndexer.scheduleParallelIndexation(host, c)
|
||||
}
|
||||
|
||||
logger.info('Removing non-existing channels and videos from ' + host)
|
||||
|
@ -143,6 +144,8 @@ export class IndexationScheduler extends AbstractScheduler {
|
|||
let start = 0
|
||||
|
||||
do {
|
||||
await this.playlistIndexer.waitForBulkIndexation()
|
||||
|
||||
logger.debug('Getting playlist results from %s (from = %d, channelHandle = %s).', host, start, channelHandle)
|
||||
|
||||
playlists = await getPlaylistsOf(host, channelHandle, start)
|
||||
|
@ -151,7 +154,7 @@ export class IndexationScheduler extends AbstractScheduler {
|
|||
start += playlists.length
|
||||
|
||||
if (playlists.length !== 0) {
|
||||
await this.playlistIndexer.indexElements(playlists)
|
||||
this.playlistIndexer.addElementsToBulkIndex(playlists)
|
||||
|
||||
logger.debug('Indexed %d playlists from %s.', playlists.length, host)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue