mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add YT script loader for data bank
This commit is contained in:
@@ -48,6 +48,92 @@ router.post('/search', jsonParser, async (request, response) => {
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get the transcript of a YouTube video
|
||||
* @copyright https://github.com/Kakulukian/youtube-transcript (MIT License)
|
||||
*/
|
||||
router.post('/transcript', jsonParser, async (request, response) => {
|
||||
try {
|
||||
const he = require('he');
|
||||
const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g;
|
||||
const id = request.body.id;
|
||||
const lang = request.body.lang;
|
||||
|
||||
if (!id) {
|
||||
console.log('Id is required for /transcript');
|
||||
return response.sendStatus(400);
|
||||
}
|
||||
|
||||
const videoPageResponse = await fetch(`https://www.youtube.com/watch?v=${id}`, {
|
||||
headers: {
|
||||
...(lang && { 'Accept-Language': lang }),
|
||||
'User-Agent': visitHeaders['User-Agent'],
|
||||
},
|
||||
});
|
||||
|
||||
const videoPageBody = await videoPageResponse.text();
|
||||
const splittedHTML = videoPageBody.split('"captions":');
|
||||
|
||||
if (splittedHTML.length <= 1) {
|
||||
if (videoPageBody.includes('class="g-recaptcha"')) {
|
||||
throw new Error('Too many requests');
|
||||
}
|
||||
if (!videoPageBody.includes('"playabilityStatus":')) {
|
||||
throw new Error('Video is not available');
|
||||
}
|
||||
throw new Error('Transcript not available');
|
||||
}
|
||||
|
||||
const captions = (() => {
|
||||
try {
|
||||
return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', ''));
|
||||
} catch (e) {
|
||||
return undefined;
|
||||
}
|
||||
})()?.['playerCaptionsTracklistRenderer'];
|
||||
|
||||
if (!captions) {
|
||||
throw new Error('Transcript disabled');
|
||||
}
|
||||
|
||||
if (!('captionTracks' in captions)) {
|
||||
throw new Error('Transcript not available');
|
||||
}
|
||||
|
||||
if (lang && !captions.captionTracks.some(track => track.languageCode === lang)) {
|
||||
throw new Error('Transcript not available in this language');
|
||||
}
|
||||
|
||||
const transcriptURL = (lang ? captions.captionTracks.find(track => track.languageCode === lang) : captions.captionTracks[0]).baseUrl;
|
||||
const transcriptResponse = await fetch(transcriptURL, {
|
||||
headers: {
|
||||
...(lang && { 'Accept-Language': lang }),
|
||||
'User-Agent': visitHeaders['User-Agent'],
|
||||
},
|
||||
});
|
||||
|
||||
if (!transcriptResponse.ok) {
|
||||
throw new Error('Transcript request failed');
|
||||
}
|
||||
|
||||
const transcriptBody = await transcriptResponse.text();
|
||||
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
|
||||
const transcript = results.map((result) => ({
|
||||
text: result[3],
|
||||
duration: parseFloat(result[2]),
|
||||
offset: parseFloat(result[1]),
|
||||
lang: lang ?? captions.captionTracks[0].languageCode,
|
||||
}));
|
||||
// The text is double-encoded
|
||||
const transcriptText = transcript.map((line) => he.decode(he.decode(line.text))).join(' ');
|
||||
|
||||
return response.send(transcriptText);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/visit', jsonParser, async (request, response) => {
|
||||
try {
|
||||
const url = request.body.url;
|
||||
|
Reference in New Issue
Block a user