mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2024-12-12 09:26:33 +01:00
Allow returning page if transcript extraction failed
This commit is contained in:
parent
6cb82fc21e
commit
777b2518bd
@ -22,59 +22,15 @@ const visitHeaders = {
|
||||
'Sec-Fetch-User': '?1',
|
||||
};
|
||||
|
||||
router.post('/serpapi', jsonParser, async (request, response) => {
|
||||
try {
|
||||
const key = readSecret(request.user.directories, SECRET_KEYS.SERPAPI);
|
||||
|
||||
if (!key) {
|
||||
console.log('No SerpApi key found');
|
||||
return response.sendStatus(400);
|
||||
}
|
||||
|
||||
const { query } = request.body;
|
||||
const result = await fetch(`https://serpapi.com/search.json?q=${encodeURIComponent(query)}&api_key=${key}`);
|
||||
|
||||
console.log('SerpApi query', query);
|
||||
|
||||
if (!result.ok) {
|
||||
const text = await result.text();
|
||||
console.log('SerpApi request failed', result.statusText, text);
|
||||
return response.status(500).send(text);
|
||||
}
|
||||
|
||||
const data = await result.json();
|
||||
return response.json(data);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get the transcript of a YouTube video
|
||||
* @copyright https://github.com/Kakulukian/youtube-transcript (MIT License)
|
||||
* Extract the transcript of a YouTube video
|
||||
* @param {string} videoPageBody HTML of the video page
|
||||
* @param {string} lang Language code
|
||||
* @returns {Promise<string>} Transcript text
|
||||
*/
|
||||
router.post('/transcript', jsonParser, async (request, response) => {
|
||||
try {
|
||||
async function extractTranscript(videoPageBody, lang) {
|
||||
const he = require('he');
|
||||
const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g;
|
||||
const id = request.body.id;
|
||||
const lang = request.body.lang;
|
||||
const json = request.body.json;
|
||||
|
||||
if (!id) {
|
||||
console.log('Id is required for /transcript');
|
||||
return response.sendStatus(400);
|
||||
}
|
||||
|
||||
const videoPageResponse = await fetch(`https://www.youtube.com/watch?v=${id}`, {
|
||||
headers: {
|
||||
...(lang && { 'Accept-Language': lang }),
|
||||
'User-Agent': visitHeaders['User-Agent'],
|
||||
},
|
||||
});
|
||||
|
||||
const videoPageBody = await videoPageResponse.text();
|
||||
const splittedHTML = videoPageBody.split('"captions":');
|
||||
|
||||
if (splittedHTML.length <= 1) {
|
||||
@ -129,10 +85,72 @@ router.post('/transcript', jsonParser, async (request, response) => {
|
||||
}));
|
||||
// The text is double-encoded
|
||||
const transcriptText = transcript.map((line) => he.decode(he.decode(line.text))).join(' ');
|
||||
return transcriptText;
|
||||
}
|
||||
|
||||
router.post('/serpapi', jsonParser, async (request, response) => {
|
||||
try {
|
||||
const key = readSecret(request.user.directories, SECRET_KEYS.SERPAPI);
|
||||
|
||||
if (!key) {
|
||||
console.log('No SerpApi key found');
|
||||
return response.sendStatus(400);
|
||||
}
|
||||
|
||||
const { query } = request.body;
|
||||
const result = await fetch(`https://serpapi.com/search.json?q=${encodeURIComponent(query)}&api_key=${key}`);
|
||||
|
||||
console.log('SerpApi query', query);
|
||||
|
||||
if (!result.ok) {
|
||||
const text = await result.text();
|
||||
console.log('SerpApi request failed', result.statusText, text);
|
||||
return response.status(500).send(text);
|
||||
}
|
||||
|
||||
const data = await result.json();
|
||||
return response.json(data);
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return response.sendStatus(500);
|
||||
}
|
||||
});
|
||||
|
||||
/**
|
||||
* Get the transcript of a YouTube video
|
||||
* @copyright https://github.com/Kakulukian/youtube-transcript (MIT License)
|
||||
*/
|
||||
router.post('/transcript', jsonParser, async (request, response) => {
|
||||
try {
|
||||
const id = request.body.id;
|
||||
const lang = request.body.lang;
|
||||
const json = request.body.json;
|
||||
|
||||
if (!id) {
|
||||
console.log('Id is required for /transcript');
|
||||
return response.sendStatus(400);
|
||||
}
|
||||
|
||||
const videoPageResponse = await fetch(`https://www.youtube.com/watch?v=${id}`, {
|
||||
headers: {
|
||||
...(lang && { 'Accept-Language': lang }),
|
||||
'User-Agent': visitHeaders['User-Agent'],
|
||||
},
|
||||
});
|
||||
|
||||
const videoPageBody = await videoPageResponse.text();
|
||||
|
||||
try {
|
||||
const transcriptText = await extractTranscript(videoPageBody, lang);
|
||||
return json
|
||||
? response.json({ transcript: transcriptText, html: videoPageBody })
|
||||
: response.send(transcriptText);
|
||||
} catch (error) {
|
||||
if (json) {
|
||||
return response.json({ html: videoPageBody, transcript: '' });
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
return response.sendStatus(500);
|
||||
|
Loading…
Reference in New Issue
Block a user