diff --git a/public/script.js b/public/script.js index d141d4f60..be298e0a3 100644 --- a/public/script.js +++ b/public/script.js @@ -1240,7 +1240,7 @@ async function getStatusTextgen() { const wantsContextDerivation = power_user.context_derived; const supportsChatTemplate = response.headers.get('x-supports-chat-template') === 'true'; if (supportsChatTemplate && (wantsInstructDerivation || wantsContextDerivation)) { - const response = await fetch('/api/backends/text-completions/chat_template', { + const response = await fetch('/api/backends/text-completions/props', { method: 'POST', headers: getRequestHeaders(), body: JSON.stringify({ diff --git a/public/scripts/chat-cemplates.js b/public/scripts/chat-cemplates.js index 00b2f9771..bcc136803 100644 --- a/public/scripts/chat-cemplates.js +++ b/public/scripts/chat-cemplates.js @@ -1,63 +1,66 @@ -// the hash can be obtained from command line e.g. via: MODEL=path_to_model; python -c "import json, hashlib, sys; print(hashlib.sha256(json.load(open('"$MODEL"/tokenizer_config.json'))['chat_template'].strip().encode()).hexdigest())" +// the hash can be obtained from command line e.g. via: MODEL=path_to_model; python -c "import json, hashlib, sys; print(hashlib.sha256(json.load(open('"$MODEL"/tokenizer_config.json'))['chat_template'].encode()).hexdigest())" // note that chat templates must be trimmed to match the llama.cpp metadata value const derivations = { // Meta - '93c0e9aa3629bbd77e68dbc0f5621f6e6b23aa8d74b932595cdb8d64684526d7': { + 'e10ca381b1ccc5cf9db52e371f3b6651576caee0a630b452e2816b2d404d4b65': // Meta-Llama-3.1-8B-Instruct // Meta-Llama-3.1-70B-Instruct - context: 'Llama 3 Instruct', - instruct: 'Llama 3 Instruct', - }, - 'd82792f95932f1c9cef5c4bd992f171225e3bf8c7b609b4557c9e1ec96be819f': { + 'Llama 3 Instruct' + , + '5816fce10444e03c2e9ee1ef8a4a1ea61ae7e69e438613f3b17b69d0426223a4': // Llama-3.2-1B-Instruct // Llama-3.2-3B-Instruct - context: 'Llama 3 Instruct', - instruct: 'Llama 3 Instruct', - }, + 'Llama 3 Instruct' + , + '73e87b1667d87ab7d7b579107f01151b29ce7f3ccdd1018fdc397e78be76219d': + // Nemotron 70B + 'Llama 3 Instruct' + , // Mistral // Mistral Reference: https://github.com/mistralai/mistral-common - 'cafb64e0e9e5fd2503054b3479593fae39cbdfd52338ce8af9bb4664a8eb05bd': { + 'e16746b40344d6c5b5265988e0328a0bf7277be86f1c335156eae07e29c82826': // Mistral-Small-Instruct-2409 // Mistral-Large-Instruct-2407 - context: 'Mistral V2 & V3', - instruct: 'Mistral V2 & V3', - }, - '3c4ad5fa60dd8c7ccdf82fa4225864c903e107728fcaf859fa6052cb80c92ee9': { + 'Mistral V2 & V3' + , + '3c4ad5fa60dd8c7ccdf82fa4225864c903e107728fcaf859fa6052cb80c92ee9': // Mistral-Large-Instruct-2411 - context: 'Mistral V7', // https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 - instruct: 'Mistral V7', - }, - 'e7deee034838db2bfc7487788a3013d8a307ab69f72f3c54a85f06fd76007d4e': { + 'Mistral V7' // https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 + , + 'e4676cb56dffea7782fd3e2b577cfaf1e123537e6ef49b3ec7caa6c095c62272': // Mistral-Nemo-Instruct-2407 - context: 'Mistral V3-Tekken', - instruct: 'Mistral V3-Tekken', - }, - '26a59556925c987317ce5291811ba3b7f32ec4c647c400c6cc7e3a9993007ba7': { + 'Mistral V3-Tekken' + , + '26a59556925c987317ce5291811ba3b7f32ec4c647c400c6cc7e3a9993007ba7': // Mistral-7B-Instruct-v0.3 - context: 'Mistral V2 & V3', - instruct: 'Mistral V2 & V3', - }, + 'Mistral V2 & V3' + , // Gemma - 'ecd6ae513fe103f0eb62e8ab5bfa8d0fe45c1074fa398b089c93a7e70c15cfd6': { + 'ecd6ae513fe103f0eb62e8ab5bfa8d0fe45c1074fa398b089c93a7e70c15cfd6': // gemma-2-9b-it // gemma-2-27b-it - context: 'Gemma 2', - instruct: 'Gemma 2', - }, + 'Gemma 2' + , // Cohere - '3b54f5c219ae1caa5c0bb2cdc7c001863ca6807cf888e4240e8739fa7eb9e02e': { + '3b54f5c219ae1caa5c0bb2cdc7c001863ca6807cf888e4240e8739fa7eb9e02e': // command-r-08-2024 - context: 'Command R', - instruct: 'Command R', - }, + 'Command R' + , }; export async function deriveTemplatesFromChatTemplate(chat_template, hash) { if (hash in derivations) { - return derivations[hash]; + const derivation = derivations[hash]; + if (typeof derivation === 'string') { + return { + 'context': derivation, + 'instruct': derivation, + } + } + return derivation; } console.log(`Unknown chat template hash: ${hash} for [${chat_template}]`); return null; diff --git a/src/endpoints/backends/text-completions.js b/src/endpoints/backends/text-completions.js index d8596e913..add810762 100644 --- a/src/endpoints/backends/text-completions.js +++ b/src/endpoints/backends/text-completions.js @@ -231,7 +231,7 @@ router.post('/status', jsonParser, async function (request, response) { } }); -router.post('/chat_template', jsonParser, async function (request, response) { +router.post('/props', jsonParser, async function (request, response) { if (!request.body.api_server) return response.sendStatus(400); try { @@ -253,13 +253,12 @@ router.post('/chat_template', jsonParser, async function (request, response) { /** @type {any} */ const props = await propsReply.json(); - // TEMPORARY: llama.cpp's /props endpoint includes a \u0000 at the end of the chat template, resulting in mismatching hashes + // TEMPORARY: llama.cpp's /props endpoint has a bug which replaces the last newline with a \0 if (apiType === TEXTGEN_TYPES.LLAMACPP && props['chat_template'].endsWith('\u0000')) { - props['chat_template'] = props['chat_template'].slice(0, -1); + props['chat_template'] = props['chat_template'].slice(0, -1) + '\n'; } - props['chat_template'] = props['chat_template'].trim(); props['chat_template_hash'] = createHash('sha256').update(props['chat_template']).digest('hex'); - console.log(`We have chat template stuff: ${JSON.stringify(props)}`); + console.log(`We have props: ${JSON.stringify(props)}`); return response.send(props); } catch (error) { console.error(error);