diff --git a/public/script.js b/public/script.js index f655d6fe1..f7c88cc6a 100644 --- a/public/script.js +++ b/public/script.js @@ -267,6 +267,7 @@ import { applyBrowserFixes } from './scripts/browser-fixes.js'; import { initServerHistory } from './scripts/server-history.js'; import { initSettingsSearch } from './scripts/setting-search.js'; import { initBulkEdit } from './scripts/bulk-edit.js'; +import { deriveTemplatesFromChatTemplate } from './scripts/chat-cemplates.js'; //exporting functions and vars for mods export { @@ -1235,6 +1236,31 @@ async function getStatusTextgen() { const supportsTokenization = response.headers.get('x-supports-tokenization') === 'true'; supportsTokenization ? sessionStorage.setItem(TOKENIZER_SUPPORTED_KEY, 'true') : sessionStorage.removeItem(TOKENIZER_SUPPORTED_KEY); + const supportsChatTemplate = response.headers.get('x-supports-chat-template') === 'true'; + + if (supportsChatTemplate) { + const response = await fetch('/api/backends/text-completions/chat_template', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + api_server: endpoint, + api_type: textgen_settings.type, + }), + }); + + const data = await response.json(); + if (data) { + const chat_template = data.chat_template; + console.log(`We have chat template ${chat_template.split('\n')[0]}...`); + const templates = await deriveTemplatesFromChatTemplate(chat_template); + if (templates) { + const { context, instruct } = templates; + selectContextPreset(context, { isAuto: true }); + selectInstructPreset(instruct, { isAuto: true }); + } + } + } + // We didn't get a 200 status code, but the endpoint has an explanation. Which means it DID connect, but I digress. if (online_status === 'no_connection' && data.response) { toastr.error(data.response, t`API Error`, { timeOut: 5000, preventDuplicates: true }); diff --git a/public/scripts/chat-cemplates.js b/public/scripts/chat-cemplates.js new file mode 100644 index 000000000..f70f9fc18 --- /dev/null +++ b/public/scripts/chat-cemplates.js @@ -0,0 +1,76 @@ +// https://developer.mozilla.org/en-US/docs/Web/API/SubtleCrypto/digest +async function digestMessage(message) { + const msgUint8 = new TextEncoder().encode(message); // encode as (utf-8) Uint8Array + const hashBuffer = await window.crypto.subtle.digest('SHA-256', msgUint8); // hash the message + const hashArray = Array.from(new Uint8Array(hashBuffer)); // convert buffer to byte array + const hashHex = hashArray + .map((b) => b.toString(16).padStart(2, '0')) + .join(''); // convert bytes to hex string + return hashHex; +} + +// the hash can be obtained from command line e.g. via: MODEL=path_to_model; python -c "import json, hashlib, sys; print(hashlib.sha256(json.load(open('"$MODEL"/tokenizer_config.json'))['chat_template'].strip().encode()).hexdigest())" +// note that chat templates must be trimmed to match the llama.cpp metadata value +const derivations = { + // Meta + '93c0e9aa3629bbd77e68dbc0f5621f6e6b23aa8d74b932595cdb8d64684526d7': { + // Meta-Llama-3.1-8B-Instruct + // Meta-Llama-3.1-70B-Instruct + context: 'Llama 3 Instruct', + instruct: 'Llama 3 Instruct', + }, + 'd82792f95932f1c9cef5c4bd992f171225e3bf8c7b609b4557c9e1ec96be819f': { + // Llama-3.2-1B-Instruct + // Llama-3.2-3B-Instruct + context: 'Llama 3 Instruct', + instruct: 'Llama 3 Instruct', + }, + + // Mistral + // Mistral Reference: https://github.com/mistralai/mistral-common + 'cafb64e0e9e5fd2503054b3479593fae39cbdfd52338ce8af9bb4664a8eb05bd': { + // Mistral-Small-Instruct-2409 + // Mistral-Large-Instruct-2407 + context: 'Mistral V2 & V3', + instruct: 'Mistral V2 & V3', + }, + '3c4ad5fa60dd8c7ccdf82fa4225864c903e107728fcaf859fa6052cb80c92ee9': { + // Mistral-Large-Instruct-2411 + context: 'Mistral V7', // https://huggingface.co/mistralai/Mistral-Large-Instruct-2411 + instruct: 'Mistral V7', + }, + 'e7deee034838db2bfc7487788a3013d8a307ab69f72f3c54a85f06fd76007d4e': { + // Mistral-Nemo-Instruct-2407 + context: 'Mistral V3-Tekken', + instruct: 'Mistral V3-Tekken', + }, + '26a59556925c987317ce5291811ba3b7f32ec4c647c400c6cc7e3a9993007ba7': { + // Mistral-7B-Instruct-v0.3 + context: 'Mistral V2 & V3', + instruct: 'Mistral V2 & V3', + }, + + // Gemma + 'ecd6ae513fe103f0eb62e8ab5bfa8d0fe45c1074fa398b089c93a7e70c15cfd6': { + // gemma-2-9b-it + // gemma-2-27b-it + context: 'Gemma 2', + instruct: 'Gemma 2', + }, + + // Cohere + '3b54f5c219ae1caa5c0bb2cdc7c001863ca6807cf888e4240e8739fa7eb9e02e': { + // command-r-08-2024 + context: 'Command R', + instruct: 'Command R', + }, +}; + +export async function deriveTemplatesFromChatTemplate(chat_template) { + const hash = await digestMessage(chat_template); + if (hash in derivations) { + return derivations[hash]; + } + console.log(`Unknown chat template hash: ${hash}`); + return null; +} diff --git a/src/endpoints/backends/text-completions.js b/src/endpoints/backends/text-completions.js index 9a3dac2ce..1c5bdb75a 100644 --- a/src/endpoints/backends/text-completions.js +++ b/src/endpoints/backends/text-completions.js @@ -218,6 +218,18 @@ router.post('/status', jsonParser, async function (request, response) { } catch (error) { console.error(`Failed to get TabbyAPI model info: ${error}`); } + } else if (apiType == TEXTGEN_TYPES.KOBOLDCPP) { + try { + const chatTemplateUrl = baseUrl + '/api/extra/chat_template'; + const chatTemplateReply = await fetch(chatTemplateUrl); + if (chatTemplateReply.ok) { + response.setHeader('x-supports-chat-template', 'true'); + } else { + console.log(`ct res = ${JSON.stringify(chatTemplateReply)}`); + } + } catch (error) { + console.error(`Failed to fetch chat template info: ${error}`); + } } return response.send({ result, data: data.data }); @@ -227,6 +239,34 @@ router.post('/status', jsonParser, async function (request, response) { } }); +router.post('/chat_template', jsonParser, async function (request, response) { + if (!request.body.api_server) return response.sendStatus(400); + + try { + const baseUrl = trimV1(request.body.api_server); + const args = { + headers: { 'Content-Type': 'application/json' }, + }; + + setAdditionalHeaders(request, args, baseUrl); + + const chatTemplateUrl = baseUrl + '/api/extra/chat_template'; + const chatTemplateReply = await fetch(chatTemplateUrl, args); + + if (!chatTemplateReply.ok) { + console.log('Chat template endpoint is offline.'); + return response.status(400); + } + + /** @type {any} */ + const chatTemplate = await chatTemplateReply.json(); + return response.send(chatTemplate); + } catch (error) { + console.error(error); + return response.status(500); + } +}); + router.post('/generate', jsonParser, async function (request, response) { if (!request.body) return response.sendStatus(400);