Closes #3233
This commit is contained in:
Cohee
2024-12-29 20:38:13 +02:00
parent 662f0e9c73
commit 4c7d160d41
10 changed files with 165 additions and 5 deletions

View File

@ -682,7 +682,7 @@
</span>
</div>
</div>
<div class="range-block" data-source="openai,claude,windowai,openrouter,ai21,scale,makersuite,mistralai,custom,cohere,perplexity,groq,01ai,nanogpt">
<div class="range-block" data-source="openai,claude,windowai,openrouter,ai21,scale,makersuite,mistralai,custom,cohere,perplexity,groq,01ai,nanogpt,deepseek">
<div class="range-block-title" data-i18n="Temperature">
Temperature
</div>
@ -695,7 +695,7 @@
</div>
</div>
</div>
<div class="range-block" data-source="openai,openrouter,custom,cohere,perplexity,groq,mistralai,nanogpt">
<div class="range-block" data-source="openai,openrouter,custom,cohere,perplexity,groq,mistralai,nanogpt,deepseek">
<div class="range-block-title" data-i18n="Frequency Penalty">
Frequency Penalty
</div>
@ -708,7 +708,7 @@
</div>
</div>
</div>
<div class="range-block" data-source="openai,openrouter,custom,cohere,perplexity,groq,mistralai,nanogpt">
<div class="range-block" data-source="openai,openrouter,custom,cohere,perplexity,groq,mistralai,nanogpt,deepseek">
<div class="range-block-title" data-i18n="Presence Penalty">
Presence Penalty
</div>
@ -734,7 +734,7 @@
</div>
</div>
</div>
<div class="range-block" data-source="openai,claude,openrouter,ai21,scale,makersuite,mistralai,custom,cohere,perplexity,groq,01ai,nanogpt">
<div class="range-block" data-source="openai,claude,openrouter,ai21,scale,makersuite,mistralai,custom,cohere,perplexity,groq,01ai,nanogpt,deepseek">
<div class="range-block-title" data-i18n="Top P">
Top P
</div>
@ -2679,6 +2679,7 @@
<option value="blockentropy">Block Entropy</option>
<option value="claude">Claude</option>
<option value="cohere">Cohere</option>
<option value="deepseek">DeepSeek</option>
<option value="groq">Groq</option>
<option value="makersuite">Google AI Studio</option>
<option value="mistralai">MistralAI</option>
@ -3192,6 +3193,23 @@
</select>
</div>
</div>
<div id="deepseek_form" data-source="deepseek">
<h4 data-i18n="DeepSeek API Key">DeepSeek API Key</h4>
<div class="flex-container">
<input id="api_key_deepseek" name="api_key_deepseek" class="text_pole flex1" value="" type="text" autocomplete="off">
<div title="Clear your API key" data-i18n="[title]Clear your API key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_nanogpt"></div>
</div>
<div data-for="api_key_deepseek" class="neutral_warning" data-i18n="For privacy reasons, your API key will be hidden after you reload the page.">
For privacy reasons, your API key will be hidden after you reload the page.
</div>
<div>
<h4 data-i18n="DeepSeek Model">DeepSeek Model</h4>
<select id="model_deepseek_select">
<option value="deepseek-chat">deepseek-chat</option>
<option value="deepseek-coder">deepseek-coder</option>
</select>
</div>
</div>
<div id="perplexity_form" data-source="perplexity">
<h4 data-i18n="Perplexity API Key">Perplexity API Key</h4>
<div class="flex-container">
@ -3762,6 +3780,7 @@
<option value="17">Mistral Nemo</option>
<option value="8">Yi</option>
<option value="11">Claude 1/2</option>
<option value="18">DeepSeek V3</option>
<option value="6">API (WebUI / koboldcpp)</option>
</select>
</div>

View File

@ -390,6 +390,7 @@ function RA_autoconnect(PrevApi) {
|| (secret_state[SECRET_KEYS.ZEROONEAI] && oai_settings.chat_completion_source == chat_completion_sources.ZEROONEAI)
|| (secret_state[SECRET_KEYS.BLOCKENTROPY] && oai_settings.chat_completion_source == chat_completion_sources.BLOCKENTROPY)
|| (secret_state[SECRET_KEYS.NANOGPT] && oai_settings.chat_completion_source == chat_completion_sources.NANOGPT)
|| (secret_state[SECRET_KEYS.DEEPSEEK] && oai_settings.chat_completion_source == chat_completion_sources.DEEPSEEK)
|| (isValidUrl(oai_settings.custom_url) && oai_settings.chat_completion_source == chat_completion_sources.CUSTOM)
) {
$('#api_button_openai').trigger('click');

View File

@ -183,6 +183,7 @@ export const chat_completion_sources = {
ZEROONEAI: '01ai',
BLOCKENTROPY: 'blockentropy',
NANOGPT: 'nanogpt',
DEEPSEEK: 'deepseek',
};
const character_names_behavior = {
@ -261,6 +262,7 @@ const default_settings = {
nanogpt_model: 'gpt-4o-mini',
zerooneai_model: 'yi-large',
blockentropy_model: 'be-70b-base-llama3.1',
deepseek_model: 'deepseek-chat',
custom_model: '',
custom_url: '',
custom_include_body: '',
@ -339,6 +341,7 @@ const oai_settings = {
nanogpt_model: 'gpt-4o-mini',
zerooneai_model: 'yi-large',
blockentropy_model: 'be-70b-base-llama3.1',
deepseek_model: 'deepseek-chat',
custom_model: '',
custom_url: '',
custom_include_body: '',
@ -1523,6 +1526,8 @@ function getChatCompletionModel() {
return oai_settings.blockentropy_model;
case chat_completion_sources.NANOGPT:
return oai_settings.nanogpt_model;
case chat_completion_sources.DEEPSEEK:
return oai_settings.deepseek_model;
default:
throw new Error(`Unknown chat completion source: ${oai_settings.chat_completion_source}`);
}
@ -1698,6 +1703,24 @@ function saveModelList(data) {
$('#model_nanogpt_select').val(oai_settings.nanogpt_model).trigger('change');
}
if (oai_settings.chat_completion_source == chat_completion_sources.DEEPSEEK) {
$('#model_deepseek_select').empty();
model_list.forEach((model) => {
$('#model_deepseek_select').append(
$('<option>', {
value: model.id,
text: model.id,
}));
});
const selectedModel = model_list.find(model => model.id === oai_settings.deepseek_model);
if (model_list.length > 0 && (!selectedModel || !oai_settings.deepseek_model)) {
oai_settings.deepseek_model = model_list[0].id;
}
$('#model_deepseek_select').val(oai_settings.deepseek_model).trigger('change');
}
}
function appendOpenRouterOptions(model_list, groupModels = false, sort = false) {
@ -1838,6 +1861,7 @@ async function sendOpenAIRequest(type, messages, signal) {
const isGroq = oai_settings.chat_completion_source == chat_completion_sources.GROQ;
const is01AI = oai_settings.chat_completion_source == chat_completion_sources.ZEROONEAI;
const isNano = oai_settings.chat_completion_source == chat_completion_sources.NANOGPT;
const isDeepSeek = oai_settings.chat_completion_source == chat_completion_sources.DEEPSEEK;
const isTextCompletion = isOAI && textCompletionModels.includes(oai_settings.openai_model);
const isQuiet = type === 'quiet';
const isImpersonate = type === 'impersonate';
@ -1902,7 +1926,7 @@ async function sendOpenAIRequest(type, messages, signal) {
}
// Add logprobs request (currently OpenAI only, max 5 on their side)
if (useLogprobs && (isOAI || isCustom)) {
if (useLogprobs && (isOAI || isCustom || isDeepSeek)) {
generate_data['logprobs'] = 5;
}
@ -2000,6 +2024,11 @@ async function sendOpenAIRequest(type, messages, signal) {
delete generate_data.stop;
}
// https://api-docs.deepseek.com/api/create-chat-completion
if (isDeepSeek) {
generate_data.top_p = generate_data.top_p || Number.EPSILON;
}
if ((isOAI || isOpenRouter || isMistral || isCustom || isCohere || isNano) && oai_settings.seed >= 0) {
generate_data['seed'] = oai_settings.seed;
}
@ -2122,6 +2151,7 @@ function parseChatCompletionLogprobs(data) {
switch (oai_settings.chat_completion_source) {
case chat_completion_sources.OPENAI:
case chat_completion_sources.DEEPSEEK:
case chat_completion_sources.CUSTOM:
if (!data.choices?.length) {
return null;
@ -3041,6 +3071,7 @@ function loadOpenAISettings(data, settings) {
oai_settings.perplexity_model = settings.perplexity_model ?? default_settings.perplexity_model;
oai_settings.groq_model = settings.groq_model ?? default_settings.groq_model;
oai_settings.nanogpt_model = settings.nanogpt_model ?? default_settings.nanogpt_model;
oai_settings.deepseek_model = settings.deepseek_model ?? default_settings.deepseek_model;
oai_settings.blockentropy_model = settings.blockentropy_model ?? default_settings.blockentropy_model;
oai_settings.zerooneai_model = settings.zerooneai_model ?? default_settings.zerooneai_model;
oai_settings.custom_model = settings.custom_model ?? default_settings.custom_model;
@ -3122,6 +3153,8 @@ function loadOpenAISettings(data, settings) {
$(`#model_groq_select option[value="${oai_settings.groq_model}"`).attr('selected', true);
$('#model_nanogpt_select').val(oai_settings.nanogpt_model);
$(`#model_nanogpt_select option[value="${oai_settings.nanogpt_model}"`).attr('selected', true);
$('#model_deepseek_select').val(oai_settings.deepseek_model);
$(`#model_deepseek_select option[value="${oai_settings.deepseek_model}"`).prop('selected', true);
$('#model_01ai_select').val(oai_settings.zerooneai_model);
$('#model_blockentropy_select').val(oai_settings.blockentropy_model);
$('#custom_model_id').val(oai_settings.custom_model);
@ -3861,6 +3894,7 @@ function onSettingsPresetChange() {
perplexity_model: ['#model_perplexity_select', 'perplexity_model', false],
groq_model: ['#model_groq_select', 'groq_model', false],
nanogpt_model: ['#model_nanogpt_select', 'nanogpt_model', false],
deepseek_model: ['#model_deepseek_select', 'deepseek_model', false],
zerooneai_model: ['#model_01ai_select', 'zerooneai_model', false],
blockentropy_model: ['#model_blockentropy_select', 'blockentropy_model', false],
custom_model: ['#custom_model_id', 'custom_model', false],
@ -4120,6 +4154,16 @@ async function onModelChange() {
oai_settings.nanogpt_model = value;
}
if ($(this).is('#model_deepseek_select')) {
if (!value) {
console.debug('Null DeepSeek model selected. Ignoring.');
return;
}
console.log('DeepSeek model changed to', value);
oai_settings.deepseek_model = value;
}
if (value && $(this).is('#model_01ai_select')) {
console.log('01.AI model changed to', value);
oai_settings.zerooneai_model = value;
@ -4436,6 +4480,22 @@ async function onModelChange() {
$('#temp_openai').attr('max', oai_max_temp).val(oai_settings.temp_openai).trigger('input');
}
if (oai_settings.chat_completion_source === chat_completion_sources.DEEPSEEK) {
if (oai_settings.max_context_unlocked) {
$('#openai_max_context').attr('max', unlocked_max);
} else if (oai_settings.deepseek_model == 'deepseek-chat') {
$('#openai_max_context').attr('max', max_64k);
} else if (oai_settings.deepseek_model == 'deepseek-coder') {
$('#openai_max_context').attr('max', max_16k);
} else {
$('#openai_max_context').attr('max', max_64k);
}
oai_settings.openai_max_context = Math.min(Number($('#openai_max_context').attr('max')), oai_settings.openai_max_context);
$('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');
$('#temp_openai').attr('max', oai_max_temp).val(oai_settings.temp_openai).trigger('input');
}
if (oai_settings.chat_completion_source === chat_completion_sources.COHERE) {
oai_settings.pres_pen_openai = Math.min(Math.max(0, oai_settings.pres_pen_openai), 1);
$('#pres_pen_openai').attr('max', 1).attr('min', 0).val(oai_settings.pres_pen_openai).trigger('input');
@ -4653,6 +4713,19 @@ async function onConnectButtonClick(e) {
}
}
if (oai_settings.chat_completion_source == chat_completion_sources.DEEPSEEK) {
const api_key_deepseek = String($('#api_key_deepseek').val()).trim();
if (api_key_deepseek.length) {
await writeSecret(SECRET_KEYS.DEEPSEEK, api_key_deepseek);
}
if (!secret_state[SECRET_KEYS.DEEPSEEK]) {
console.log('No secret key saved for DeepSeek');
return;
}
}
if (oai_settings.chat_completion_source == chat_completion_sources.ZEROONEAI) {
const api_key_01ai = String($('#api_key_01ai').val()).trim();
@ -4734,6 +4807,9 @@ function toggleChatCompletionForms() {
else if (oai_settings.chat_completion_source == chat_completion_sources.BLOCKENTROPY) {
$('#model_blockentropy_select').trigger('change');
}
else if (oai_settings.chat_completion_source == chat_completion_sources.DEEPSEEK) {
$('#model_deepseek_select').trigger('change');
}
$('[data-source]').each(function () {
const validSources = $(this).data('source').split(',');
$(this).toggle(validSources.includes(oai_settings.chat_completion_source));
@ -5447,6 +5523,7 @@ export function initOpenAI() {
$('#model_perplexity_select').on('change', onModelChange);
$('#model_groq_select').on('change', onModelChange);
$('#model_nanogpt_select').on('change', onModelChange);
$('#model_deepseek_select').on('change', onModelChange);
$('#model_01ai_select').on('change', onModelChange);
$('#model_blockentropy_select').on('change', onModelChange);
$('#model_custom_select').on('change', onModelChange);

View File

@ -39,6 +39,7 @@ export const SECRET_KEYS = {
TAVILY: 'api_key_tavily',
BFL: 'api_key_bfl',
GENERIC: 'api_key_generic',
DEEPSEEK: 'api_key_deepseek',
};
const INPUT_MAP = {
@ -73,6 +74,7 @@ const INPUT_MAP = {
[SECRET_KEYS.BLOCKENTROPY]: '#api_key_blockentropy',
[SECRET_KEYS.NANOGPT]: '#api_key_nanogpt',
[SECRET_KEYS.GENERIC]: '#api_key_generic',
[SECRET_KEYS.DEEPSEEK]: '#api_key_deepseek',
};
async function clearSecret() {

View File

@ -3815,6 +3815,7 @@ function getModelOptions(quiet) {
{ id: 'model_nanogpt_select', api: 'openai', type: chat_completion_sources.NANOGPT },
{ id: 'model_01ai_select', api: 'openai', type: chat_completion_sources.ZEROONEAI },
{ id: 'model_blockentropy_select', api: 'openai', type: chat_completion_sources.BLOCKENTROPY },
{ id: 'model_deepseek_select', api: 'openai', type: chat_completion_sources.DEEPSEEK },
{ id: 'model_novel_select', api: 'novel', type: null },
{ id: 'horde_model', api: 'koboldhorde', type: null },
];

View File

@ -31,6 +31,7 @@ export const tokenizers = {
QWEN2: 15,
COMMAND_R: 16,
NEMO: 17,
DEEPSEEK: 18,
BEST_MATCH: 99,
MANUAL_SELECTION: 411,
};
@ -46,6 +47,7 @@ export const ENCODE_TOKENIZERS = [
tokenizers.QWEN2,
tokenizers.COMMAND_R,
tokenizers.NEMO,
tokenizers.DEEPSEEK,
// uncomment when NovelAI releases Kayra and Clio weights, lol
//tokenizers.NERD,
//tokenizers.NERD2,
@ -133,6 +135,11 @@ const TOKENIZER_URLS = {
decode: '/api/tokenizers/nemo/decode',
count: '/api/tokenizers/nemo/encode',
},
[tokenizers.DEEPSEEK]: {
encode: '/api/tokenizers/deepseek/encode',
decode: '/api/tokenizers/deepseek/decode',
count: '/api/tokenizers/deepseek/encode',
},
[tokenizers.API_TEXTGENERATIONWEBUI]: {
encode: '/api/tokenizers/remote/textgenerationwebui/encode',
count: '/api/tokenizers/remote/textgenerationwebui/encode',
@ -551,12 +558,17 @@ export function getTokenizerModel() {
const qwen2Tokenizer = 'qwen2';
const commandRTokenizer = 'command-r';
const nemoTokenizer = 'nemo';
const deepseekTokenizer = 'deepseek';
// Assuming no one would use it for different models.. right?
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
return gpt4Tokenizer;
}
if (oai_settings.chat_completion_source == chat_completion_sources.DEEPSEEK) {
return deepseekTokenizer;
}
// Select correct tokenizer for WindowAI proxies
if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI && oai_settings.windowai_model) {
if (oai_settings.windowai_model.includes('gpt-4')) {
@ -619,6 +631,9 @@ export function getTokenizerModel() {
else if (oai_settings.openrouter_model.includes('jamba')) {
return jambaTokenizer;
}
else if (oai_settings.openrouter_model.includes('deepseek')) {
return deepseekTokenizer;
}
}
if (oai_settings.chat_completion_source == chat_completion_sources.COHERE) {

View File

@ -175,6 +175,7 @@ export const CHAT_COMPLETION_SOURCES = {
ZEROONEAI: '01ai',
BLOCKENTROPY: 'blockentropy',
NANOGPT: 'nanogpt',
DEEPSEEK: 'deepseek',
};
/**

View File

@ -51,6 +51,7 @@ const API_01AI = 'https://api.01.ai/v1';
const API_BLOCKENTROPY = 'https://api.blockentropy.ai/v1';
const API_AI21 = 'https://api.ai21.com/studio/v1';
const API_NANOGPT = 'https://nano-gpt.com/api/v1';
const API_DEEPSEEK = 'https://api.deepseek.com/beta';
/**
* Applies a post-processing step to the generated messages.
@ -66,6 +67,8 @@ function postProcessPrompt(messages, type, names) {
return mergeMessages(messages, names, false);
case 'strict':
return mergeMessages(messages, names, true);
case 'deepseek':
return (x => x[x.length - 1].role !== 'assistant' || (x[x.length - 1].prefix = true) ? x : x)([...mergeMessages(messages, names, true)]);
default:
return messages;
}
@ -668,6 +671,10 @@ router.post('/status', jsonParser, async function (request, response_getstatus_o
api_url = API_NANOGPT;
api_key_openai = readSecret(request.user.directories, SECRET_KEYS.NANOGPT);
headers = {};
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.DEEPSEEK) {
api_url = API_DEEPSEEK;
api_key_openai = readSecret(request.user.directories, SECRET_KEYS.DEEPSEEK);
headers = {};
} else {
console.log('This chat completion source is not supported yet.');
return response_getstatus_openai.status(400).send({ error: true });
@ -939,6 +946,18 @@ router.post('/generate', jsonParser, function (request, response) {
apiKey = readSecret(request.user.directories, SECRET_KEYS.BLOCKENTROPY);
headers = {};
bodyParams = {};
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.DEEPSEEK) {
apiUrl = API_DEEPSEEK;
apiKey = readSecret(request.user.directories, SECRET_KEYS.DEEPSEEK);
headers = {};
bodyParams = {};
if (request.body.logprobs > 0) {
bodyParams['top_logprobs'] = request.body.logprobs;
bodyParams['logprobs'] = true;
}
request.body.messages = postProcessPrompt(request.body.messages, 'deepseek', getPromptNames(request));
} else {
console.log('This chat completion source is not supported yet.');
return response.status(400).send({ error: true });

View File

@ -51,6 +51,7 @@ export const SECRET_KEYS = {
NANOGPT: 'api_key_nanogpt',
BFL: 'api_key_bfl',
GENERIC: 'api_key_generic',
DEEPSEEK: 'api_key_deepseek',
};
// These are the keys that are safe to expose, even if allowKeysExposure is false

View File

@ -226,6 +226,7 @@ const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json');
const commandTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/command-r.json', 'src/tokenizers/llama3.json');
const qwen2Tokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/qwen2.json', 'src/tokenizers/llama3.json');
const nemoTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/nemo.json', 'src/tokenizers/llama3.json');
const deepseekTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/deepseek.json', 'src/tokenizers/llama3.json');
export const sentencepieceTokenizers = [
'llama',
@ -407,6 +408,10 @@ export function getTokenizerModel(requestModel) {
return 'yi';
}
if (requestModel.includes('deepseek')) {
return 'deepseek';
}
if (requestModel.includes('gemma') || requestModel.includes('gemini')) {
return 'gemma';
}
@ -655,6 +660,7 @@ router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llam
router.post('/qwen2/encode', jsonParser, createWebTokenizerEncodingHandler(qwen2Tokenizer));
router.post('/command-r/encode', jsonParser, createWebTokenizerEncodingHandler(commandTokenizer));
router.post('/nemo/encode', jsonParser, createWebTokenizerEncodingHandler(nemoTokenizer));
router.post('/deepseek/encode', jsonParser, createWebTokenizerEncodingHandler(deepseekTokenizer));
router.post('/llama/decode', jsonParser, createSentencepieceDecodingHandler(spp_llama));
router.post('/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
@ -668,6 +674,7 @@ router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llam
router.post('/qwen2/decode', jsonParser, createWebTokenizerDecodingHandler(qwen2Tokenizer));
router.post('/command-r/decode', jsonParser, createWebTokenizerDecodingHandler(commandTokenizer));
router.post('/nemo/decode', jsonParser, createWebTokenizerDecodingHandler(nemoTokenizer));
router.post('/deepseek/decode', jsonParser, createWebTokenizerDecodingHandler(deepseekTokenizer));
router.post('/openai/encode', jsonParser, async function (req, res) {
try {
@ -723,6 +730,11 @@ router.post('/openai/encode', jsonParser, async function (req, res) {
return handler(req, res);
}
if (queryModel.includes('deepseek')) {
const handler = createWebTokenizerEncodingHandler(deepseekTokenizer);
return handler(req, res);
}
const model = getTokenizerModel(queryModel);
const handler = createTiktokenEncodingHandler(model);
return handler(req, res);
@ -786,6 +798,11 @@ router.post('/openai/decode', jsonParser, async function (req, res) {
return handler(req, res);
}
if (queryModel.includes('deepseek')) {
const handler = createWebTokenizerDecodingHandler(deepseekTokenizer);
return handler(req, res);
}
const model = getTokenizerModel(queryModel);
const handler = createTiktokenDecodingHandler(model);
return handler(req, res);
@ -863,6 +880,13 @@ router.post('/openai/count', jsonParser, async function (req, res) {
return res.send({ 'token_count': num_tokens });
}
if (model === 'deepseek') {
const instance = await deepseekTokenizer.get();
if (!instance) throw new Error('Failed to load the DeepSeek tokenizer');
num_tokens = countWebTokenizerTokens(instance, req.body);
return res.send({ 'token_count': num_tokens });
}
const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
const tokensPadding = 3;