mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-02-23 15:37:50 +01:00
#371 Add llama.cpp inference server support
This commit is contained in:
parent
6e8104873e
commit
edd737e8bd
39
public/img/llamacpp.svg
Normal file
39
public/img/llamacpp.svg
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<svg
|
||||||
|
version="1.0"
|
||||||
|
width="350.95343pt"
|
||||||
|
height="433.92468pt"
|
||||||
|
viewBox="0 0 350.95343 433.92468"
|
||||||
|
preserveAspectRatio="xMidYMid"
|
||||||
|
id="svg3"
|
||||||
|
sodipodi:docname="llamacpp.svg"
|
||||||
|
inkscape:version="1.3 (0e150ed, 2023-07-21)"
|
||||||
|
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||||
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg">
|
||||||
|
<defs
|
||||||
|
id="defs3" />
|
||||||
|
<sodipodi:namedview
|
||||||
|
id="namedview3"
|
||||||
|
pagecolor="#ffffff"
|
||||||
|
bordercolor="#000000"
|
||||||
|
borderopacity="0.25"
|
||||||
|
inkscape:showpageshadow="2"
|
||||||
|
inkscape:pageopacity="0.0"
|
||||||
|
inkscape:pagecheckerboard="true"
|
||||||
|
inkscape:deskcolor="#d1d1d1"
|
||||||
|
inkscape:document-units="pt"
|
||||||
|
inkscape:zoom="0.61795062"
|
||||||
|
inkscape:cx="360.87026"
|
||||||
|
inkscape:cy="319.60482"
|
||||||
|
inkscape:window-width="1280"
|
||||||
|
inkscape:window-height="688"
|
||||||
|
inkscape:window-x="0"
|
||||||
|
inkscape:window-y="25"
|
||||||
|
inkscape:window-maximized="1"
|
||||||
|
inkscape:current-layer="svg3" />
|
||||||
|
<path
|
||||||
|
id="path15"
|
||||||
|
d="M 115.66411,7.7769089e-4 C 108.03646,-0.04647231 97.66356,2.0614588 89.22605,5.7471588 51.629188,22.170371 29.279858,72.255744 26.302778,146.75404 l -1.08171,27.05939 10.19027,-9.11895 c 16.68028,-14.92501 43.7359,-30.80127 65.967952,-38.71307 3.63963,-1.29525 7.39727,-3.00875 8.34819,-3.80665 1.00889,-0.84654 -0.84203,-6.76797 -4.44134,-14.21878 -5.75466,-11.912432 -6.12062,-13.824142 -5.45304,-28.480056 0.68369,-15.00947 1.27807,-16.84384 13.33674,-41.2326 C 128.87131,6.4869918 129.50802,4.3066778 123.92323,1.4548327 122.03009,0.48812169 119.13122,0.02222669 115.66411,7.7769089e-4 Z M 204.3319,24.868452 c -7.90831,-0.07627 -17.36177,1.199451 -23.54292,3.870384 -18.58511,8.030767 -38.06958,36.609918 -47.25132,69.305902 -2.22908,7.937702 -4.5161,15.970742 -5.08401,17.852392 -0.86974,2.88178 -0.32873,3.22525 3.43601,2.17653 2.45813,-0.68477 18.29522,-1.73488 35.1935,-2.33437 16.89826,-0.59952 30.72354,-1.40131 30.72354,-1.78192 0,-0.38061 -1.78758,-5.74168 -3.97051,-11.9117 -6.54342,-18.495036 -4.8829,-25.966506 11.1988,-50.400166 7.46265,-11.33831 13.56896,-21.480943 13.56896,-22.542378 0,-2.73047 -6.36368,-4.158497 -14.27205,-4.234674 z M 168.50212,145.23018 c -45.12449,0.0128 -76.75805,10.98462 -110.460932,38.31236 -22.62195,18.34285 -45.99259,54.10069 -54.3650997,83.1786 -4.94441,17.17201 -4.88874,65.42308 0.0924,79.37804 16.4963297,46.21663 57.3528097,79.08349 107.4639617,86.44794 32.21284,4.73407 74.8601,-2.95259 109.24245,-19.68893 l 7.20925,-3.50917 -4.64502,-17.64293 c -2.55479,-9.70397 -5.46337,-20.62804 -6.46485,-24.27571 l -1.82292,-6.63282 -14.30391,6.30496 c -22.86829,10.08133 -41.37356,13.8047 -63.89044,12.8558 -13.70887,-0.57772 -22.19455,-1.94878 -30.04268,-4.85697 -14.96555,-5.54563 -31.436082,-20.30658 -37.827792,-33.90468 -16.63575,-35.39192 -7.26602,-83.4333 21.984032,-112.712 34.5434,-34.57726 78.91103,-41.04325 127.6377,-18.6022 9.71534,4.47445 18.40283,7.701 19.30836,7.16708 1.84426,-1.08761 26.365,-41.92583 26.365,-43.91001 0,-1.77105 -17.98211,-11.91179 -29.15193,-16.43783 -20.81281,-8.43331 -38.421,-11.4793 -66.32745,-11.47153 z m -4.7277,92.6254 v 17.13902 17.13905 h -17.96261 -17.96264 v 15.33281 15.33588 h 17.96264 17.96261 v 17.13903 17.13599 h 16.06964 16.07283 v -17.13599 -17.13903 h 17.01451 17.0178 V 287.46646 272.13365 H 212.9314 195.91689 V 254.9946 237.85558 h -16.07283 z m 121.00426,0 v 17.13902 17.13905 h -17.95945 -17.96254 v 15.33281 15.33588 h 17.96254 17.95945 v 17.13903 17.13599 h 15.12793 15.12482 v -17.13599 -17.13903 h 17.96254 17.95945 V 287.46646 272.13365 H 332.99397 315.03143 V 254.9946 237.85558 h -15.12482 z" />
|
||||||
|
</svg>
|
After Width: | Height: | Size: 3.7 KiB |
@ -1738,6 +1738,7 @@
|
|||||||
<option value="aphrodite">Aphrodite</option>
|
<option value="aphrodite">Aphrodite</option>
|
||||||
<option value="tabby">TabbyAPI</option>
|
<option value="tabby">TabbyAPI</option>
|
||||||
<option value="koboldcpp">KoboldCpp</option>
|
<option value="koboldcpp">KoboldCpp</option>
|
||||||
|
<option value="llamacpp">llama.cpp</option>
|
||||||
<option value="togetherai">TogetherAI</option>
|
<option value="togetherai">TogetherAI</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
@ -1817,6 +1818,18 @@
|
|||||||
<input id="aphrodite_api_url_text" class="text_pole wide100p" maxlength="500" value="" autocomplete="off" data-server-history="aphrodite">
|
<input id="aphrodite_api_url_text" class="text_pole wide100p" maxlength="500" value="" autocomplete="off" data-server-history="aphrodite">
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div data-tg-type="llamacpp">
|
||||||
|
<div class="flex-container flexFlowColumn">
|
||||||
|
<a href="https://github.com/ggerganov/llama.cpp" target="_blank">
|
||||||
|
ggerganov/llama.cpp (inference server)
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
|
<div class="flex1">
|
||||||
|
<h4 data-i18n="API url">API URL</h4>
|
||||||
|
<small data-i18n="Example: http://127.0.0.1:8080">Example: http://127.0.0.1:8080</small>
|
||||||
|
<input id="llamacpp_api_url_text" class="text_pole wide100p" maxlength="500" value="" autocomplete="off" data-server-history="llamacpp">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div data-tg-type="tabby">
|
<div data-tg-type="tabby">
|
||||||
<div class="flex-container flexFlowColumn">
|
<div class="flex-container flexFlowColumn">
|
||||||
<a href="https://github.com/theroyallab/tabbyAPI" target="_blank">
|
<a href="https://github.com/theroyallab/tabbyAPI" target="_blank">
|
||||||
@ -1854,7 +1867,7 @@
|
|||||||
<div id="api_button_textgenerationwebui" class="api_button menu_button" type="submit" data-i18n="Connect" data-server-connect="ooba_blocking,aphrodite,tabby,koboldcpp">Connect</div>
|
<div id="api_button_textgenerationwebui" class="api_button menu_button" type="submit" data-i18n="Connect" data-server-connect="ooba_blocking,aphrodite,tabby,koboldcpp">Connect</div>
|
||||||
<div class="api_loading menu_button" data-i18n="Cancel">Cancel</div>
|
<div class="api_loading menu_button" data-i18n="Cancel">Cancel</div>
|
||||||
</div>
|
</div>
|
||||||
<label class="checkbox_label margin-bot-10px" for="legacy_api_textgenerationwebui">
|
<label data-tg-type="ooba,aphrodite" class="checkbox_label margin-bot-10px" for="legacy_api_textgenerationwebui">
|
||||||
<input type="checkbox" id="legacy_api_textgenerationwebui" />
|
<input type="checkbox" id="legacy_api_textgenerationwebui" />
|
||||||
<span data-i18n="Legacy API (pre-OAI, no streaming)">Legacy API (pre-OAI, no streaming)</span>
|
<span data-i18n="Legacy API (pre-OAI, no streaming)">Legacy API (pre-OAI, no streaming)</span>
|
||||||
</label>
|
</label>
|
||||||
|
@ -22,7 +22,7 @@ import {
|
|||||||
getTextGenServer,
|
getTextGenServer,
|
||||||
} from './scripts/textgen-settings.js';
|
} from './scripts/textgen-settings.js';
|
||||||
|
|
||||||
const { MANCER, TOGETHERAI } = textgen_types;
|
const { MANCER, TOGETHERAI, OOBA, APHRODITE } = textgen_types;
|
||||||
|
|
||||||
import {
|
import {
|
||||||
world_info,
|
world_info,
|
||||||
@ -945,10 +945,7 @@ async function getStatusTextgen() {
|
|||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
api_server: endpoint,
|
api_server: endpoint,
|
||||||
api_type: textgen_settings.type,
|
api_type: textgen_settings.type,
|
||||||
legacy_api:
|
legacy_api: textgen_settings.legacy_api && (textgen_settings.type === OOBA || textgen_settings.type === APHRODITE),
|
||||||
textgen_settings.legacy_api &&
|
|
||||||
textgen_settings.type !== MANCER &&
|
|
||||||
textgen_settings.type !== TOGETHERAI,
|
|
||||||
}),
|
}),
|
||||||
signal: abortStatusCheck.signal,
|
signal: abortStatusCheck.signal,
|
||||||
});
|
});
|
||||||
@ -2960,9 +2957,8 @@ async function Generate(type, { automatic_trigger, force_name2, quiet_prompt, qu
|
|||||||
if (main_api === 'textgenerationwebui' &&
|
if (main_api === 'textgenerationwebui' &&
|
||||||
textgen_settings.streaming &&
|
textgen_settings.streaming &&
|
||||||
textgen_settings.legacy_api &&
|
textgen_settings.legacy_api &&
|
||||||
textgen_settings.type !== MANCER &&
|
(textgen_settings.type === OOBA || textgen_settings.type === APHRODITE)) {
|
||||||
textgen_settings.type !== TOGETHERAI) {
|
toastr.error('Streaming is not supported for the Legacy API. Update Ooba and use new API to enable streaming.', undefined, { timeOut: 10000, preventDuplicates: true });
|
||||||
toastr.error('Streaming is not supported for the Legacy API. Update Ooba and use --extensions openai to enable streaming.', undefined, { timeOut: 10000, preventDuplicates: true });
|
|
||||||
unblockGeneration();
|
unblockGeneration();
|
||||||
return Promise.resolve();
|
return Promise.resolve();
|
||||||
}
|
}
|
||||||
@ -4460,6 +4456,11 @@ function extractTitleFromData(data) {
|
|||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts the message from the response data.
|
||||||
|
* @param {object} data Response data
|
||||||
|
* @returns {string} Extracted message
|
||||||
|
*/
|
||||||
function extractMessageFromData(data) {
|
function extractMessageFromData(data) {
|
||||||
switch (main_api) {
|
switch (main_api) {
|
||||||
case 'kobold':
|
case 'kobold':
|
||||||
@ -4467,7 +4468,7 @@ function extractMessageFromData(data) {
|
|||||||
case 'koboldhorde':
|
case 'koboldhorde':
|
||||||
return data.text;
|
return data.text;
|
||||||
case 'textgenerationwebui':
|
case 'textgenerationwebui':
|
||||||
return data.choices[0].text;
|
return data.choices?.[0]?.text ?? data.content;
|
||||||
case 'novel':
|
case 'novel':
|
||||||
return data.output;
|
return data.output;
|
||||||
case 'openai':
|
case 'openai':
|
||||||
@ -5738,6 +5739,7 @@ async function getSettings() {
|
|||||||
$('#textgenerationwebui_api_url_text').val(api_server_textgenerationwebui);
|
$('#textgenerationwebui_api_url_text').val(api_server_textgenerationwebui);
|
||||||
$('#aphrodite_api_url_text').val(api_server_textgenerationwebui);
|
$('#aphrodite_api_url_text').val(api_server_textgenerationwebui);
|
||||||
$('#tabby_api_url_text').val(api_server_textgenerationwebui);
|
$('#tabby_api_url_text').val(api_server_textgenerationwebui);
|
||||||
|
$('#llamacpp_api_url_text').val(api_server_textgenerationwebui);
|
||||||
$('#koboldcpp_api_url_text').val(api_server_textgenerationwebui);
|
$('#koboldcpp_api_url_text').val(api_server_textgenerationwebui);
|
||||||
|
|
||||||
selected_button = settings.selected_button;
|
selected_button = settings.selected_button;
|
||||||
@ -7502,6 +7504,11 @@ async function connectAPISlash(_, text) {
|
|||||||
button: '#api_button_textgenerationwebui',
|
button: '#api_button_textgenerationwebui',
|
||||||
type: textgen_types.TABBY,
|
type: textgen_types.TABBY,
|
||||||
},
|
},
|
||||||
|
'llamacpp': {
|
||||||
|
selected: 'textgenerationwebui',
|
||||||
|
button: '#api_button_textgenerationwebui',
|
||||||
|
type: textgen_types.LLAMACPP,
|
||||||
|
},
|
||||||
'mancer': {
|
'mancer': {
|
||||||
selected: 'textgenerationwebui',
|
selected: 'textgenerationwebui',
|
||||||
button: '#api_button_textgenerationwebui',
|
button: '#api_button_textgenerationwebui',
|
||||||
@ -7848,7 +7855,7 @@ jQuery(async function () {
|
|||||||
}
|
}
|
||||||
|
|
||||||
registerSlashCommand('dupe', DupeChar, [], '– duplicates the currently selected character', true, true);
|
registerSlashCommand('dupe', DupeChar, [], '– duplicates the currently selected character', true, true);
|
||||||
registerSlashCommand('api', connectAPISlash, [], '<span class="monospace">(kobold, horde, novel, ooba, tabby, mancer, aphrodite, kcpp, oai, claude, windowai, openrouter, scale, ai21, makersuite, mistralai, togetherai)</span> – connect to an API', true, true);
|
registerSlashCommand('api', connectAPISlash, [], '<span class="monospace">(kobold, horde, novel, ooba, tabby, mancer, aphrodite, kcpp, oai, claude, windowai, openrouter, scale, ai21, makersuite, mistralai, togetherai, llamacpp)</span> – connect to an API', true, true);
|
||||||
registerSlashCommand('impersonate', doImpersonate, ['imp'], '– calls an impersonation response', true, true);
|
registerSlashCommand('impersonate', doImpersonate, ['imp'], '– calls an impersonation response', true, true);
|
||||||
registerSlashCommand('delchat', doDeleteChat, [], '– deletes the current chat', true, true);
|
registerSlashCommand('delchat', doDeleteChat, [], '– deletes the current chat', true, true);
|
||||||
registerSlashCommand('closechat', doCloseChat, [], '– closes the current chat', true, true);
|
registerSlashCommand('closechat', doCloseChat, [], '– closes the current chat', true, true);
|
||||||
|
@ -36,6 +36,7 @@ import {
|
|||||||
import { registerSlashCommand } from './slash-commands.js';
|
import { registerSlashCommand } from './slash-commands.js';
|
||||||
import { tags } from './tags.js';
|
import { tags } from './tags.js';
|
||||||
import { tokenizers } from './tokenizers.js';
|
import { tokenizers } from './tokenizers.js';
|
||||||
|
import { BIAS_CACHE } from './logit-bias.js';
|
||||||
|
|
||||||
import { countOccurrences, debounce, delay, isOdd, resetScrollHeight, shuffle, sortMoments, stringToRange, timestampToMoment } from './utils.js';
|
import { countOccurrences, debounce, delay, isOdd, resetScrollHeight, shuffle, sortMoments, stringToRange, timestampToMoment } from './utils.js';
|
||||||
|
|
||||||
@ -2932,6 +2933,7 @@ $(document).ready(() => {
|
|||||||
$('#tokenizer').on('change', function () {
|
$('#tokenizer').on('change', function () {
|
||||||
const value = $(this).find(':selected').val();
|
const value = $(this).find(':selected').val();
|
||||||
power_user.tokenizer = Number(value);
|
power_user.tokenizer = Number(value);
|
||||||
|
BIAS_CACHE.clear();
|
||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
|
|
||||||
// Trigger character editor re-tokenize
|
// Trigger character editor re-tokenize
|
||||||
|
@ -33,9 +33,10 @@ export const textgen_types = {
|
|||||||
TABBY: 'tabby',
|
TABBY: 'tabby',
|
||||||
KOBOLDCPP: 'koboldcpp',
|
KOBOLDCPP: 'koboldcpp',
|
||||||
TOGETHERAI: 'togetherai',
|
TOGETHERAI: 'togetherai',
|
||||||
|
LLAMACPP: 'llamacpp',
|
||||||
};
|
};
|
||||||
|
|
||||||
const { MANCER, APHRODITE, TOGETHERAI } = textgen_types;
|
const { MANCER, APHRODITE, TOGETHERAI, OOBA } = textgen_types;
|
||||||
const BIAS_KEY = '#textgenerationwebui_api-settings';
|
const BIAS_KEY = '#textgenerationwebui_api-settings';
|
||||||
|
|
||||||
// Maybe let it be configurable in the future?
|
// Maybe let it be configurable in the future?
|
||||||
@ -166,6 +167,7 @@ async function selectPreset(name) {
|
|||||||
setSettingByName(name, value, true);
|
setSettingByName(name, value, true);
|
||||||
}
|
}
|
||||||
setGenerationParamsFromPreset(preset);
|
setGenerationParamsFromPreset(preset);
|
||||||
|
BIAS_CACHE.delete(BIAS_KEY);
|
||||||
displayLogitBias(preset.logit_bias, BIAS_KEY);
|
displayLogitBias(preset.logit_bias, BIAS_KEY);
|
||||||
saveSettingsDebounced();
|
saveSettingsDebounced();
|
||||||
}
|
}
|
||||||
@ -311,6 +313,7 @@ function loadTextGenSettings(data, loadedSettings) {
|
|||||||
|
|
||||||
$('#textgen_type').val(settings.type);
|
$('#textgen_type').val(settings.type);
|
||||||
showTypeSpecificControls(settings.type);
|
showTypeSpecificControls(settings.type);
|
||||||
|
BIAS_CACHE.delete(BIAS_KEY);
|
||||||
displayLogitBias(settings.logit_bias, BIAS_KEY);
|
displayLogitBias(settings.logit_bias, BIAS_KEY);
|
||||||
//this is needed because showTypeSpecificControls() does not handle NOT declarations
|
//this is needed because showTypeSpecificControls() does not handle NOT declarations
|
||||||
if (settings.type === textgen_types.APHRODITE) {
|
if (settings.type === textgen_types.APHRODITE) {
|
||||||
@ -343,6 +346,8 @@ export function getTextGenUrlSourceId() {
|
|||||||
return '#tabby_api_url_text';
|
return '#tabby_api_url_text';
|
||||||
case textgen_types.KOBOLDCPP:
|
case textgen_types.KOBOLDCPP:
|
||||||
return '#koboldcpp_api_url_text';
|
return '#koboldcpp_api_url_text';
|
||||||
|
case textgen_types.LLAMACPP:
|
||||||
|
return '#llamacpp_api_url_text';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,6 +420,7 @@ jQuery(function () {
|
|||||||
|
|
||||||
showTypeSpecificControls(type);
|
showTypeSpecificControls(type);
|
||||||
setOnlineStatus('no_connection');
|
setOnlineStatus('no_connection');
|
||||||
|
BIAS_CACHE.delete(BIAS_KEY);
|
||||||
|
|
||||||
$('#main_api').trigger('change');
|
$('#main_api').trigger('change');
|
||||||
$('#api_button_textgenerationwebui').trigger('click');
|
$('#api_button_textgenerationwebui').trigger('click');
|
||||||
@ -463,12 +469,15 @@ jQuery(function () {
|
|||||||
|
|
||||||
function showTypeSpecificControls(type) {
|
function showTypeSpecificControls(type) {
|
||||||
$('[data-tg-type]').each(function () {
|
$('[data-tg-type]').each(function () {
|
||||||
const tgType = $(this).attr('data-tg-type');
|
const tgTypes = $(this).attr('data-tg-type').split(',');
|
||||||
if (tgType == type) {
|
for (const tgType of tgTypes) {
|
||||||
|
if (tgType === type || tgType == 'all') {
|
||||||
$(this).show();
|
$(this).show();
|
||||||
|
return;
|
||||||
} else {
|
} else {
|
||||||
$(this).hide();
|
$(this).hide();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -550,11 +559,11 @@ async function generateTextGenWithStreaming(generate_data, signal) {
|
|||||||
|
|
||||||
let data = JSON.parse(value.data);
|
let data = JSON.parse(value.data);
|
||||||
|
|
||||||
if (data?.choices[0]?.index > 0) {
|
if (data?.choices?.[0]?.index > 0) {
|
||||||
const swipeIndex = data.choices[0].index - 1;
|
const swipeIndex = data.choices[0].index - 1;
|
||||||
swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text;
|
swipes[swipeIndex] = (swipes[swipeIndex] || '') + data.choices[0].text;
|
||||||
} else {
|
} else {
|
||||||
text += data?.choices[0]?.text || '';
|
text += data?.choices?.[0]?.text || data?.content || '';
|
||||||
}
|
}
|
||||||
|
|
||||||
yield { text, swipes };
|
yield { text, swipes };
|
||||||
@ -585,6 +594,11 @@ function tryParseStreamingError(response, decoded) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a string of comma-separated integers to an array of integers.
|
||||||
|
* @param {string} string Input string
|
||||||
|
* @returns {number[]} Array of integers
|
||||||
|
*/
|
||||||
function toIntArray(string) {
|
function toIntArray(string) {
|
||||||
if (!string) {
|
if (!string) {
|
||||||
return [];
|
return [];
|
||||||
@ -623,7 +637,7 @@ export function getTextGenServer() {
|
|||||||
|
|
||||||
export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, isContinue, cfgValues, type) {
|
export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, isContinue, cfgValues, type) {
|
||||||
const canMultiSwipe = !isContinue && !isImpersonate && type !== 'quiet';
|
const canMultiSwipe = !isContinue && !isImpersonate && type !== 'quiet';
|
||||||
let APIflags = {
|
let params = {
|
||||||
'prompt': finalPrompt,
|
'prompt': finalPrompt,
|
||||||
'model': getModel(),
|
'model': getModel(),
|
||||||
'max_new_tokens': maxTokens,
|
'max_new_tokens': maxTokens,
|
||||||
@ -659,12 +673,10 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
|||||||
getCustomTokenBans(),
|
getCustomTokenBans(),
|
||||||
'api_type': settings.type,
|
'api_type': settings.type,
|
||||||
'api_server': getTextGenServer(),
|
'api_server': getTextGenServer(),
|
||||||
'legacy_api': settings.legacy_api && settings.type !== MANCER && settings.type !== TOGETHERAI,
|
'legacy_api': settings.legacy_api && (settings.type === OOBA || settings.type === APHRODITE),
|
||||||
'sampler_order': settings.type === textgen_types.KOBOLDCPP ?
|
'sampler_order': settings.type === textgen_types.KOBOLDCPP ? settings.sampler_order : undefined,
|
||||||
settings.sampler_order :
|
|
||||||
undefined,
|
|
||||||
};
|
};
|
||||||
let aphroditeExclusionFlags = {
|
const nonAphroditeParams = {
|
||||||
'repetition_penalty_range': settings.rep_pen_range,
|
'repetition_penalty_range': settings.rep_pen_range,
|
||||||
'encoder_repetition_penalty': settings.encoder_rep_pen,
|
'encoder_repetition_penalty': settings.encoder_rep_pen,
|
||||||
'no_repeat_ngram_size': settings.no_repeat_ngram_size,
|
'no_repeat_ngram_size': settings.no_repeat_ngram_size,
|
||||||
@ -676,7 +688,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
|||||||
'negative_prompt': cfgValues?.negativePrompt ?? substituteParams(settings.negative_prompt) ?? '',
|
'negative_prompt': cfgValues?.negativePrompt ?? substituteParams(settings.negative_prompt) ?? '',
|
||||||
'grammar_string': settings.grammar_string,
|
'grammar_string': settings.grammar_string,
|
||||||
};
|
};
|
||||||
let aphroditeFlags = {
|
const aphroditeParams = {
|
||||||
'n': canMultiSwipe ? settings.n : 1,
|
'n': canMultiSwipe ? settings.n : 1,
|
||||||
'best_of': canMultiSwipe ? settings.n : 1,
|
'best_of': canMultiSwipe ? settings.n : 1,
|
||||||
'ignore_eos': settings.ignore_eos_token_aphrodite,
|
'ignore_eos': settings.ignore_eos_token_aphrodite,
|
||||||
@ -686,17 +698,37 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
|||||||
//'prompt_logprobs': settings.prompt_log_probs_aphrodite,
|
//'prompt_logprobs': settings.prompt_log_probs_aphrodite,
|
||||||
};
|
};
|
||||||
if (settings.type === textgen_types.APHRODITE) {
|
if (settings.type === textgen_types.APHRODITE) {
|
||||||
APIflags = Object.assign(APIflags, aphroditeFlags);
|
params = Object.assign(params, aphroditeParams);
|
||||||
} else {
|
} else {
|
||||||
APIflags = Object.assign(APIflags, aphroditeExclusionFlags);
|
params = Object.assign(params, nonAphroditeParams);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Array.isArray(settings.logit_bias) && settings.logit_bias.length) {
|
if (Array.isArray(settings.logit_bias) && settings.logit_bias.length) {
|
||||||
const logitBias = BIAS_CACHE.get(BIAS_KEY) || calculateLogitBias();
|
const logitBias = BIAS_CACHE.get(BIAS_KEY) || calculateLogitBias();
|
||||||
BIAS_CACHE.set(BIAS_KEY, logitBias);
|
BIAS_CACHE.set(BIAS_KEY, logitBias);
|
||||||
APIflags.logit_bias = logitBias;
|
params.logit_bias = logitBias;
|
||||||
}
|
}
|
||||||
|
|
||||||
return APIflags;
|
if (settings.type === textgen_types.LLAMACPP) {
|
||||||
|
// Convert bias and token bans to array of arrays
|
||||||
|
const logitBiasArray = (params.logit_bias && typeof params.logit_bias === 'object' && Object.keys(params.logit_bias).length > 0)
|
||||||
|
? Object.entries(params.logit_bias).map(([key, value]) => [Number(key), value])
|
||||||
|
: [];
|
||||||
|
const tokenBans = toIntArray(getCustomTokenBans());
|
||||||
|
logitBiasArray.push(...tokenBans.map(x => [Number(x), false]));
|
||||||
|
const llamaCppParams = {
|
||||||
|
'repeat_penalty': settings.rep_pen,
|
||||||
|
'tfs_z': settings.tfs,
|
||||||
|
'repeat_last_n': settings.rep_pen_range,
|
||||||
|
'n_predict': settings.maxTokens,
|
||||||
|
'mirostat': settings.mirostat_mode,
|
||||||
|
'ignore_eos': settings.ban_eos_token,
|
||||||
|
'grammar': settings.grammar_string,
|
||||||
|
'logit_bias': logitBiasArray,
|
||||||
|
};
|
||||||
|
params = Object.assign(params, llamaCppParams);
|
||||||
|
}
|
||||||
|
|
||||||
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@ import { getStringHash } from './utils.js';
|
|||||||
import { kai_flags } from './kai-settings.js';
|
import { kai_flags } from './kai-settings.js';
|
||||||
import { textgen_types, textgenerationwebui_settings as textgen_settings } from './textgen-settings.js';
|
import { textgen_types, textgenerationwebui_settings as textgen_settings } from './textgen-settings.js';
|
||||||
|
|
||||||
const { OOBA, TABBY, KOBOLDCPP, MANCER, TOGETHERAI } = textgen_types;
|
const { OOBA, TABBY, KOBOLDCPP, APHRODITE, LLAMACPP } = textgen_types;
|
||||||
|
|
||||||
export const CHARACTERS_PER_TOKEN_RATIO = 3.35;
|
export const CHARACTERS_PER_TOKEN_RATIO = 3.35;
|
||||||
const TOKENIZER_WARNING_KEY = 'tokenizationWarningShown';
|
const TOKENIZER_WARNING_KEY = 'tokenizationWarningShown';
|
||||||
@ -190,7 +190,7 @@ export function getTokenizerBestMatch(forApi) {
|
|||||||
// - Tokenizer haven't reported an error previously
|
// - Tokenizer haven't reported an error previously
|
||||||
const hasTokenizerError = sessionStorage.getItem(TOKENIZER_WARNING_KEY);
|
const hasTokenizerError = sessionStorage.getItem(TOKENIZER_WARNING_KEY);
|
||||||
const isConnected = online_status !== 'no_connection';
|
const isConnected = online_status !== 'no_connection';
|
||||||
const isTokenizerSupported = textgen_settings.type === OOBA || textgen_settings.type === TABBY || textgen_settings.type === KOBOLDCPP;
|
const isTokenizerSupported = [OOBA, TABBY, KOBOLDCPP, LLAMACPP].includes(textgen_settings.type);
|
||||||
|
|
||||||
if (!hasTokenizerError && isConnected) {
|
if (!hasTokenizerError && isConnected) {
|
||||||
if (forApi === 'kobold' && kai_flags.can_use_tokenization) {
|
if (forApi === 'kobold' && kai_flags.can_use_tokenization) {
|
||||||
@ -538,10 +538,7 @@ function getTextgenAPITokenizationParams(str) {
|
|||||||
text: str,
|
text: str,
|
||||||
api_type: textgen_settings.type,
|
api_type: textgen_settings.type,
|
||||||
url: api_server_textgenerationwebui,
|
url: api_server_textgenerationwebui,
|
||||||
legacy_api:
|
legacy_api: textgen_settings.legacy_api && (textgen_settings.type === OOBA || textgen_settings.type === APHRODITE),
|
||||||
textgen_settings.legacy_api &&
|
|
||||||
textgen_settings.type !== MANCER &&
|
|
||||||
textgen_settings.type !== TOGETHERAI,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,6 +172,7 @@ const TEXTGEN_TYPES = {
|
|||||||
TABBY: 'tabby',
|
TABBY: 'tabby',
|
||||||
KOBOLDCPP: 'koboldcpp',
|
KOBOLDCPP: 'koboldcpp',
|
||||||
TOGETHERAI: 'togetherai',
|
TOGETHERAI: 'togetherai',
|
||||||
|
LLAMACPP: 'llamacpp',
|
||||||
};
|
};
|
||||||
|
|
||||||
// https://docs.together.ai/reference/completions
|
// https://docs.together.ai/reference/completions
|
||||||
|
@ -39,6 +39,7 @@ router.post('/status', jsonParser, async function (request, response) {
|
|||||||
case TEXTGEN_TYPES.OOBA:
|
case TEXTGEN_TYPES.OOBA:
|
||||||
case TEXTGEN_TYPES.APHRODITE:
|
case TEXTGEN_TYPES.APHRODITE:
|
||||||
case TEXTGEN_TYPES.KOBOLDCPP:
|
case TEXTGEN_TYPES.KOBOLDCPP:
|
||||||
|
case TEXTGEN_TYPES.LLAMACPP:
|
||||||
url += '/v1/models';
|
url += '/v1/models';
|
||||||
break;
|
break;
|
||||||
case TEXTGEN_TYPES.MANCER:
|
case TEXTGEN_TYPES.MANCER:
|
||||||
@ -160,6 +161,9 @@ router.post('/generate', jsonParser, async function (request, response_generate)
|
|||||||
case TEXTGEN_TYPES.MANCER:
|
case TEXTGEN_TYPES.MANCER:
|
||||||
url += '/oai/v1/completions';
|
url += '/oai/v1/completions';
|
||||||
break;
|
break;
|
||||||
|
case TEXTGEN_TYPES.LLAMACPP:
|
||||||
|
url += '/completion';
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -622,6 +622,10 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
|
|||||||
url += '/api/extra/tokencount';
|
url += '/api/extra/tokencount';
|
||||||
args.body = JSON.stringify({ 'prompt': text });
|
args.body = JSON.stringify({ 'prompt': text });
|
||||||
break;
|
break;
|
||||||
|
case TEXTGEN_TYPES.LLAMACPP:
|
||||||
|
url += '/tokenize';
|
||||||
|
args.body = JSON.stringify({ 'content': text });
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
url += '/v1/internal/encode';
|
url += '/v1/internal/encode';
|
||||||
args.body = JSON.stringify({ 'text': text });
|
args.body = JSON.stringify({ 'text': text });
|
||||||
@ -637,7 +641,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
|
|||||||
}
|
}
|
||||||
|
|
||||||
const data = await result.json();
|
const data = await result.json();
|
||||||
const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value);
|
const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value ?? data?.tokens?.length);
|
||||||
const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []);
|
const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []);
|
||||||
|
|
||||||
return response.send({ count, ids });
|
return response.send({ count, ids });
|
||||||
|
Loading…
x
Reference in New Issue
Block a user