Merge Tabby API Loader Ext into Core WIP

This commit is contained in:
RossAscends
2025-05-23 08:53:52 +09:00
parent 6dc59b9fd3
commit 6f51a13590
6 changed files with 837 additions and 34 deletions

View File

@@ -2678,35 +2678,67 @@
<small data-i18n="Example: http://127.0.0.1:5000">Example: http://127.0.0.1:5000</small>
<input id="tabby_api_url_text" class="text_pole wide100p" value="" autocomplete="off" data-server-history="tabby">
</div>
<div class="flex1">
<h4>
<span data-i18n="Tabby Model">Tabby Model</span>
<div class="inline-drawer">
<div class="gap10h5v inline-drawer-header inline-drawer-toggle widthFitContent standoutHeader">
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down"></div>
<strong>Tabby Model Management</strong>
</div>
<div class="inline-drawer-content">
<div class="">
<small>
<b>To switch models:</b>
<ol class="marginTop5">
<li>
<code>inline_model_loading: True</code> must be set in Tabby's config.yml
</li>
<li>
Connect with an admin API key
</li>
</ol>
</small>
</div>
<h4 class="alignItemsCenter flex-container">
<span data-i18n="Tabby Model">Inline Model Swap</span>
</h4>
<small>The model selected here will be loaded and used in the next generation request.</small>
<select id="tabby_model">
<option value="" data-i18n="-- Connect to the API --">
-- Connect to the API --
</option>
</select>
<div class="marginTopBot5">
<i class="fa-solid fa-flask"></i>
<span>Experimental feature. Use at your own risk.</span>
<div class="alignItemsStart flex-container flexFlowColumn">
<h4 data-i18n="Tabby API key" class="marginTop10">
Model Select
<i class="fa-solid fa-flask margin5"></i>
<span class="redOverlayGlow" data-i18n="Experimental feature. Use at your own risk.">Experimental feature. Use at your own risk.</span>
</h4>
<div class="flex-container gap5px">
<div id="tabby_load_model_button" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Load model" data-i18n="[title]Load model" tabindex="0">
<i class="fa-solid fa-play"></i>
</div>
<div id="tabby_unload_model_button" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Unload model" data-i18n="[title]Unload model" tabindex="0">
<i class="fa-solid fa-xmark"></i>
</div>
<div id="tabby_reload_model_list_button" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Refresh model list" data-i18n="[title]Refresh model list" tabindex="0">
<i class="fa-solid fa-arrows-rotate"></i>
</div>
<div id="tabby_parameter_editor_button" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Open parameter editor" data-i18n="[title]Open parameter editor" tabindex="0">
<i class="fa-solid fa-gear"></i>
</div>
<div id="tabby_download_model" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Download model from HuggingFace" data-i18n="[title]Download model from HuggingFace" tabindex="0">
<i class="fa-solid fa-cloud-arrow-down"></i>
</div>
</div>
<div class="">
<input id="tabby_load_model_list" name="tabby_load_model_list" class="text_pole ui-autocomplete-input" placeholder="Model name here" maxlength="100" size="35" value="" autocomplete="off">
<input id="tabby_load_draft_model_list" name="tabby_load_draft_model_list" class="text_pole ui-autocomplete-input" placeholder="Draft model name here" maxlength="100" size="35" value="" autocomplete="off">
<div id="loading_progress_container" class="progress_container">
<div id="loading_progressbar"></div>
</div>
</div>
<div class="marginTopBot5">
<small>
<i class="fa-solid fa-lightbulb"></i>
&nbsp;
<code>inline_model_loading: True</code>
<span data-i18n="must be set in Tabby's config.yml to switch models.">
must be set in Tabby's config.yml to switch models.
</span>
<b data-i18n="Use an admin API key.">
Use an admin API key.
</b>
</small>
</div>
<div id="tabby_download_model" class="menu_button menu_button_icon">
<i class="fa-solid fa-download"></i>
<span data-i18n="Download">Download</span>
</div>
</div>
</div>

View File

@@ -0,0 +1,528 @@
import { eventSource, event_types, callPopup, getRequestHeaders, online_status, saveSettingsDebounced, settings } from '../script.js';
import { textgen_types, textgenerationwebui_settings, getTextGenServer } from '../scripts/textgen-settings.js';
//import { SECRET_KEYS, readSecretState, findSecret, secret_state } from '../scripts/secrets.js';
import { SmoothEventSourceStream } from '../scripts/sse-stream.js';
// Used for settings
const tempaltesFolderPath = 'scripts/templates/';
const defaultSettings = {
max_seq_len: 4096,
cache_size: 'Max Seq Len',
max_batch_size: 'Auto',
fasttensors: false,
rope_scale: 'Auto',
rope_alpha: 'Auto',
gpu_split_auto: true,
gpu_split_value: null,
cache_mode: 'FP16',
draft_rope_alpha: 'Auto',
draft_rope_scale: 'Auto',
urlOverride: null,
useProxy: false,
};
let tabbyModelLoadParams = defaultSettings;
// Cached models list
let models = [];
let draftModels = [];
const cache_mode = {
FP16: 0,
Q4: 1,
Q6: 2,
Q8: 3,
};
function getKeyByValue(object, value) {
return Object.keys(object).find(key => object[key] === value);
}
// Check if user is connected to TabbyAPI
function verifyTabby(logError = true) {
const result = online_status !== 'no_connection' || textgenerationwebui_settings.type === textgen_types.TABBY;
if (!result && logError) {
toastr.error('TabbyLoader: Please connect to a TabbyAPI instance to use this extension');
}
return result;
}
async function getTabbyAuth() {
let authToken = null;
if (!authToken) {
try {
authToken = localStorage.getItem('Tabby_Admin'); //This needs to be removed and integrated with the actual key retrieval process
if (!authToken) {
console.error('Tabby Admin key not found in localStorage. Trying to fetch from secret state.');
// authToken = await findSecret('api_key_tabby');
console.warn(authToken);
}
if (!authToken) {
console.error('Tabby Admin key not found. Please make sure allowKeysExposure is true in config.conf and an API key is set for TabbyAPI.');
}
} catch (error) {
console.error(`TabbyLoader: ${error}`);
console.error('Admin key error: Please make sure allowKeysExposure is true in config.conf and an API key is set for TabbyAPI.');
}
}
return authToken;
}
// Fetch the model list for autocomplete population
export async function fetchTabbyModels() {
console.warn('fetchTabbyModels loaded');
if (!verifyTabby(false)) {
console.error('TabbyLoader: Could not connect to TabbyAPI');
return;
}
var modelsFromResponse = [];
try {
let url = '/api/backends/text-completions/status';
const response = await fetch(url, {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
api_server: getTextGenServer('tabby'),
api_type: 'tabby',
}),
});
console.warn(response);
if (response.ok) {
modelsFromResponse = await response.json();
} else {
console.error(`Mode list request failed with a statuscode of ${response.status}:\n${response.statusText}`);
return [];
}
modelsFromResponse = modelsFromResponse.data.map((e) => e.id);
console.warn(modelsFromResponse);
models = modelsFromResponse;
console.warn(models);
$('#tabby_load_model_list')
.autocomplete({
source: (_, response) => {
return response(models);
},
minLength: 0,
})
.focus(function () {
$(this)
.autocomplete(
'search',
String($(this).val()),
);
});
} catch (error) {
console.error(error);
return [];
}
}
// This function is called when the button is clicked
export async function onTabbyLoadModelClick() {
if (!verifyTabby()) {
return;
}
const modelValue = $('#tabby_load_model_list').val();
const draftModelValue = $('#tabby_load_draft_model_list').val();
if (!modelValue || !models.includes(modelValue)) {
console.warn(models);
console.warn(modelValue);
toastr.error('TabbyLoader: Please make sure the model name is spelled correctly before loading!');
return;
}
if (draftModelValue !== '' && !draftModels.includes(draftModelValue)) {
toastr.error('TabbyLoader: Please make sure the draft model name is spelled correctly before loading!');
return;
}
const body = {
name: modelValue,
max_seq_len: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.maxSeqLen) || 0,
cache_size: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.cacheSize) || null,
max_batch_size: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.maxBatchSize) || null,
rope_scale: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeScale) || null,
rope_alpha: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeAlpha) || null,
gpu_split_auto: textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplitAuto,
cache_mode: textgenerationwebui_settings?.tabbyModelLoadParams?.cacheMode,
fasttensors: textgenerationwebui_settings?.tabbyModelLoadParams?.fasttensors,
};
if (draftModelValue) {
body.draft = {
draft_model_name: draftModelValue,
draft_rope_scale: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.draft.draft_ropeAlpha) || null,
draft_rope_alpha: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.draft.draft_ropeScale) || null,
};
}
if (!body.gpu_split_auto) {
// TODO: Add a check for an empty array here
const gpuSplit = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplit;
if (Array.isArray(gpuSplit) && gpuSplit?.length > 0) {
body['gpu_split'] = gpuSplit;
} else {
console.error(`TabbyLoader: GPU split ${gpuSplit} is invalid. Set to auto or adjust your parameters!`);
toastr.error('TabbyLoader: Invalid GPU split. Set GPU split to auto or adjust your parameters');
return;
}
}
try {
let url = '/api/backends/text-completions/tabby/load';
const response = await fetch(url, {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
api_server: getTextGenServer('tabby'),
api_type: 'tabby',
toTabby: JSON.stringify(body),
}),
});
// Initialize progress bar only if not already initialized
if (!$('#loading_progressbar').hasClass('ui-progressbar')) {
$('#loading_progressbar').progressbar({
value: 0,
max: 100,
});
} else {
$('#loading_progressbar').progressbar('value', 0); // Reset if already initialized
console.warn('Progressbar already initialized, resetting value');
}
// Ensure single .ui-progressbar-value and initial state
const progressValue = $('#loading_progressbar .ui-progressbar-value');
if (progressValue.length > 1) {
console.warn('Multiple .ui-progressbar-value elements detected:', progressValue.length);
progressValue.slice(1).remove(); // Keep only the first
}
progressValue.css({
display: 'none',
width: '0%',
});
async function readStream(reader, progressContainer, soFar, times) {
const { value, done } = await reader.read();
console.warn('Stream read:', { value, done, timestamp: new Date().toISOString() });
if (done && soFar === times) {
progressContainer.css('display', 'none');
$('#loading_progressbar').progressbar('value', 0);
progressValue.css({ display: 'none', width: '0%' });
return;
}
if (!value) {
console.warn('Empty stream value received');
requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times));
return;
}
let packet;
try {
packet = JSON.parse(value.data);
console.log('Parsed packet:', packet);
console.log('Packet status:', packet.status);
} catch (error) {
console.error('Failed to parse stream packet:', error, value);
requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times));
return;
}
if (packet.error) {
progressContainer.css('display', 'none');
$('#loading_progressbar').progressbar('value', 0);
progressValue.css({ display: 'none', width: '0%' });
throw new Error(packet.error.message);
}
const numerator = parseInt(packet.module) ?? 0;
const denominator = parseInt(packet.modules) ?? 0;
const percent = denominator ? (numerator / denominator) * 100 : 0;
// Indicate draft or main model
const modelLabel = soFar === 0 && times === 2 ? 'Draft Model' : 'Main Model';
$('#loading_progress_container').attr('data-model', modelLabel);
if (packet.status === 'finished') {
if (soFar === times - 1) {
progressContainer.css('display', 'none');
toastr.info(`TabbyLoader: ${modelLabel} loaded`);
$('#loading_progressbar').progressbar('value', 0);
progressValue.css({ display: 'none', width: '0%' });
} else {
$('#loading_progressbar').progressbar('value', 0);
progressValue.css({ display: 'none', width: '0%' });
toastr.info('TabbyLoader: Draft Model loaded');
}
soFar++;
} else {
const roundedPercent = Math.round(percent);
$('#loading_progressbar').progressbar('value', roundedPercent);
progressValue.css({
display: 'block',
width: `${roundedPercent}%`,
});
console.log(`Progress set to: ${roundedPercent}% for ${modelLabel} at`, new Date().toISOString());
}
requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times));
}
if (response.ok) {
console.warn('saw ok response..hope for stream..');
if (!response.body) {
console.error('No response body received');
toastr.error('TabbyLoader: No stream received from server.');
return;
}
const eventStream = new SmoothEventSourceStream();
const reader = response.body.pipeThrough(eventStream).getReader();
const progressContainer = $('#loading_progress_container');
// Show container only during streaming
progressContainer.css({
display: 'block',
visibility: 'visible',
position: 'relative',
zIndex: 1000,
});
let soFar = 0;
let times = draftModelValue ? 2 : 1;
await readStream(reader, progressContainer, soFar, times);
} else {
const responseJson = await response.json();
console.error('TabbyLoader: Could not load the model because:', responseJson?.detail ?? response.statusText);
toastr.error('TabbyLoader: Could not load the model. Please check the JavaScript or TabbyAPI console for details.');
}
} catch (error) {
console.error('TabbyLoader: Could not load the model because:', error);
toastr.error('Could not load the model. Please check the TabbyAPI console for details.');
} finally {
$('#loading_progressbar').progressbar('value', 0);
$('#loading_progressbar .ui-progressbar-value').css({ display: 'none', width: '0%' });
}
}
export async function onTabbyUnloadModelClick() {
let url = '/api/backends/text-completions/tabby/unload';
const response = await fetch(url, {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
api_server: getTextGenServer('tabby'),
api_type: 'tabby',
}),
});
console.warn(response);
if (response.ok) {
toastr.info('Tabby model was unloaded.');
} else {
const responseJson = await response.json();
console.error('TabbyLoader: Could not unload the model because:\n', responseJson?.detail ?? response.statusText);
toastr.error('TabbyLoader: Could not unload the model. Please check the browser or TabbyAPI console for details.');
return [];
}
}
export async function onTabbyParameterEditorClick() {
console.warn('onParameterEditorClick');
const parameterHtml = $(await $.get(`${tempaltesFolderPath}/tabbyModelParameters.html`));
parameterHtml
.find('input[name="max_seq_len"]')
.val(textgenerationwebui_settings?.tabbyModelLoadParams?.maxSeqLen ?? 4096);
parameterHtml
.find('input[name="cache_size"]')
.val(textgenerationwebui_settings?.tabbyModelLoadParams?.cacheSize ?? 'Max Seq Len');
parameterHtml
.find('input[name="max_batch_size"]')
.val(textgenerationwebui_settings?.tabbyModelLoadParams?.maxBatchSize ?? 'Auto');
parameterHtml
.find('input[name="fasttensors"]')
.prop('checked', textgenerationwebui_settings?.tabbyModelLoadParams?.fasttensors ?? false);
parameterHtml
.find('select[name="cache_mode_select"]')
.val(cache_mode[textgenerationwebui_settings?.tabbyModelLoadParams?.cacheMode ?? 'FP16']);
// Rope and Draft rope
parameterHtml
.find('input[name="rope_scale"]')
.val(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeScale ?? 'Auto');
parameterHtml
.find('input[name="rope_alpha"]')
.val(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeAlpha ?? 'Auto');
parameterHtml
.find('input[name="draft_rope_scale"]')
.val(textgenerationwebui_settings?.tabbyModelLoadParams?.draft_ropeScale ?? 'Auto');
parameterHtml
.find('input[name="draft_rope_alpha"]')
.val(textgenerationwebui_settings?.tabbyModelLoadParams?.draft_ropeAlpha ?? 'Auto');
// MARK: GPU split options
const gpuSplitAuto = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplitAuto ?? true;
const gpuSplitValue = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplit;
const gpuSplitTextbox = parameterHtml
.find('input[name="gpu_split_value"]')
.val(JSON.stringify(gpuSplitValue?.length > 0 ? gpuSplitValue : undefined))
.prop('disabled', gpuSplitAuto);
parameterHtml
.find('input[name="gpu_split_auto"]')
.prop('checked', gpuSplitAuto)
.on('click', function () {
gpuSplitTextbox.prop('disabled', $(this).prop('checked'));
});
const popupResult = await callPopup(parameterHtml, 'confirm', undefined, { okButton: 'Save' });
if (popupResult) {
const newParams = {
maxSeqLen: Number(parameterHtml.find('input[name="max_seq_len"]').val()) || 4096,
cacheSize: Number(parameterHtml.find('input[name="cache_mode"]').val()) || null,
maxBatchSize: Number(parameterHtml.find('input[name="max_batch_size"]').val()) || null,
ropeScale: Number(parameterHtml.find('input[name="rope_scale"]').val()) || null,
ropeAlpha: Number(parameterHtml.find('input[name="rope_alpha"]').val()) || null,
draft_ropeScale: Number(parameterHtml.find('input[name="draft_rope_scale"]').val()) || null,
draft_ropeAlpha: Number(parameterHtml.find('input[name="draft_rope_alpha"]').val()) || null,
gpuSplitAuto: parameterHtml.find('input[name="gpu_split_auto"]').prop('checked'),
fasttensors: parameterHtml.find('input[name="fasttensors"]').prop('checked'),
cacheMode: getKeyByValue(
cache_mode,
Number(
parameterHtml.find('select[name="cache_mode_select"]').find(':selected').val(),
) || 0,
),
};
// Handle GPU split setting
const gpuSplitVal = String(parameterHtml.find('input[name="gpu_split_value"]').val());
try {
if (gpuSplitVal) {
const gpuSplitArray = JSON.parse(gpuSplitVal) ?? [];
if (Array.isArray(gpuSplitArray)) {
newParams['gpuSplit'] = gpuSplitArray;
} else {
console.error(`Provided GPU split value (${gpuSplitArray}) is not an array.`);
newParams['gpuSplit'] = [];
}
}
} catch (error) {
console.error(error);
newParams['gpuSplit'] = [];
}
textgenerationwebui_settings.tabbyModelLoadParams = newParams;
saveSettingsDebounced();
}
}
/* function migrateSettings() {
let performSave = false;
const modelParamsInSettings = settings?.textgenerationwebui_settings?.tabbyModelLoadParams?.modelParams;
if (modelParamsInSettings && 'eightBitCache' in modelParamsInSettings) {
const newParams = {
cacheMode: settings.textgenerationwebui_settings?.tabbyModelLoadParams?.eightBitCache ? 'FP8' : 'FP16',
};
delete settings.textgenerationwebui_settings?.tabbyModelLoadParams.modelParams.eightBitCache;
Object.assign(settings.textgenerationwebui_settings?.tabbyModelLoadParams?.modelParams, newParams);
performSave = true;
}
if (performSave) {
saveSettingsDebounced();
}
} */
export async function loadTabbySettings() {
if (!textgenerationwebui_settings.tabbyModelLoadParams) {
console.warn('saw no tabby model loading object in text_gen settings');
textgenerationwebui_settings.tabbyModelLoadParams = defaultSettings;
}
//Create the settings if they don't exist
tabbyModelLoadParams = textgenerationwebui_settings?.tabbyModelLoadParams || {};
if (Object.keys(tabbyModelLoadParams).length === 0) {
console.warn('tabby model loading settings were empty in text_gen settings, using default instead.');
Object.assign(tabbyModelLoadParams, defaultSettings);
}
saveSettingsDebounced();
//migrateSettings();
//$('#tabby_url_override').val(settings.textgenerationwebui_settings?.tabbyModelLoadParams?.urlOverride ?? '');
//$('#tabby_use_proxy').prop('checked', settings.textgenerationwebui_settings?.tabbyModelLoadParams?.useProxy ?? false);
// Updating settings in the UI
//const placeholder = await getTabbyAuth() ? '✔️ Key found' : '❌ Missing key';
//$('#tabby_admin_key').attr('placeholder', placeholder);
}
// This function is called when the extension is loaded
jQuery(async () => {
/* $('#tabby_load_draft_model_list')
.autocomplete({
source: (_, response) => {
return response(draftModels);
},
minLength: 0,
})
.focus(function () {
$(this)
.autocomplete(
'search',
String($(this).val()),
);
}); */
$('#tabby_url_override').on('input', function () {
const value = $(this).val();
if (value !== undefined) {
textgenerationwebui_settings.tabbyModelLoadParams.urlOverride = value;
saveSettingsDebounced();
}
});
$('#tabby_use_proxy').on('input', function () {
textgenerationwebui_settings.tabbyModelLoadParams.useProxy = !!$(this).prop('checked');
saveSettingsDebounced();
});
$('#loading_progressbar').progressbar({
value: 0,
});
$('#loading_progress_container').hide();
// Load settings when starting things up (if you have any)
eventSource.on(event_types.APP_READY, async () => {
await loadTabbySettings();
});
});

View File

@@ -0,0 +1,96 @@
<div id="tabby_loader_popup">
<div>
<h3><strong data-i18n="">Set Parameters</strong>
<a href="https://github.com/theroyallab/tabbyAPI" class="notes-link" target="_blank">
<span class="note-link-span">?</span>
</a>
</h3>
<small class="flex-container extensions_info justifyCenter">
Set Parameters for Loading a Model
</small>
<hr />
Main Model
<div class="flex-container">
<div class="flex1">
<label for="max_seq_len">
<small data-i18n="Max Seq Len">Max Seq Len</small>
</label>
<input name="max_seq_len" class="text_pole" type="text" placeholder="ex: 4096" />
</div>
<div class="flex1">
<label for="cache_size">
<small data-i18n="Cache Size">Cache Size</small>
</label>
<input name="cache_size" class="text_pole" type="text" placeholder="Max Seq Len" />
</div>
<div class="flex1">
<label for="max_batch_size">
<small data-i18n="Max Batch Size">Max Batch Size</small>
</label>
<input name="max_batch_size" class="text_pole" type="text" placeholder="ex: 512" />
</div>
</div>
<div class="flex-container">
<div class="flex1">
<label for="rope_scale">
<small data-i18n="Rope Scale">Rope Scale</small>
</label>
<input name="rope_scale" class="text_pole" type="text" placeholder="ex: 1.0" />
</div>
<div class="flex1">
<label for="rope_alpha">
<small data-i18n="Rope Alpha">Rope Alpha</small>
</label>
<input name="rope_alpha" class="text_pole" type="text" placeholder="ex: 1.0" />
</div>
</div>
Draft Model
<div class="flex-container">
<div class="flex1">
<label for="rope_scale">
<small data-i18n="Rope Scale">Rope Scale</small>
</label>
<input name="draft_rope_scale" class="text_pole" type="text" placeholder="ex: 1.0" />
</div>
<div class="flex1">
<label for="rope_alpha">
<small data-i18n="Rope Alpha">Rope Alpha</small>
</label>
<input name="draft_rope_alpha" class="text_pole" type="text" placeholder="ex: 1.0" />
</div>
</div>
<!-- Container for GPU Split and other options -->
Loader Options
<div class="flex-container padded-loader-setting-category">
<div class="flex-container flexFlowColumn flexNoGap alignitemsstart">
<small class="justifyCenter">GPU Split</small>
<label class="checkbox flex-container">
<input type="checkbox" name="gpu_split_auto" />
<span data-i18n="Auto Split">Auto Split</span>
</label>
<input name="gpu_split_value" class="text_pole" type="text" placeholder="ex. [20.6, 24]" />
</div>
<div class="flex-container flexFlowColumn flexNoGap alignitemsstart padded-loader-setting-block">
<small>Other Options</small>
<label class="checkbox flex-container">
<input type="checkbox" name="fasttensors" />
<span data-i18n="FastTensors">FastTensors</span>
</label>
</div>
<div class="flex-container flexFlowColumn flexNoGap alignitemsstart">
<small class="justifyCenter">Cache Mode</small>
<select name="cache_mode_select" class="margin0">
<option value="0">FP16</option>
<option value="1">Q4</option>
<option value="2">Q6</option>
<option value="3">Q8</option>
</select>
</div>
</div>
</div>
</div>

View File

@@ -8,6 +8,7 @@ import { POPUP_TYPE, callGenericPopup } from './popup.js';
import { t } from './i18n.js';
import { accountStorage } from './util/AccountStorage.js';
import { localizePagination, PAGINATION_TEMPLATE } from './utils.js';
import { onTabbyLoadModelClick, onTabbyParameterEditorClick, onTabbyUnloadModelClick, fetchTabbyModels } from './tabbyModelLoader.js';
let mancerModels = [];
let togetherModels = [];
@@ -945,6 +946,11 @@ export function initTextGenModels() {
$('#tabby_model').on('change', onTabbyModelSelect);
$('#featherless_model').on('change', () => onFeatherlessModelSelect(String($('#featherless_model').val())));
$('#tabby_load_model_button').on('click', async () => await onTabbyLoadModelClick());
$('#tabby_unload_model_button').on('click', async () => await onTabbyUnloadModelClick());
$('#tabby_parameter_editor_button').on('click', async () => await onTabbyParameterEditorClick());
$('#tabby_reload_model_list_button').on('click', async () => await fetchTabbyModels());
const providersSelect = $('.openrouter_providers');
for (const provider of OPENROUTER_PROVIDERS) {
providersSelect.append($('<option>', {

View File

@@ -6149,3 +6149,17 @@ body:not(.movingUI) .drawer-content.maximized {
border-color: var(--error-color, #e87f7f);
background-color: rgba(241, 163, 163, 0.2);
}
.progress_container {
padding-top: 10px;
padding-bottom: 10px;
}
#loading_progressbar.ui-widget-content {
height: 10px;
}
#loading_progressbar>.ui-widget-header {
background: orange;
border: none !important;
}

View File

@@ -627,6 +627,133 @@ tabby.post('/download', async function (request, response) {
}
});
tabby.post('/unload', async function (request, response) {
try {
const baseUrl = String(request.body.api_server).replace(/\/$/, '');
const args = {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(request.body),
timeout: 0,
};
setAdditionalHeaders(request, args, baseUrl);
// Check key permissions
const permissionResponse = await fetch(`${baseUrl}/v1/auth/permission`, {
headers: args.headers,
});
if (permissionResponse.ok) {
/** @type {any} */
const permissionJson = await permissionResponse.json();
if (permissionJson['permission'] !== 'admin') {
return response.status(403).send({ error: true });
}
} else {
console.error('API Permission error:', permissionResponse.status, permissionResponse.statusText);
return response.status(500).send({ error: true });
}
const fetchResponse = await fetch(`${baseUrl}/v1/model/unload`, args);
if (!fetchResponse.ok) {
console.error('Tabby unload error:', fetchResponse.status, fetchResponse.statusText);
return response.status(500).send({ error: true });
}
return response.send({ ok: true });
} catch (error) {
console.error(error);
return response.sendStatus(500);
}
});
tabby.post('/load', async function (request, response) {
try {
const baseUrl = String(request.body.api_server).replace(/\/$/, '');
const args = {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(request.body),
timeout: 0,
};
let tempArgs = { ...args };
let tempBody = JSON.parse(tempArgs.body);
const toTabby = JSON.parse(tempBody.toTabby);
delete tempBody.api_type
delete tempBody.api_server
tempArgs.body = JSON.stringify(toTabby)
setAdditionalHeaders(request, tempArgs, baseUrl);
console.log('this is what we are sending to tabby, including all headers..')
console.log(tempArgs);
// Check key permissions
const permissionResponse = await fetch(`${baseUrl}/v1/auth/permission`, {
headers: args.headers,
});
if (permissionResponse.ok) {
/** @type {any} */
const permissionJson = await permissionResponse.json();
if (permissionJson['permission'] !== 'admin') {
return response.status(403).send({ error: true });
}
} else {
console.error('API Permission error:', permissionResponse.status, permissionResponse.statusText);
return response.status(500).send({ error: true });
}
const fetchResponse = await fetch(`${baseUrl}/v1/model/load`, tempArgs);
if (!fetchResponse.ok) {
console.error('Tabby load error:', fetchResponse.status, fetchResponse.statusText);
return response.status(500).send({ error: true });
}
if (!fetchResponse.body) {
console.error('No response body received from LLM server');
return response.status(500).send({ error: true });
}
// Set headers for Server-Sent Events
response.set({
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
});
// Pipe the stream with explicit flushing
fetchResponse.body.on('data', (chunk) => {
console.log('Stream chunk:', chunk.toString());
response.write(chunk);
response.flush(); // Force flush to client
});
// Handle stream errors
fetchResponse.body.on('error', (err) => {
console.error('Stream error:', err);
response.status(500).send({ error: true });
});
fetchResponse.body.on('end', () => {
console.log('Stream closed');
response.end();
});
} catch (error) {
console.error(error);
return response.sendStatus(500);
}
});
router.use('/ollama', ollama);
router.use('/llamacpp', llamacpp);
router.use('/tabby', tabby);