import { eventSource, event_types, callPopup, getRequestHeaders, online_status, saveSettingsDebounced, settings } from '../script.js'; import { textgen_types, textgenerationwebui_settings, getTextGenServer } from '../scripts/textgen-settings.js'; import { SmoothEventSourceStream } from '../scripts/sse-stream.js'; // Used for settings const tempaltesFolderPath = 'scripts/templates/'; const defaultSettings = { max_seq_len: 4096, cache_size: 'Max Seq Len', max_batch_size: 'Auto', fasttensors: false, rope_scale: 'Auto', rope_alpha: 'Auto', gpu_split_auto: true, gpu_split_value: null, cache_mode: 'FP16', draft_rope_alpha: 'Auto', draft_rope_scale: 'Auto', urlOverride: null, useProxy: false, }; let tabbyModelLoadParams = defaultSettings; // Cached models list let models = []; let draftModels = []; const cache_mode = { FP16: 0, Q4: 1, Q6: 2, Q8: 3, }; function getKeyByValue(object, value) { return Object.keys(object).find(key => object[key] === value); } // Check if user is connected to TabbyAPI function verifyTabby(logError = true) { const result = online_status !== 'no_connection' || textgenerationwebui_settings.type === textgen_types.TABBY; if (!result && logError) { toastr.error('TabbyLoader: Please connect to a TabbyAPI instance to use this extension'); } return result; } // Fetch the model list for autocomplete population export async function fetchTabbyModels() { console.debug('fetchTabbyModels loaded'); if (!verifyTabby(false)) { console.error('TabbyLoader: Could not connect to TabbyAPI'); return; } var modelsFromResponse = []; try { let url = '/api/backends/text-completions/status'; const response = await fetch(url, { method: 'POST', headers: getRequestHeaders(), body: JSON.stringify({ api_server: getTextGenServer('tabby'), api_type: 'tabby', }), }); if (response.ok) { modelsFromResponse = await response.json(); } else { console.error(`Mode list request failed with a statuscode of ${response.status}:\n${response.statusText}`); return []; } modelsFromResponse = modelsFromResponse.data.map((e) => e.id); console.debug(modelsFromResponse); models = modelsFromResponse; console.debug(models); $('#tabby_load_model_list') .autocomplete({ source: (_, response) => { return response(models); }, minLength: 0, }) .focus(function () { $(this) .autocomplete( 'search', String($(this).val()), ); }); } catch (error) { console.error(error); return []; } } // This function is called when the button is clicked export async function onTabbyLoadModelClick() { if (!verifyTabby()) { return; } const modelValue = $('#tabby_load_model_list').val(); const draftModelValue = $('#tabby_load_draft_model_list').val(); if (!modelValue || !models.includes(modelValue)) { console.debug(models); console.debug(modelValue); toastr.error('TabbyLoader: Please make sure the model name is spelled correctly before loading!'); return; } if (draftModelValue !== '' && !draftModels.includes(draftModelValue)) { toastr.error('TabbyLoader: Please make sure the draft model name is spelled correctly before loading!'); return; } const body = { name: modelValue, max_seq_len: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.maxSeqLen) || 0, cache_size: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.cacheSize) || null, max_batch_size: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.maxBatchSize) || null, rope_scale: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeScale) || null, rope_alpha: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeAlpha) || null, gpu_split_auto: textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplitAuto, cache_mode: textgenerationwebui_settings?.tabbyModelLoadParams?.cacheMode, fasttensors: textgenerationwebui_settings?.tabbyModelLoadParams?.fasttensors, }; if (draftModelValue) { body.draft = { draft_model_name: draftModelValue, draft_rope_scale: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.draft.draft_ropeAlpha) || null, draft_rope_alpha: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.draft.draft_ropeScale) || null, }; } if (!body.gpu_split_auto) { // TODO: Add a check for an empty array here const gpuSplit = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplit; if (Array.isArray(gpuSplit) && gpuSplit?.length > 0) { body['gpu_split'] = gpuSplit; } else { console.error(`TabbyLoader: GPU split ${gpuSplit} is invalid. Set to auto or adjust your parameters!`); toastr.error('TabbyLoader: Invalid GPU split. Set GPU split to auto or adjust your parameters'); return; } } try { let url = '/api/backends/text-completions/tabby/load'; const response = await fetch(url, { method: 'POST', headers: getRequestHeaders(), body: JSON.stringify({ api_server: getTextGenServer('tabby'), api_type: 'tabby', toTabby: JSON.stringify(body), }), }); // Initialize progress bar only if not already initialized if (!$('#loading_progressbar').hasClass('ui-progressbar')) { $('#loading_progressbar').progressbar({ value: 0, max: 100, }); } else { $('#loading_progressbar').progressbar('value', 0); // Reset if already initialized console.debug('Progressbar already initialized, resetting value'); } // Ensure single .ui-progressbar-value and initial state const progressValue = $('#loading_progressbar .ui-progressbar-value'); if (progressValue.length > 1) { console.warn('Multiple .ui-progressbar-value elements detected:', progressValue.length); progressValue.slice(1).remove(); // Keep only the first } progressValue.css({ display: 'none', width: '0%', }); async function readStream(reader, progressContainer, soFar, times) { const { value, done } = await reader.read(); if (done && soFar === times) { progressContainer.css('display', 'none'); $('#loading_progressbar').progressbar('value', 0); progressValue.css({ display: 'none', width: '0%' }); return; } if (!value) { console.warn('Empty stream value received'); requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times)); return; } let packet; try { packet = JSON.parse(value.data); } catch (error) { console.error('Failed to parse stream packet:', error, value); requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times)); return; } if (packet.error) { progressContainer.css('display', 'none'); $('#loading_progressbar').progressbar('value', 0); progressValue.css({ display: 'none', width: '0%' }); throw new Error(packet.error.message); } const numerator = parseInt(packet.module) ?? 0; const denominator = parseInt(packet.modules) ?? 0; const percent = denominator ? (numerator / denominator) * 100 : 0; // Indicate draft or main model const modelLabel = soFar === 0 && times === 2 ? 'Draft Model' : 'Main Model'; $('#loading_progress_container').attr('data-model', modelLabel); if (packet.status === 'finished') { if (soFar === times - 1) { progressContainer.css('display', 'none'); toastr.info(`TabbyLoader: ${modelLabel} loaded`); $('#loading_progressbar').progressbar('value', 0); progressValue.css({ display: 'none', width: '0%' }); } else { $('#loading_progressbar').progressbar('value', 0); progressValue.css({ display: 'none', width: '0%' }); toastr.info('TabbyLoader: Draft Model loaded'); } soFar++; } else { const roundedPercent = Math.round(percent); $('#loading_progressbar').progressbar('value', roundedPercent); progressValue.css({ display: 'block', width: `${roundedPercent}%`, }); } requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times)); } if (response.ok) { if (!response.body) { console.error('No response body received'); toastr.error('TabbyLoader: No stream received from server.'); return; } const eventStream = new SmoothEventSourceStream(); const reader = response.body.pipeThrough(eventStream).getReader(); const progressContainer = $('#loading_progress_container'); // Show container only during streaming progressContainer.css({ display: 'block', visibility: 'visible', position: 'relative', zIndex: 1000, }); let soFar = 0; let times = draftModelValue ? 2 : 1; await readStream(reader, progressContainer, soFar, times); } else { const responseJson = await response.json(); console.error('TabbyLoader: Could not load the model because:', responseJson?.detail ?? response.statusText); toastr.error('TabbyLoader: Could not load the model. Please check the JavaScript or TabbyAPI console for details.'); } } catch (error) { console.error('TabbyLoader: Could not load the model because:', error); toastr.error('Could not load the model. Please check the TabbyAPI console for details.'); } finally { $('#loading_progressbar').progressbar('value', 0); $('#loading_progressbar .ui-progressbar-value').css({ display: 'none', width: '0%' }); } } export async function onTabbyUnloadModelClick() { let url = '/api/backends/text-completions/tabby/unload'; const response = await fetch(url, { method: 'POST', headers: getRequestHeaders(), body: JSON.stringify({ api_server: getTextGenServer('tabby'), api_type: 'tabby', }), }); if (response.ok) { toastr.info('Tabby model was unloaded.'); } else { const responseJson = await response.json(); console.error('TabbyLoader: Could not unload the model because:\n', responseJson?.detail ?? response.statusText); toastr.error('TabbyLoader: Could not unload the model. Please check the browser or TabbyAPI console for details.'); return []; } } export async function onTabbyParameterEditorClick() { console.debug('onParameterEditorClick'); const parameterHtml = $(await $.get(`${tempaltesFolderPath}/tabbyModelParameters.html`)); parameterHtml .find('input[name="max_seq_len"]') .val(textgenerationwebui_settings?.tabbyModelLoadParams?.maxSeqLen ?? 4096); parameterHtml .find('input[name="cache_size"]') .val(textgenerationwebui_settings?.tabbyModelLoadParams?.cacheSize ?? 'Max Seq Len'); parameterHtml .find('input[name="max_batch_size"]') .val(textgenerationwebui_settings?.tabbyModelLoadParams?.maxBatchSize ?? 'Auto'); parameterHtml .find('input[name="fasttensors"]') .prop('checked', textgenerationwebui_settings?.tabbyModelLoadParams?.fasttensors ?? false); parameterHtml .find('select[name="cache_mode_select"]') .val(cache_mode[textgenerationwebui_settings?.tabbyModelLoadParams?.cacheMode ?? 'FP16']); // Rope and Draft rope parameterHtml .find('input[name="rope_scale"]') .val(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeScale ?? 'Auto'); parameterHtml .find('input[name="rope_alpha"]') .val(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeAlpha ?? 'Auto'); parameterHtml .find('input[name="draft_rope_scale"]') .val(textgenerationwebui_settings?.tabbyModelLoadParams?.draft_ropeScale ?? 'Auto'); parameterHtml .find('input[name="draft_rope_alpha"]') .val(textgenerationwebui_settings?.tabbyModelLoadParams?.draft_ropeAlpha ?? 'Auto'); // MARK: GPU split options const gpuSplitAuto = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplitAuto ?? true; const gpuSplitValue = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplit; const gpuSplitTextbox = parameterHtml .find('input[name="gpu_split_value"]') .val(JSON.stringify(gpuSplitValue?.length > 0 ? gpuSplitValue : undefined)) .prop('disabled', gpuSplitAuto); parameterHtml .find('input[name="gpu_split_auto"]') .prop('checked', gpuSplitAuto) .on('click', function () { gpuSplitTextbox.prop('disabled', $(this).prop('checked')); }); const popupResult = await callPopup(parameterHtml, 'confirm', undefined, { okButton: 'Save' }); if (popupResult) { const newParams = { maxSeqLen: Number(parameterHtml.find('input[name="max_seq_len"]').val()) || 4096, cacheSize: Number(parameterHtml.find('input[name="cache_mode"]').val()) || null, maxBatchSize: Number(parameterHtml.find('input[name="max_batch_size"]').val()) || null, ropeScale: Number(parameterHtml.find('input[name="rope_scale"]').val()) || null, ropeAlpha: Number(parameterHtml.find('input[name="rope_alpha"]').val()) || null, draft_ropeScale: Number(parameterHtml.find('input[name="draft_rope_scale"]').val()) || null, draft_ropeAlpha: Number(parameterHtml.find('input[name="draft_rope_alpha"]').val()) || null, gpuSplitAuto: parameterHtml.find('input[name="gpu_split_auto"]').prop('checked'), fasttensors: parameterHtml.find('input[name="fasttensors"]').prop('checked'), cacheMode: getKeyByValue( cache_mode, Number( parameterHtml.find('select[name="cache_mode_select"]').find(':selected').val(), ) || 0, ), }; // Handle GPU split setting const gpuSplitVal = String(parameterHtml.find('input[name="gpu_split_value"]').val()); try { if (gpuSplitVal) { const gpuSplitArray = JSON.parse(gpuSplitVal) ?? []; if (Array.isArray(gpuSplitArray)) { newParams['gpuSplit'] = gpuSplitArray; } else { console.error(`Provided GPU split value (${gpuSplitArray}) is not an array.`); newParams['gpuSplit'] = []; } } } catch (error) { console.error(error); newParams['gpuSplit'] = []; } textgenerationwebui_settings.tabbyModelLoadParams = newParams; saveSettingsDebounced(); } } /* function migrateSettings() { let performSave = false; const modelParamsInSettings = settings?.textgenerationwebui_settings?.tabbyModelLoadParams?.modelParams; if (modelParamsInSettings && 'eightBitCache' in modelParamsInSettings) { const newParams = { cacheMode: settings.textgenerationwebui_settings?.tabbyModelLoadParams?.eightBitCache ? 'FP8' : 'FP16', }; delete settings.textgenerationwebui_settings?.tabbyModelLoadParams.modelParams.eightBitCache; Object.assign(settings.textgenerationwebui_settings?.tabbyModelLoadParams?.modelParams, newParams); performSave = true; } if (performSave) { saveSettingsDebounced(); } } */ export async function loadTabbySettings() { if (!textgenerationwebui_settings.tabbyModelLoadParams) { console.warn('saw no tabby model loading object in text_gen settings'); textgenerationwebui_settings.tabbyModelLoadParams = defaultSettings; } //Create the settings if they don't exist tabbyModelLoadParams = textgenerationwebui_settings?.tabbyModelLoadParams || {}; if (Object.keys(tabbyModelLoadParams).length === 0) { console.warn('tabby model loading settings were empty in text_gen settings, using default instead.'); Object.assign(tabbyModelLoadParams, defaultSettings); } saveSettingsDebounced(); //migrateSettings(); //$('#tabby_url_override').val(settings.textgenerationwebui_settings?.tabbyModelLoadParams?.urlOverride ?? ''); //$('#tabby_use_proxy').prop('checked', settings.textgenerationwebui_settings?.tabbyModelLoadParams?.useProxy ?? false); // Updating settings in the UI //const placeholder = await getTabbyAuth() ? '✔️ Key found' : '❌ Missing key'; //$('#tabby_admin_key').attr('placeholder', placeholder); } // This function is called when the extension is loaded jQuery(async () => { /* $('#tabby_load_draft_model_list') .autocomplete({ source: (_, response) => { return response(draftModels); }, minLength: 0, }) .focus(function () { $(this) .autocomplete( 'search', String($(this).val()), ); }); */ $('#tabby_url_override').on('input', function () { const value = $(this).val(); if (value !== undefined) { textgenerationwebui_settings.tabbyModelLoadParams.urlOverride = value; saveSettingsDebounced(); } }); $('#tabby_use_proxy').on('input', function () { textgenerationwebui_settings.tabbyModelLoadParams.useProxy = !!$(this).prop('checked'); saveSettingsDebounced(); }); $('#loading_progressbar').progressbar({ value: 0, }); $('#loading_progress_container').hide(); // Load settings when starting things up (if you have any) eventSource.on(event_types.APP_READY, async () => { await loadTabbySettings(); }); });