Merge Tabby API Loader Ext into Core WIP

2025-06-05 21:59:27 +02:00 · 2025-05-23 08:53:52 +09:00
parent 6dc59b9fd3
commit 6f51a13590
6 changed files with 837 additions and 34 deletions
--- a/public/index.html
+++ b/public/index.html
@@ -2678,35 +2678,67 @@
                                    <small data-i18n="Example: http://127.0.0.1:5000">Example: http://127.0.0.1:5000</small>
                                    <input id="tabby_api_url_text" class="text_pole wide100p" value="" autocomplete="off" data-server-history="tabby">
                                </div>
-                                <div class="flex1">
-                                    <h4>
-                                        <span data-i18n="Tabby Model">Tabby Model</span>
+                                <div class="inline-drawer">
+                                    <div class="gap10h5v inline-drawer-header inline-drawer-toggle widthFitContent standoutHeader">
+                                        <div class="inline-drawer-icon fa-solid fa-circle-chevron-down"></div>
+                                        <strong>Tabby Model Management</strong>
+                                    </div>
+                                    <div class="inline-drawer-content">
+
+                                        <div class="">
+                                            <small>
+                                                <b>To switch models:</b>
+                                                <ol class="marginTop5">
+                                                    <li>
+                                                        <code>inline_model_loading: True</code> must be set in Tabby's config.yml
+                                                    </li>
+                                                    <li>
+                                                        Connect with an admin API key
+                                                    </li>
+                                                </ol>
+                                            </small>
+                                        </div>
+                                        <h4 class="alignItemsCenter flex-container">
+                                            <span data-i18n="Tabby Model">Inline Model Swap</span>
                                        </h4>
+                                        <small>The model selected here will be loaded and used in the next generation request.</small>
                                        <select id="tabby_model">
                                            <option value="" data-i18n="-- Connect to the API --">
                                                -- Connect to the API --
                                            </option>
                                        </select>
-                                    <div class="marginTopBot5">
-                                        <i class="fa-solid fa-flask"></i>
-                                        <span>Experimental feature. Use at your own risk.</span>
+
+                                        <div class="alignItemsStart flex-container flexFlowColumn">
+                                            <h4 data-i18n="Tabby API key" class="marginTop10">
+                                                Model Select
+                                                <i class="fa-solid fa-flask margin5"></i>
+                                                <span class="redOverlayGlow" data-i18n="Experimental feature. Use at your own risk.">Experimental feature. Use at your own risk.</span>
+                                            </h4>
+                                            <div class="flex-container gap5px">
+                                                <div id="tabby_load_model_button" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Load model" data-i18n="[title]Load model" tabindex="0">
+                                                    <i class="fa-solid fa-play"></i>
+                                                </div>
+                                                <div id="tabby_unload_model_button" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Unload model" data-i18n="[title]Unload model" tabindex="0">
+                                                    <i class="fa-solid fa-xmark"></i>
+                                                </div>
+                                                <div id="tabby_reload_model_list_button" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Refresh model list" data-i18n="[title]Refresh model list" tabindex="0">
+                                                    <i class="fa-solid fa-arrows-rotate"></i>
+                                                </div>
+                                                <div id="tabby_parameter_editor_button" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Open parameter editor" data-i18n="[title]Open parameter editor" tabindex="0">
+                                                    <i class="fa-solid fa-gear"></i>
+                                                </div>
+                                                <div id="tabby_download_model" class="fa-lg menu_button menu_button_icon heightFitContent interactable" title="Download model from HuggingFace" data-i18n="[title]Download model from HuggingFace" tabindex="0">
+                                                    <i class="fa-solid fa-cloud-arrow-down"></i>
+                                                </div>
+                                            </div>
+                                            <div class="">
+                                                <input id="tabby_load_model_list" name="tabby_load_model_list" class="text_pole ui-autocomplete-input" placeholder="Model name here" maxlength="100" size="35" value="" autocomplete="off">
+                                                <input id="tabby_load_draft_model_list" name="tabby_load_draft_model_list" class="text_pole ui-autocomplete-input" placeholder="Draft model name here" maxlength="100" size="35" value="" autocomplete="off">
+                                                <div id="loading_progress_container" class="progress_container">
+                                                    <div id="loading_progressbar"></div>
+                                                </div>
                                            </div>
-                                    <div class="marginTopBot5">
-                                        <small>
-                                            <i class="fa-solid fa-lightbulb"></i>
-                                            &nbsp;
-                                            <code>inline_model_loading: True</code>
-                                            <span data-i18n="must be set in Tabby's config.yml to switch models.">
-                                                must be set in Tabby's config.yml to switch models.
-                                            </span>
-                                            <b data-i18n="Use an admin API key.">
-                                                Use an admin API key.
-                                            </b>
-                                        </small>
                                        </div>
-                                    <div id="tabby_download_model" class="menu_button menu_button_icon">
-                                        <i class="fa-solid fa-download"></i>
-                                        <span data-i18n="Download">Download</span>
                                    </div>
                                </div>
                            </div>
@@ -3672,7 +3704,7 @@
                                    <input id="instruct_bind_to_context" type="checkbox" style="display:none;" />
                                    <small><i class="fa-solid fa-link menu_button margin0"></i></small>
                                </label>
-                                <label id="instruct_enabled_label"for="instruct_enabled" class="checkbox_label flex1" title="Enable Instruct Mode" data-i18n="[title]instruct_enabled">
+                                <label id="instruct_enabled_label" for="instruct_enabled" class="checkbox_label flex1" title="Enable Instruct Mode" data-i18n="[title]instruct_enabled">
                                    <input id="instruct_enabled" type="checkbox" style="display:none;" />
                                    <small><i class="fa-solid fa-power-off menu_button togglable margin0"></i></small>
                                </label>
@@ -5120,7 +5152,7 @@
                <div class="flex-container wide100p alignitemscenter spaceBetween flexNoGap">
                    <div class="flex-container alignItemsBaseline wide100p">
                        <div class="flex1 flex-container alignItemsBaseline">
-                            <h3 class="margin0" >
+                            <h3 class="margin0">
                                <span data-i18n="Persona Management">Persona Management</span>
                                <a href="https://docs.sillytavern.app/usage/core-concepts/personas/" target="_blank">
                                    <span class="fa-solid fa-circle-question note-link-span"></span>
--- a/public/scripts/tabbyModelLoader.js
+++ b/public/scripts/tabbyModelLoader.js
@@ -0,0 +1,528 @@
+import { eventSource, event_types, callPopup, getRequestHeaders, online_status, saveSettingsDebounced, settings } from '../script.js';
+import { textgen_types, textgenerationwebui_settings, getTextGenServer } from '../scripts/textgen-settings.js';
+//import { SECRET_KEYS, readSecretState, findSecret, secret_state } from '../scripts/secrets.js';
+import { SmoothEventSourceStream } from '../scripts/sse-stream.js';
+
+// Used for settings
+const tempaltesFolderPath = 'scripts/templates/';
+
+const defaultSettings = {
+    max_seq_len: 4096,
+    cache_size: 'Max Seq Len',
+    max_batch_size: 'Auto',
+    fasttensors: false,
+    rope_scale: 'Auto',
+    rope_alpha: 'Auto',
+    gpu_split_auto: true,
+    gpu_split_value: null,
+    cache_mode: 'FP16',
+    draft_rope_alpha: 'Auto',
+    draft_rope_scale: 'Auto',
+    urlOverride: null,
+    useProxy: false,
+};
+
+let tabbyModelLoadParams = defaultSettings;
+
+// Cached models list
+let models = [];
+let draftModels = [];
+
+const cache_mode = {
+    FP16: 0,
+    Q4: 1,
+    Q6: 2,
+    Q8: 3,
+};
+
+function getKeyByValue(object, value) {
+    return Object.keys(object).find(key => object[key] === value);
+}
+
+// Check if user is connected to TabbyAPI
+function verifyTabby(logError = true) {
+    const result = online_status !== 'no_connection' || textgenerationwebui_settings.type === textgen_types.TABBY;
+    if (!result && logError) {
+        toastr.error('TabbyLoader: Please connect to a TabbyAPI instance to use this extension');
+    }
+    return result;
+}
+
+async function getTabbyAuth() {
+
+    let authToken = null;
+
+    if (!authToken) {
+        try {
+            authToken = localStorage.getItem('Tabby_Admin'); //This needs to be removed and integrated with the actual key retrieval process
+            if (!authToken) {
+                console.error('Tabby Admin key not found in localStorage. Trying to fetch from secret state.');
+                //  authToken = await findSecret('api_key_tabby');
+                console.warn(authToken);
+            }
+            if (!authToken) {
+                console.error('Tabby Admin key not found. Please make sure allowKeysExposure is true in config.conf and an API key is set for TabbyAPI.');
+            }
+        } catch (error) {
+            console.error(`TabbyLoader: ${error}`);
+            console.error('Admin key error: Please make sure allowKeysExposure is true in config.conf and an API key is set for TabbyAPI.');
+        }
+    }
+    return authToken;
+}
+
+// Fetch the model list for autocomplete population
+export async function fetchTabbyModels() {
+    console.warn('fetchTabbyModels loaded');
+    if (!verifyTabby(false)) {
+        console.error('TabbyLoader: Could not connect to TabbyAPI');
+        return;
+    }
+
+    var modelsFromResponse = [];
+
+    try {
+        let url = '/api/backends/text-completions/status';
+        const response = await fetch(url, {
+            method: 'POST',
+            headers: getRequestHeaders(),
+            body: JSON.stringify({
+                api_server: getTextGenServer('tabby'),
+                api_type: 'tabby',
+            }),
+        });
+
+        console.warn(response);
+        if (response.ok) {
+            modelsFromResponse = await response.json();
+        } else {
+            console.error(`Mode list request failed with a statuscode of ${response.status}:\n${response.statusText}`);
+            return [];
+        }
+
+        modelsFromResponse = modelsFromResponse.data.map((e) => e.id);
+
+        console.warn(modelsFromResponse);
+
+        models = modelsFromResponse;
+        console.warn(models);
+
+        $('#tabby_load_model_list')
+            .autocomplete({
+                source: (_, response) => {
+                    return response(models);
+                },
+                minLength: 0,
+            })
+            .focus(function () {
+                $(this)
+                    .autocomplete(
+                        'search',
+                        String($(this).val()),
+                    );
+            });
+
+    } catch (error) {
+        console.error(error);
+
+        return [];
+    }
+}
+
+// This function is called when the button is clicked
+export async function onTabbyLoadModelClick() {
+    if (!verifyTabby()) {
+        return;
+    }
+
+    const modelValue = $('#tabby_load_model_list').val();
+    const draftModelValue = $('#tabby_load_draft_model_list').val();
+
+    if (!modelValue || !models.includes(modelValue)) {
+        console.warn(models);
+        console.warn(modelValue);
+        toastr.error('TabbyLoader: Please make sure the model name is spelled correctly before loading!');
+
+        return;
+    }
+
+    if (draftModelValue !== '' && !draftModels.includes(draftModelValue)) {
+        toastr.error('TabbyLoader: Please make sure the draft model name is spelled correctly before loading!');
+        return;
+    }
+
+    const body = {
+        name: modelValue,
+        max_seq_len: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.maxSeqLen) || 0,
+        cache_size: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.cacheSize) || null,
+        max_batch_size: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.maxBatchSize) || null,
+        rope_scale: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeScale) || null,
+        rope_alpha: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeAlpha) || null,
+        gpu_split_auto: textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplitAuto,
+        cache_mode: textgenerationwebui_settings?.tabbyModelLoadParams?.cacheMode,
+        fasttensors: textgenerationwebui_settings?.tabbyModelLoadParams?.fasttensors,
+    };
+
+    if (draftModelValue) {
+        body.draft = {
+            draft_model_name: draftModelValue,
+            draft_rope_scale: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.draft.draft_ropeAlpha) || null,
+            draft_rope_alpha: Number(textgenerationwebui_settings?.tabbyModelLoadParams?.draft.draft_ropeScale) || null,
+        };
+    }
+
+    if (!body.gpu_split_auto) {
+        // TODO: Add a check for an empty array here
+        const gpuSplit = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplit;
+
+        if (Array.isArray(gpuSplit) && gpuSplit?.length > 0) {
+            body['gpu_split'] = gpuSplit;
+        } else {
+            console.error(`TabbyLoader: GPU split ${gpuSplit} is invalid. Set to auto or adjust your parameters!`);
+            toastr.error('TabbyLoader: Invalid GPU split. Set GPU split to auto or adjust your parameters');
+
+            return;
+        }
+    }
+
+    try {
+        let url = '/api/backends/text-completions/tabby/load';
+        const response = await fetch(url, {
+            method: 'POST',
+            headers: getRequestHeaders(),
+            body: JSON.stringify({
+                api_server: getTextGenServer('tabby'),
+                api_type: 'tabby',
+                toTabby: JSON.stringify(body),
+            }),
+        });
+
+        // Initialize progress bar only if not already initialized
+        if (!$('#loading_progressbar').hasClass('ui-progressbar')) {
+            $('#loading_progressbar').progressbar({
+                value: 0,
+                max: 100,
+            });
+        } else {
+            $('#loading_progressbar').progressbar('value', 0); // Reset if already initialized
+            console.warn('Progressbar already initialized, resetting value');
+        }
+
+        // Ensure single .ui-progressbar-value and initial state
+        const progressValue = $('#loading_progressbar .ui-progressbar-value');
+        if (progressValue.length > 1) {
+            console.warn('Multiple .ui-progressbar-value elements detected:', progressValue.length);
+            progressValue.slice(1).remove(); // Keep only the first
+        }
+        progressValue.css({
+            display: 'none',
+            width: '0%',
+        });
+
+        async function readStream(reader, progressContainer, soFar, times) {
+            const { value, done } = await reader.read();
+            console.warn('Stream read:', { value, done, timestamp: new Date().toISOString() });
+            if (done && soFar === times) {
+                progressContainer.css('display', 'none');
+                $('#loading_progressbar').progressbar('value', 0);
+                progressValue.css({ display: 'none', width: '0%' });
+                return;
+            }
+
+            if (!value) {
+                console.warn('Empty stream value received');
+                requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times));
+                return;
+            }
+
+            let packet;
+            try {
+                packet = JSON.parse(value.data);
+                console.log('Parsed packet:', packet);
+                console.log('Packet status:', packet.status);
+            } catch (error) {
+                console.error('Failed to parse stream packet:', error, value);
+                requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times));
+                return;
+            }
+
+            if (packet.error) {
+                progressContainer.css('display', 'none');
+                $('#loading_progressbar').progressbar('value', 0);
+                progressValue.css({ display: 'none', width: '0%' });
+                throw new Error(packet.error.message);
+            }
+
+            const numerator = parseInt(packet.module) ?? 0;
+            const denominator = parseInt(packet.modules) ?? 0;
+            const percent = denominator ? (numerator / denominator) * 100 : 0;
+
+            // Indicate draft or main model
+            const modelLabel = soFar === 0 && times === 2 ? 'Draft Model' : 'Main Model';
+            $('#loading_progress_container').attr('data-model', modelLabel);
+
+            if (packet.status === 'finished') {
+                if (soFar === times - 1) {
+                    progressContainer.css('display', 'none');
+                    toastr.info(`TabbyLoader: ${modelLabel} loaded`);
+                    $('#loading_progressbar').progressbar('value', 0);
+                    progressValue.css({ display: 'none', width: '0%' });
+                } else {
+                    $('#loading_progressbar').progressbar('value', 0);
+                    progressValue.css({ display: 'none', width: '0%' });
+                    toastr.info('TabbyLoader: Draft Model loaded');
+                }
+                soFar++;
+            } else {
+                const roundedPercent = Math.round(percent);
+                $('#loading_progressbar').progressbar('value', roundedPercent);
+                progressValue.css({
+                    display: 'block',
+                    width: `${roundedPercent}%`,
+                });
+                console.log(`Progress set to: ${roundedPercent}% for ${modelLabel} at`, new Date().toISOString());
+            }
+
+            requestAnimationFrame(() => readStream(reader, progressContainer, soFar, times));
+        }
+
+        if (response.ok) {
+            console.warn('saw ok response..hope for stream..');
+            if (!response.body) {
+                console.error('No response body received');
+                toastr.error('TabbyLoader: No stream received from server.');
+                return;
+            }
+
+            const eventStream = new SmoothEventSourceStream();
+            const reader = response.body.pipeThrough(eventStream).getReader();
+            const progressContainer = $('#loading_progress_container');
+            // Show container only during streaming
+            progressContainer.css({
+                display: 'block',
+                visibility: 'visible',
+                position: 'relative',
+                zIndex: 1000,
+            });
+            let soFar = 0;
+            let times = draftModelValue ? 2 : 1;
+            await readStream(reader, progressContainer, soFar, times);
+        } else {
+            const responseJson = await response.json();
+            console.error('TabbyLoader: Could not load the model because:', responseJson?.detail ?? response.statusText);
+            toastr.error('TabbyLoader: Could not load the model. Please check the JavaScript or TabbyAPI console for details.');
+        }
+    } catch (error) {
+        console.error('TabbyLoader: Could not load the model because:', error);
+        toastr.error('Could not load the model. Please check the TabbyAPI console for details.');
+    } finally {
+        $('#loading_progressbar').progressbar('value', 0);
+        $('#loading_progressbar .ui-progressbar-value').css({ display: 'none', width: '0%' });
+    }
+}
+
+export async function onTabbyUnloadModelClick() {
+
+    let url = '/api/backends/text-completions/tabby/unload';
+    const response = await fetch(url, {
+        method: 'POST',
+        headers: getRequestHeaders(),
+        body: JSON.stringify({
+            api_server: getTextGenServer('tabby'),
+            api_type: 'tabby',
+        }),
+    });
+
+    console.warn(response);
+    if (response.ok) {
+        toastr.info('Tabby model was unloaded.');
+    } else {
+        const responseJson = await response.json();
+        console.error('TabbyLoader: Could not unload the model because:\n', responseJson?.detail ?? response.statusText);
+        toastr.error('TabbyLoader: Could not unload the model. Please check the browser or TabbyAPI console for details.');
+        return [];
+    }
+}
+
+export async function onTabbyParameterEditorClick() {
+    console.warn('onParameterEditorClick');
+    const parameterHtml = $(await $.get(`${tempaltesFolderPath}/tabbyModelParameters.html`));
+    parameterHtml
+        .find('input[name="max_seq_len"]')
+        .val(textgenerationwebui_settings?.tabbyModelLoadParams?.maxSeqLen ?? 4096);
+    parameterHtml
+        .find('input[name="cache_size"]')
+        .val(textgenerationwebui_settings?.tabbyModelLoadParams?.cacheSize ?? 'Max Seq Len');
+    parameterHtml
+        .find('input[name="max_batch_size"]')
+        .val(textgenerationwebui_settings?.tabbyModelLoadParams?.maxBatchSize ?? 'Auto');
+    parameterHtml
+        .find('input[name="fasttensors"]')
+        .prop('checked', textgenerationwebui_settings?.tabbyModelLoadParams?.fasttensors ?? false);
+    parameterHtml
+        .find('select[name="cache_mode_select"]')
+        .val(cache_mode[textgenerationwebui_settings?.tabbyModelLoadParams?.cacheMode ?? 'FP16']);
+
+    // Rope and Draft rope
+    parameterHtml
+        .find('input[name="rope_scale"]')
+        .val(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeScale ?? 'Auto');
+    parameterHtml
+        .find('input[name="rope_alpha"]')
+        .val(textgenerationwebui_settings?.tabbyModelLoadParams?.ropeAlpha ?? 'Auto');
+    parameterHtml
+        .find('input[name="draft_rope_scale"]')
+        .val(textgenerationwebui_settings?.tabbyModelLoadParams?.draft_ropeScale ?? 'Auto');
+    parameterHtml
+        .find('input[name="draft_rope_alpha"]')
+        .val(textgenerationwebui_settings?.tabbyModelLoadParams?.draft_ropeAlpha ?? 'Auto');
+
+    // MARK: GPU split options
+    const gpuSplitAuto = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplitAuto ?? true;
+
+    const gpuSplitValue = textgenerationwebui_settings?.tabbyModelLoadParams?.gpuSplit;
+    const gpuSplitTextbox = parameterHtml
+        .find('input[name="gpu_split_value"]')
+        .val(JSON.stringify(gpuSplitValue?.length > 0 ? gpuSplitValue : undefined))
+        .prop('disabled', gpuSplitAuto);
+
+    parameterHtml
+        .find('input[name="gpu_split_auto"]')
+        .prop('checked', gpuSplitAuto)
+        .on('click', function () {
+            gpuSplitTextbox.prop('disabled', $(this).prop('checked'));
+        });
+
+    const popupResult = await callPopup(parameterHtml, 'confirm', undefined, { okButton: 'Save' });
+    if (popupResult) {
+        const newParams = {
+            maxSeqLen: Number(parameterHtml.find('input[name="max_seq_len"]').val()) || 4096,
+            cacheSize: Number(parameterHtml.find('input[name="cache_mode"]').val()) || null,
+            maxBatchSize: Number(parameterHtml.find('input[name="max_batch_size"]').val()) || null,
+            ropeScale: Number(parameterHtml.find('input[name="rope_scale"]').val()) || null,
+            ropeAlpha: Number(parameterHtml.find('input[name="rope_alpha"]').val()) || null,
+            draft_ropeScale: Number(parameterHtml.find('input[name="draft_rope_scale"]').val()) || null,
+            draft_ropeAlpha: Number(parameterHtml.find('input[name="draft_rope_alpha"]').val()) || null,
+            gpuSplitAuto: parameterHtml.find('input[name="gpu_split_auto"]').prop('checked'),
+            fasttensors: parameterHtml.find('input[name="fasttensors"]').prop('checked'),
+            cacheMode: getKeyByValue(
+                cache_mode,
+                Number(
+                    parameterHtml.find('select[name="cache_mode_select"]').find(':selected').val(),
+                ) || 0,
+            ),
+        };
+
+        // Handle GPU split setting
+        const gpuSplitVal = String(parameterHtml.find('input[name="gpu_split_value"]').val());
+        try {
+            if (gpuSplitVal) {
+                const gpuSplitArray = JSON.parse(gpuSplitVal) ?? [];
+                if (Array.isArray(gpuSplitArray)) {
+                    newParams['gpuSplit'] = gpuSplitArray;
+                } else {
+                    console.error(`Provided GPU split value (${gpuSplitArray}) is not an array.`);
+                    newParams['gpuSplit'] = [];
+                }
+            }
+        } catch (error) {
+            console.error(error);
+            newParams['gpuSplit'] = [];
+        }
+        textgenerationwebui_settings.tabbyModelLoadParams = newParams;
+
+        saveSettingsDebounced();
+    }
+}
+
+/* function migrateSettings() {
+    let performSave = false;
+
+    const modelParamsInSettings = settings?.textgenerationwebui_settings?.tabbyModelLoadParams?.modelParams;
+
+    if (modelParamsInSettings && 'eightBitCache' in modelParamsInSettings) {
+        const newParams = {
+            cacheMode: settings.textgenerationwebui_settings?.tabbyModelLoadParams?.eightBitCache ? 'FP8' : 'FP16',
+        };
+
+        delete settings.textgenerationwebui_settings?.tabbyModelLoadParams.modelParams.eightBitCache;
+        Object.assign(settings.textgenerationwebui_settings?.tabbyModelLoadParams?.modelParams, newParams);
+
+        performSave = true;
+    }
+
+    if (performSave) {
+        saveSettingsDebounced();
+    }
+} */
+
+export async function loadTabbySettings() {
+    if (!textgenerationwebui_settings.tabbyModelLoadParams) {
+        console.warn('saw no tabby model loading object in text_gen settings');
+        textgenerationwebui_settings.tabbyModelLoadParams = defaultSettings;
+    }
+    //Create the settings if they don't exist
+    tabbyModelLoadParams = textgenerationwebui_settings?.tabbyModelLoadParams || {};
+
+    if (Object.keys(tabbyModelLoadParams).length === 0) {
+        console.warn('tabby model loading settings were empty in text_gen settings, using default instead.');
+        Object.assign(tabbyModelLoadParams, defaultSettings);
+
+    }
+
+    saveSettingsDebounced();
+    //migrateSettings();
+
+    //$('#tabby_url_override').val(settings.textgenerationwebui_settings?.tabbyModelLoadParams?.urlOverride ?? '');
+    //$('#tabby_use_proxy').prop('checked', settings.textgenerationwebui_settings?.tabbyModelLoadParams?.useProxy ?? false);
+
+    // Updating settings in the UI
+    //const placeholder = await getTabbyAuth() ? '✔️ Key found' : '❌ Missing key';
+    //$('#tabby_admin_key').attr('placeholder', placeholder);
+}
+
+
+
+// This function is called when the extension is loaded
+jQuery(async () => {
+
+    /*     $('#tabby_load_draft_model_list')
+            .autocomplete({
+                source: (_, response) => {
+                    return response(draftModels);
+                },
+                minLength: 0,
+            })
+            .focus(function () {
+                $(this)
+                    .autocomplete(
+                        'search',
+                        String($(this).val()),
+                    );
+            }); */
+
+    $('#tabby_url_override').on('input', function () {
+        const value = $(this).val();
+        if (value !== undefined) {
+            textgenerationwebui_settings.tabbyModelLoadParams.urlOverride = value;
+            saveSettingsDebounced();
+        }
+    });
+
+    $('#tabby_use_proxy').on('input', function () {
+        textgenerationwebui_settings.tabbyModelLoadParams.useProxy = !!$(this).prop('checked');
+        saveSettingsDebounced();
+    });
+
+    $('#loading_progressbar').progressbar({
+        value: 0,
+    });
+
+    $('#loading_progress_container').hide();
+
+    // Load settings when starting things up (if you have any)
+    eventSource.on(event_types.APP_READY, async () => {
+        await loadTabbySettings();
+    });
+
+});
--- a/public/scripts/templates/tabbyModelParameters.html
+++ b/public/scripts/templates/tabbyModelParameters.html
@@ -0,0 +1,96 @@
+<div id="tabby_loader_popup">
+    <div>
+        <h3><strong data-i18n="">Set Parameters</strong>
+            <a href="https://github.com/theroyallab/tabbyAPI" class="notes-link" target="_blank">
+                <span class="note-link-span">?</span>
+            </a>
+        </h3>
+
+        <small class="flex-container extensions_info justifyCenter">
+            Set Parameters for Loading a Model
+        </small>
+        <hr />
+
+        Main Model
+        <div class="flex-container">
+            <div class="flex1">
+                <label for="max_seq_len">
+                    <small data-i18n="Max Seq Len">Max Seq Len</small>
+                </label>
+                <input name="max_seq_len" class="text_pole" type="text" placeholder="ex: 4096" />
+            </div>
+            <div class="flex1">
+                <label for="cache_size">
+                    <small data-i18n="Cache Size">Cache Size</small>
+                </label>
+                <input name="cache_size" class="text_pole" type="text" placeholder="Max Seq Len" />
+            </div>
+            <div class="flex1">
+                <label for="max_batch_size">
+                    <small data-i18n="Max Batch Size">Max Batch Size</small>
+                </label>
+                <input name="max_batch_size" class="text_pole" type="text" placeholder="ex: 512" />
+            </div>
+        </div>
+        <div class="flex-container">
+            <div class="flex1">
+                <label for="rope_scale">
+                    <small data-i18n="Rope Scale">Rope Scale</small>
+                </label>
+                <input name="rope_scale" class="text_pole" type="text" placeholder="ex: 1.0" />
+            </div>
+            <div class="flex1">
+                <label for="rope_alpha">
+                    <small data-i18n="Rope Alpha">Rope Alpha</small>
+                </label>
+                <input name="rope_alpha" class="text_pole" type="text" placeholder="ex: 1.0" />
+            </div>
+        </div>
+        Draft Model
+        <div class="flex-container">
+            <div class="flex1">
+                <label for="rope_scale">
+                    <small data-i18n="Rope Scale">Rope Scale</small>
+                </label>
+                <input name="draft_rope_scale" class="text_pole" type="text" placeholder="ex: 1.0" />
+            </div>
+            <div class="flex1">
+                <label for="rope_alpha">
+                    <small data-i18n="Rope Alpha">Rope Alpha</small>
+                </label>
+                <input name="draft_rope_alpha" class="text_pole" type="text" placeholder="ex: 1.0" />
+            </div>
+        </div>
+
+        <!-- Container for GPU Split and other options -->
+        Loader Options
+        <div class="flex-container padded-loader-setting-category">
+            <div class="flex-container flexFlowColumn flexNoGap alignitemsstart">
+                <small class="justifyCenter">GPU Split</small>
+                <label class="checkbox flex-container">
+                    <input type="checkbox" name="gpu_split_auto" />
+                    <span data-i18n="Auto Split">Auto Split</span>
+                </label>
+                <input name="gpu_split_value" class="text_pole" type="text" placeholder="ex. [20.6, 24]" />
+            </div>
+
+            <div class="flex-container flexFlowColumn flexNoGap alignitemsstart padded-loader-setting-block">
+                <small>Other Options</small>
+                <label class="checkbox flex-container">
+                    <input type="checkbox" name="fasttensors" />
+                    <span data-i18n="FastTensors">FastTensors</span>
+                </label>
+            </div>
+
+            <div class="flex-container flexFlowColumn flexNoGap alignitemsstart">
+                <small class="justifyCenter">Cache Mode</small>
+                <select name="cache_mode_select" class="margin0">
+                    <option value="0">FP16</option>
+                    <option value="1">Q4</option>
+                    <option value="2">Q6</option>
+                    <option value="3">Q8</option>
+                </select>
+            </div>
+        </div>
+    </div>
+</div>
--- a/public/scripts/textgen-models.js
+++ b/public/scripts/textgen-models.js
@@ -8,6 +8,7 @@ import { POPUP_TYPE, callGenericPopup } from './popup.js';
 import { t } from './i18n.js';
 import { accountStorage } from './util/AccountStorage.js';
 import { localizePagination, PAGINATION_TEMPLATE } from './utils.js';
+import { onTabbyLoadModelClick, onTabbyParameterEditorClick, onTabbyUnloadModelClick, fetchTabbyModels } from './tabbyModelLoader.js';

 let mancerModels = [];
 let togetherModels = [];
@@ -945,6 +946,11 @@ export function initTextGenModels() {
    $('#tabby_model').on('change', onTabbyModelSelect);
    $('#featherless_model').on('change', () => onFeatherlessModelSelect(String($('#featherless_model').val())));

+    $('#tabby_load_model_button').on('click', async () => await onTabbyLoadModelClick());
+    $('#tabby_unload_model_button').on('click', async () => await onTabbyUnloadModelClick());
+    $('#tabby_parameter_editor_button').on('click', async () => await onTabbyParameterEditorClick());
+    $('#tabby_reload_model_list_button').on('click', async () => await fetchTabbyModels());
+
    const providersSelect = $('.openrouter_providers');
    for (const provider of OPENROUTER_PROVIDERS) {
        providersSelect.append($('<option>', {
--- a/public/style.css
+++ b/public/style.css
@@ -6149,3 +6149,17 @@ body:not(.movingUI) .drawer-content.maximized {
    border-color: var(--error-color, #e87f7f);
    background-color: rgba(241, 163, 163, 0.2);
 }
+
+.progress_container {
+    padding-top: 10px;
+    padding-bottom: 10px;
+}
+
+#loading_progressbar.ui-widget-content {
+    height: 10px;
+}
+
+#loading_progressbar>.ui-widget-header {
+    background: orange;
+    border: none !important;
+}
--- a/src/endpoints/backends/text-completions.js
+++ b/src/endpoints/backends/text-completions.js
@@ -627,6 +627,133 @@ tabby.post('/download', async function (request, response) {
    }
 });

+tabby.post('/unload', async function (request, response) {
+    try {
+        const baseUrl = String(request.body.api_server).replace(/\/$/, '');
+
+        const args = {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(request.body),
+            timeout: 0,
+        };
+
+        setAdditionalHeaders(request, args, baseUrl);
+
+        // Check key permissions
+        const permissionResponse = await fetch(`${baseUrl}/v1/auth/permission`, {
+            headers: args.headers,
+        });
+
+        if (permissionResponse.ok) {
+            /** @type {any} */
+            const permissionJson = await permissionResponse.json();
+
+            if (permissionJson['permission'] !== 'admin') {
+                return response.status(403).send({ error: true });
+            }
+        } else {
+            console.error('API Permission error:', permissionResponse.status, permissionResponse.statusText);
+            return response.status(500).send({ error: true });
+        }
+
+        const fetchResponse = await fetch(`${baseUrl}/v1/model/unload`, args);
+
+        if (!fetchResponse.ok) {
+            console.error('Tabby unload error:', fetchResponse.status, fetchResponse.statusText);
+            return response.status(500).send({ error: true });
+        }
+
+        return response.send({ ok: true });
+    } catch (error) {
+        console.error(error);
+        return response.sendStatus(500);
+    }
+});
+
+tabby.post('/load', async function (request, response) {
+    try {
+        const baseUrl = String(request.body.api_server).replace(/\/$/, '');
+
+        const args = {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify(request.body),
+            timeout: 0,
+        };
+        let tempArgs = { ...args };
+        let tempBody = JSON.parse(tempArgs.body);
+        const toTabby = JSON.parse(tempBody.toTabby);
+        delete tempBody.api_type
+        delete tempBody.api_server
+        tempArgs.body = JSON.stringify(toTabby)
+
+        setAdditionalHeaders(request, tempArgs, baseUrl);
+        console.log('this is what we are sending to tabby, including all headers..')
+        console.log(tempArgs);
+
+        // Check key permissions
+        const permissionResponse = await fetch(`${baseUrl}/v1/auth/permission`, {
+            headers: args.headers,
+        });
+
+        if (permissionResponse.ok) {
+            /** @type {any} */
+            const permissionJson = await permissionResponse.json();
+
+            if (permissionJson['permission'] !== 'admin') {
+                return response.status(403).send({ error: true });
+            }
+        } else {
+            console.error('API Permission error:', permissionResponse.status, permissionResponse.statusText);
+            return response.status(500).send({ error: true });
+        }
+
+        const fetchResponse = await fetch(`${baseUrl}/v1/model/load`, tempArgs);
+
+        if (!fetchResponse.ok) {
+            console.error('Tabby load error:', fetchResponse.status, fetchResponse.statusText);
+            return response.status(500).send({ error: true });
+        }
+
+        if (!fetchResponse.body) {
+            console.error('No response body received from LLM server');
+            return response.status(500).send({ error: true });
+        }
+
+        // Set headers for Server-Sent Events
+        response.set({
+            'Content-Type': 'text/event-stream',
+            'Cache-Control': 'no-cache',
+            'Connection': 'keep-alive',
+        });
+
+        // Pipe the stream with explicit flushing
+        fetchResponse.body.on('data', (chunk) => {
+            console.log('Stream chunk:', chunk.toString());
+            response.write(chunk);
+            response.flush(); // Force flush to client
+        });
+
+        // Handle stream errors
+        fetchResponse.body.on('error', (err) => {
+            console.error('Stream error:', err);
+            response.status(500).send({ error: true });
+        });
+
+        fetchResponse.body.on('end', () => {
+            console.log('Stream closed');
+            response.end();
+        });
+
+    } catch (error) {
+        console.error(error);
+        return response.sendStatus(500);
+    }
+});
+
+
+
 router.use('/ollama', ollama);
 router.use('/llamacpp', llamacpp);
 router.use('/tabby', tabby);