From 9eba076ae4724e86f4c0e34d2645c80fecf39e0c Mon Sep 17 00:00:00 2001
From: Deciare <1689220+deciare@users.noreply.github.com>
Date: Fri, 23 Feb 2024 23:01:04 -0500
Subject: [PATCH] Sampler order for llama.cpp server backend
---
default/settings.json | 8 +++++
public/index.html | 21 +++++++++++++
public/scripts/textgen-settings.js | 48 ++++++++++++++++++++++++++++++
3 files changed, 77 insertions(+)
diff --git a/default/settings.json b/default/settings.json
index 4383798ff..c351ab04e 100644
--- a/default/settings.json
+++ b/default/settings.json
@@ -61,6 +61,14 @@
"min_p",
"mirostat"
],
+ "samplers": [
+ "top_k",
+ "tfs_z",
+ "typical_p",
+ "top_p",
+ "min_p",
+ "temperature"
+ ],
"mirostat_mode": 0,
"mirostat_tau": 5,
"mirostat_eta": 0.1,
diff --git a/public/index.html b/public/index.html
index 642344aa0..1a7f787f7 100644
--- a/public/index.html
+++ b/public/index.html
@@ -1550,6 +1550,27 @@
Load default order
+
+
+
+ Samplers Order
+
+
+
+ llama.cpp only. Determines the order of samplers. If Mirostat mode is not 0, sampler order is ignored.
+
+
+
Temperature
+
Top K
+
Top P
+
Typical P
+
Tail Free Sampling
+
Min P
+
+
+
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index b0cae37c7..ddc8fdc9d 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -34,6 +34,14 @@ export const textgen_types = {
};
const { MANCER, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP } = textgen_types;
+const LLAMACPP_DEFAULT_ORDER = [
+ 'top_k',
+ 'tfs_z',
+ 'typical_p',
+ 'top_p',
+ 'min_p',
+ 'temperature',
+];
const OOBA_DEFAULT_ORDER = [
'temperature',
'dynamic_temperature',
@@ -111,6 +119,7 @@ const settings = {
grammar_string: '',
banned_tokens: '',
sampler_priority: OOBA_DEFAULT_ORDER,
+ samplers: LLAMACPP_DEFAULT_ORDER,
//n_aphrodite: 1,
//best_of_aphrodite: 1,
ignore_eos_token_aphrodite: false,
@@ -186,6 +195,7 @@ const setting_names = [
//'prompt_log_probs_aphrodite'
'sampler_order',
'sampler_priority',
+ 'samplers',
'n',
'logit_bias',
'custom_model',
@@ -449,6 +459,16 @@ function sortKoboldItemsByOrder(orderArray) {
}
}
+function sortLlamacppItemsByOrder(orderArray) {
+ console.debug('Preset samplers order: ', orderArray);
+ const $container = $('#llamacpp_samplers_sortable');
+
+ orderArray.forEach((name) => {
+ const $item = $container.find(`[data-name="${name}"]`).detach();
+ $container.append($item);
+ });
+}
+
function sortOobaItemsByOrder(orderArray) {
console.debug('Preset samplers order: ', orderArray);
const $container = $('#sampler_priority_container');
@@ -479,6 +499,26 @@ jQuery(function () {
saveSettingsDebounced();
});
+ $('#llamacpp_samplers_sortable').sortable({
+ delay: getSortableDelay(),
+ stop: function () {
+ const order = [];
+ $('#llamacpp_samplers_sortable').children().each(function () {
+ order.push($(this).data('name'));
+ });
+ settings.samplers = order;
+ console.log('Samplers reordered:', settings.samplers);
+ saveSettingsDebounced();
+ },
+ });
+
+ $('#llamacpp_samplers_default_order').on('click', function () {
+ sortLlamacppItemsByOrder(LLAMACPP_DEFAULT_ORDER);
+ settings.samplers = LLAMACPP_DEFAULT_ORDER;
+ console.log('Default samplers order loaded:', settings.samplers);
+ saveSettingsDebounced();
+ });
+
$('#sampler_priority_container').sortable({
delay: getSortableDelay(),
stop: function () {
@@ -674,6 +714,13 @@ function setSettingByName(setting, value, trigger) {
return;
}
+ if ('samplers' === setting) {
+ value = Array.isArray(value) ? value : LLAMACPP_DEFAULT_ORDER;
+ sortLlamacppItemsByOrder(value);
+ settings.samplers = value;
+ return;
+ }
+
if ('logit_bias' === setting) {
settings.logit_bias = Array.isArray(value) ? value : [];
return;
@@ -882,6 +929,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
'dynatemp_exponent': settings.dynatemp ? settings.dynatemp_exponent : 1,
'smoothing_factor': settings.smoothing_factor,
'sampler_priority': settings.type === OOBA ? settings.sampler_priority : undefined,
+ 'samplers': settings.type === LLAMACPP ? settings.samplers : undefined,
'stopping_strings': getStoppingStrings(isImpersonate, isContinue),
'stop': getStoppingStrings(isImpersonate, isContinue),
'truncation_length': max_context,