Update llama.cpp textgen settings

* Add min_keep, a llama.cpp-exclusive setting for constraining the effect of truncation samplers
* Enable nsigma for llama.cpp, and add llama.cpp alias top_n_sigma, add nsigma to the llama.cpp sampler order block
* Allow a negative value of nsigma as this represents 'disabled' in llama.cpp (while 0 is deterministic)
* Remove tfs and top_a as these are not supported by llama.cpp (tfs was removed, and top_a was never supported)
* Correct the identification string for typical_p in the llama.cpp sampler order block
* Add penalties to the llama.cpp sampler order block
This commit is contained in:
DocShotgun
2025-05-06 00:32:29 -07:00
parent 6625e4036e
commit bf66a39579
2 changed files with 25 additions and 10 deletions

View File

@@ -1284,7 +1284,7 @@
<input class="neo-range-slider" type="range" id="min_p_textgenerationwebui" name="volume" min="0" max="1" step="0.001"> <input class="neo-range-slider" type="range" id="min_p_textgenerationwebui" name="volume" min="0" max="1" step="0.001">
<input class="neo-range-input" type="number" min="0" max="1" step="0.001" data-for="min_p_textgenerationwebui" id="min_p_counter_textgenerationwebui"> <input class="neo-range-input" type="number" min="0" max="1" step="0.001" data-for="min_p_textgenerationwebui" id="min_p_counter_textgenerationwebui">
</div> </div>
<div data-tg-type-mode="except" data-tg-type="generic" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0"> <div data-tg-type-mode="except" data-tg-type="generic,llamacpp" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small> <small>
<span data-i18n="Top A">Top A</span> <span data-i18n="Top A">Top A</span>
<div class="fa-solid fa-circle-info opacity50p" title="Top A sets a threshold for token selection based on the square of the highest token probability.&#13;E.g if the Top-A value is 0.2 and the top token's probability is 50%, tokens with probabilities below 5% (0.2 * 0.5^2) are excluded.&#13;Set to 0 to disable." data-i18n="[title]Top_A_desc"></div> <div class="fa-solid fa-circle-info opacity50p" title="Top A sets a threshold for token selection based on the square of the highest token probability.&#13;E.g if the Top-A value is 0.2 and the top token's probability is 50%, tokens with probabilities below 5% (0.2 * 0.5^2) are excluded.&#13;Set to 0 to disable." data-i18n="[title]Top_A_desc"></div>
@@ -1292,7 +1292,7 @@
<input class="neo-range-slider" type="range" id="top_a_textgenerationwebui" name="volume" min="0" max="1" step="0.01"> <input class="neo-range-slider" type="range" id="top_a_textgenerationwebui" name="volume" min="0" max="1" step="0.01">
<input class="neo-range-input" type="number" min="0" max="1" step="0.01" data-for="top_a_textgenerationwebui" id="top_a_counter_textgenerationwebui"> <input class="neo-range-input" type="number" min="0" max="1" step="0.01" data-for="top_a_textgenerationwebui" id="top_a_counter_textgenerationwebui">
</div> </div>
<div data-tg-type-mode="except" data-tg-type="generic" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0"> <div data-tg-type-mode="except" data-tg-type="generic,llamacpp" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small> <small>
<span data-i18n="TFS">TFS</span> <span data-i18n="TFS">TFS</span>
<div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Tail_Free_Sampling_desc" title="Tail-Free Sampling (TFS) searches for a tail of low-probability tokens in the distribution,&#13;by analyzing the rate of change in token probabilities using derivatives. It retains tokens up to a threshold (e.g., 0.3) based on the normalized second derivative.&#13;The closer to 0, the more discarded tokens. Set to 1.0 to disable."></div> <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Tail_Free_Sampling_desc" title="Tail-Free Sampling (TFS) searches for a tail of low-probability tokens in the distribution,&#13;by analyzing the rate of change in token probabilities using derivatives. It retains tokens up to a threshold (e.g., 0.3) based on the normalized second derivative.&#13;The closer to 0, the more discarded tokens. Set to 1.0 to disable."></div>
@@ -1308,13 +1308,21 @@
<input class="neo-range-slider" type="range" id="epsilon_cutoff_textgenerationwebui" name="volume" min="0" max="9" step="0.01"> <input class="neo-range-slider" type="range" id="epsilon_cutoff_textgenerationwebui" name="volume" min="0" max="9" step="0.01">
<input class="neo-range-input" type="number" min="0" max="9" step="0.01" data-for="epsilon_cutoff_textgenerationwebui" id="epsilon_cutoff_counter_textgenerationwebui"> <input class="neo-range-input" type="number" min="0" max="9" step="0.01" data-for="epsilon_cutoff_textgenerationwebui" id="epsilon_cutoff_counter_textgenerationwebui">
</div> </div>
<div data-tg-type="aphrodite,koboldcpp" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0"> <div data-tg-type="aphrodite,koboldcpp,llamacpp" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small> <small>
<span data-i18n="Top nsigma">Top nsigma</span> <span data-i18n="Top nsigma">Top nsigma</span>
<div class="fa-solid fa-circle-info opacity50p" title="A sampling method that filters logits based on their statistical properties. It keeps tokens within n standard deviations of the maximum logit value, providing a simpler alternative to top-p/top-k sampling while maintaining sampling stability across different temperatures."></div> <div class="fa-solid fa-circle-info opacity50p" title="A sampling method that filters logits based on their statistical properties. It keeps tokens within n standard deviations of the maximum logit value, providing a simpler alternative to top-p/top-k sampling while maintaining sampling stability across different temperatures."></div>
</small> </small>
<input class="neo-range-slider" type="range" id="nsigma_textgenerationwebui" name="volume" min="0" max="5" step="0.01"> <input class="neo-range-slider" type="range" id="nsigma_textgenerationwebui" name="volume" min="-0.01" max="5" step="0.01">
<input class="neo-range-input" type="number" min="0" max="5" step="0.01" data-for="nsigma_textgenerationwebui" id="nsigma_counter_textgenerationwebui"> <input class="neo-range-input" type="number" min="-0.01" max="5" step="0.01" data-for="nsigma_textgenerationwebui" id="nsigma_counter_textgenerationwebui">
</div>
<div data-tg-type="llamacpp" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small>
<span data-i18n="Min Keep">Min Keep</span>
<div class="fa-solid fa-circle-info opacity50p" title="A sampling modifier that ensures that truncation samplers such as top-p, min-p, typical-p, and xtc return at least this many tokens. Set to 0 to disable."></div>
</small>
<input class="neo-range-slider" type="range" id="min_keep_textgenerationwebui" name="volume" min="0" max="50" step="1">
<input class="neo-range-input" type="number" min="0" max="50" step="1" data-for="min_keep_textgenerationwebui" id="min_keep_counter_textgenerationwebui">
</div> </div>
<div data-tg-type="ooba,mancer,aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0"> <div data-tg-type="ooba,mancer,aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small> <small>
@@ -1769,11 +1777,12 @@
<div data-name="temperature" draggable="true"><span>Temperature</span><small></small></div> <div data-name="temperature" draggable="true"><span>Temperature</span><small></small></div>
<div data-name="top_k" draggable="true"><span>Top K</span><small></small></div> <div data-name="top_k" draggable="true"><span>Top K</span><small></small></div>
<div data-name="top_p" draggable="true"><span>Top P</span><small></small></div> <div data-name="top_p" draggable="true"><span>Top P</span><small></small></div>
<div data-name="typical_p" draggable="true"><span>Typical P</span><small></small></div> <div data-name="typ_p" draggable="true"><span>Typical P</span><small></small></div>
<div data-name="tfs_z" draggable="true"><span>Tail Free Sampling</span><small></small></div>
<div data-name="min_p" draggable="true"><span>Min P</span><small></small></div> <div data-name="min_p" draggable="true"><span>Min P</span><small></small></div>
<div data-name="xtc" draggable="true"><span>Exclude Top Choices</span><small></small></div> <div data-name="xtc" draggable="true"><span>Exclude Top Choices</span><small></small></div>
<div data-name="dry" draggable="true"><span>DRY</span><small></small></div> <div data-name="dry" draggable="true"><span>DRY</span><small></small></div>
<div data-name="penalties" draggable="true"><span>Rep/Freq/Pres Penalties</span><small></small></div>
<div data-name="top_n_sigma" draggable="true"><span>Top N-Sigma</span><small></small></div>
</div> </div>
<div id="llamacpp_samplers_default_order" class="menu_button menu_button_icon"> <div id="llamacpp_samplers_default_order" class="menu_button menu_button_icon">
<span data-i18n="Load default order">Load default order</span> <span data-i18n="Load default order">Load default order</span>

View File

@@ -56,10 +56,11 @@ const {
} = textgen_types; } = textgen_types;
const LLAMACPP_DEFAULT_ORDER = [ const LLAMACPP_DEFAULT_ORDER = [
'penalties',
'dry', 'dry',
'top_n_sigma',
'top_k', 'top_k',
'tfs_z', 'typ_p',
'typical_p',
'top_p', 'top_p',
'min_p', 'min_p',
'xtc', 'xtc',
@@ -212,6 +213,7 @@ const settings = {
xtc_threshold: 0.1, xtc_threshold: 0.1,
xtc_probability: 0, xtc_probability: 0,
nsigma: 0.0, nsigma: 0.0,
min_keep: 0,
featherless_model: '', featherless_model: '',
generic_model: '', generic_model: '',
}; };
@@ -294,6 +296,7 @@ export const setting_names = [
'xtc_threshold', 'xtc_threshold',
'xtc_probability', 'xtc_probability',
'nsigma', 'nsigma',
'min_keep',
'generic_model', 'generic_model',
]; ];
@@ -803,7 +806,8 @@ jQuery(function () {
'dry_penalty_last_n_textgenerationwebui': 0, 'dry_penalty_last_n_textgenerationwebui': 0,
'xtc_threshold_textgenerationwebui': 0.1, 'xtc_threshold_textgenerationwebui': 0.1,
'xtc_probability_textgenerationwebui': 0, 'xtc_probability_textgenerationwebui': 0,
'nsigma_textgenerationwebui': 0, 'nsigma_textgenerationwebui': [LLAMACPP].includes(settings.type) ? -0.01 : 0,
'min_keep_textgenerationwebui': 0,
}; };
for (const [id, value] of Object.entries(inputs)) { for (const [id, value] of Object.entries(inputs)) {
@@ -1332,6 +1336,8 @@ export async function getTextGenGenerationData(finalPrompt, maxTokens, isImperso
'xtc_threshold': settings.xtc_threshold, 'xtc_threshold': settings.xtc_threshold,
'xtc_probability': settings.xtc_probability, 'xtc_probability': settings.xtc_probability,
'nsigma': settings.nsigma, 'nsigma': settings.nsigma,
'top_n_sigma': settings.nsigma,
'min_keep': settings.min_keep,
}; };
const nonAphroditeParams = { const nonAphroditeParams = {
'rep_pen': settings.rep_pen, 'rep_pen': settings.rep_pen,