mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Request token probabilities from llama.cpp backend
llama.cpp server token probabilities are given as values ranging from 0 to 1 instead of as logarithms.
This commit is contained in:
@@ -694,7 +694,7 @@ async function generateTextGenWithStreaming(generate_data, signal) {
|
||||
} else {
|
||||
const newText = data?.choices?.[0]?.text || data?.content || '';
|
||||
text += newText;
|
||||
logprobs = parseTextgenLogprobs(newText, data.choices?.[0]?.logprobs);
|
||||
logprobs = parseTextgenLogprobs(newText, data.choices?.[0]?.logprobs || data?.completion_probabilities);
|
||||
}
|
||||
|
||||
yield { text, swipes, logprobs };
|
||||
@@ -727,6 +727,14 @@ function parseTextgenLogprobs(token, logprobs) {
|
||||
const candidates = Object.entries(topLogprobs[0]);
|
||||
return { token, topLogprobs: candidates };
|
||||
}
|
||||
case LLAMACPP: {
|
||||
/** @type {Record<string, number>[]} */
|
||||
if (!logprobs?.length) {
|
||||
return null;
|
||||
}
|
||||
const candidates = logprobs[0].probs.map(x => [ x.tok_str, x.prob ]);
|
||||
return { token, topLogprobs: candidates };
|
||||
}
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
@@ -867,6 +875,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
||||
'n_predict': maxTokens,
|
||||
'mirostat': settings.mirostat_mode,
|
||||
'ignore_eos': settings.ban_eos_token,
|
||||
'n_probs': power_user.request_token_probabilities ? 10 : undefined,
|
||||
};
|
||||
const aphroditeParams = {
|
||||
'n': canMultiSwipe ? settings.n : 1,
|
||||
|
Reference in New Issue
Block a user