Request token probabilities from llama.cpp backend

llama.cpp server token probabilities are given as values ranging from
0 to 1 instead of as logarithms.
This commit is contained in:
Deciare
2024-02-23 14:01:46 -05:00
parent 2d152d2705
commit 344b9eedbc
2 changed files with 22 additions and 5 deletions

View File

@@ -694,7 +694,7 @@ async function generateTextGenWithStreaming(generate_data, signal) {
} else {
const newText = data?.choices?.[0]?.text || data?.content || '';
text += newText;
logprobs = parseTextgenLogprobs(newText, data.choices?.[0]?.logprobs);
logprobs = parseTextgenLogprobs(newText, data.choices?.[0]?.logprobs || data?.completion_probabilities);
}
yield { text, swipes, logprobs };
@@ -727,6 +727,14 @@ function parseTextgenLogprobs(token, logprobs) {
const candidates = Object.entries(topLogprobs[0]);
return { token, topLogprobs: candidates };
}
case LLAMACPP: {
/** @type {Record<string, number>[]} */
if (!logprobs?.length) {
return null;
}
const candidates = logprobs[0].probs.map(x => [ x.tok_str, x.prob ]);
return { token, topLogprobs: candidates };
}
default:
return null;
}
@@ -867,6 +875,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
'n_predict': maxTokens,
'mirostat': settings.mirostat_mode,
'ignore_eos': settings.ban_eos_token,
'n_probs': power_user.request_token_probabilities ? 10 : undefined,
};
const aphroditeParams = {
'n': canMultiSwipe ? settings.n : 1,