Merge pull request #3475 from SillyTavern/ollama-batch

Ollama: Add num_batch config value
This commit is contained in:
Cohee 2025-02-14 12:34:13 +02:00 committed by GitHub
commit c47d997a2d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 9 additions and 0 deletions

View File

@ -183,6 +183,10 @@ ollama:
# * 0: Unload the model immediately after the request
# * N (any positive number): Keep the model loaded for N seconds after the request.
keepAlive: -1
# Controls the "num_batch" (batch size) parameter of the generation request
# * -1: Use the default value of the model
# * N (positive number): Use the specified value. Must be a power of 2, e.g. 128, 256, 512, etc.
batchSize: -1
# -- ANTHROPIC CLAUDE API CONFIGURATION --
claude:
# Enables caching of the system prompt (if supported).

View File

@ -304,6 +304,7 @@ export const TOGETHERAI_KEYS = [
export const OLLAMA_KEYS = [
'num_predict',
'num_ctx',
'num_batch',
'stop',
'temperature',
'repeat_penalty',

View File

@ -373,6 +373,10 @@ router.post('/generate', jsonParser, async function (request, response) {
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
const keepAlive = getConfigValue('ollama.keepAlive', -1);
const numBatch = getConfigValue('ollama.batchSize', -1);
if (numBatch > 0) {
request.body['num_batch'] = numBatch;
}
args.body = JSON.stringify({
model: request.body.model,
prompt: request.body.prompt,