From dd7391caafe13a3b2ddc55ab501450ee6d0d6911 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 13 Feb 2025 20:17:33 +0200 Subject: [PATCH] Ollama: Add num_batch config value --- default/config.yaml | 4 ++++ src/constants.js | 1 + src/endpoints/backends/text-completions.js | 4 ++++ 3 files changed, 9 insertions(+) diff --git a/default/config.yaml b/default/config.yaml index 94673ca11..5c885422c 100644 --- a/default/config.yaml +++ b/default/config.yaml @@ -183,6 +183,10 @@ ollama: # * 0: Unload the model immediately after the request # * N (any positive number): Keep the model loaded for N seconds after the request. keepAlive: -1 + # Controls the "num_batch" (batch size) parameter of the generation request + # * -1: Use the default value of the model + # * N (positive number): Use the specified value. Must be a power of 2, e.g. 128, 256, 512, etc. + batchSize: -1 # -- ANTHROPIC CLAUDE API CONFIGURATION -- claude: # Enables caching of the system prompt (if supported). diff --git a/src/constants.js b/src/constants.js index 30f6f2da0..66697fed4 100644 --- a/src/constants.js +++ b/src/constants.js @@ -304,6 +304,7 @@ export const TOGETHERAI_KEYS = [ export const OLLAMA_KEYS = [ 'num_predict', 'num_ctx', + 'num_batch', 'stop', 'temperature', 'repeat_penalty', diff --git a/src/endpoints/backends/text-completions.js b/src/endpoints/backends/text-completions.js index e58474ed4..c5da6280d 100644 --- a/src/endpoints/backends/text-completions.js +++ b/src/endpoints/backends/text-completions.js @@ -373,6 +373,10 @@ router.post('/generate', jsonParser, async function (request, response) { if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) { const keepAlive = getConfigValue('ollama.keepAlive', -1); + const numBatch = getConfigValue('ollama.batchSize', -1); + if (numBatch > 0) { + request.body['num_batch'] = numBatch; + } args.body = JSON.stringify({ model: request.body.model, prompt: request.body.prompt,