From dd7391caafe13a3b2ddc55ab501450ee6d0d6911 Mon Sep 17 00:00:00 2001
From: Cohee <18619528+Cohee1207@users.noreply.github.com>
Date: Thu, 13 Feb 2025 20:17:33 +0200
Subject: [PATCH] Ollama: Add num_batch config value

---
 default/config.yaml                        | 4 ++++
 src/constants.js                           | 1 +
 src/endpoints/backends/text-completions.js | 4 ++++
 3 files changed, 9 insertions(+)

diff --git a/default/config.yaml b/default/config.yaml
index 94673ca11..5c885422c 100644
--- a/default/config.yaml
+++ b/default/config.yaml
@@ -183,6 +183,10 @@ ollama:
   # * 0: Unload the model immediately after the request
   # * N (any positive number): Keep the model loaded for N seconds after the request.
   keepAlive: -1
+  # Controls the "num_batch" (batch size) parameter of the generation request
+  # * -1: Use the default value of the model
+  # * N (positive number): Use the specified value. Must be a power of 2, e.g. 128, 256, 512, etc.
+  batchSize: -1
 # -- ANTHROPIC CLAUDE API CONFIGURATION --
 claude:
   # Enables caching of the system prompt (if supported).
diff --git a/src/constants.js b/src/constants.js
index 30f6f2da0..66697fed4 100644
--- a/src/constants.js
+++ b/src/constants.js
@@ -304,6 +304,7 @@ export const TOGETHERAI_KEYS = [
 export const OLLAMA_KEYS = [
     'num_predict',
     'num_ctx',
+    'num_batch',
     'stop',
     'temperature',
     'repeat_penalty',
diff --git a/src/endpoints/backends/text-completions.js b/src/endpoints/backends/text-completions.js
index e58474ed4..c5da6280d 100644
--- a/src/endpoints/backends/text-completions.js
+++ b/src/endpoints/backends/text-completions.js
@@ -373,6 +373,10 @@ router.post('/generate', jsonParser, async function (request, response) {
 
         if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
             const keepAlive = getConfigValue('ollama.keepAlive', -1);
+            const numBatch = getConfigValue('ollama.batchSize', -1);
+            if (numBatch > 0) {
+                request.body['num_batch'] = numBatch;
+            }
             args.body = JSON.stringify({
                 model: request.body.model,
                 prompt: request.body.prompt,