Merge pull request #3475 from SillyTavern/ollama-batch

Ollama: Add num_batch config value
2025-02-18 21:20:39 +01:00 · 2025-02-14 12:34:13 +02:00 · 2025-02-14 12:34:13 +02:00 · c47d997a2d
commit c47d997a2d
parent 09aaa9181c dd7391caaf
3 changed files with 9 additions and 0 deletions
--- a/default/config.yaml
+++ b/default/config.yaml
@ -183,6 +183,10 @@ ollama:
  # * 0: Unload the model immediately after the request
  # * N (any positive number): Keep the model loaded for N seconds after the request.
  keepAlive: -1
+  # Controls the "num_batch" (batch size) parameter of the generation request
+  # * -1: Use the default value of the model
+  # * N (positive number): Use the specified value. Must be a power of 2, e.g. 128, 256, 512, etc.
+  batchSize: -1
 # -- ANTHROPIC CLAUDE API CONFIGURATION --
 claude:
  # Enables caching of the system prompt (if supported).
--- a/src/constants.js
+++ b/src/constants.js
@ -304,6 +304,7 @@ export const TOGETHERAI_KEYS = [
 export const OLLAMA_KEYS = [
    'num_predict',
    'num_ctx',
+    'num_batch',
    'stop',
    'temperature',
    'repeat_penalty',
--- a/src/endpoints/backends/text-completions.js
+++ b/src/endpoints/backends/text-completions.js
@ -373,6 +373,10 @@ router.post('/generate', jsonParser, async function (request, response) {

        if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
            const keepAlive = getConfigValue('ollama.keepAlive', -1);
+            const numBatch = getConfigValue('ollama.batchSize', -1);
+            if (numBatch > 0) {
+                request.body['num_batch'] = numBatch;
+            }
            args.body = JSON.stringify({
                model: request.body.model,
                prompt: request.body.prompt,