From f305ba7ce7f7986e742f679491297a628c699472 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 11 Aug 2024 17:32:31 +0300 Subject: [PATCH] Configurable ollama keep_alive Closes #1859 --- default/config.yaml | 7 +++++++ src/endpoints/backends/text-completions.js | 5 +++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/default/config.yaml b/default/config.yaml index 0a6ee7df8..354c295ff 100644 --- a/default/config.yaml +++ b/default/config.yaml @@ -98,6 +98,13 @@ mistral: # Enables prefilling of the reply with the last assistant message in the prompt # CAUTION: The prefix is echoed into the completion. You may want to use regex to trim it out. enablePrefix: false +# -- OLLAMA API CONFIGURATION -- +ollama: + # Controls how long the model will stay loaded into memory following the request + # * -1: Keep the model loaded indefinitely + # * 0: Unload the model immediately after the request + # * 5m: Keep the model loaded for 5 minutes after the request. Accepts duration strings (e.g. 5h30m40s) + keepAlive: -1 # -- SERVER PLUGIN CONFIGURATION -- enableServerPlugins: false # User session timeout *in seconds* (defaults to 24 hours). diff --git a/src/endpoints/backends/text-completions.js b/src/endpoints/backends/text-completions.js index 98ccb50f4..ba113f2fd 100644 --- a/src/endpoints/backends/text-completions.js +++ b/src/endpoints/backends/text-completions.js @@ -5,7 +5,7 @@ const Readable = require('stream').Readable; const { jsonParser } = require('../../express-common'); const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS, FEATHERLESS_KEYS } = require('../../constants'); -const { forwardFetchResponse, trimV1 } = require('../../util'); +const { forwardFetchResponse, trimV1, getConfigValue } = require('../../util'); const { setAdditionalHeaders } = require('../../additional-headers'); const router = express.Router(); @@ -325,11 +325,12 @@ router.post('/generate', jsonParser, async function (request, response) { } if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) { + const keepAlive = getConfigValue('ollama.keepAlive', -1); args.body = JSON.stringify({ model: request.body.model, prompt: request.body.prompt, stream: request.body.stream ?? false, - keep_alive: -1, + keep_alive: keepAlive, raw: true, options: _.pickBy(request.body, (_, key) => OLLAMA_KEYS.includes(key)), });