Configurable ollama keep_alive

Closes #1859
This commit is contained in:
Cohee 2024-08-11 17:32:31 +03:00
parent 8df27254fd
commit f305ba7ce7
2 changed files with 10 additions and 2 deletions

View File

@ -98,6 +98,13 @@ mistral:
# Enables prefilling of the reply with the last assistant message in the prompt
# CAUTION: The prefix is echoed into the completion. You may want to use regex to trim it out.
enablePrefix: false
# -- OLLAMA API CONFIGURATION --
ollama:
# Controls how long the model will stay loaded into memory following the request
# * -1: Keep the model loaded indefinitely
# * 0: Unload the model immediately after the request
# * 5m: Keep the model loaded for 5 minutes after the request. Accepts duration strings (e.g. 5h30m40s)
keepAlive: -1
# -- SERVER PLUGIN CONFIGURATION --
enableServerPlugins: false
# User session timeout *in seconds* (defaults to 24 hours).

View File

@ -5,7 +5,7 @@ const Readable = require('stream').Readable;
const { jsonParser } = require('../../express-common');
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS, FEATHERLESS_KEYS } = require('../../constants');
const { forwardFetchResponse, trimV1 } = require('../../util');
const { forwardFetchResponse, trimV1, getConfigValue } = require('../../util');
const { setAdditionalHeaders } = require('../../additional-headers');
const router = express.Router();
@ -325,11 +325,12 @@ router.post('/generate', jsonParser, async function (request, response) {
}
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
const keepAlive = getConfigValue('ollama.keepAlive', -1);
args.body = JSON.stringify({
model: request.body.model,
prompt: request.body.prompt,
stream: request.body.stream ?? false,
keep_alive: -1,
keep_alive: keepAlive,
raw: true,
options: _.pickBy(request.body, (_, key) => OLLAMA_KEYS.includes(key)),
});