parent
8df27254fd
commit
f305ba7ce7
|
@ -98,6 +98,13 @@ mistral:
|
|||
# Enables prefilling of the reply with the last assistant message in the prompt
|
||||
# CAUTION: The prefix is echoed into the completion. You may want to use regex to trim it out.
|
||||
enablePrefix: false
|
||||
# -- OLLAMA API CONFIGURATION --
|
||||
ollama:
|
||||
# Controls how long the model will stay loaded into memory following the request
|
||||
# * -1: Keep the model loaded indefinitely
|
||||
# * 0: Unload the model immediately after the request
|
||||
# * 5m: Keep the model loaded for 5 minutes after the request. Accepts duration strings (e.g. 5h30m40s)
|
||||
keepAlive: -1
|
||||
# -- SERVER PLUGIN CONFIGURATION --
|
||||
enableServerPlugins: false
|
||||
# User session timeout *in seconds* (defaults to 24 hours).
|
||||
|
|
|
@ -5,7 +5,7 @@ const Readable = require('stream').Readable;
|
|||
|
||||
const { jsonParser } = require('../../express-common');
|
||||
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS, FEATHERLESS_KEYS } = require('../../constants');
|
||||
const { forwardFetchResponse, trimV1 } = require('../../util');
|
||||
const { forwardFetchResponse, trimV1, getConfigValue } = require('../../util');
|
||||
const { setAdditionalHeaders } = require('../../additional-headers');
|
||||
|
||||
const router = express.Router();
|
||||
|
@ -325,11 +325,12 @@ router.post('/generate', jsonParser, async function (request, response) {
|
|||
}
|
||||
|
||||
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
|
||||
const keepAlive = getConfigValue('ollama.keepAlive', -1);
|
||||
args.body = JSON.stringify({
|
||||
model: request.body.model,
|
||||
prompt: request.body.prompt,
|
||||
stream: request.body.stream ?? false,
|
||||
keep_alive: -1,
|
||||
keep_alive: keepAlive,
|
||||
raw: true,
|
||||
options: _.pickBy(request.body, (_, key) => OLLAMA_KEYS.includes(key)),
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue