mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-02-18 21:20:39 +01:00
Merge pull request #3475 from SillyTavern/ollama-batch
Ollama: Add num_batch config value
This commit is contained in:
commit
c47d997a2d
@ -183,6 +183,10 @@ ollama:
|
||||
# * 0: Unload the model immediately after the request
|
||||
# * N (any positive number): Keep the model loaded for N seconds after the request.
|
||||
keepAlive: -1
|
||||
# Controls the "num_batch" (batch size) parameter of the generation request
|
||||
# * -1: Use the default value of the model
|
||||
# * N (positive number): Use the specified value. Must be a power of 2, e.g. 128, 256, 512, etc.
|
||||
batchSize: -1
|
||||
# -- ANTHROPIC CLAUDE API CONFIGURATION --
|
||||
claude:
|
||||
# Enables caching of the system prompt (if supported).
|
||||
|
@ -304,6 +304,7 @@ export const TOGETHERAI_KEYS = [
|
||||
export const OLLAMA_KEYS = [
|
||||
'num_predict',
|
||||
'num_ctx',
|
||||
'num_batch',
|
||||
'stop',
|
||||
'temperature',
|
||||
'repeat_penalty',
|
||||
|
@ -373,6 +373,10 @@ router.post('/generate', jsonParser, async function (request, response) {
|
||||
|
||||
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
|
||||
const keepAlive = getConfigValue('ollama.keepAlive', -1);
|
||||
const numBatch = getConfigValue('ollama.batchSize', -1);
|
||||
if (numBatch > 0) {
|
||||
request.body['num_batch'] = numBatch;
|
||||
}
|
||||
args.body = JSON.stringify({
|
||||
model: request.body.model,
|
||||
prompt: request.body.prompt,
|
||||
|
Loading…
x
Reference in New Issue
Block a user