mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Ollama: Add num_batch config value
This commit is contained in:
@ -183,6 +183,10 @@ ollama:
|
|||||||
# * 0: Unload the model immediately after the request
|
# * 0: Unload the model immediately after the request
|
||||||
# * N (any positive number): Keep the model loaded for N seconds after the request.
|
# * N (any positive number): Keep the model loaded for N seconds after the request.
|
||||||
keepAlive: -1
|
keepAlive: -1
|
||||||
|
# Controls the "num_batch" (batch size) parameter of the generation request
|
||||||
|
# * -1: Use the default value of the model
|
||||||
|
# * N (positive number): Use the specified value. Must be a power of 2, e.g. 128, 256, 512, etc.
|
||||||
|
batchSize: -1
|
||||||
# -- ANTHROPIC CLAUDE API CONFIGURATION --
|
# -- ANTHROPIC CLAUDE API CONFIGURATION --
|
||||||
claude:
|
claude:
|
||||||
# Enables caching of the system prompt (if supported).
|
# Enables caching of the system prompt (if supported).
|
||||||
|
@ -304,6 +304,7 @@ export const TOGETHERAI_KEYS = [
|
|||||||
export const OLLAMA_KEYS = [
|
export const OLLAMA_KEYS = [
|
||||||
'num_predict',
|
'num_predict',
|
||||||
'num_ctx',
|
'num_ctx',
|
||||||
|
'num_batch',
|
||||||
'stop',
|
'stop',
|
||||||
'temperature',
|
'temperature',
|
||||||
'repeat_penalty',
|
'repeat_penalty',
|
||||||
|
@ -373,6 +373,10 @@ router.post('/generate', jsonParser, async function (request, response) {
|
|||||||
|
|
||||||
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
|
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
|
||||||
const keepAlive = getConfigValue('ollama.keepAlive', -1);
|
const keepAlive = getConfigValue('ollama.keepAlive', -1);
|
||||||
|
const numBatch = getConfigValue('ollama.batchSize', -1);
|
||||||
|
if (numBatch > 0) {
|
||||||
|
request.body['num_batch'] = numBatch;
|
||||||
|
}
|
||||||
args.body = JSON.stringify({
|
args.body = JSON.stringify({
|
||||||
model: request.body.model,
|
model: request.body.model,
|
||||||
prompt: request.body.prompt,
|
prompt: request.body.prompt,
|
||||||
|
Reference in New Issue
Block a user