mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Compare commits
6 Commits
OptimizedW
...
staging
Author | SHA1 | Date | |
---|---|---|---|
|
d55676d6d5 | ||
|
1c499df9da | ||
|
f12c523fcd | ||
|
ba74e4f126 | ||
|
8eb56355df | ||
|
1211493404 |
42
package-lock.json
generated
42
package-lock.json
generated
@@ -74,7 +74,7 @@
|
||||
"mime-types": "^2.1.35",
|
||||
"moment": "^2.30.1",
|
||||
"morphdom": "^2.7.4",
|
||||
"multer": "^2.0.0",
|
||||
"multer": "^2.0.1",
|
||||
"node-fetch": "^3.3.2",
|
||||
"node-persist": "^4.0.4",
|
||||
"open": "^8.4.2",
|
||||
@@ -3376,20 +3376,34 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/concat-stream": {
|
||||
"version": "1.6.2",
|
||||
"resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-1.6.2.tgz",
|
||||
"integrity": "sha512-27HBghJxjiZtIk3Ycvn/4kbJk/1uZuJFfuPEns6LaEvpvG1f0hTea8lilrouyo9mVc2GWdcEZ8OLoGmSADlrCw==",
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-2.0.0.tgz",
|
||||
"integrity": "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==",
|
||||
"engines": [
|
||||
"node >= 0.8"
|
||||
"node >= 6.0"
|
||||
],
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"buffer-from": "^1.0.0",
|
||||
"inherits": "^2.0.3",
|
||||
"readable-stream": "^2.2.2",
|
||||
"readable-stream": "^3.0.2",
|
||||
"typedarray": "^0.0.6"
|
||||
}
|
||||
},
|
||||
"node_modules/concat-stream/node_modules/readable-stream": {
|
||||
"version": "3.6.2",
|
||||
"resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
|
||||
"integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"inherits": "^2.0.3",
|
||||
"string_decoder": "^1.1.1",
|
||||
"util-deprecate": "^1.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/content-disposition": {
|
||||
"version": "0.5.4",
|
||||
"resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz",
|
||||
@@ -6076,18 +6090,18 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/multer": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/multer/-/multer-2.0.0.tgz",
|
||||
"integrity": "sha512-bS8rPZurbAuHGAnApbM9d4h1wSoYqrOqkE+6a64KLMK9yWU7gJXBDDVklKQ3TPi9DRb85cRs6yXaC0+cjxRtRg==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/multer/-/multer-2.0.1.tgz",
|
||||
"integrity": "sha512-Ug8bXeTIUlxurg8xLTEskKShvcKDZALo1THEX5E41pYCD2sCVub5/kIRIGqWNoqV6szyLyQKV6mD4QUrWE5GCQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"append-field": "^1.0.0",
|
||||
"busboy": "^1.0.0",
|
||||
"concat-stream": "^1.5.2",
|
||||
"mkdirp": "^0.5.4",
|
||||
"busboy": "^1.6.0",
|
||||
"concat-stream": "^2.0.0",
|
||||
"mkdirp": "^0.5.6",
|
||||
"object-assign": "^4.1.1",
|
||||
"type-is": "^1.6.4",
|
||||
"xtend": "^4.0.0"
|
||||
"type-is": "^1.6.18",
|
||||
"xtend": "^4.0.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">= 10.16.0"
|
||||
|
@@ -64,7 +64,7 @@
|
||||
"mime-types": "^2.1.35",
|
||||
"moment": "^2.30.1",
|
||||
"morphdom": "^2.7.4",
|
||||
"multer": "^2.0.0",
|
||||
"multer": "^2.0.1",
|
||||
"node-fetch": "^3.3.2",
|
||||
"node-persist": "^4.0.4",
|
||||
"open": "^8.4.2",
|
||||
|
@@ -2098,8 +2098,8 @@
|
||||
<div class="toggle-description justifyLeft marginBot5" data-source="claude" data-i18n="Allocates a portion of the response length for thinking (min: 1024 tokens, low: 10%, medium: 25%, high: 50%, max: 95%), but minimum 1024 tokens. Auto does not request thinking.">
|
||||
Allocates a portion of the response length for thinking (min: 1024 tokens, low: 10%, medium: 25%, high: 50%, max: 95%), but minimum 1024 tokens. Auto does not request thinking.
|
||||
</div>
|
||||
<div class="toggle-description justifyLeft marginBot5" data-source="makersuite,vertexai" data-i18n="Allocates a portion of the response length for thinking (min: 0 tokens, low: 10%, medium: 25%, high: 50%, max: 24576 tokens). Auto lets the model decide.">
|
||||
Allocates a portion of the response length for thinking (min: 0 tokens, low: 10%, medium: 25%, high: 50%, max: 24576 tokens). Auto lets the model decide.
|
||||
<div class="toggle-description justifyLeft marginBot5" data-source="makersuite,vertexai" data-i18n="Allocates a portion of the response length for thinking (Flash 2.5/Pro 2.5) (min: 0/128 tokens, low: 10%, medium: 25%, high: 50%, max: 24576/32768 tokens). Auto lets the model decide.">
|
||||
Allocates a portion of the response length for thinking (Flash 2.5/Pro 2.5) (min: 0/128 tokens, low: 10%, medium: 25%, high: 50%, max: 24576/32768 tokens). Auto lets the model decide.
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -3190,6 +3190,7 @@
|
||||
<h4 data-i18n="Google Model">Google Model</h4>
|
||||
<select id="model_google_select">
|
||||
<optgroup label="Gemini 2.5">
|
||||
<option value="gemini-2.5-pro-preview-06-05">gemini-2.5-pro-preview-06-05</option>
|
||||
<option value="gemini-2.5-pro-preview-05-06">gemini-2.5-pro-preview-05-06</option>
|
||||
<option value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option>
|
||||
<option value="gemini-2.5-pro-exp-03-25">gemini-2.5-pro-exp-03-25</option>
|
||||
@@ -3336,6 +3337,7 @@
|
||||
|
||||
<!-- Full Version Models -->
|
||||
<optgroup id="vertexai_full_gemini_25" label="Gemini 2.5" data-mode="full">
|
||||
<option value="gemini-2.5-pro-preview-06-05">gemini-2.5-pro-preview-06-05</option>
|
||||
<option value="gemini-2.5-pro-preview-05-06">gemini-2.5-pro-preview-05-06</option>
|
||||
<option value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option>
|
||||
<option value="gemini-2.5-pro-exp-03-25">gemini-2.5-pro-exp-03-25</option>
|
||||
|
@@ -85,6 +85,7 @@
|
||||
<option data-type="anthropic" value="claude-3-opus-20240229">claude-3-opus-20240229</option>
|
||||
<option data-type="anthropic" value="claude-3-sonnet-20240229">claude-3-sonnet-20240229</option>
|
||||
<option data-type="anthropic" value="claude-3-haiku-20240307">claude-3-haiku-20240307</option>
|
||||
<option data-type="google" value="gemini-2.5-pro-preview-06-05">gemini-2.5-pro-preview-06-05</option>
|
||||
<option data-type="google" value="gemini-2.5-pro-preview-05-06">gemini-2.5-pro-preview-05-06</option>
|
||||
<option data-type="google" value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option>
|
||||
<option data-type="google" value="gemini-2.5-pro-exp-03-25">gemini-2.5-pro-exp-03-25</option>
|
||||
@@ -116,6 +117,7 @@
|
||||
<option data-type="google" value="gemini-1.5-flash-8b-exp-0827">gemini-1.5-flash-8b-exp-0827</option>
|
||||
<option data-type="google" value="learnlm-2.0-flash-experimental">learnlm-2.0-flash-experimental</option>
|
||||
<option data-type="google" value="learnlm-1.5-pro-experimental">learnlm-1.5-pro-experimental</option>
|
||||
<option data-type="vertexai" value="gemini-2.5-pro-preview-06-05">gemini-2.5-pro-preview-06-05</option>
|
||||
<option data-type="vertexai" value="gemini-2.5-pro-preview-05-06">gemini-2.5-pro-preview-05-06</option>
|
||||
<option data-type="vertexai" value="gemini-2.5-pro-preview-03-25">gemini-2.5-pro-preview-03-25</option>
|
||||
<option data-type="vertexai" value="gemini-2.5-flash-preview-05-20">gemini-2.5-flash-preview-05-20</option>
|
||||
|
@@ -414,7 +414,6 @@ async function sendMakerSuiteRequest(request, response) {
|
||||
];
|
||||
|
||||
const isThinkingConfigModel = m => /^gemini-2.5-(flash|pro)/.test(m);
|
||||
const isThinkingBudgetModel = m => /^gemini-2.5-flash/.test(m);
|
||||
|
||||
const noSearchModels = [
|
||||
'gemini-2.0-flash-lite',
|
||||
@@ -470,11 +469,9 @@ async function sendMakerSuiteRequest(request, response) {
|
||||
if (isThinkingConfigModel(model)) {
|
||||
const thinkingConfig = { includeThoughts: includeReasoning };
|
||||
|
||||
if (isThinkingBudgetModel(model)) {
|
||||
const thinkingBudget = calculateGoogleBudgetTokens(generationConfig.maxOutputTokens, reasoningEffort);
|
||||
if (Number.isInteger(thinkingBudget)) {
|
||||
thinkingConfig.thinkingBudget = thinkingBudget;
|
||||
}
|
||||
const thinkingBudget = calculateGoogleBudgetTokens(generationConfig.maxOutputTokens, reasoningEffort, model);
|
||||
if (Number.isInteger(thinkingBudget)) {
|
||||
thinkingConfig.thinkingBudget = thinkingBudget;
|
||||
}
|
||||
|
||||
generationConfig.thinkingConfig = thinkingConfig;
|
||||
|
@@ -992,32 +992,73 @@ export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream)
|
||||
* Calculate the Google budget tokens for a given reasoning effort.
|
||||
* @param {number} maxTokens Maximum tokens
|
||||
* @param {string} reasoningEffort Reasoning effort
|
||||
* @param {string} model Model name
|
||||
* @returns {number?} Budget tokens
|
||||
*/
|
||||
export function calculateGoogleBudgetTokens(maxTokens, reasoningEffort) {
|
||||
let budgetTokens = 0;
|
||||
export function calculateGoogleBudgetTokens(maxTokens, reasoningEffort, model) {
|
||||
function getFlashBudget() {
|
||||
let budgetTokens = 0;
|
||||
|
||||
switch (reasoningEffort) {
|
||||
case REASONING_EFFORT.auto:
|
||||
return null;
|
||||
case REASONING_EFFORT.min:
|
||||
budgetTokens = 0;
|
||||
break;
|
||||
case REASONING_EFFORT.low:
|
||||
budgetTokens = Math.floor(maxTokens * 0.1);
|
||||
break;
|
||||
case REASONING_EFFORT.medium:
|
||||
budgetTokens = Math.floor(maxTokens * 0.25);
|
||||
break;
|
||||
case REASONING_EFFORT.high:
|
||||
budgetTokens = Math.floor(maxTokens * 0.5);
|
||||
break;
|
||||
case REASONING_EFFORT.max:
|
||||
budgetTokens = maxTokens;
|
||||
break;
|
||||
switch (reasoningEffort) {
|
||||
case REASONING_EFFORT.auto:
|
||||
return null;
|
||||
case REASONING_EFFORT.min:
|
||||
budgetTokens = 0;
|
||||
break;
|
||||
case REASONING_EFFORT.low:
|
||||
budgetTokens = Math.floor(maxTokens * 0.1);
|
||||
break;
|
||||
case REASONING_EFFORT.medium:
|
||||
budgetTokens = Math.floor(maxTokens * 0.25);
|
||||
break;
|
||||
case REASONING_EFFORT.high:
|
||||
budgetTokens = Math.floor(maxTokens * 0.5);
|
||||
break;
|
||||
case REASONING_EFFORT.max:
|
||||
budgetTokens = maxTokens;
|
||||
break;
|
||||
}
|
||||
|
||||
budgetTokens = Math.min(budgetTokens, 24576);
|
||||
|
||||
return budgetTokens;
|
||||
}
|
||||
|
||||
budgetTokens = Math.min(budgetTokens, 24576);
|
||||
function getProBudget() {
|
||||
let budgetTokens = 0;
|
||||
|
||||
return budgetTokens;
|
||||
switch (reasoningEffort) {
|
||||
case REASONING_EFFORT.auto:
|
||||
return null;
|
||||
case REASONING_EFFORT.min:
|
||||
budgetTokens = 128;
|
||||
break;
|
||||
case REASONING_EFFORT.low:
|
||||
budgetTokens = Math.floor(maxTokens * 0.1);
|
||||
break;
|
||||
case REASONING_EFFORT.medium:
|
||||
budgetTokens = Math.floor(maxTokens * 0.25);
|
||||
break;
|
||||
case REASONING_EFFORT.high:
|
||||
budgetTokens = Math.floor(maxTokens * 0.5);
|
||||
break;
|
||||
case REASONING_EFFORT.max:
|
||||
budgetTokens = maxTokens;
|
||||
break;
|
||||
}
|
||||
|
||||
budgetTokens = Math.max(Math.min(budgetTokens, 32768), 128);
|
||||
|
||||
return budgetTokens;
|
||||
}
|
||||
|
||||
if (model.includes('flash')) {
|
||||
return getFlashBudget();
|
||||
}
|
||||
|
||||
if (model.includes('pro')) {
|
||||
return getProBudget();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
Reference in New Issue
Block a user