Merge branch 'staging' into feature/char-search-fav-list-styling

2024-05-05 02:59:02 +02:00 · 2024-05-05 02:59:02 +02:00 · cda8b51fd3
parent 96fa2870cd 75832c1ad6
commit cda8b51fd3
31 changed files with 797 additions and 177 deletions
--- a/.eslintrc.js
+++ b/.eslintrc.js
@ -44,6 +44,7 @@ module.exports = {
                toastr: 'readonly',
                Readability: 'readonly',
                isProbablyReaderable: 'readonly',
+                ePub: 'readonly',
            },
        },
    ],
--- a/.github/readme.md
+++ b/.github/readme.md
@ -326,7 +326,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Affero General Public License for more details.**

-* TAI Base by Humi: Unknown license
+* TAI Base by Humi: MIT
 * Cohee's modifications and derived code: AGPL v3
 * RossAscends' additions: AGPL v3
 * Portions of CncAnon's TavernAITurbo mod: Unknown license
--- a/default/content/default_CodingSensei.png
+++ b/default/content/default_CodingSensei.png
--- a/default/content/presets/textgen/Universal-Creative.json
+++ b/default/content/presets/textgen/Universal-Creative.json
@ -33,8 +33,8 @@
    "negative_prompt": "",
    "grammar_string": "",
    "banned_tokens": "",
-    "ignore_eos_token_aphrodite": false,
-    "spaces_between_special_tokens_aphrodite": true,
+    "ignore_eos_token": false,
+    "spaces_between_special_tokens": true,
    "type": "ooba",
    "legacy_api": false,
    "sampler_order": [
--- a/default/content/presets/textgen/Universal-Light.json
+++ b/default/content/presets/textgen/Universal-Light.json
@ -33,8 +33,8 @@
    "negative_prompt": "",
    "grammar_string": "",
    "banned_tokens": "",
-    "ignore_eos_token_aphrodite": false,
-    "spaces_between_special_tokens_aphrodite": true,
+    "ignore_eos_token": false,
+    "spaces_between_special_tokens": true,
    "type": "ooba",
    "legacy_api": false,
    "sampler_order": [
--- a/default/content/presets/textgen/Universal-Super-Creative.json
+++ b/default/content/presets/textgen/Universal-Super-Creative.json
@ -33,8 +33,8 @@
    "negative_prompt": "",
    "grammar_string": "",
    "banned_tokens": "",
-    "ignore_eos_token_aphrodite": false,
-    "spaces_between_special_tokens_aphrodite": true,
+    "ignore_eos_token": false,
+    "spaces_between_special_tokens": true,
    "type": "ooba",
    "legacy_api": false,
    "sampler_order": [
--- a/public/css/world-info.css
+++ b/public/css/world-info.css
@ -196,3 +196,21 @@
 .WIEntryHeaderTitleMobile {
    display: none;
 }
+
+#world_info+span.select2-container .select2-selection__choice__remove,
+#world_info+span.select2-container .select2-selection__choice__display {
+    cursor: pointer;
+    transition: background-color 0.3s;
+    color: var(--SmartThemeBodyColor);
+    background-color: var(--black50a);
+}
+
+#world_info+span.select2-container .select2-selection__choice__display {
+    /* Fix weird alignment on the left side */
+    margin-left: 1px;
+}
+
+#world_info+span.select2-container .select2-selection__choice__remove:hover,
+#world_info+span.select2-container .select2-selection__choice__display:hover {
+    background-color: var(--white30a);
+}
--- a/public/global.d.ts
+++ b/public/global.d.ts
@ -0,0 +1,34 @@
+// Global namespace modules
+declare var DOMPurify;
+declare var droll;
+declare var Fuse;
+declare var Handlebars;
+declare var hljs;
+declare var localforage;
+declare var moment;
+declare var pdfjsLib;
+declare var Popper;
+declare var showdown;
+declare var showdownKatex;
+declare var SVGInject;
+declare var toastr;
+declare var Readability;
+declare var isProbablyReaderable;
+declare var ePub;
+declare var ai;
+
+// Jquery plugins
+interface JQuery {
+    pagination(method: 'getCurrentPageNum'): number;
+    pagination(method: string, options?: any): JQuery;
+    pagination(options?: any): JQuery;
+    transition(options?: any): JQuery;
+    select2(options?: any): JQuery;
+    sortable(options?: any): JQuery;
+    autocomplete(options?: any): JQuery;
+    autocomplete(method: string, options?: any): JQuery;
+    slider(options?: any): JQuery;
+    slider(method: string, func: string, options?: any): JQuery;
+    cropper(options?: any): JQuery;
+    izoomify(options?: any): JQuery;
+}
--- a/public/img/infermaticai.svg
+++ b/public/img/infermaticai.svg
@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg
+   version="1.0"
+   width="70pt"
+   height="70pt"
+   viewBox="0 0 70 70"
+   preserveAspectRatio="xMidYMid"
+   id="svg15"
+   sodipodi:docname="infermatic.svg"
+   inkscape:version="1.3 (0e150ed, 2023-07-21)"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:svg="http://www.w3.org/2000/svg">
+  <defs
+     id="defs15" />
+  <sodipodi:namedview
+     id="namedview15"
+     pagecolor="#ffffff"
+     bordercolor="#000000"
+     borderopacity="0.25"
+     inkscape:showpageshadow="2"
+     inkscape:pageopacity="0.0"
+     inkscape:pagecheckerboard="0"
+     inkscape:deskcolor="#d1d1d1"
+     inkscape:document-units="pt"
+     inkscape:zoom="0.75112613"
+     inkscape:cx="306.2069"
+     inkscape:cy="50.590705"
+     inkscape:window-width="1312"
+     inkscape:window-height="449"
+     inkscape:window-x="0"
+     inkscape:window-y="38"
+     inkscape:window-maximized="0"
+     inkscape:current-layer="svg15" />
+  <path
+     id="path15"
+     d="m 1030,375 v -75 h 75 74 l 6,33 c 4,18 5,51 3,72 l -3,40 -77,3 -78,3 z m 547,619 c -4,-4 -7,-41 -7,-81 v -74 l 78,3 77,3 v 75 75 l -70,3 c -39,1 -74,0 -78,-4 z m -547,-74 v -79 l 133,-3 132,-3 3,-267 2,-268 h 215 215 v 75 75 h -135 -135 l -2,273 -3,272 -212,3 -213,2 z"
+     transform="matrix(0.1,0,0,-0.1,-103,99.999998)" />
+</svg>
--- a/public/index.html
+++ b/public/index.html
@ -9,6 +9,13 @@
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="darkreader-lock">
    <meta name="robots" content="noindex, nofollow" />
+    <style>
+        /* Put critical CSS here. The rest should go in stylesheets. */
+        body {
+            background-color: rgb(36, 36, 37);
+        }
+    </style>
+    <link rel="preload" as="style" href="style.css">
    <link rel="manifest" crossorigin="use-credentials" href="manifest.json">
    <link href="webfonts/NotoSans/stylesheet.css" rel="stylesheet">
    <link href="css/fontawesome.min.css" rel="stylesheet">
@ -1125,7 +1132,7 @@
                                        <div class="fa-solid fa-circle-info opacity50p" title="Set all samplers to their neutral/disabled state." data-i18n="[title]Set all samplers to their neutral/disabled state."></div>
                                    </small>
                                </div>
-                                <div data-newbie-hidden data-tg-type="mancer, aphrodite" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
+                                <div data-newbie-hidden data-tg-type="mancer, vllm, aphrodite" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
                                    <small data-i18n="Multiple swipes per generation">Multiple swipes per generation</small>
                                    <input type="number" id="n_textgenerationwebui" class="text_pole textAlignCenter" min="1" value="1" step="1" />
                                </div>
@ -1207,12 +1214,12 @@
                                        <input class="neo-range-slider" type="range" id="rep_pen_textgenerationwebui" name="volume" min="1" max="3" step="0.01">
                                        <input class="neo-range-input" type="number" min="1" max="3" step="0.01" data-for="rep_pen_textgenerationwebui" id="rep_pen_counter_textgenerationwebui">
                                    </div>
-                                    <div data-forAphro="False" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="ooba, koboldcpp, tabby, ollama, llamacpp" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                        <small data-i18n="rep.pen range">Rep Pen Range</small>
                                        <input class="neo-range-slider" type="range" id="rep_pen_range_textgenerationwebui" name="volume" min="-1" max="8192" step="1">
                                        <input class="neo-range-input" type="number" min="-1" max="8192" step="1" data-for="rep_pen_range_textgenerationwebui" id="rep_pen_range_counter_textgenerationwebui">
                                    </div>
-                                    <div data-forAphro="False" data-tg-type="ooba" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="ooba" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                        <small data-i18n="Encoder Rep. Pen.">Encoder Penalty</small>
                                        <input class="neo-range-slider" type="range" id="encoder_rep_pen_textgenerationwebui" name="volume" min="0.8" max="1.5" step="0.01" />
                                        <input class="neo-range-input" type="number" min="0.8" max="1.5" step="0.01" data-for="encoder_rep_pen_textgenerationwebui" id="encoder_rep_pen_counter_textgenerationwebui">
@ -1227,7 +1234,7 @@
                                        <input class="neo-range-slider" type="range" id="presence_pen_textgenerationwebui" name="volume" min="-2" max="2" step="0.01" />
                                        <input class="neo-range-input" type="number" min="-2" max="2" step="0.01" data-for="presence_pen_textgenerationwebui" id="presence_pen_counter_textgenerationwebui">
                                    </div>
-                                    <div data-forAphro="False" data-tg-type="ooba" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="ooba" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                        <small data-i18n="No Repeat Ngram Size">No Repeat Ngram Size</small>
                                        <input class="neo-range-slider" type="range" id="no_repeat_ngram_size_textgenerationwebui" name="volume" min="0" max="20" step="1">
                                        <input class="neo-range-input" type="number" min="0" max="20" step="1" data-for="no_repeat_ngram_size_textgenerationwebui" id="no_repeat_ngram_size_counter_textgenerationwebui">
@ -1247,41 +1254,19 @@
                                            <label data-i18n="Smooth Sampling">Smooth Sampling</label>
                                            <div class=" fa-solid fa-circle-info opacity50p " data-i18n="[title]Smooth Sampling" title="Allows you to use quadratic/cubic transformations to adjust the distribution. Lower Smoothing Factor values will be more creative, usually between 0.2-0.3 is the sweetspot (assuming the curve = 1). Higher Smoothing Curve values will make the curve steeper, which will punish low probability choices more aggressively. 1.0 curve is equivalent to only using Smoothing Factor."></div>
                                        </h4>
-                                        <div class="flex-container flexFlowRow gap10px flexShrink">
+                                        <div data-tg-type="mancer, ooba, koboldcpp, aphrodite, tabby" class="flex-container flexFlowRow gap10px flexShrink">
                                            <div data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0">
                                                <small data-i18n="Smoothing Factor">Smoothing Factor</small>
                                                <input class="neo-range-slider" type="range" id="smoothing_factor_textgenerationwebui" name="volume" min="0" max="10" step="0.01" />
                                                <input class="neo-range-input" type="number" min="0" max="10" step="0.01" data-for="smoothing_factor_textgenerationwebui" id="smoothing_factor_counter_textgenerationwebui">
                                            </div>
-                                            <div data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0">
+                                            <div data-tg-type="mancer, ooba, koboldcpp, aphrodite" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0">
                                                <small data-i18n="Smoothing Curve">Smoothing Curve</small>
                                                <input class="neo-range-slider" type="range" id="smoothing_curve_textgenerationwebui" name="volume" min="1" max="10" step="0.01" />
                                                <input class="neo-range-input" type="number" min="1" max="10" step="0.01" data-for="smoothing_curve_textgenerationwebui" id="smoothing_curve_counter_textgenerationwebui">
                                            </div>
                                        </div>
                                    </div>
-                                    <!--
-                                        <div data-tg-type="aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0" data-i18n="Responses">
-                                            <small>Responses</small>
-                                            <input class="neo-range-slider" type="range" id="n_aphrodite_textgenerationwebui" name="volume" min="1" max="5" step="1">
-                                            <input class="neo-range-input" type="number" min="1" max="5" step="1" data-for="n_aphrodite" id="n_aphrodite_counter_textgenerationwebui">
-                                        </div>
-                                        <div data-tg-type="aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0 displayNone" data-i18n="Keep # Best Responses">
-                                            <small>Keep # Best Responses</small>
-                                            <input class="neo-range-slider" type="range" id="best_of_aphrodite_textgenerationwebui" name="volume" min="1" max="5" step="1">
-                                            <input class="neo-range-input" type="number" min="1" max="5" step="1" data-for="best_of_aphrodite" id="best_of_aphrodite_counter_textgenerationwebui">
-                                        </div>
-                                        <div data-tg-type="aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0 displayNone" data-i18n="Logit Probabilities">
-                                            <small>Logit Probabilities</small>
-                                            <input class="neo-range-slider" type="range" id="log_probs_aphrodite_textgenerationwebui" name="volume" min="0" max="5" step="1">
-                                            <input class="neo-range-input" type="number" min="0" max="5" step="1" data-for="log_probs_aphrodite" id="log_probs_aphrodite_counter_textgenerationwebui">
-                                        </div>
-                                        <div data-tg-type="aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0 displayNone" data-i18n="Prompt Logit Probabilities">
-                                            <small>Prompt Logit Probabilities</small>
-                                            <input class="neo-range-slider" type="range" id="prompt_log_probs_aphrodite_textgenerationwebui" name="volume" min="0" max="5" step="1">
-                                            <input class="neo-range-input" type="number" min="0" max="5" step="1" data-for="prompt_log_probs_aphrodite" id="prompt_log_probs_aphrodite_counter_textgenerationwebui">
-                                        </div>
-                                    -->
                                    <div data-newbie-hidden data-tg-type="ooba, mancer, koboldcpp, tabby, llamacpp, aphrodite" name="dynaTempBlock" class="wide100p">
                                        <h4 class="wide100p textAlignCenter" data-i18n="DynaTemp">
                                            <div class="flex-container alignitemscenter" style="justify-content: center;">
@ -1366,7 +1351,7 @@
                                            </div>
                                        </div>
                                    </div>
-                                    <div data-forAphro="False" data-tg-type="ooba" data-newbie-hidden name="contrastiveSearchBlock" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0">
+                                    <div data-tg-type="ooba" data-newbie-hidden name="contrastiveSearchBlock" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0">
                                        <h4 class="textAlignCenter" data-i18n="Contrastive search">Contrastive Search
                                            <div class=" fa-solid fa-circle-info opacity50p " title="A sampler that encourages diversity while maintaining coherence, by exploiting the isotropicity of the representation space of most LLMs. For details, see the paper A Contrastive Framework for Neural Text Generation by Su et al. (2022)."></div>
                                        </h4>
@ -1381,26 +1366,26 @@
                                    </div>
                                    <div data-newbie-hidden name="checkboxes" class="flex-container flexBasis48p justifyCenter flexGrow flexShrink ">
                                        <div class="flex-container flexFlowColumn marginTop5">
-                                            <label data-forAphro="False" data-tg-type="ooba" class="checkbox_label  flexGrow flexShrink" for="do_sample_textgenerationwebui">
+                                            <label data-tg-type="ooba" class="checkbox_label  flexGrow flexShrink" for="do_sample_textgenerationwebui">
                                                <input type="checkbox" id="do_sample_textgenerationwebui" />
                                                <small data-i18n="Do Sample">Do Sample</small>
                                            </label>
-                                            <label data-forAphro="False" data-tg-type="ooba, tabby" class="checkbox_label  flexGrow flexShrink" for="add_bos_token_textgenerationwebui">
+                                            <label data-tg-type="ooba, tabby" class="checkbox_label  flexGrow flexShrink" for="add_bos_token_textgenerationwebui">
                                                <input type="checkbox" id="add_bos_token_textgenerationwebui" />
                                                <label>
                                                    <small data-i18n="Add BOS Token">Add BOS Token</small>
                                                    <div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Add the bos_token to the beginning of prompts. Disabling this can make the replies more creative" title="Add the bos_token to the beginning of prompts. Disabling this can make the replies more creative."></div>
                                                </label>
                                            </label>
-                                            <label data-forAphro="False" class="checkbox_label  flexGrow flexShrink" for="ban_eos_token_textgenerationwebui">
+                                            <label data-tg-type="ooba, llamacpp, tabby" class="checkbox_label  flexGrow flexShrink" for="ban_eos_token_textgenerationwebui">
                                                <input type="checkbox" id="ban_eos_token_textgenerationwebui" />
                                                <label>
                                                    <small data-i18n="Ban EOS Token">Ban EOS Token</small>
                                                    <div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Ban the eos_token. This forces the model to never end the generation prematurely" title="Ban the eos_token. This forces the model to never end the generation prematurely."></div>
                                                </label>
                                            </label>
-                                            <label data-tg-type="aphrodite" class="checkbox_label" for="ignore_eos_token_aphrodite_textgenerationwebui">
-                                                <input type="checkbox" id="ignore_eos_token_aphrodite_textgenerationwebui" />
+                                            <label data-tg-type="vllm, aphrodite" class="checkbox_label" for="ignore_eos_token_textgenerationwebui">
+                                                <input type="checkbox" id="ignore_eos_token_textgenerationwebui" />
                                                <small data-i18n="Ignore EOS Token">Ignore EOS Token
                                                    <div class="fa-solid fa-circle-info opacity50p " data-i18n="Ignore the EOS Token even if it generates." title="Ignore the EOS Token even if it generates."></div>
                                                </small>
@ -1409,7 +1394,7 @@
                                                <input type="checkbox" id="skip_special_tokens_textgenerationwebui" />
                                                <small data-i18n="Skip Special Tokens">Skip Special Tokens</small>
                                            </label>
-                                            <label data-forAphro="False" data-tg-type="ooba, aphrodite, tabby" class="checkbox_label  flexGrow flexShrink" for="temperature_last_textgenerationwebui">
+                                            <label data-tg-type="ooba, aphrodite, tabby" class="checkbox_label  flexGrow flexShrink" for="temperature_last_textgenerationwebui">
                                                <input type="checkbox" id="temperature_last_textgenerationwebui" />
                                                <label>
                                                    <small data-i18n="Temperature Last">Temperature Last</small>
@ -1417,13 +1402,13 @@
                                                </label>
                                            </label>

-                                            <label data-tg-type="aphrodite" class="checkbox_label" for="spaces_between_special_tokens_aphrodite_textgenerationwebui">
-                                                <input type="checkbox" id="spaces_between_special_tokens_aphrodite_textgenerationwebui" />
+                                            <label data-tg-type="vllm, aphrodite" class="checkbox_label" for="spaces_between_special_tokens_textgenerationwebui">
+                                                <input type="checkbox" id="spaces_between_special_tokens_textgenerationwebui" />
                                                <small data-i18n="Spaces Between Special Tokens">Spaces Between Special Tokens</small>
                                            </label>
                                        </div>
                                    </div>
-                                    <div data-forAphro="False" data-newbie-hidden class="flex-container flexFlowColumn alignitemscenter flexBasis48p flexGrow flexShrink gap0">
+                                    <div data-tg-type="mancer, ooba, koboldcpp, aphrodite, llamaccp, ollama" data-newbie-hidden class="flex-container flexFlowColumn alignitemscenter flexBasis48p flexGrow flexShrink gap0">
                                        <small data-i18n="Seed" class="textAlignCenter">Seed</small>
                                        <input type="number" id="seed_textgenerationwebui" class="text_pole textAlignCenter" min="-1" value="-1" maxlength="100" />
                                    </div>
@ -1452,7 +1437,7 @@
                                            <div class="logit_bias_list"></div>
                                        </div>
                                    </div>
-                                    <div data-newbie-hidden data-forAphro="False" data-tg-type="ooba, tabby" class="wide100p">
+                                    <div data-newbie-hidden data-tg-type="ooba, tabby" class="wide100p">
                                        <hr class="width100p">
                                        <h4 data-i18n="CFG" class="textAlignCenter">CFG
                                            <div class="margin5 fa-solid fa-circle-info opacity50p " data-i18n="[title]Classifier Free Guidance. More helpful tip coming soon" title="Classifier Free Guidance. More helpful tip coming soon."></div>
@ -1489,10 +1474,11 @@
                                        <hr class="wide100p">
                                        <h4 class="wide100p textAlignCenter">
                                            <label>
-                                                <span data-i18n="GBNF Grammar">GBNF Grammar</span>
+                                                <span data-i18n="Grammar String">Grammar String</span>
+                                                <div class="margin5 fa-solid fa-circle-info opacity50p " data-i18n="[title]GNBF or ENBF, depends on the backend in use. If you're using this you should know which." title="GNBF or ENBF, depends on the backend in use. If you're using this you should know which."></div>
                                                <a href="https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md" target="_blank">
                                                    <small>
-                                                        <div class="fa-solid fa-circle-question note-link-span"></div>
+                                                        <div class="fa-solid fa-up-right-from-square note-link-span"></div>
                                                    </small>
                                                </a>
                                            </label>
@ -1948,7 +1934,7 @@
                            <div>
                                <h4 data-i18n="API Type">API Type</h4>
                                <select id="textgen_type">
-                                    <option value="ooba" data-i18n="Default (completions compatible)">Default [OpenAI /completions compatible: oobabooga, vLLM, LM Studio, etc.]</option>
+                                    <option value="ooba" data-i18n="Default (completions compatible)">Default [OpenAI /completions compatible: oobabooga, LM Studio, etc.]</option>
                                    <option value="aphrodite">Aphrodite</option>
                                    <option value="dreamgen">DreamGen</option>
                                    <option value="infermaticai">InfermaticAI</option>
@ -1959,6 +1945,7 @@
                                    <option value="openrouter">OpenRouter</option>
                                    <option value="tabby">TabbyAPI</option>
                                    <option value="togetherai">TogetherAI</option>
+                                    <option value="vllm">vLLM</option>
                                </select>
                            </div>
                            <div data-tg-type="togetherai" class="flex-container flexFlowColumn">
@ -2098,6 +2085,36 @@
                                </div>
                                <input id="custom_model_textgenerationwebui" class="text_pole wide100p" maxlength="500" placeholder="Custom model (optional)" data-i18n="[placeholder]Custom model (optional)" type="text">
                            </div>
+                            <div data-tg-type="vllm">
+                                <div class="flex-container flexFlowColumn">
+                                    <a href="https://github.com/vllm-project/vllm" target="_blank" data-i18n="vllm-project/vllm">
+                                        vllm-project/vllm (OpenAI API wrapper mode)
+                                    </a>
+                                </div>
+                                <h4 data-i18n="vLLM API key">vLLM API key</h4>
+                                <div class="flex-container">
+                                    <input id="api_key_vllm" name="api_key_vllm" class="text_pole flex1 wide100p" maxlength="500" size="35" type="text" autocomplete="off">
+                                    <div title="Clear your API key" data-i18n="[title]Clear your API key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_vllm">
+                                    </div>
+                                </div>
+                                <div data-for="api_key_vllm" class="neutral_warning" data-i18n="For privacy reasons, your API key will be hidden after you reload the page.">
+                                    For privacy reasons, your API key will be hidden after you reload the page.
+                                </div>
+                                <div class="flex1">
+                                    <h4 data-i18n="API url">API URL</h4>
+                                    <small data-i18n="Example: 127.0.0.1:8000">Example: http://127.0.0.1:8000</small>
+                                    <input id="vllm_api_url_text" class="text_pole wide100p" maxlength="500" value="" autocomplete="off" data-server-history="vllm">
+                                </div>
+                                <div>
+                                    <h4 data-i18n="vLLM Model">vLLM Model</h4>
+                                    <select id="vllm_model">
+                                        <option data-i18n="-- Connect to the API --">
+                                            -- Connect to the API --
+                                        </option>
+                                    </select>
+                                </div>
+                            </div>
+
                            <div data-tg-type="aphrodite">
                                <div class="flex-container flexFlowColumn">
                                    <a href="https://github.com/PygmalionAI/aphrodite-engine" target="_blank" data-i18n="PygmalionAI/aphrodite-engine">
@ -2218,7 +2235,7 @@
                                </div>
                            </div>
                            <div class="flex-container">
-                                <div id="api_button_textgenerationwebui" class="api_button menu_button" type="submit" data-i18n="Connect" data-server-connect="ooba_blocking,aphrodite,tabby,koboldcpp">Connect</div>
+                                <div id="api_button_textgenerationwebui" class="api_button menu_button" type="submit" data-i18n="Connect" data-server-connect="ooba_blocking,vllm,aphrodite,tabby,koboldcpp,ollama,llamacpp">Connect</div>
                                <div data-tg-type="openrouter" class="menu_button menu_button_icon openrouter_authorize" title="Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai" data-i18n="Authorize;[title]Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai">Authorize</div>
                                <div class="api_loading menu_button" data-i18n="Cancel">Cancel</div>
                            </div>
@ -2637,11 +2654,13 @@
                                    <optgroup label="Latest">
                                        <option value="open-mistral-7b">open-mistral-7b</option>
                                        <option value="open-mixtral-8x7b">open-mixtral-8x7b</option>
+                                        <option value="open-mixtral-8x22b">open-mixtral-8x22b</option>
                                        <option value="mistral-small-latest">mistral-small-latest</option>
                                        <option value="mistral-medium-latest">mistral-medium-latest</option>
                                        <option value="mistral-large-latest">mistral-large-latest</option>
                                    </optgroup>
                                    <optgroup label="Sub-versions">
+                                        <option value="open-mixtral-8x22b-2404">open-mixtral-8x22b-2404</option>
                                        <option value="mistral-tiny-2312">mistral-tiny-2312</option>
                                        <option value="mistral-small-2312">mistral-small-2312</option>
                                        <option value="mistral-small-2402">mistral-small-2402</option>
@ -3133,11 +3152,13 @@
                                <option value="0">None / Estimated</option>
                                <option value="1">GPT-2</option>
                                <!-- Option #2 was a legacy GPT-2/3 tokenizer -->
-                                <option value="3">LLaMA</option>
+                                <option value="3">Llama 1/2</option>
+                                <option value="12">Llama 3</option>
                                <option value="4">NerdStash (NovelAI Clio)</option>
                                <option value="5">NerdStash v2 (NovelAI Kayra)</option>
                                <option value="7">Mistral</option>
                                <option value="8">Yi</option>
+                                <option value="11">Claude 1/2</option>
                                <option value="6">API (WebUI / koboldcpp)</option>
                            </select>
                        </div>
@ -3377,6 +3398,12 @@
                                            Match whole words
                                        </small>
                                    </label>
+                                    <label title="Only the entries with the most number of key matches will be selected for Inclusion Group filtering" data-i18n="[title]Only the entries with the most number of key matches will be selected for Inclusion Group filtering" class="checkbox_label flex1">
+                                        <input id="world_info_use_group_scoring" type="checkbox" />
+                                        <small data-i18n="Use Group Scoring" class="whitespacenowrap flex1">
+                                            Use Group Scoring
+                                        </small>
+                                    </label>
                                    <label title="Alert if your world info is greater than the allocated budget." data-i18n="[title]Alert if your world info is greater than the allocated budget." class="checkbox_label flex1">
                                        <input id="world_info_overflow_alert" type="checkbox" />
                                        <small data-i18n="Alert On Overflow" class="whitespacenowrap flex1">
@ -5096,6 +5123,14 @@
                                        <option value="false" data-i18n="No">No</option>
                                    </select>
                                </div>
+                                <div class="world_entry_form_control flex1">
+                                    <small class="textAlignCenter" data-i18n="Use Group Scoring">Use Group Scoring</small>
+                                    <select name="useGroupScoring" class="text_pole widthNatural margin0">
+                                        <option value="null" data-i18n="Use global setting">Use global setting</option>
+                                        <option value="true" data-i18n="Yes">Yes</option>
+                                        <option value="false" data-i18n="No">No</option>
+                                    </select>
+                                </div>
                                <div class="world_entry_form_control flex1" title="Can be used to automatically activate Quick Replies" data-i18n="[title]Can be used to automatically activate Quick Replies">
                                    <small class="textAlignCenter" data-i18n="Automation ID">Automation ID</small>
                                    <input class="text_pole margin0" name="automationId" type="text" placeholder="( None )" data-i18n="[placeholder]( None )">
@ -5166,15 +5201,14 @@
                                <div class="flex-container justifySpaceBetween">
                                    <small for="group" data-i18n="Inclusion Group">
                                        Inclusion Group
-                                        <a href="https://docs.sillytavern.app/usage/core-concepts/worldinfo/#inclusion-group" class="notes-link" target="_blank"
-                                            title="Inclusion Groups ensure only one entry from a group is activated at a time, if multiple are triggered.&#13;&#13;Documentation: World Info - Inclusion Group" data-i18n="[title]Inclusion Groups ensure only one entry from a group is activated at a time, if multiple are triggered.&#13;&#13;Documentation: World Info - Inclusion Group">
+                                        <a href="https://docs.sillytavern.app/usage/core-concepts/worldinfo/#inclusion-group" class="notes-link" target="_blank" title="Inclusion Groups ensure only one entry from a group is activated at a time, if multiple are triggered.&#13;&#13;Documentation: World Info - Inclusion Group" data-i18n="[title]Inclusion Groups ensure only one entry from a group is activated at a time, if multiple are triggered.&#13;&#13;Documentation: World Info - Inclusion Group">
                                            <span class="fa-solid fa-circle-question note-link-span"></span>
                                        </a>
                                    </small>
                                    <label class="checkbox_label flexNoGap margin-r5" for="groupOverride">
                                        <input type="checkbox" name="groupOverride" />
                                        <span>
-                                            <small data-i18n="Prioritize Inclusion"  title="Prioritize this entry: When checked, this entry is prioritized out of all selections.&#13;If multiple are prioritized, the one with the highest 'Order' is chosen.&#13;" data-i18n="[title]Prioritize this entry: When checked, this entry is prioritized out of all selections.&#13;If multiple are prioritized, the one with the highest 'Order' is chosen.">
+                                            <small data-i18n="Prioritize Inclusion" title="Prioritize this entry: When checked, this entry is prioritized out of all selections.&#13;If multiple are prioritized, the one with the highest 'Order' is chosen.&#13;" data-i18n="[title]Prioritize this entry: When checked, this entry is prioritized out of all selections.&#13;If multiple are prioritized, the one with the highest 'Order' is chosen.">
                                                Prioritize Inclusion
                                                <div class="fa-solid fa-circle-info opacity50p"></div>
                                            </small>
--- a/public/login.html
+++ b/public/login.html
@ -9,6 +9,13 @@
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="darkreader-lock">
    <meta name="robots" content="noindex, nofollow" />
+    <style>
+        /* Put critical CSS here. The rest should go in stylesheets. */
+        body {
+            background-color: rgb(36, 36, 37);
+        }
+    </style>
+    <link rel="preload" as="style" href="style.css">
    <link rel="apple-touch-icon" sizes="57x57" href="img/apple-icon-57x57.png" />
    <link rel="apple-touch-icon" sizes="72x72" href="img/apple-icon-72x72.png" />
    <link rel="apple-touch-icon" sizes="114x114" href="img/apple-icon-114x114.png" />
@ -20,8 +27,8 @@
    <link rel="manifest" crossorigin="use-credentials" href="manifest.json">
    <link href="webfonts/NotoSans/stylesheet.css" rel="stylesheet">
    <!-- fontawesome webfonts-->
-    <link href="css/fontawesome.css" rel="stylesheet">
-    <link href="css/solid.css" rel="stylesheet">
+    <link href="css/fontawesome.min.css" rel="stylesheet">
+    <link href="css/solid.min.css" rel="stylesheet">
    <link href="css/user.css" rel="stylesheet">
    <script src="lib/jquery-3.5.1.min.js"></script>
    <script src="scripts/login.js"></script>
--- a/public/script.js
+++ b/public/script.js
@ -22,7 +22,7 @@ import {
    parseTabbyLogprobs,
 } from './scripts/textgen-settings.js';

-const { MANCER, TOGETHERAI, OOBA, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types;
+const { MANCER, TOGETHERAI, OOBA, VLLM, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types;

 import {
    world_info,
@ -218,7 +218,7 @@ import {
 import { getBackgrounds, initBackgrounds, loadBackgroundSettings, background_settings } from './scripts/backgrounds.js';
 import { hideLoader, showLoader } from './scripts/loader.js';
 import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js';
-import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js';
+import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js';
 import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId } from './scripts/chats.js';
 import { initPresetManager } from './scripts/preset-manager.js';
 import { evaluateMacros } from './scripts/macros.js';
@ -1071,6 +1071,9 @@ async function getStatusTextgen() {
        } else if (textgen_settings.type === OPENROUTER) {
            loadOpenRouterModels(data?.data);
            online_status = textgen_settings.openrouter_model;
+        } else if (textgen_settings.type === VLLM) {
+            loadVllmModels(data?.data);
+            online_status = textgen_settings.vllm_model;
        } else if (textgen_settings.type === APHRODITE) {
            loadAphroditeModels(data?.data);
            online_status = textgen_settings.aphrodite_model;
@ -1909,7 +1912,7 @@ function getMessageFromTemplate({
    timestamp,
    tokenCount,
    extra,
-} = {}) {
+}) {
    const mes = messageTemplate.clone();
    mes.attr({
        'mesid': mesId,
@ -4832,6 +4835,7 @@ function parseAndSaveLogprobs(data, continueFrom) {
                case textgen_types.LLAMACPP: {
                    logprobs = data?.completion_probabilities?.map(x => parseTextgenLogprobs(x.content, [x])) || null;
                } break;
+                case textgen_types.VLLM:
                case textgen_types.APHRODITE:
                case textgen_types.MANCER:
                case textgen_types.TABBY: {
@ -4888,7 +4892,7 @@ function extractMultiSwipes(data, type) {
        return swipes;
    }

-    if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, APHRODITE].includes(textgen_settings.type))) {
+    if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, VLLM, APHRODITE].includes(textgen_settings.type))) {
        if (!Array.isArray(data.choices)) {
            return swipes;
        }
@ -7922,6 +7926,11 @@ const CONNECT_API_MAP = {
        button: '#api_button_textgenerationwebui',
        type: textgen_types.MANCER,
    },
+    'vllm': {
+        selected: 'textgenerationwebui',
+        button: '#api_button_textgenerationwebui',
+        type: textgen_types.VLLM,
+    },
    'aphrodite': {
        selected: 'textgenerationwebui',
        button: '#api_button_textgenerationwebui',
@ -8354,6 +8363,33 @@ function addDebugFunctions() {
        localStorage.setItem('eventTracing', localStorage.getItem('eventTracing') === 'true' ? 'false' : 'true');
        toastr.info('Event tracing is now ' + (localStorage.getItem('eventTracing') === 'true' ? 'enabled' : 'disabled'));
    });
+
+    registerDebugFunction('copySetup', 'Copy ST setup to clipboard [WIP]', 'Useful data when reporting bugs', async () => {
+        const getContextContents = getContext();
+        const getSettingsContents = settings;
+        //console.log(getSettingsContents);
+        const logMessage = `
+\`\`\`
+API: ${getSettingsContents.main_api}
+API Type: ${getSettingsContents[getSettingsContents.main_api + '_settings'].type}
+API server: ${getSettingsContents.api_server}
+Model: ${getContextContents.onlineStatus}
+Context Preset: ${power_user.context.preset}
+Instruct Preset: ${power_user.instruct.preset}
+API Settings: ${JSON.stringify(getSettingsContents[getSettingsContents.main_api + '_settings'], null, 2)}
+\`\`\`
+    `;
+
+        //console.log(getSettingsContents)
+        //console.log(logMessage);
+
+        try {
+            await navigator.clipboard.writeText(logMessage);
+            toastr.info('Your ST API setup data has been copied to the clipboard.');
+        } catch (error) {
+            toastr.error('Failed to copy ST Setup to clipboard:', error);
+        }
+    });
 }

 jQuery(async function () {
@ -8896,6 +8932,7 @@ jQuery(async function () {
    $('#api_button_textgenerationwebui').on('click', async function (e) {
        const keys = [
            { id: 'api_key_mancer', secret: SECRET_KEYS.MANCER },
+            { id: 'api_key_vllm', secret: SECRET_KEYS.VLLM },
            { id: 'api_key_aphrodite', secret: SECRET_KEYS.APHRODITE },
            { id: 'api_key_tabby', secret: SECRET_KEYS.TABBY },
            { id: 'api_key_togetherai', secret: SECRET_KEYS.TOGETHERAI },
--- a/public/scripts/extensions/expressions/style.css
+++ b/public/scripts/extensions/expressions/style.css
@ -14,7 +14,6 @@
    display: flex;
    height: calc(100vh - var(--topBarBlockSize));
    width: 100vw;
-    position: relative;
    overflow: hidden;
 }

--- a/public/scripts/openai.js
+++ b/public/scripts/openai.js
@ -114,6 +114,7 @@ const max_4k = 4095;
 const max_8k = 8191;
 const max_16k = 16383;
 const max_32k = 32767;
+const max_64k = 65535;
 const max_128k = 128 * 1000;
 const max_200k = 200 * 1000;
 const max_1mil = 1000 * 1000;
@ -899,7 +900,7 @@ function getPromptRole(role) {
 /**
 * Populate a chat conversation by adding prompts to the conversation and managing system and user prompts.
 *
- * @param {PromptCollection} prompts - PromptCollection containing all prompts where the key is the prompt identifier and the value is the prompt object.
+ * @param {import('./PromptManager.js').PromptCollection} prompts - PromptCollection containing all prompts where the key is the prompt identifier and the value is the prompt object.
 * @param {ChatCompletion} chatCompletion - An instance of ChatCompletion class that will be populated with the prompts.
 * @param {Object} options - An object with optional settings.
 * @param {string} options.bias - A bias to be added in the conversation.
@ -911,7 +912,7 @@ function getPromptRole(role) {
 * @param {object[]} options.messageExamples - Array containing all message examples.
 * @returns {Promise<void>}
 */
-async function populateChatCompletion(prompts, chatCompletion, { bias, quietPrompt, quietImage, type, cyclePrompt, messages, messageExamples } = {}) {
+async function populateChatCompletion(prompts, chatCompletion, { bias, quietPrompt, quietImage, type, cyclePrompt, messages, messageExamples }) {
    // Helper function for preparing a prompt, that already exists within the prompt collection, for completion
    const addToChatCompletion = (source, target = null) => {
        // We need the prompts array to determine a position for the source.
@ -1046,21 +1047,22 @@ async function populateChatCompletion(prompts, chatCompletion, { bias, quietProm
 /**
 * Combines system prompts with prompt manager prompts
 *
- * @param {string} Scenario - The scenario or context of the dialogue.
- * @param {string} charPersonality - Description of the character's personality.
- * @param {string} name2 - The second name to be used in the messages.
- * @param {string} worldInfoBefore - The world info to be added before the main conversation.
- * @param {string} worldInfoAfter - The world info to be added after the main conversation.
- * @param {string} charDescription - Description of the character.
- * @param {string} quietPrompt - The quiet prompt to be used in the conversation.
- * @param {string} bias - The bias to be added in the conversation.
- * @param {Object} extensionPrompts - An object containing additional prompts.
- * @param {string} systemPromptOverride
- * @param {string} jailbreakPromptOverride
- * @param {string} personaDescription
+ * @param {Object} options - An object with optional settings.
+ * @param {string} options.Scenario - The scenario or context of the dialogue.
+ * @param {string} options.charPersonality - Description of the character's personality.
+ * @param {string} options.name2 - The second name to be used in the messages.
+ * @param {string} options.worldInfoBefore - The world info to be added before the main conversation.
+ * @param {string} options.worldInfoAfter - The world info to be added after the main conversation.
+ * @param {string} options.charDescription - Description of the character.
+ * @param {string} options.quietPrompt - The quiet prompt to be used in the conversation.
+ * @param {string} options.bias - The bias to be added in the conversation.
+ * @param {Object} options.extensionPrompts - An object containing additional prompts.
+ * @param {string} options.systemPromptOverride
+ * @param {string} options.jailbreakPromptOverride
+ * @param {string} options.personaDescription
 * @returns {Object} prompts - The prepared and merged system and user-defined prompts.
 */
-function preparePromptsForChatCompletion({ Scenario, charPersonality, name2, worldInfoBefore, worldInfoAfter, charDescription, quietPrompt, bias, extensionPrompts, systemPromptOverride, jailbreakPromptOverride, personaDescription } = {}) {
+function preparePromptsForChatCompletion({ Scenario, charPersonality, name2, worldInfoBefore, worldInfoAfter, charDescription, quietPrompt, bias, extensionPrompts, systemPromptOverride, jailbreakPromptOverride, personaDescription }) {
    const scenarioText = Scenario && oai_settings.scenario_format ? substituteParams(oai_settings.scenario_format) : '';
    const charPersonalityText = charPersonality && oai_settings.personality_format ? substituteParams(oai_settings.personality_format) : '';
    const groupNudge = substituteParams(oai_settings.group_nudge_prompt);
@ -1178,12 +1180,16 @@ function preparePromptsForChatCompletion({ Scenario, charPersonality, name2, wor
 * @param {string} content.bias - The bias to be added in the conversation.
 * @param {string} content.type - The type of the chat, can be 'impersonate'.
 * @param {string} content.quietPrompt - The quiet prompt to be used in the conversation.
+ * @param {string} content.quietImage - Image prompt for extras
 * @param {string} content.cyclePrompt - The last prompt used for chat message continuation.
- * @param {Array} content.extensionPrompts - An array of additional prompts.
+ * @param {string} content.systemPromptOverride - The system prompt override.
+ * @param {string} content.jailbreakPromptOverride - The jailbreak prompt override.
+ * @param {string} content.personaDescription - The persona description.
+ * @param {object} content.extensionPrompts - An array of additional prompts.
 * @param {object[]} content.messages - An array of messages to be used as chat history.
 * @param {string[]} content.messageExamples - An array of messages to be used as dialogue examples.
 * @param dryRun - Whether this is a live call or not.
- * @returns {(*[]|boolean)[]} An array where the first element is the prepared chat and the second element is a boolean flag.
+ * @returns {Promise<(any[]|boolean)[]>} An array where the first element is the prepared chat and the second element is a boolean flag.
 */
 export async function prepareOpenAIMessages({
    name2,
@ -1203,7 +1209,7 @@ export async function prepareOpenAIMessages({
    personaDescription,
    messages,
    messageExamples,
-} = {}, dryRun) {
+}, dryRun) {
    // Without a character selected, there is no way to accurately calculate tokens
    if (!promptManager.activeCharacter && dryRun) return [null, false];

@ -3728,7 +3734,11 @@ async function onModelChange() {
    }

    if (oai_settings.chat_completion_source === chat_completion_sources.MISTRALAI) {
-        $('#openai_max_context').attr('max', max_32k);
+        if (oai_settings.mistralai_model.includes('mixtral-8x22b')) {
+            $('#openai_max_context').attr('max', max_64k);
+        } else {
+            $('#openai_max_context').attr('max', max_32k);
+        }
        oai_settings.openai_max_context = Math.min(oai_settings.openai_max_context, Number($('#openai_max_context').attr('max')));
        $('#openai_max_context').val(oai_settings.openai_max_context).trigger('input');

--- a/public/scripts/preset-manager.js
+++ b/public/scripts/preset-manager.js
@ -309,6 +309,7 @@ class PresetManager {
            'mancer_model',
            'togetherai_model',
            'ollama_model',
+            'vllm_model',
            'aphrodite_model',
            'server_urls',
            'type',
--- a/public/scripts/secrets.js
+++ b/public/scripts/secrets.js
@ -3,6 +3,7 @@ import { callPopup, getRequestHeaders } from '../script.js';
 export const SECRET_KEYS = {
    HORDE: 'api_key_horde',
    MANCER: 'api_key_mancer',
+    VLLM: 'api_key_vllm',
    APHRODITE: 'api_key_aphrodite',
    TABBY: 'api_key_tabby',
    OPENAI: 'api_key_openai',
@ -38,6 +39,7 @@ const INPUT_MAP = {
    [SECRET_KEYS.AI21]: '#api_key_ai21',
    [SECRET_KEYS.SCALE_COOKIE]: '#scale_cookie',
    [SECRET_KEYS.MAKERSUITE]: '#api_key_makersuite',
+    [SECRET_KEYS.VLLM]: '#api_key_vllm',
    [SECRET_KEYS.APHRODITE]: '#api_key_aphrodite',
    [SECRET_KEYS.TABBY]: '#api_key_tabby',
    [SECRET_KEYS.MISTRALAI]: '#api_key_mistralai',
--- a/public/scripts/slash-commands.js
+++ b/public/scripts/slash-commands.js
@ -1665,6 +1665,7 @@ function modelCallback(_, model) {
        { id: 'model_infermaticai_select', api: 'textgenerationwebui', type: textgen_types.INFERMATICAI },
        { id: 'model_dreamgen_select', api: 'textgenerationwebui', type: textgen_types.DREAMGEN },
        { id: 'mancer_model', api: 'textgenerationwebui', type: textgen_types.MANCER },
+        { id: 'vllm_model', api: 'textgenerationwebui', type: textgen_types.VLLM },
        { id: 'aphrodite_model', api: 'textgenerationwebui', type: textgen_types.APHRODITE },
        { id: 'ollama_model', api: 'textgenerationwebui', type: textgen_types.OLLAMA },
        { id: 'model_openai_select', api: 'openai', type: chat_completion_sources.OPENAI },
--- a/public/scripts/textgen-models.js
+++ b/public/scripts/textgen-models.js
@ -7,6 +7,7 @@ let mancerModels = [];
 let togetherModels = [];
 let infermaticAIModels = [];
 let dreamGenModels = [];
+let vllmModels = [];
 let aphroditeModels = [];
 export let openRouterModels = [];

@ -156,6 +157,28 @@ export async function loadOpenRouterModels(data) {
    calculateOpenRouterCost();
 }

+export async function loadVllmModels(data) {
+    if (!Array.isArray(data)) {
+        console.error('Invalid vLLM models data', data);
+        return;
+    }
+
+    vllmModels = data;
+
+    if (!data.find(x => x.id === textgen_settings.vllm_model)) {
+        textgen_settings.vllm_model = data[0]?.id || '';
+    }
+
+    $('#vllm_model').empty();
+    for (const model of data) {
+        const option = document.createElement('option');
+        option.value = model.id;
+        option.text = model.id;
+        option.selected = model.id === textgen_settings.vllm_model;
+        $('#vllm_model').append(option);
+    }
+}
+
 export async function loadAphroditeModels(data) {
    if (!Array.isArray(data)) {
        console.error('Invalid Aphrodite models data', data);
@ -224,6 +247,12 @@ function onOpenRouterModelSelect() {
    setGenerationParamsFromPreset({ max_length: model.context_length });
 }

+function onVllmModelSelect() {
+    const modelId = String($('#vllm_model').val());
+    textgen_settings.vllm_model = modelId;
+    $('#api_button_textgenerationwebui').trigger('click');
+}
+
 function onAphroditeModelSelect() {
    const modelId = String($('#aphrodite_model').val());
    textgen_settings.aphrodite_model = modelId;
@ -310,6 +339,20 @@ function getOpenRouterModelTemplate(option) {
    `));
 }

+function getVllmModelTemplate(option) {
+    const model = vllmModels.find(x => x.id === option?.element?.value);
+
+    if (!option.id || !model) {
+        return option.text;
+    }
+
+    return $((`
+        <div class="flex-container flexFlowColumn">
+            <div><strong>${DOMPurify.sanitize(model.id)}</strong></div>
+        </div>
+    `));
+}
+
 function getAphroditeModelTemplate(option) {
    const model = aphroditeModels.find(x => x.id === option?.element?.value);

@ -397,6 +440,10 @@ export function getCurrentOpenRouterModelTokenizer() {
    switch (model?.architecture?.tokenizer) {
        case 'Llama2':
            return tokenizers.LLAMA;
+        case 'Llama3':
+            return tokenizers.LLAMA3;
+        case 'Yi':
+            return tokenizers.YI;
        case 'Mistral':
            return tokenizers.MISTRAL;
        default:
@ -426,6 +473,7 @@ jQuery(function () {
    $('#ollama_model').on('change', onOllamaModelSelect);
    $('#openrouter_model').on('change', onOpenRouterModelSelect);
    $('#ollama_download_model').on('click', downloadOllamaModel);
+    $('#vllm_model').on('change', onVllmModelSelect);
    $('#aphrodite_model').on('change', onAphroditeModelSelect);

    if (!isMobile()) {
@ -470,6 +518,13 @@ jQuery(function () {
            width: '100%',
            templateResult: getOpenRouterModelTemplate,
        });
+        $('#vllm_model').select2({
+            placeholder: 'Select a model',
+            searchInputPlaceholder: 'Search models...',
+            searchInputCssClass: 'text_pole',
+            width: '100%',
+            templateResult: getVllmModelTemplate,
+        });
        $('#aphrodite_model').select2({
            placeholder: 'Select a model',
            searchInputPlaceholder: 'Search models...',
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@ -28,6 +28,7 @@ export {
 export const textgen_types = {
    OOBA: 'ooba',
    MANCER: 'mancer',
+    VLLM: 'vllm',
    APHRODITE: 'aphrodite',
    TABBY: 'tabby',
    KOBOLDCPP: 'koboldcpp',
@ -39,7 +40,7 @@ export const textgen_types = {
    OPENROUTER: 'openrouter',
 };

-const { MANCER, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
+const { MANCER, VLLM, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;

 const LLAMACPP_DEFAULT_ORDER = [
    'top_k',
@ -77,6 +78,7 @@ let OPENROUTER_SERVER = 'https://openrouter.ai/api';

 const SERVER_INPUTS = {
    [textgen_types.OOBA]: '#textgenerationwebui_api_url_text',
+    [textgen_types.VLLM]: '#vllm_api_url_text',
    [textgen_types.APHRODITE]: '#aphrodite_api_url_text',
    [textgen_types.TABBY]: '#tabby_api_url_text',
    [textgen_types.KOBOLDCPP]: '#koboldcpp_api_url_text',
@ -135,8 +137,8 @@ const settings = {
    samplers: LLAMACPP_DEFAULT_ORDER,
    //n_aphrodite: 1,
    //best_of_aphrodite: 1,
-    ignore_eos_token_aphrodite: false,
-    spaces_between_special_tokens_aphrodite: true,
+    ignore_eos_token: false,
+    spaces_between_special_tokens: true,
    //logits_processors_aphrodite: [],
    //log_probs_aphrodite: 0,
    //prompt_log_probs_aphrodite: 0,
@ -146,6 +148,7 @@ const settings = {
    infermaticai_model: '',
    ollama_model: '',
    openrouter_model: 'openrouter/auto',
+    vllm_model: '',
    aphrodite_model: '',
    dreamgen_model: 'opus-v1-xl/text',
    legacy_api: false,
@ -208,8 +211,8 @@ const setting_names = [
    'legacy_api',
    //'n_aphrodite',
    //'best_of_aphrodite',
-    'ignore_eos_token_aphrodite',
-    'spaces_between_special_tokens_aphrodite',
+    'ignore_eos_token',
+    'spaces_between_special_tokens',
    //'logits_processors_aphrodite',
    //'log_probs_aphrodite',
    //'prompt_log_probs_aphrodite'
@ -454,18 +457,6 @@ function loadTextGenSettings(data, loadedSettings) {
    showTypeSpecificControls(settings.type);
    BIAS_CACHE.delete(BIAS_KEY);
    displayLogitBias(settings.logit_bias, BIAS_KEY);
-    //this is needed because showTypeSpecificControls() does not handle NOT declarations
-    if (settings.type === textgen_types.APHRODITE) {
-        $('[data-forAphro="False"]').each(function () {
-            $(this).hide();
-        });
-    } else {
-        $('[data-forAphro="False"]').each(function () {
-            if ($(this).css('display') !== 'none') { //if it wasn't already hidden by showTypeSpecificControls
-                $(this).show();
-            }
-        });
-    }

    registerDebugFunction('change-mancer-url', 'Change Mancer base URL', 'Change Mancer API server base URL', () => {
        const result = prompt(`Enter Mancer base URL\nDefault: ${MANCER_SERVER_DEFAULT}`, MANCER_SERVER);
@ -587,27 +578,19 @@ jQuery(function () {
        const type = String($(this).val());
        settings.type = type;

-        if (settings.type === textgen_types.APHRODITE) {
-            //this is needed because showTypeSpecificControls() does not handle NOT declarations
-            $('[data-forAphro="False"]').each(function () {
-                $(this).hide();
-            });
+        if ([VLLM, APHRODITE, INFERMATICAI].includes(settings.type)) {
            $('#mirostat_mode_textgenerationwebui').attr('step', 2); //Aphro disallows mode 1
            $('#do_sample_textgenerationwebui').prop('checked', true); //Aphro should always do sample; 'otherwise set temp to 0 to mimic no sample'
            $('#ban_eos_token_textgenerationwebui').prop('checked', false); //Aphro should not ban EOS, just ignore it; 'add token '2' to ban list do to this'
-            //special handling for Aphrodite topK -1 disable state
+            //special handling for vLLM/Aphrodite topK -1 disable state
            $('#top_k_textgenerationwebui').attr('min', -1);
            if ($('#top_k_textgenerationwebui').val() === '0' || settings['top_k'] === 0) {
                settings['top_k'] = -1;
                $('#top_k_textgenerationwebui').val('-1').trigger('input');
            }
        } else {
-            //this is needed because showTypeSpecificControls() does not handle NOT declarations
-            $('[data-forAphro="False"]').each(function () {
-                $(this).show();
-            });
            $('#mirostat_mode_textgenerationwebui').attr('step', 1);
-            //undo special Aphrodite setup for topK
+            //undo special vLLM/Aphrodite setup for topK
            $('#top_k_textgenerationwebui').attr('min', 0);
            if ($('#top_k_textgenerationwebui').val() === '-1' || settings['top_k'] === -1) {
                settings['top_k'] = 0;
@ -636,7 +619,7 @@ jQuery(function () {
    $('#samplerResetButton').off('click').on('click', function () {
        const inputs = {
            'temp_textgenerationwebui': 1,
-            'top_k_textgenerationwebui': 0,
+            'top_k_textgenerationwebui': [INFERMATICAI, APHRODITE, VLLM].includes(settings.type) ? -1 : 0,
            'top_p_textgenerationwebui': 1,
            'min_p_textgenerationwebui': 0,
            'rep_pen_textgenerationwebui': 1,
@ -711,9 +694,10 @@ jQuery(function () {
                const value = Number($(this).val());
                $(`#${id}_counter_textgenerationwebui`).val(value);
                settings[id] = value;
-                //special handling for aphrodite using -1 as disabled instead of 0
+                //special handling for vLLM/Aphrodite using -1 as disabled instead of 0
                if ($(this).attr('id') === 'top_k_textgenerationwebui' &&
-                    settings.type === textgen_types.APHRODITE &&
+                    (settings.type === textgen_types.VLLM ||
+                    settings.type === textgen_types.APHRODITE) &&
                    value === 0) {
                    settings[id] = -1;
                    $(this).val(-1);
@ -869,6 +853,7 @@ export function parseTextgenLogprobs(token, logprobs) {

    switch (settings.type) {
        case TABBY:
+        case VLLM:
        case APHRODITE:
        case MANCER:
        case OOBA: {
@ -947,7 +932,7 @@ function toIntArray(string) {
    return string.split(',').map(x => parseInt(x)).filter(x => !isNaN(x));
 }

-function getModel() {
+export function getTextGenModel() {
    switch (settings.type) {
        case OOBA:
            if (settings.custom_model) {
@ -964,6 +949,8 @@ function getModel() {
            return settings.dreamgen_model;
        case OPENROUTER:
            return settings.openrouter_model;
+        case VLLM:
+            return settings.vllm_model;
        case APHRODITE:
            return settings.aphrodite_model;
        case OLLAMA:
@ -987,7 +974,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
    const canMultiSwipe = !isContinue && !isImpersonate && type !== 'quiet';
    let params = {
        'prompt': finalPrompt,
-        'model': getModel(),
+        'model': getTextGenModel(),
        'max_new_tokens': maxTokens,
        'max_tokens': maxTokens,
        'logprobs': power_user.request_token_probabilities ? 10 : undefined,
@ -1061,11 +1048,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
        'ignore_eos': settings.ban_eos_token,
        'n_probs': power_user.request_token_probabilities ? 10 : undefined,
    };
+    const vllmParams = {
+        'n': canMultiSwipe ? settings.n : 1,
+        'best_of': canMultiSwipe ? settings.n : 1,
+        'ignore_eos': settings.ignore_eos_token,
+        'spaces_between_special_tokens': settings.spaces_between_special_tokens,
+        'seed': settings.seed,
+    };
    const aphroditeParams = {
        'n': canMultiSwipe ? settings.n : 1,
        'best_of': canMultiSwipe ? settings.n : 1,
-        'ignore_eos': settings.ignore_eos_token_aphrodite,
-        'spaces_between_special_tokens': settings.spaces_between_special_tokens_aphrodite,
+        'ignore_eos': settings.ignore_eos_token,
+        'spaces_between_special_tokens': settings.spaces_between_special_tokens,
        'grammar': settings.grammar_string,
        //'logits_processors': settings.logits_processors_aphrodite,
        //'logprobs': settings.log_probs_aphrodite,
@ -1087,10 +1081,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
        delete params.dynatemp_high;
    }

-    if (settings.type === APHRODITE) {
-        params = Object.assign(params, aphroditeParams);
-    } else {
-        params = Object.assign(params, nonAphroditeParams);
+    switch (settings.type) {
+        case VLLM:
+            params = Object.assign(params, vllmParams);
+            break;
+
+        case APHRODITE:
+            params = Object.assign(params, aphroditeParams);
+            break;
+
+        default:
+            params = Object.assign(params, nonAphroditeParams);
+            break;
    }

    if (Array.isArray(settings.logit_bias) && settings.logit_bias.length) {
@ -1119,4 +1121,3 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,

    return params;
 }
-
--- a/public/scripts/tokenizers.js
+++ b/public/scripts/tokenizers.js
@ -4,7 +4,7 @@ import { chat_completion_sources, model_list, oai_settings } from './openai.js';
 import { groups, selected_group } from './group-chats.js';
 import { getStringHash } from './utils.js';
 import { kai_flags } from './kai-settings.js';
-import { textgen_types, textgenerationwebui_settings as textgen_settings, getTextGenServer } from './textgen-settings.js';
+import { textgen_types, textgenerationwebui_settings as textgen_settings, getTextGenServer, getTextGenModel } from './textgen-settings.js';
 import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, openRouterModels } from './textgen-models.js';

 const { OOBA, TABBY, KOBOLDCPP, APHRODITE, LLAMACPP, OPENROUTER, DREAMGEN } = textgen_types;
@ -24,6 +24,8 @@ export const tokenizers = {
    YI: 8,
    API_TEXTGENERATIONWEBUI: 9,
    API_KOBOLD: 10,
+    CLAUDE: 11,
+    LLAMA3: 12,
    BEST_MATCH: 99,
 };

@ -31,6 +33,7 @@ export const SENTENCEPIECE_TOKENIZERS = [
    tokenizers.LLAMA,
    tokenizers.MISTRAL,
    tokenizers.YI,
+    tokenizers.LLAMA3,
    // uncomment when NovelAI releases Kayra and Clio weights, lol
    //tokenizers.NERD,
    //tokenizers.NERD2,
@ -78,6 +81,16 @@ const TOKENIZER_URLS = {
        decode: '/api/tokenizers/yi/decode',
        count: '/api/tokenizers/yi/encode',
    },
+    [tokenizers.CLAUDE]: {
+        encode: '/api/tokenizers/claude/encode',
+        decode: '/api/tokenizers/claude/decode',
+        count: '/api/tokenizers/claude/encode',
+    },
+    [tokenizers.LLAMA3]: {
+        encode: '/api/tokenizers/llama3/encode',
+        decode: '/api/tokenizers/llama3/decode',
+        count: '/api/tokenizers/llama3/encode',
+    },
    [tokenizers.API_TEXTGENERATIONWEBUI]: {
        encode: '/api/tokenizers/remote/textgenerationwebui/encode',
        count: '/api/tokenizers/remote/textgenerationwebui/encode',
@ -211,6 +224,16 @@ export function getTokenizerBestMatch(forApi) {
            }
        }

+        if (forApi === 'textgenerationwebui') {
+            const model = String(getTextGenModel() || online_status).toLowerCase();
+            if (model.includes('llama3') || model.includes('llama-3')) {
+                return tokenizers.LLAMA3;
+            }
+            if (model.includes('mistral') || model.includes('mixtral')) {
+                return tokenizers.MISTRAL;
+            }
+        }
+
        return tokenizers.LLAMA;
    }

@ -421,6 +444,7 @@ export function getTokenizerModel() {
    const gpt2Tokenizer = 'gpt2';
    const claudeTokenizer = 'claude';
    const llamaTokenizer = 'llama';
+    const llama3Tokenizer = 'llama3';
    const mistralTokenizer = 'mistral';
    const yiTokenizer = 'yi';

@ -458,6 +482,9 @@ export function getTokenizerModel() {
        if (model?.architecture?.tokenizer === 'Llama2') {
            return llamaTokenizer;
        }
+        else if (model?.architecture?.tokenizer === 'Llama3') {
+            return llama3Tokenizer;
+        }
        else if (model?.architecture?.tokenizer === 'Mistral') {
            return mistralTokenizer;
        }
@ -498,10 +525,13 @@ export function getTokenizerModel() {
    }

    if (oai_settings.chat_completion_source === chat_completion_sources.PERPLEXITY)  {
+        if (oai_settings.perplexity_model.includes('llama-3') || oai_settings.perplexity_model.includes('llama3')) {
+            return llama3Tokenizer;
+        }
        if (oai_settings.perplexity_model.includes('llama')) {
            return llamaTokenizer;
        }
-        if (oai_settings.perplexity_model.includes('mistral')) {
+        if (oai_settings.perplexity_model.includes('mistral') || oai_settings.perplexity_model.includes('mixtral')) {
            return mistralTokenizer;
        }
    }
--- a/public/scripts/world-info.js
+++ b/public/scripts/world-info.js
@ -57,6 +57,7 @@ let world_info_recursive = false;
 let world_info_overflow_alert = false;
 let world_info_case_sensitive = false;
 let world_info_match_whole_words = false;
+let world_info_use_group_scoring = false;
 let world_info_character_strategy = world_info_insertion_strategy.character_first;
 let world_info_budget_cap = 0;
 const saveWorldDebounced = debounce(async (name, data) => await _save(name, data), debounce_timeout.relaxed);
@ -80,7 +81,17 @@ const MAX_SCAN_DEPTH = 1000;
 */
 class WorldInfoBuffer {
    // Typedef area
-    /** @typedef {{scanDepth?: number, caseSensitive?: boolean, matchWholeWords?: boolean}} WIScanEntry The entry that triggered the scan */
+    /**
+     * @typedef {object} WIScanEntry The entry that triggered the scan
+     * @property {number} [scanDepth] The depth of the scan
+     * @property {boolean} [caseSensitive] If the scan is case sensitive
+     * @property {boolean} [matchWholeWords] If the scan should match whole words
+     * @property {boolean} [useGroupScoring] If the scan should use group scoring
+     * @property {number} [uid] The UID of the entry that triggered the scan
+     * @property {string[]} [key] The primary keys to scan for
+     * @property {string[]} [keysecondary] The secondary keys to scan for
+     * @property {number} [selectiveLogic] The logic to use for selective activation
+     */
    // End typedef area

    /**
@ -244,6 +255,58 @@ class WorldInfoBuffer {
    cleanExternalActivations() {
        WorldInfoBuffer.externalActivations.splice(0, WorldInfoBuffer.externalActivations.length);
    }
+
+    /**
+     * Gets the match score for the given entry.
+     * @param {WIScanEntry} entry Entry to check
+     * @returns {number} The number of key activations for the given entry
+     */
+    getScore(entry) {
+        const bufferState = this.get(entry);
+        let numberOfPrimaryKeys = 0;
+        let numberOfSecondaryKeys = 0;
+        let primaryScore = 0;
+        let secondaryScore = 0;
+
+        // Increment score for every key found in the buffer
+        if (Array.isArray(entry.key)) {
+            numberOfPrimaryKeys = entry.key.length;
+            for (const key of entry.key) {
+                if (this.matchKeys(bufferState, key, entry)) {
+                    primaryScore++;
+                }
+            }
+        }
+
+        // Increment score for every secondary key found in the buffer
+        if (Array.isArray(entry.keysecondary)) {
+            numberOfSecondaryKeys = entry.keysecondary.length;
+            for (const key of entry.keysecondary) {
+                if (this.matchKeys(bufferState, key, entry)) {
+                    secondaryScore++;
+                }
+            }
+        }
+
+        // No keys == no score
+        if (!numberOfPrimaryKeys) {
+            return 0;
+        }
+
+        // Only positive logic influences the score
+        if (numberOfSecondaryKeys > 0) {
+            switch (entry.selectiveLogic) {
+                // AND_ANY: Add both scores
+                case world_info_logic.AND_ANY:
+                    return primaryScore + secondaryScore;
+                // AND_ALL: Add both scores if all secondary keys are found, otherwise only primary score
+                case world_info_logic.AND_ALL:
+                    return secondaryScore === numberOfSecondaryKeys ? primaryScore + secondaryScore : primaryScore;
+            }
+        }
+
+        return primaryScore;
+    }
 }

 export function getWorldInfoSettings() {
@ -259,6 +322,7 @@ export function getWorldInfoSettings() {
        world_info_match_whole_words,
        world_info_character_strategy,
        world_info_budget_cap,
+        world_info_use_group_scoring,
    };
 }

@ -322,12 +386,18 @@ function setWorldInfoSettings(settings, data) {
        world_info_character_strategy = Number(settings.world_info_character_strategy);
    if (settings.world_info_budget_cap !== undefined)
        world_info_budget_cap = Number(settings.world_info_budget_cap);
+    if (settings.world_info_use_group_scoring !== undefined)
+        world_info_use_group_scoring = Boolean(settings.world_info_use_group_scoring);

    // Migrate old settings
    if (world_info_budget > 100) {
        world_info_budget = 25;
    }

+    if (world_info_use_group_scoring === undefined) {
+        world_info_use_group_scoring = false;
+    }
+
    // Reset selected world from old string and delete old keys
    // TODO: Remove next release
    const existingWorldInfo = settings.world_info;
@ -357,6 +427,7 @@ function setWorldInfoSettings(settings, data) {
    $('#world_info_overflow_alert').prop('checked', world_info_overflow_alert);
    $('#world_info_case_sensitive').prop('checked', world_info_case_sensitive);
    $('#world_info_match_whole_words').prop('checked', world_info_match_whole_words);
+    $('#world_info_use_group_scoring').prop('checked', world_info_use_group_scoring);

    $(`#world_info_character_strategy option[value='${world_info_character_strategy}']`).prop('selected', true);
    $('#world_info_character_strategy').val(world_info_character_strategy);
@ -379,6 +450,7 @@ function setWorldInfoSettings(settings, data) {
    });

    $('#world_info_sort_order').val(localStorage.getItem(SORT_ORDER_KEY) || '0');
+    $('#world_info').trigger('change');
    $('#world_editor_select').trigger('change');

    eventSource.on(event_types.CHAT_CHANGED, () => {
@ -785,7 +857,7 @@ function displayWorldEntries(name, data, navigation = navigation_option.none) {

        // Apply the filter and do the chosen sorting
        entriesArray = worldInfoFilter.applyFilters(entriesArray);
-        entriesArray = sortEntries(entriesArray)
+        entriesArray = sortEntries(entriesArray);

        // Run the callback for printing this
        typeof callback === 'function' && callback(entriesArray);
@ -1015,6 +1087,7 @@ const originalDataKeyMap = {
    'keysecondary': 'secondary_keys',
    'selective': 'selective',
    'matchWholeWords': 'extensions.match_whole_words',
+    'useGroupScoring': 'extensions.use_group_scoring',
    'caseSensitive': 'extensions.case_sensitive',
    'scanDepth': 'extensions.scan_depth',
    'automationId': 'extensions.automation_id',
@ -1708,6 +1781,19 @@ function getWorldEntry(name, data, entry) {
    });
    matchWholeWordsSelect.val((entry.matchWholeWords === null || entry.matchWholeWords === undefined) ? 'null' : entry.matchWholeWords ? 'true' : 'false').trigger('input');

+    // use group scoring select
+    const useGroupScoringSelect = template.find('select[name="useGroupScoring"]');
+    useGroupScoringSelect.data('uid', entry.uid);
+    useGroupScoringSelect.on('input', function () {
+        const uid = $(this).data('uid');
+        const value = $(this).val();
+
+        data.entries[uid].useGroupScoring = value === 'null' ? null : value === 'true';
+        setOriginalDataValue(data, uid, 'extensions.use_group_scoring', data.entries[uid].useGroupScoring);
+        saveWorldInfo(name, data);
+    });
+    useGroupScoringSelect.val((entry.useGroupScoring === null || entry.useGroupScoring === undefined) ? 'null' : entry.useGroupScoring ? 'true' : 'false').trigger('input');
+
    // automation id
    const automationIdInput = template.find('input[name="automationId"]');
    automationIdInput.data('uid', entry.uid);
@ -1888,6 +1974,7 @@ const newEntryTemplate = {
    scanDepth: null,
    caseSensitive: null,
    matchWholeWords: null,
+    useGroupScoring: null,
    automationId: '',
    role: 0,
 };
@ -2331,7 +2418,7 @@ async function checkWorldInfo(chat, maxContext) {
        const textToScanTokens = await getTokenCountAsync(allActivatedText);
        const probabilityChecksBefore = failedProbabilityChecks.size;

-        filterByInclusionGroups(newEntries, allActivatedEntries);
+        filterByInclusionGroups(newEntries, allActivatedEntries, buffer);

        console.debug('-- PROBABILITY CHECKS BEGIN --');
        for (const entry of newEntries) {
@ -2451,12 +2538,50 @@ async function checkWorldInfo(chat, maxContext) {
    return { worldInfoBefore, worldInfoAfter, WIDepthEntries, allActivatedEntries };
 }

+/**
+ * Only leaves entries with the highest key matching score in each group.
+ * @param {Record<string, WIScanEntry[]>} groups The groups to filter
+ * @param {WorldInfoBuffer} buffer The buffer to use for scoring
+ * @param {(entry: WIScanEntry) => void} removeEntry The function to remove an entry
+ */
+function filterGroupsByScoring(groups, buffer, removeEntry) {
+    for (const [key, group] of Object.entries(groups)) {
+        // Group scoring is disabled both globally and for the group entries
+        if (!world_info_use_group_scoring && !group.some(x => x.useGroupScoring)) {
+            console.debug(`Skipping group scoring for group '${key}'`);
+            continue;
+        }
+
+        const scores = group.map(entry => buffer.getScore(entry));
+        const maxScore = Math.max(...scores);
+        console.debug(`Group '${key}' max score: ${maxScore}`);
+        //console.table(group.map((entry, i) => ({ uid: entry.uid, key: JSON.stringify(entry.key), score: scores[i] })));
+
+        for (let i = 0; i < group.length; i++) {
+            const isScored = group[i].useGroupScoring ?? world_info_use_group_scoring;
+
+            if (!isScored) {
+                continue;
+            }
+
+            if (scores[i] < maxScore) {
+                console.debug(`Removing score loser from inclusion group '${key}' entry '${group[i].uid}'`, group[i]);
+                removeEntry(group[i]);
+                group.splice(i, 1);
+                scores.splice(i, 1);
+                i--;
+            }
+        }
+    }
+}
+
 /**
 * Filters entries by inclusion groups.
 * @param {object[]} newEntries Entries activated on current recursion level
 * @param {Set<object>} allActivatedEntries Set of all activated entries
+ * @param {WorldInfoBuffer} buffer The buffer to use for scanning
 */
-function filterByInclusionGroups(newEntries, allActivatedEntries) {
+function filterByInclusionGroups(newEntries, allActivatedEntries, buffer) {
    console.debug('-- INCLUSION GROUP CHECKS BEGIN --');
    const grouped = newEntries.filter(x => x.group).reduce((acc, item) => {
        if (!acc[item.group]) {
@ -2483,6 +2608,8 @@ function filterByInclusionGroups(newEntries, allActivatedEntries) {
        }
    }

+    filterGroupsByScoring(grouped, buffer, removeEntry);
+
    for (const [key, group] of Object.entries(grouped)) {
        console.debug(`Checking inclusion group '${key}' with ${group.length} entries`, group);

@ -2560,6 +2687,7 @@ function convertAgnaiMemoryBook(inputObj) {
            scanDepth: null,
            caseSensitive: null,
            matchWholeWords: null,
+            useGroupScoring: null,
            automationId: '',
            role: extension_prompt_roles.SYSTEM,
        };
@ -2596,6 +2724,7 @@ function convertRisuLorebook(inputObj) {
            scanDepth: null,
            caseSensitive: null,
            matchWholeWords: null,
+            useGroupScoring: null,
            automationId: '',
            role: extension_prompt_roles.SYSTEM,
        };
@ -2637,6 +2766,7 @@ function convertNovelLorebook(inputObj) {
            scanDepth: null,
            caseSensitive: null,
            matchWholeWords: null,
+            useGroupScoring: null,
            automationId: '',
            role: extension_prompt_roles.SYSTEM,
        };
@ -2679,6 +2809,7 @@ function convertCharacterBook(characterBook) {
            scanDepth: entry.extensions?.scan_depth ?? null,
            caseSensitive: entry.extensions?.case_sensitive ?? null,
            matchWholeWords: entry.extensions?.match_whole_words ?? null,
+            useGroupScoring: entry.extensions?.use_group_scoring ?? null,
            automationId: entry.extensions?.automation_id ?? '',
            role: entry.extensions?.role ?? extension_prompt_roles.SYSTEM,
            vectorized: entry.extensions?.vectorized ?? false,
@ -3057,6 +3188,11 @@ jQuery(() => {
        saveSettingsDebounced();
    });

+    $('#world_info_use_group_scoring').on('change', function () {
+        world_info_use_group_scoring = !!$(this).prop('checked');
+        saveSettingsDebounced();
+    });
+
    $('#world_info_budget_cap').on('input', function () {
        world_info_budget_cap = Number($(this).val());
        $('#world_info_budget_cap_counter').val(world_info_budget_cap);
@ -3114,6 +3250,23 @@ jQuery(() => {
            allowClear: true,
            closeOnSelect: false,
        });
+
+        // Subscribe world loading to the select2 multiselect items (We need to target the specific select2 control)
+        $('#world_info + span.select2-container').on('click', function (event) {
+            if ($(event.target).hasClass('select2-selection__choice__display')) {
+                event.preventDefault();
+
+                // select2 still bubbles the event to open the dropdown. So we close it here
+                $('#world_info').select2('close');
+
+                const name = $(event.target).text();
+                const selectedIndex = world_names.indexOf(name);
+                if (selectedIndex !== -1) {
+                    $('#world_editor_select').val(selectedIndex).trigger('change');
+                    console.log('Quick selection of world', name);
+                }
+            }
+        });
    }

    $('#WorldInfo').on('scroll', () => {
--- a/public/style.css
+++ b/public/style.css
@ -155,17 +155,19 @@ body {
    border-radius: 10px;
    background-clip: content-box;
    border: 2px solid transparent;
-    border-top: 20px solid transparent;
    min-height: 40px;
 }

+body.movingUI ::-webkit-scrollbar-thumb:vertical {
+    border-top: 20px solid transparent;
+}
+
 ::-webkit-scrollbar-thumb:horizontal {
    background-color: var(--grey7070a);
    box-shadow: inset 0 0 0 1px var(--black50a);
    border-radius: 10px;
    background-clip: content-box;
    border: 2px solid transparent;
-    /* border-left: 20px solid transparent; */
    min-width: 40px;
 }

@ -487,8 +489,8 @@ body.reduced-motion #bg_custom {
 }

 #sheld {
-    display: grid;
-    grid-template-rows: auto min-content;
+    display: flex;
+    flex-direction: column;
    /* -1px to give sheld some wiggle room to bounce off tobar when moving*/
    height: calc(100vh - var(--topBarBlockSize) - 1px);
    height: calc(100svh - var(--topBarBlockSize) - 1px);
@ -582,6 +584,7 @@ body .panelControlBar {
    text-shadow: 0px 0px calc(var(--shadowWidth) * 1px) var(--SmartThemeShadowColor);
    flex-direction: column;
    z-index: 30;
+    flex-grow: 1;
 }

 #form_sheld {
--- a/src/additional-headers.js
+++ b/src/additional-headers.js
@ -67,6 +67,19 @@ function getOpenRouterHeaders(directories) {
    return apiKey ? Object.assign(baseHeaders, { 'Authorization': `Bearer ${apiKey}` }) : baseHeaders;
 }

+/**
+ * Gets the headers for the vLLM API.
+ * @param {import('./users').UserDirectoryList} directories User directories
+ * @returns {object} Headers for the request
+ */
+function getVllmHeaders(directories) {
+    const apiKey = readSecret(directories, SECRET_KEYS.VLLM);
+
+    return apiKey ? ({
+        'Authorization': `Bearer ${apiKey}`,
+    }) : {};
+}
+
 /**
 * Gets the headers for the Aphrodite API.
 * @param {import('./users').UserDirectoryList} directories User directories
@ -153,6 +166,7 @@ function getOverrideHeaders(urlHost) {
 function setAdditionalHeaders(request, args, server) {
    const headerGetters = {
        [TEXTGEN_TYPES.MANCER]: getMancerHeaders,
+        [TEXTGEN_TYPES.VLLM]: getVllmHeaders,
        [TEXTGEN_TYPES.APHRODITE]: getAphroditeHeaders,
        [TEXTGEN_TYPES.TABBY]: getTabbyHeaders,
        [TEXTGEN_TYPES.TOGETHERAI]: getTogetherAIHeaders,
--- a/src/constants.js
+++ b/src/constants.js
@ -200,6 +200,7 @@ const UPLOADS_PATH = './uploads';
 const TEXTGEN_TYPES = {
    OOBA: 'ooba',
    MANCER: 'mancer',
+    VLLM: 'vllm',
    APHRODITE: 'aphrodite',
    TABBY: 'tabby',
    KOBOLDCPP: 'koboldcpp',
@ -298,6 +299,49 @@ const OPENROUTER_KEYS = [
    'stop',
 ];

+// https://github.com/vllm-project/vllm/blob/0f8a91401c89ac0a8018def3756829611b57727f/vllm/entrypoints/openai/protocol.py#L220
+const VLLM_KEYS = [
+    'model',
+    'prompt',
+    'best_of',
+    'echo',
+    'frequency_penalty',
+    'logit_bias',
+    'logprobs',
+    'max_tokens',
+    'n',
+    'presence_penalty',
+    'seed',
+    'stop',
+    'stream',
+    'suffix',
+    'temperature',
+    'top_p',
+    'user',
+
+    'use_beam_search',
+    'top_k',
+    'min_p',
+    'repetition_penalty',
+    'length_penalty',
+    'early_stopping',
+    'stop_token_ids',
+    'ignore_eos',
+    'min_tokens',
+    'skip_special_tokens',
+    'spaces_between_special_tokens',
+    'truncate_prompt_tokens',
+
+    'include_stop_str_in_output',
+    'response_format',
+    'guided_json',
+    'guided_regex',
+    'guided_choice',
+    'guided_grammar',
+    'guided_decoding_backend',
+    'guided_whitespace_pattern',
+];
+
 module.exports = {
    DEFAULT_USER,
    DEFAULT_AVATAR,
@ -318,4 +362,5 @@ module.exports = {
    DREAMGEN_KEYS,
    OPENROUTER_HEADERS,
    OPENROUTER_KEYS,
+    VLLM_KEYS,
 };
--- a/src/endpoints/backends/chat-completions.js
+++ b/src/endpoints/backends/chat-completions.js
@ -5,7 +5,7 @@ const Readable = require('stream').Readable;
 const { jsonParser } = require('../../express-common');
 const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants');
 const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
-const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages } = require('../../prompt-converters');
+const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages, convertMistralMessages } = require('../../prompt-converters');

 const { readSecret, SECRET_KEYS } = require('../secrets');
 const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
@ -465,35 +465,7 @@ async function sendMistralAIRequest(request, response) {
    }

    try {
-        //must send a user role as last message
-        const messages = Array.isArray(request.body.messages) ? request.body.messages : [];
-        //large seems to be throwing a 500 error if we don't make the first message a user role, most likely a bug since the other models won't do this
-        if (request.body.model.includes('large'))
-            messages[0].role = 'user';
-        const lastMsg = messages[messages.length - 1];
-        if (messages.length > 0 && lastMsg && (lastMsg.role === 'system' || lastMsg.role === 'assistant')) {
-            if (lastMsg.role === 'assistant' && lastMsg.name) {
-                lastMsg.content = lastMsg.name + ': ' + lastMsg.content;
-            } else if (lastMsg.role === 'system') {
-                lastMsg.content = '[INST] ' + lastMsg.content + ' [/INST]';
-            }
-            lastMsg.role = 'user';
-        }
-
-        //system prompts can be stacked at the start, but any futher sys prompts after the first user/assistant message will break the model
-        let encounteredNonSystemMessage = false;
-        messages.forEach(msg => {
-            if ((msg.role === 'user' || msg.role === 'assistant') && !encounteredNonSystemMessage) {
-                encounteredNonSystemMessage = true;
-            }
-
-            if (encounteredNonSystemMessage && msg.role === 'system') {
-                msg.role = 'user';
-                //unsure if the instruct version is what they've deployed on their endpoints and if this will make a difference or not.
-                //it should be better than just sending the message as a user role without context though
-                msg.content = '[INST] ' + msg.content + ' [/INST]';
-            }
-        });
+        const messages = convertMistralMessages(request.body.messages, request.body.model, request.body.char_name, request.body.user_name);
        const controller = new AbortController();
        request.socket.removeAllListeners('close');
        request.socket.on('close', function () {
@ -758,7 +730,11 @@ router.post('/bias', jsonParser, async function (request, response) {
        if (sentencepieceTokenizers.includes(model)) {
            const tokenizer = getSentencepiceTokenizer(model);
            const instance = await tokenizer?.get();
-            encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text));
+            if (!instance) {
+                console.warn('Tokenizer not initialized:', model);
+                return response.send({});
+            }
+            encodeFunction = (text) => new Uint32Array(instance.encodeIds(text));
        } else {
            const tokenizer = getTiktokenTokenizer(model);
            encodeFunction = (tokenizer.encode.bind(tokenizer));
--- a/src/endpoints/backends/text-completions.js
+++ b/src/endpoints/backends/text-completions.js
@ -4,7 +4,7 @@ const _ = require('lodash');
 const Readable = require('stream').Readable;

 const { jsonParser } = require('../../express-common');
-const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, DREAMGEN_KEYS } = require('../../constants');
+const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS } = require('../../constants');
 const { forwardFetchResponse, trimV1 } = require('../../util');
 const { setAdditionalHeaders } = require('../../additional-headers');

@ -103,6 +103,7 @@ router.post('/status', jsonParser, async function (request, response) {
        } else {
            switch (request.body.api_type) {
                case TEXTGEN_TYPES.OOBA:
+                case TEXTGEN_TYPES.VLLM:
                case TEXTGEN_TYPES.APHRODITE:
                case TEXTGEN_TYPES.KOBOLDCPP:
                case TEXTGEN_TYPES.LLAMACPP:
@ -233,6 +234,7 @@ router.post('/generate', jsonParser, async function (request, response) {
            url += '/v1/generate';
        } else {
            switch (request.body.api_type) {
+                case TEXTGEN_TYPES.VLLM:
                case TEXTGEN_TYPES.APHRODITE:
                case TEXTGEN_TYPES.OOBA:
                case TEXTGEN_TYPES.TABBY:
@ -291,6 +293,11 @@ router.post('/generate', jsonParser, async function (request, response) {
            args.body = JSON.stringify(request.body);
        }

+        if (request.body.api_type === TEXTGEN_TYPES.VLLM) {
+            request.body = _.pickBy(request.body, (_, key) => VLLM_KEYS.includes(key));
+            args.body = JSON.stringify(request.body);
+        }
+
        if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
            args.body = JSON.stringify({
                model: request.body.model,
--- a/src/endpoints/characters.js
+++ b/src/endpoints/characters.js
@ -437,6 +437,7 @@ function convertWorldInfoToCharacterBook(name, entries) {
                prevent_recursion: entry.preventRecursion ?? false,
                scan_depth: entry.scanDepth ?? null,
                match_whole_words: entry.matchWholeWords ?? null,
+                use_group_scoring: entry.useGroupScoring ?? false,
                case_sensitive: entry.caseSensitive ?? null,
                automation_id: entry.automationId ?? '',
                role: entry.role ?? 0,
--- a/src/endpoints/secrets.js
+++ b/src/endpoints/secrets.js
@ -9,6 +9,7 @@ const SECRETS_FILE = 'secrets.json';
 const SECRET_KEYS = {
    HORDE: 'api_key_horde',
    MANCER: 'api_key_mancer',
+    VLLM: 'api_key_vllm',
    APHRODITE: 'api_key_aphrodite',
    TABBY: 'api_key_tabby',
    OPENAI: 'api_key_openai',
--- a/src/endpoints/tokenizers.js
+++ b/src/endpoints/tokenizers.js
@ -142,6 +142,7 @@ const spp_nerd_v2 = new SentencePieceTokenizer('src/tokenizers/nerdstash_v2.mode
 const spp_mistral = new SentencePieceTokenizer('src/tokenizers/mistral.model');
 const spp_yi = new SentencePieceTokenizer('src/tokenizers/yi.model');
 const claude_tokenizer = new WebTokenizer('src/tokenizers/claude.json');
+const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json');

 const sentencepieceTokenizers = [
    'llama',
@ -285,6 +286,10 @@ function getTokenizerModel(requestModel) {
        return 'claude';
    }

+    if (requestModel.includes('llama3') || requestModel.includes('llama-3')) {
+        return 'llama3';
+    }
+
    if (requestModel.includes('llama')) {
        return 'llama';
    }
@ -313,12 +318,12 @@ function getTiktokenTokenizer(model) {
 }

 /**
- * Counts the tokens for the given messages using the Claude tokenizer.
+ * Counts the tokens for the given messages using the WebTokenizer and Claude prompt conversion.
 * @param {Tokenizer} tokenizer Web tokenizer
 * @param {object[]} messages Array of messages
 * @returns {number} Number of tokens
 */
-function countClaudeTokens(tokenizer, messages) {
+function countWebTokenizerTokens(tokenizer, messages) {
    // Should be fine if we use the old conversion method instead of the messages API one i think?
    const convertedPrompt = convertClaudePrompt(messages, false, '', false, false, '', false);

@ -449,6 +454,67 @@ function createTiktokenDecodingHandler(modelId) {
    };
 }

+/**
+ * Creates an API handler for encoding WebTokenizer tokens.
+ * @param {WebTokenizer} tokenizer WebTokenizer instance
+ * @returns {TokenizationHandler} Handler function
+ */
+function createWebTokenizerEncodingHandler(tokenizer) {
+    /**
+     * Request handler for encoding WebTokenizer tokens.
+     * @param {import('express').Request} request
+     * @param {import('express').Response} response
+     */
+    return async function (request, response) {
+        try {
+            if (!request.body) {
+                return response.sendStatus(400);
+            }
+
+            const text = request.body.text || '';
+            const instance = await tokenizer?.get();
+            if (!instance) throw new Error('Failed to load the Web tokenizer');
+            const tokens = Array.from(instance.encode(text));
+            const chunks = getWebTokenizersChunks(instance, tokens);
+            return response.send({ ids: tokens, count: tokens.length, chunks });
+        } catch (error) {
+            console.log(error);
+            return response.send({ ids: [], count: 0, chunks: [] });
+        }
+    };
+}
+
+/**
+ * Creates an API handler for decoding WebTokenizer tokens.
+ * @param {WebTokenizer} tokenizer WebTokenizer instance
+ * @returns {TokenizationHandler} Handler function
+ */
+function createWebTokenizerDecodingHandler(tokenizer) {
+    /**
+     * Request handler for decoding WebTokenizer tokens.
+     * @param {import('express').Request} request
+     * @param {import('express').Response} response
+     * @returns {Promise<any>}
+     */
+    return async function (request, response) {
+        try {
+            if (!request.body) {
+                return response.sendStatus(400);
+            }
+
+            const ids = request.body.ids || [];
+            const instance = await tokenizer?.get();
+            if (!instance) throw new Error('Failed to load the Web tokenizer');
+            const chunks = getWebTokenizersChunks(instance, ids);
+            const text = instance.decode(new Int32Array(ids));
+            return response.send({ text, chunks });
+        } catch (error) {
+            console.log(error);
+            return response.send({ text: '', chunks: [] });
+        }
+    };
+}
+
 const router = express.Router();

 router.post('/ai21/count', jsonParser, async function (req, res) {
@ -501,17 +567,26 @@ router.post('/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandl
 router.post('/mistral/encode', jsonParser, createSentencepieceEncodingHandler(spp_mistral));
 router.post('/yi/encode', jsonParser, createSentencepieceEncodingHandler(spp_yi));
 router.post('/gpt2/encode', jsonParser, createTiktokenEncodingHandler('gpt2'));
+router.post('/claude/encode', jsonParser, createWebTokenizerEncodingHandler(claude_tokenizer));
+router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer));
 router.post('/llama/decode', jsonParser, createSentencepieceDecodingHandler(spp_llama));
 router.post('/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
 router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
 router.post('/mistral/decode', jsonParser, createSentencepieceDecodingHandler(spp_mistral));
 router.post('/yi/decode', jsonParser, createSentencepieceDecodingHandler(spp_yi));
 router.post('/gpt2/decode', jsonParser, createTiktokenDecodingHandler('gpt2'));
+router.post('/claude/decode', jsonParser, createWebTokenizerDecodingHandler(claude_tokenizer));
+router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer));

 router.post('/openai/encode', jsonParser, async function (req, res) {
    try {
        const queryModel = String(req.query.model || '');

+        if (queryModel.includes('llama3') || queryModel.includes('llama-3')) {
+            const handler = createWebTokenizerEncodingHandler(llama3_tokenizer);
+            return handler(req, res);
+        }
+
        if (queryModel.includes('llama')) {
            const handler = createSentencepieceEncodingHandler(spp_llama);
            return handler(req, res);
@ -528,12 +603,8 @@ router.post('/openai/encode', jsonParser, async function (req, res) {
        }

        if (queryModel.includes('claude')) {
-            const text = req.body.text || '';
-            const instance = await claude_tokenizer.get();
-            if (!instance) throw new Error('Failed to load the Claude tokenizer');
-            const tokens = Object.values(instance.encode(text));
-            const chunks = getWebTokenizersChunks(instance, tokens);
-            return res.send({ ids: tokens, count: tokens.length, chunks });
+            const handler = createWebTokenizerEncodingHandler(claude_tokenizer);
+            return handler(req, res);
        }

        const model = getTokenizerModel(queryModel);
@ -549,6 +620,11 @@ router.post('/openai/decode', jsonParser, async function (req, res) {
    try {
        const queryModel = String(req.query.model || '');

+        if (queryModel.includes('llama3') || queryModel.includes('llama-3')) {
+            const handler = createWebTokenizerDecodingHandler(llama3_tokenizer);
+            return handler(req, res);
+        }
+
        if (queryModel.includes('llama')) {
            const handler = createSentencepieceDecodingHandler(spp_llama);
            return handler(req, res);
@ -565,11 +641,8 @@ router.post('/openai/decode', jsonParser, async function (req, res) {
        }

        if (queryModel.includes('claude')) {
-            const ids = req.body.ids || [];
-            const instance = await claude_tokenizer.get();
-            if (!instance) throw new Error('Failed to load the Claude tokenizer');
-            const chunkText = instance.decode(new Int32Array(ids));
-            return res.send({ text: chunkText });
+            const handler = createWebTokenizerDecodingHandler(claude_tokenizer);
+            return handler(req, res);
        }

        const model = getTokenizerModel(queryModel);
@ -592,7 +665,14 @@ router.post('/openai/count', jsonParser, async function (req, res) {
        if (model === 'claude') {
            const instance = await claude_tokenizer.get();
            if (!instance) throw new Error('Failed to load the Claude tokenizer');
-            num_tokens = countClaudeTokens(instance, req.body);
+            num_tokens = countWebTokenizerTokens(instance, req.body);
+            return res.send({ 'token_count': num_tokens });
+        }
+
+        if (model === 'llama3' || model === 'llama-3') {
+            const instance = await llama3_tokenizer.get();
+            if (!instance) throw new Error('Failed to load the Llama3 tokenizer');
+            num_tokens = countWebTokenizerTokens(instance, req.body);
            return res.send({ 'token_count': num_tokens });
        }

@ -720,6 +800,8 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
                    url += '/tokenize';
                    args.body = JSON.stringify({ 'content': text });
                    break;
+                case TEXTGEN_TYPES.VLLM:
+                    return response.send({ error: true });
                case TEXTGEN_TYPES.APHRODITE:
                    url += '/v1/tokenize';
                    args.body = JSON.stringify({ 'prompt': text });
@ -753,7 +835,7 @@ module.exports = {
    TEXT_COMPLETION_MODELS,
    getTokenizerModel,
    getTiktokenTokenizer,
-    countClaudeTokens,
+    countWebTokenizerTokens,
    getSentencepiceTokenizer,
    sentencepieceTokenizers,
    router,
--- a/src/prompt-converters.js
+++ b/src/prompt-converters.js
@ -2,6 +2,7 @@ require('./polyfill.js');

 /**
 * Convert a prompt from the ChatML objects to the format used by Claude.
+ * Mainly deprecated. Only used for counting tokens.
 * @param {object[]} messages Array of messages
 * @param {boolean}  addAssistantPostfix Add Assistant postfix.
 * @param {string}   addAssistantPrefill Add Assistant prefill after the assistant postfix.
@ -354,6 +355,71 @@ function convertGooglePrompt(messages, model, useSysPrompt = false, charName = '
    return { contents: contents, system_instruction: system_instruction };
 }

+/**
+ * Convert a prompt from the ChatML objects to the format used by MistralAI.
+ * @param {object[]} messages Array of messages
+ * @param {string} model Model name
+ * @param {string} charName Character name
+ * @param {string} userName User name
+ */
+function convertMistralMessages(messages, model, charName = '', userName = '') {
+    if (!Array.isArray(messages)) {
+        return [];
+    }
+
+    //large seems to be throwing a 500 error if we don't make the first message a user role, most likely a bug since the other models won't do this
+    if (model.includes('large')) {
+        messages[0].role = 'user';
+    }
+
+    //must send a user role as last message
+    const lastMsg = messages[messages.length - 1];
+    if (messages.length > 0 && lastMsg && (lastMsg.role === 'system' || lastMsg.role === 'assistant')) {
+        if (lastMsg.role === 'assistant' && lastMsg.name) {
+            lastMsg.content = lastMsg.name + ': ' + lastMsg.content;
+        } else if (lastMsg.role === 'system') {
+            lastMsg.content = '[INST] ' + lastMsg.content + ' [/INST]';
+        }
+        lastMsg.role = 'user';
+    }
+
+    //system prompts can be stacked at the start, but any futher sys prompts after the first user/assistant message will break the model
+    let encounteredNonSystemMessage = false;
+    messages.forEach(msg => {
+        if (msg.role === 'system' && msg.name === 'example_assistant') {
+            if (charName) {
+                msg.content = `${charName}: ${msg.content}`;
+            }
+            delete msg.name;
+        }
+
+        if (msg.role === 'system' && msg.name === 'example_user') {
+            if (userName) {
+                msg.content = `${userName}: ${msg.content}`;
+            }
+            delete msg.name;
+        }
+
+        if (msg.name) {
+            msg.content = `${msg.name}: ${msg.content}`;
+            delete msg.name;
+        }
+
+        if ((msg.role === 'user' || msg.role === 'assistant') && !encounteredNonSystemMessage) {
+            encounteredNonSystemMessage = true;
+        }
+
+        if (encounteredNonSystemMessage && msg.role === 'system') {
+            msg.role = 'user';
+            //unsure if the instruct version is what they've deployed on their endpoints and if this will make a difference or not.
+            //it should be better than just sending the message as a user role without context though
+            msg.content = '[INST] ' + msg.content + ' [/INST]';
+        }
+    });
+
+    return messages;
+}
+
 /**
 * Convert a prompt from the ChatML objects to the format used by Text Completion API.
 * @param {object[]} messages Array of messages
@ -385,4 +451,5 @@ module.exports = {
    convertGooglePrompt,
    convertTextCompletionPrompt,
    convertCohereMessages,
+    convertMistralMessages,
 };
--- a/src/tokenizers/llama3.json
+++ b/src/tokenizers/llama3.json