Allow EOS unbanning

2025-06-05 21:59:24 +02:00 · 2023-08-29 20:51:09 +02:00
parent d77acf17eb
commit 49fa63052f
4 changed files with 52 additions and 12 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -930,7 +930,7 @@ tags = [
 api_version = None  # This gets set automatically so don't change this value
 api_v1 = KoboldAPISpec(
-    version="1.2.3",
+    version="1.2.4",
    prefixes=["/api/v1", "/api/latest"],
    tags=tags,
 )
@@ -8161,6 +8161,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
    frmtrmblln: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, replaces all occurrences of two or more consecutive newlines in the output with one newline.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
    frmtrmspch: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes `#/@%{}+=~|\^<>` from the output.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
    singleline: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes everything after the first line of the output, including the newline.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
    use_default_badwordids: bool = fields.Boolean(load_default=True, metadata={"description": "Ban tokens that commonly worsen the writing experience for continuous story writing"})
    disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
    frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
    quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
@@ -8169,6 +8170,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
    sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
    stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."}, validate=[validate.Length(max=10)])
 class GenerationResultSchema(KoboldSchema):
    text: str = fields.String(required=True, metadata={"description": "Generated output as plain text."})
@@ -8311,6 +8313,7 @@ def _generate_text(body: GenerationInputSchema):
        "sampler_order": ("koboldai_vars", "sampler_order", None),
        "sampler_full_determinism": ("koboldai_vars", "full_determinism", None),
        "stop_sequence": ("koboldai_vars", "stop_sequence", None),
        "use_default_badwordids": ("koboldai_vars", "use_default_badwordids", None),
    }
    saved_settings = {}
    set_aibusy(1)
--- a/gensettings.py
+++ b/gensettings.py
@@ -396,6 +396,22 @@ gensettingstf = [
    "name": "output_streaming",
    "ui_level": 1
 	},
    {
 	"uitype": "toggle",
 	"unit": "bool",
 	"label": "Ban Bad Tokens",
 	"id": "setusedefaultbadwordids",
 	"min": 0,
 	"max": 1,
 	"step": 1,
 	"default": 1,
 	"tooltip": "Ban tokens that commonly worsen the writing experience for continuous story writing.",
    "menu_path": "Settings",
    "sub_path": "Sampling",
    "classname": "model",
    "name": "use_default_badwordids",
    "ui_level": 0
 	},
    {
 	"uitype": "toggle",
 	"unit": "bool",
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -693,6 +693,7 @@ class model_settings(settings):
        self._koboldai_vars = koboldai_vars
        self.alt_multi_gen = False
        self.bit_8_available = None
        self.use_default_badwordids = True
        self.supported_gen_modes = []
    def reset_for_model_load(self):
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -330,19 +330,39 @@ class HFTorchInferenceModel(HFInferenceModel):
        if seed is not None:
            torch.manual_seed(seed)
        if utils.koboldai_vars.use_default_badwordids:
            self.active_badwordids = self.badwordsids + additional_bad_words_ids
        else:
            if additional_bad_words_ids:
                self.active_badwordids = additional_bad_words_ids
            else:
                self.active_badwordids = None
        with torch.no_grad():
            start_time = time.time()
-            genout = self.model.generate(
+            if self.active_badwordids: ## I know duplicating this is ugly, but HF checks if its present and accepts nothing but actual token bans if its there (Which I can't guarantee would be universal enough).... - Henk
-                input_ids=gen_in,
+                genout = self.model.generate(
-                do_sample=True,
+                    input_ids=gen_in,
-                max_length=min(
+                    do_sample=True,
-                    len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
+                    max_length=min(
-                ),
+                        len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
-                repetition_penalty=1.0,
+                    ),
-                bad_words_ids=self.badwordsids + additional_bad_words_ids,
+                    repetition_penalty=1.0,
-                use_cache=True,
+                    bad_words_ids=self.active_badwordids,
-                num_return_sequences=batch_count,
+                    use_cache=True,
-            )
+                    num_return_sequences=batch_count,
                )
            else:
                 genout = self.model.generate(
                    input_ids=gen_in,
                    do_sample=True,
                    max_length=min(
                        len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
                    ),
                    repetition_penalty=1.0,
                    use_cache=True,
                    num_return_sequences=batch_count,
                )               
        logger.debug(
            "torch_raw_generate: run generator {}s".format(time.time() - start_time)
        )