Added in alternative rep pen calculation (log instead of linear application) as an option.

2025-06-05 21:59:24 +02:00 · 2023-01-10 08:45:55 -05:00
parent 70a25ed6db
commit 9b1138bafa
5 changed files with 55 additions and 20 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -2180,7 +2180,7 @@ def patch_transformers():
                return old_call(self, *args, **kwargs)
            return args[1]
        cls.__call__ = new_call
-    dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0)
+    dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range", "use_alt_rep_pen"), ("rep_pen", "rep_pen_slope", "rep_pen_range", "use_alt_rep_pen"), cond=lambda x: x[0] != 1.0)
    dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0)
    dynamic_processor_wrap(TopALogitsWarper, "top_a", "top_a", cond=lambda x: x > 0.0)
    dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0)
--- a/gensettings.py
+++ b/gensettings.py
@@ -160,6 +160,22 @@ gensettingstf = [
    "name": "rep_pen_slope",
    "ui_level": 1
 	},
+    {
+ 	"uitype": "toggle",
+ 	"unit": "bool",
+ 	"label": "Alt Rep Pen",
+ 	"id": "use_alt_rep_pen",
+ 	"min": 0,
+ 	"max": 1,
+ 	"step": 1,
+ 	"default": 0,
+	"tooltip": "Applies repetition penalty as a logarithmic modifier rather than a linear modifier.",
+    "menu_path": "Settings",
+    "sub_path": "Repetition",
+    "classname": "model",
+    "name": "use_alt_rep_pen",
+    "ui_level": 2
+ 	},
    {
 	"uitype": "slider",
 	"unit": "int",
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -720,6 +720,7 @@ class model_settings(settings):
        self.horde_wait_time = 0
        self.horde_queue_position = 0
        self.horde_queue_size = 0
+        self.use_alt_rep_pen = False
        
        

--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -201,15 +201,23 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
    # values by repetition_penalty (the academic publication that described
    # this technique actually just only divided, but that would cause tokens
    # with negative logits to become more likely, which is obviously wrong)
-    penalty_logits = np.where(
-        penalty_arange >= 0,
-        np.where(
-            penalty_logits > 0,
-            penalty_logits/repetition_penalty,
-            penalty_logits*repetition_penalty,
-        ),
-        penalty_logits,
-    )
+    if koboldai_vars.use_alt_rep_pen:
+        penalty_logits = np.where(
+            penalty_arange >= 0,
+            penalty_logits - np.log(repetition_penalty),
+            penalty_logits,
+        )
+
+    else:
+        penalty_logits = np.where(
+            penalty_arange >= 0,
+            np.where(
+                penalty_logits > 0,
+                penalty_logits/repetition_penalty,
+                penalty_logits*repetition_penalty,
+            ),
+            penalty_logits,
+        )
    # Finally, put those penalized logit values back into their original
    # positions in the logits array
    logits[tokens] = penalty_logits
@@ -389,15 +397,22 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate
    # values by repetition_penalty (the academic publication that described
    # this technique actually just only divided, but that would cause tokens
    # with negative logits to become more likely, which is obviously wrong)
-    penalty_logits = jnp.where(
-        penalty_arange >= 0,
-        jnp.where(
-            penalty_logits > 0,
-            penalty_logits/repetition_penalty,
-            penalty_logits*repetition_penalty,
-        ),
-        penalty_logits,
-    )
+    if koboldai_vars.use_alt_rep_pen:
+        penalty_logits = jnp.where(
+            penalty_arange >= 0,
+            penalty_logits - jnp.log(repetition_penalty),
+            penalty_logits,
+        )
+    else:
+        penalty_logits = jnp.where(
+            penalty_arange >= 0,
+            jnp.where(
+                penalty_logits > 0,
+                penalty_logits/repetition_penalty,
+                penalty_logits*repetition_penalty,
+            ),
+            penalty_logits,
+        )
    # Finally, put those penalized logit values back into their original
    # positions in the logits array
    return logits.at[tokens].set(penalty_logits)
--- a/warpers.py
+++ b/warpers.py
@@ -51,7 +51,10 @@ class AdvancedRepetitionPenaltyLogitsProcessor(LogitsWarper):
                    self.penalty = _penalty[..., -clipped_penalty_range:]

            score = torch.gather(scores, 1, input_ids)
-            score = torch.where(score <= 0, score * self.penalty, score / self.penalty)
+            if self.use_alt_rep_pen:
+                score = score - torch.log(self.penalty)
+            else:
+                score = torch.where(score <= 0, score * self.penalty, score / self.penalty)
            scores.scatter_(1, input_ids, score)

        return scores