Added in alternative rep pen calculation (log instead of linear application) as an option.

This commit is contained in:
ebolam
2023-01-10 08:45:55 -05:00
parent 70a25ed6db
commit 9b1138bafa
5 changed files with 55 additions and 20 deletions

View File

@@ -2180,7 +2180,7 @@ def patch_transformers():
return old_call(self, *args, **kwargs)
return args[1]
cls.__call__ = new_call
dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0)
dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range", "use_alt_rep_pen"), ("rep_pen", "rep_pen_slope", "rep_pen_range", "use_alt_rep_pen"), cond=lambda x: x[0] != 1.0)
dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0)
dynamic_processor_wrap(TopALogitsWarper, "top_a", "top_a", cond=lambda x: x > 0.0)
dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0)

View File

@@ -160,6 +160,22 @@ gensettingstf = [
"name": "rep_pen_slope",
"ui_level": 1
},
{
"uitype": "toggle",
"unit": "bool",
"label": "Alt Rep Pen",
"id": "use_alt_rep_pen",
"min": 0,
"max": 1,
"step": 1,
"default": 0,
"tooltip": "Applies repetition penalty as a logarithmic modifier rather than a linear modifier.",
"menu_path": "Settings",
"sub_path": "Repetition",
"classname": "model",
"name": "use_alt_rep_pen",
"ui_level": 2
},
{
"uitype": "slider",
"unit": "int",

View File

@@ -720,6 +720,7 @@ class model_settings(settings):
self.horde_wait_time = 0
self.horde_queue_position = 0
self.horde_queue_size = 0
self.use_alt_rep_pen = False

View File

@@ -201,15 +201,23 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
# values by repetition_penalty (the academic publication that described
# this technique actually just only divided, but that would cause tokens
# with negative logits to become more likely, which is obviously wrong)
penalty_logits = np.where(
penalty_arange >= 0,
np.where(
penalty_logits > 0,
penalty_logits/repetition_penalty,
penalty_logits*repetition_penalty,
),
penalty_logits,
)
if koboldai_vars.use_alt_rep_pen:
penalty_logits = np.where(
penalty_arange >= 0,
penalty_logits - np.log(repetition_penalty),
penalty_logits,
)
else:
penalty_logits = np.where(
penalty_arange >= 0,
np.where(
penalty_logits > 0,
penalty_logits/repetition_penalty,
penalty_logits*repetition_penalty,
),
penalty_logits,
)
# Finally, put those penalized logit values back into their original
# positions in the logits array
logits[tokens] = penalty_logits
@@ -389,15 +397,22 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate
# values by repetition_penalty (the academic publication that described
# this technique actually just only divided, but that would cause tokens
# with negative logits to become more likely, which is obviously wrong)
penalty_logits = jnp.where(
penalty_arange >= 0,
jnp.where(
penalty_logits > 0,
penalty_logits/repetition_penalty,
penalty_logits*repetition_penalty,
),
penalty_logits,
)
if koboldai_vars.use_alt_rep_pen:
penalty_logits = jnp.where(
penalty_arange >= 0,
penalty_logits - jnp.log(repetition_penalty),
penalty_logits,
)
else:
penalty_logits = jnp.where(
penalty_arange >= 0,
jnp.where(
penalty_logits > 0,
penalty_logits/repetition_penalty,
penalty_logits*repetition_penalty,
),
penalty_logits,
)
# Finally, put those penalized logit values back into their original
# positions in the logits array
return logits.at[tokens].set(penalty_logits)

View File

@@ -51,7 +51,10 @@ class AdvancedRepetitionPenaltyLogitsProcessor(LogitsWarper):
self.penalty = _penalty[..., -clipped_penalty_range:]
score = torch.gather(scores, 1, input_ids)
score = torch.where(score <= 0, score * self.penalty, score / self.penalty)
if self.use_alt_rep_pen:
score = score - torch.log(self.penalty)
else:
score = torch.where(score <= 0, score * self.penalty, score / self.penalty)
scores.scatter_(1, input_ids, score)
return scores