Merge commit '8a38b258f497281af06fcb0c2559f382b419b938' into overhaul-merge

This commit is contained in:
Gnome Ann 2022-06-14 18:36:37 -04:00
commit 18218a99bc
6 changed files with 107 additions and 7 deletions

View File

@ -220,6 +220,7 @@ class vars:
temp = 0.5 # Default generator temperature temp = 0.5 # Default generator temperature
top_p = 0.9 # Default generator top_p top_p = 0.9 # Default generator top_p
top_k = 0 # Default generator top_k top_k = 0 # Default generator top_k
top_a = 0.0 # Default generator top-a
tfs = 1.0 # Default generator tfs (tail-free sampling) tfs = 1.0 # Default generator tfs (tail-free sampling)
typical = 1.0 # Default generator typical sampling threshold typical = 1.0 # Default generator typical sampling threshold
numseqs = 1 # Number of sequences to ask the generator to create numseqs = 1 # Number of sequences to ask the generator to create
@ -657,6 +658,8 @@ def loadmodelsettings():
vars.tfs = js["tfs"] vars.tfs = js["tfs"]
if("typical" in js): if("typical" in js):
vars.typical = js["typical"] vars.typical = js["typical"]
if("top_a" in js):
vars.top_a = js["top_a"]
if("rep_pen" in js): if("rep_pen" in js):
vars.rep_pen = js["rep_pen"] vars.rep_pen = js["rep_pen"]
if("rep_pen_slope" in js): if("rep_pen_slope" in js):
@ -693,6 +696,7 @@ def savesettings():
js["top_k"] = vars.top_k js["top_k"] = vars.top_k
js["tfs"] = vars.tfs js["tfs"] = vars.tfs
js["typical"] = vars.typical js["typical"] = vars.typical
js["top_a"] = vars.top_a
js["rep_pen"] = vars.rep_pen js["rep_pen"] = vars.rep_pen
js["rep_pen_slope"] = vars.rep_pen_slope js["rep_pen_slope"] = vars.rep_pen_slope
js["rep_pen_range"] = vars.rep_pen_range js["rep_pen_range"] = vars.rep_pen_range
@ -773,6 +777,8 @@ def processsettings(js):
vars.tfs = js["tfs"] vars.tfs = js["tfs"]
if("typical" in js): if("typical" in js):
vars.typical = js["typical"] vars.typical = js["typical"]
if("top_a" in js):
vars.top_a = js["top_a"]
if("rep_pen" in js): if("rep_pen" in js):
vars.rep_pen = js["rep_pen"] vars.rep_pen = js["rep_pen"]
if("rep_pen_slope" in js): if("rep_pen_slope" in js):
@ -1268,7 +1274,7 @@ def patch_transformers():
# Patch transformers to use our custom logit warpers # Patch transformers to use our custom logit warpers
from transformers import LogitsProcessorList, LogitsWarper, LogitsProcessor, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, RepetitionPenaltyLogitsProcessor from transformers import LogitsProcessorList, LogitsWarper, LogitsProcessor, TopKLogitsWarper, TopPLogitsWarper, TemperatureLogitsWarper, RepetitionPenaltyLogitsProcessor
from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper from warpers import AdvancedRepetitionPenaltyLogitsProcessor, TailFreeLogitsWarper, TypicalLogitsWarper, TopALogitsWarper
def dynamic_processor_wrap(cls, field_name, var_name, cond=None): def dynamic_processor_wrap(cls, field_name, var_name, cond=None):
old_call = cls.__call__ old_call = cls.__call__
@ -1288,6 +1294,7 @@ def patch_transformers():
cls.__call__ = new_call cls.__call__ = new_call
dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0) dynamic_processor_wrap(AdvancedRepetitionPenaltyLogitsProcessor, ("penalty", "penalty_slope", "penalty_range"), ("rep_pen", "rep_pen_slope", "rep_pen_range"), cond=lambda x: x[0] != 1.0)
dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0) dynamic_processor_wrap(TopKLogitsWarper, "top_k", "top_k", cond=lambda x: x > 0)
dynamic_processor_wrap(TopALogitsWarper, "top_a", "top_a", cond=lambda x: x > 0.0)
dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0) dynamic_processor_wrap(TopPLogitsWarper, "top_p", "top_p", cond=lambda x: x < 1.0)
dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0) dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0)
dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0) dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0)
@ -1334,6 +1341,7 @@ def patch_transformers():
def new_get_logits_warper(beams: int = 1,) -> LogitsProcessorList: def new_get_logits_warper(beams: int = 1,) -> LogitsProcessorList:
warper_list = LogitsProcessorList() warper_list = LogitsProcessorList()
warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1))) warper_list.append(TopKLogitsWarper(top_k=1, min_tokens_to_keep=1 + (beams > 1)))
warper_list.append(TopALogitsWarper(top_a=0.5, min_tokens_to_keep=1 + (beams > 1)))
warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1))) warper_list.append(TopPLogitsWarper(top_p=0.5, min_tokens_to_keep=1 + (beams > 1)))
warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1))) warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1))) warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
@ -1962,6 +1970,7 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model="
"top_k": int(vars.top_k), "top_k": int(vars.top_k),
"tfs": float(vars.tfs), "tfs": float(vars.tfs),
"typical": float(vars.typical), "typical": float(vars.typical),
"top_a": float(vars.top_a),
"repetition_penalty": float(vars.rep_pen), "repetition_penalty": float(vars.rep_pen),
"rpslope": float(vars.rep_pen_slope), "rpslope": float(vars.rep_pen_slope),
"rprange": int(vars.rep_pen_range), "rprange": int(vars.rep_pen_range),
@ -2384,6 +2393,7 @@ def lua_has_setting(setting):
"settopk", "settopk",
"settfs", "settfs",
"settypical", "settypical",
"settopa",
"setreppen", "setreppen",
"setreppenslope", "setreppenslope",
"setreppenrange", "setreppenrange",
@ -2403,6 +2413,7 @@ def lua_has_setting(setting):
"top_k", "top_k",
"tfs", "tfs",
"typical", "typical",
"topa",
"reppen", "reppen",
"reppenslope", "reppenslope",
"reppenrange", "reppenrange",
@ -2437,6 +2448,7 @@ def lua_get_setting(setting):
if(setting in ("settopk", "topk", "top_k")): return vars.top_k if(setting in ("settopk", "topk", "top_k")): return vars.top_k
if(setting in ("settfs", "tfs")): return vars.tfs if(setting in ("settfs", "tfs")): return vars.tfs
if(setting in ("settypical", "typical")): return vars.typical if(setting in ("settypical", "typical")): return vars.typical
if(setting in ("settopa", "topa")): return vars.top_a
if(setting in ("setreppen", "reppen")): return vars.rep_pen if(setting in ("setreppen", "reppen")): return vars.rep_pen
if(setting in ("setreppenslope", "reppenslope")): return vars.rep_pen_slope if(setting in ("setreppenslope", "reppenslope")): return vars.rep_pen_slope
if(setting in ("setreppenrange", "reppenrange")): return vars.rep_pen_range if(setting in ("setreppenrange", "reppenrange")): return vars.rep_pen_range
@ -2472,6 +2484,7 @@ def lua_set_setting(setting, v):
if(setting in ("settopk", "topk")): vars.top_k = v if(setting in ("settopk", "topk")): vars.top_k = v
if(setting in ("settfs", "tfs")): vars.tfs = v if(setting in ("settfs", "tfs")): vars.tfs = v
if(setting in ("settypical", "typical")): vars.typical = v if(setting in ("settypical", "typical")): vars.typical = v
if(setting in ("settopa", "topa")): vars.top_a = v
if(setting in ("setreppen", "reppen")): vars.rep_pen = v if(setting in ("setreppen", "reppen")): vars.rep_pen = v
if(setting in ("setreppenslope", "reppenslope")): vars.rep_pen_slope = v if(setting in ("setreppenslope", "reppenslope")): vars.rep_pen_slope = v
if(setting in ("setreppenrange", "reppenrange")): vars.rep_pen_range = v if(setting in ("setreppenrange", "reppenrange")): vars.rep_pen_range = v
@ -2862,6 +2875,11 @@ def get_message(msg):
emit('from_server', {'cmd': 'setlabeltypical', 'data': msg['data']}, broadcast=True) emit('from_server', {'cmd': 'setlabeltypical', 'data': msg['data']}, broadcast=True)
settingschanged() settingschanged()
refresh_settings() refresh_settings()
elif(msg['cmd'] == 'settopa'):
vars.top_a = float(msg['data'])
emit('from_server', {'cmd': 'setlabeltopa', 'data': msg['data']}, broadcast=True)
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'setreppen'): elif(msg['cmd'] == 'setreppen'):
vars.rep_pen = float(msg['data']) vars.rep_pen = float(msg['data'])
emit('from_server', {'cmd': 'setlabelreppen', 'data': msg['data']}, broadcast=True) emit('from_server', {'cmd': 'setlabelreppen', 'data': msg['data']}, broadcast=True)
@ -3988,6 +4006,7 @@ def sendtocolab(txt, min, max):
'top_k': vars.top_k, 'top_k': vars.top_k,
'tfs': vars.tfs, 'tfs': vars.tfs,
'typical': vars.typical, 'typical': vars.typical,
'topa': vars.top_a,
'numseqs': vars.numseqs, 'numseqs': vars.numseqs,
'retfultxt': False 'retfultxt': False
} }
@ -4125,6 +4144,7 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None):
top_k=vars.top_k, top_k=vars.top_k,
tfs=vars.tfs, tfs=vars.tfs,
typical=vars.typical, typical=vars.typical,
top_a=vars.top_a,
numseqs=vars.numseqs, numseqs=vars.numseqs,
repetition_penalty=vars.rep_pen, repetition_penalty=vars.rep_pen,
rpslope=vars.rep_pen_slope, rpslope=vars.rep_pen_slope,
@ -4311,6 +4331,7 @@ def refresh_settings():
emit('from_server', {'cmd': 'updatetopk', 'data': vars.top_k}, broadcast=True) emit('from_server', {'cmd': 'updatetopk', 'data': vars.top_k}, broadcast=True)
emit('from_server', {'cmd': 'updatetfs', 'data': vars.tfs}, broadcast=True) emit('from_server', {'cmd': 'updatetfs', 'data': vars.tfs}, broadcast=True)
emit('from_server', {'cmd': 'updatetypical', 'data': vars.typical}, broadcast=True) emit('from_server', {'cmd': 'updatetypical', 'data': vars.typical}, broadcast=True)
emit('from_server', {'cmd': 'updatetopa', 'data': vars.top_a}, broadcast=True)
emit('from_server', {'cmd': 'updatereppen', 'data': vars.rep_pen}, broadcast=True) emit('from_server', {'cmd': 'updatereppen', 'data': vars.rep_pen}, broadcast=True)
emit('from_server', {'cmd': 'updatereppenslope', 'data': vars.rep_pen_slope}, broadcast=True) emit('from_server', {'cmd': 'updatereppenslope', 'data': vars.rep_pen_slope}, broadcast=True)
emit('from_server', {'cmd': 'updatereppenrange', 'data': vars.rep_pen_range}, broadcast=True) emit('from_server', {'cmd': 'updatereppenrange', 'data': vars.rep_pen_range}, broadcast=True)

View File

@ -867,6 +867,7 @@ return function(_python, _bridged)
---@field settopk integer ---@field settopk integer
---@field settfs number ---@field settfs number
---@field settypical number ---@field settypical number
---@field settopa number
---@field setreppen number ---@field setreppen number
---@field setreppenslope number ---@field setreppenslope number
---@field setreppenrange number ---@field setreppenrange number
@ -884,6 +885,7 @@ return function(_python, _bridged)
---@field top_k integer ---@field top_k integer
---@field tfs number ---@field tfs number
---@field typical number ---@field typical number
---@field topa number
---@field reppen number ---@field reppen number
---@field reppenslope number ---@field reppenslope number
---@field reppenrange number ---@field reppenrange number

View File

@ -64,6 +64,17 @@ gensettingstf = [
"step": 0.05, "step": 0.05,
"default": 1.0, "default": 1.0,
"tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect." "tooltip": "Alternative sampling method described in the paper \"Typical Decoding for Natural Language Generation\" (10.48550/ARXIV.2202.00666). The paper suggests 0.2 as a good value for this setting. Set this setting to 1 to disable its effect."
},
{
"uitype": "slider",
"unit": "float",
"label": "Top a Sampling",
"id": "settopa",
"min": 0.0,
"max": 1.0,
"step": 0.01,
"default": 0.0,
"tooltip": "Alternative sampling method that reduces the randomness of the AI whenever the probability of one token is much higher than all the others. Higher values have a stronger effect. Set this setting to 0 to disable its effect."
}, },
{ {
"uitype": "slider", "uitype": "slider",

View File

@ -2231,6 +2231,10 @@ $(document).ready(function(){
// Send current typical value to input // Send current typical value to input
$("#settypicalcur").val(msg.data); $("#settypicalcur").val(msg.data);
$("#settypical").val(parseFloat(msg.data)).trigger("change"); $("#settypical").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatetopa") {
// Send current top a value to input
$("#settopacur").val(msg.data);
$("#settopa").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updatereppen") { } else if(msg.cmd == "updatereppen") {
// Send current rep pen value to input // Send current rep pen value to input
$("#setreppencur").val(msg.data); $("#setreppencur").val(msg.data);
@ -2270,6 +2274,9 @@ $(document).ready(function(){
} else if(msg.cmd == "setlabeltypical") { } else if(msg.cmd == "setlabeltypical") {
// Update setting label with value from server // Update setting label with value from server
$("#settypicalcur").val(msg.data); $("#settypicalcur").val(msg.data);
} else if(msg.cmd == "setlabeltypical") {
// Update setting label with value from server
$("#settopa").val(msg.data);
} else if(msg.cmd == "setlabelreppen") { } else if(msg.cmd == "setlabelreppen") {
// Update setting label with value from server // Update setting label with value from server
$("#setreppencur").val(msg.data); $("#setreppencur").val(msg.data);

View File

@ -70,6 +70,7 @@ def settings_callback() -> dict:
"top_k": 0, "top_k": 0,
"tfs": 1.0, "tfs": 1.0,
"typical": 1.0, "typical": 1.0,
"top_a": 0.0,
"repetition_penalty": 1.0, "repetition_penalty": 1.0,
"rpslope": 0.0, "rpslope": 0.0,
"rprange": 0, "rprange": 0,
@ -158,10 +159,10 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
logits[tokens] = penalty_logits logits[tokens] = penalty_logits
return logits return logits
def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0): def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
''' '''
This gets called by generate_loop_fn to apply a series of 5 filters This gets called by generate_loop_fn to apply a series of 6 filters
to the logits (top-k, then top-p, then TFS, then typical, then temperature) to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
before picking one token using the modified logits before picking one token using the modified logits
''' '''
# Top-k (keep only the k tokens with the highest logits and remove # Top-k (keep only the k tokens with the highest logits and remove
@ -182,6 +183,20 @@ def kobold_sample_dynamic(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, ty
return np.where(indices_to_remove, -np.inf, logits) return np.where(indices_to_remove, -np.inf, logits)
if top_k > 0: if top_k > 0:
logits = top_k_filter(logits) logits = top_k_filter(logits)
# Top-a (remove all tokens that have softmax probability less than
# a*m^2 where m is the maximum softmax probability)
def top_a_filter(logits):
# Replace every element in the logits array
# with e (Euler's number) to the power of that element, and divide
# each element of the new array by the sum of the elements in the
# new array
probabilities = np.array(jax.nn.softmax(logits), copy=True)
# Find the largest probability
probs_max = probabilities.max()
# Remove tokens
return np.where(probabilities < probs_max * probs_max * top_a, -np.inf, logits)
if top_a > 0.0:
logits = top_a_filter(logits)
# Top-p (after sorting the remaining tokens again in descending order of # Top-p (after sorting the remaining tokens again in descending order of
# logit, remove the ones that have cumulative softmax probability # logit, remove the ones that have cumulative softmax probability
# greater than p) # greater than p)
@ -332,10 +347,10 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate
# positions in the logits array # positions in the logits array
return logits.at[tokens].set(penalty_logits) return logits.at[tokens].set(penalty_logits)
def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0): def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
''' '''
This gets called by generate_loop_fn to apply a series of 5 filters This gets called by generate_loop_fn to apply a series of 6 filters
to the logits (top-k, then top-p, then TFS, then typical, then temperature) to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
before picking one token using the modified logits before picking one token using the modified logits
''' '''
# Top-k (keep only the k tokens with the highest logits and remove # Top-k (keep only the k tokens with the highest logits and remove
@ -355,6 +370,19 @@ def kobold_sample_static(key, logits, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typ
) )
return jnp.where(indices_to_remove, -jnp.inf, logits) return jnp.where(indices_to_remove, -jnp.inf, logits)
logits = jax.lax.cond(top_k > 0, top_k_filter, lambda x: x, logits) logits = jax.lax.cond(top_k > 0, top_k_filter, lambda x: x, logits)
# Top-a (remove all tokens that have softmax probability less than
# a*m^2 where m is the maximum softmax probability)
def top_a_filter(logits):
# Replace every element in the logits array
# with e (Euler's number) to the power of that element, and divide
# each element of the new array by the sum of the elements in the
# new array
probabilities = jax.nn.softmax(logits)
# Find the largest probability
probs_max = probabilities.max()
# Remove tokens
return jnp.where(probabilities < probs_max * probs_max * top_a, -jnp.inf, logits)
logits = jax.lax.cond(top_a > 0.0, top_a_filter, lambda x: x, logits)
# Top-p (after sorting the remaining tokens again in descending order of # Top-p (after sorting the remaining tokens again in descending order of
# logit, remove the ones that have cumulative softmax probability # logit, remove the ones that have cumulative softmax probability
# greater than p) # greater than p)
@ -806,6 +834,7 @@ def infer_static(
top_k=0, top_k=0,
tfs=1.0, tfs=1.0,
typical=1.0, typical=1.0,
top_a=0.0,
repetition_penalty=1.0, repetition_penalty=1.0,
rpslope=0.0, rpslope=0.0,
rprange=0, rprange=0,
@ -829,6 +858,7 @@ def infer_static(
"top_p": top_p * np.ones(total_batch), "top_p": top_p * np.ones(total_batch),
"tfs": tfs * np.ones(total_batch), "tfs": tfs * np.ones(total_batch),
"typical": typical * np.ones(total_batch), "typical": typical * np.ones(total_batch),
"top_a": top_a * np.ones(total_batch),
"repetition_penalty": repetition_penalty * np.ones(total_batch), "repetition_penalty": repetition_penalty * np.ones(total_batch),
"rpslope": rpslope * np.ones(total_batch), "rpslope": rpslope * np.ones(total_batch),
"rprange": np.full(total_batch, rprange, dtype=np.uint32), "rprange": np.full(total_batch, rprange, dtype=np.uint32),

View File

@ -148,3 +148,32 @@ class TypicalLogitsWarper(LogitsWarper):
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
scores = scores.masked_fill(indices_to_remove, self.filter_value) scores = scores.masked_fill(indices_to_remove, self.filter_value)
return scores return scores
class TopALogitsWarper(LogitsWarper):
def __init__(self, top_a: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
top_a = float(top_a)
if top_a < 0 or top_a > 1.0:
raise ValueError(f"`top_a` has to be a float >= 0 and <= 1, but is {top_a}")
self.top_a = top_a
self.filter_value = filter_value
self.min_tokens_to_keep = min_tokens_to_keep
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
if self.filter_value >= 1.0:
return scores
sorted_logits, sorted_indices = torch.sort(scores, descending=True)
probs = sorted_logits.softmax(dim=-1)
# Remove tokens with probability less than top_a*(max(probs))^2 (token with 0 are kept)
probs_max = probs[..., 0, None]
sorted_indices_to_remove = probs < probs_max * probs_max * self.top_a
if self.min_tokens_to_keep > 1:
# Keep at least min_tokens_to_keep
sorted_indices_to_remove[..., : self.min_tokens_to_keep] = 0
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
scores = scores.masked_fill(indices_to_remove, self.filter_value)
return scores