Merge branch 'united' into mkultra

This commit is contained in:
vfbd 2022-08-23 21:29:29 -04:00
commit 51135e192b
6 changed files with 55 additions and 41 deletions

View File

@ -964,7 +964,10 @@ def loadmodelsettings():
if("nobreakmodel" in js): if("nobreakmodel" in js):
vars.nobreakmodel = js["nobreakmodel"] vars.nobreakmodel = js["nobreakmodel"]
if("sampler_order" in js): if("sampler_order" in js):
vars.sampler_order = js["sampler_order"] sampler_order = vars.sampler_order
if(len(sampler_order) < 7):
sampler_order = [6] + sampler_order
vars.sampler_order = sampler_order
if("temp" in js): if("temp" in js):
vars.temp = js["temp"] vars.temp = js["temp"]
if("top_p" in js): if("top_p" in js):
@ -1095,7 +1098,10 @@ def processsettings(js):
if("andepth" in js): if("andepth" in js):
vars.andepth = js["andepth"] vars.andepth = js["andepth"]
if("sampler_order" in js): if("sampler_order" in js):
vars.sampler_order = js["sampler_order"] sampler_order = vars.sampler_order
if(len(sampler_order) < 7):
sampler_order = [6] + sampler_order
vars.sampler_order = sampler_order
if("temp" in js): if("temp" in js):
vars.temp = js["temp"] vars.temp = js["temp"]
if("top_p" in js): if("top_p" in js):
@ -1732,8 +1738,6 @@ def patch_transformers():
dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0) dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0)
dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0) dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0)
dynamic_processor_wrap(TemperatureLogitsWarper, "temperature", "temp", cond=lambda x: x != 1.0) dynamic_processor_wrap(TemperatureLogitsWarper, "temperature", "temp", cond=lambda x: x != 1.0)
RepetitionPenaltyLogitsProcessor.__init__ = AdvancedRepetitionPenaltyLogitsProcessor.__init__
RepetitionPenaltyLogitsProcessor.__call__ = AdvancedRepetitionPenaltyLogitsProcessor.__call__
class LuaLogitsProcessor(LogitsProcessor): class LuaLogitsProcessor(LogitsProcessor):
@ -1810,9 +1814,13 @@ def patch_transformers():
self.__warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1))) self.__warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1)))
self.__warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1))) self.__warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1)))
self.__warper_list.append(TemperatureLogitsWarper(temperature=0.5)) self.__warper_list.append(TemperatureLogitsWarper(temperature=0.5))
self.__warper_list.append(AdvancedRepetitionPenaltyLogitsProcessor())
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, *args, **kwargs): def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, *args, **kwargs):
for k in vars.sampler_order: sampler_order = vars.sampler_order[:]
if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present
sampler_order = [6] + sampler_order
for k in sampler_order:
scores = self.__warper_list[k](input_ids, scores, *args, **kwargs) scores = self.__warper_list[k](input_ids, scores, *args, **kwargs)
return scores return scores
@ -1945,7 +1953,7 @@ def reset_model_settings():
vars.badwordsids = [] vars.badwordsids = []
vars.fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format vars.fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format
vars.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B) vars.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
vars.sampler_order = [0, 1, 2, 3, 4, 5] vars.sampler_order = [6, 0, 1, 2, 3, 4, 5]
vars.newlinemode = "n" vars.newlinemode = "n"
vars.revision = None vars.revision = None
@ -2558,8 +2566,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
vars.compiling = False vars.compiling = False
def tpumtjgenerate_settings_callback() -> dict: def tpumtjgenerate_settings_callback() -> dict:
sampler_order = vars.sampler_order[:]
if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present
sampler_order = [6] + sampler_order
return { return {
"sampler_order": vars.sampler_order, "sampler_order": sampler_order,
"top_p": float(vars.top_p), "top_p": float(vars.top_p),
"temp": float(vars.temp), "temp": float(vars.temp),
"top_k": int(vars.top_k), "top_k": int(vars.top_k),
@ -3666,12 +3677,16 @@ def get_message(msg):
sendUSStatItems() sendUSStatItems()
elif(msg['cmd'] == 'samplers'): elif(msg['cmd'] == 'samplers'):
sampler_order = msg["data"] sampler_order = msg["data"]
sampler_order_min_length = 6
sampler_order_max_length = 7
if(not isinstance(sampler_order, list)): if(not isinstance(sampler_order, list)):
raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}") raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
if(len(sampler_order) != len(vars.sampler_order)): if(not (sampler_order_min_length <= len(sampler_order) <= sampler_order_max_length)):
raise ValueError(f"Sampler order must be a list of length {len(vars.sampler_order)}, but got a list of length {len(sampler_order)}") raise ValueError(f"Sampler order must be a list of length greater than or equal to {sampler_order_min_length} and less than or equal to {sampler_order_max_length}, but got a list of length {len(sampler_order)}")
if(not all(isinstance(e, int) for e in sampler_order)): if(not all(isinstance(e, int) for e in sampler_order)):
raise ValueError(f"Sampler order must be a list of ints, but got a list with at least one non-int element") raise ValueError(f"Sampler order must be a list of ints, but got a list with at least one non-int element")
if(min(sampler_order) != 0 or max(sampler_order) != len(sampler_order) - 1 or len(set(sampler_order)) != len(sampler_order)):
raise ValueError(f"Sampler order list of length {len(sampler_order)} must be a permutation of the first {len(sampler_order)} nonnegative integers")
vars.sampler_order = sampler_order vars.sampler_order = sampler_order
settingschanged() settingschanged()
elif(msg['cmd'] == 'list_model'): elif(msg['cmd'] == 'list_model'):
@ -4624,7 +4639,7 @@ def _generate(txt, minimum, maximum, found_entries):
gen_in, gen_in,
do_sample=True, do_sample=True,
max_length=int(2e9), max_length=int(2e9),
repetition_penalty=1.1, repetition_penalty=1.0,
bad_words_ids=vars.badwordsids, bad_words_ids=vars.badwordsids,
use_cache=True, use_cache=True,
num_return_sequences=numseqs num_return_sequences=numseqs

View File

@ -256,7 +256,7 @@ function addSetting(ob) {
} }
}); });
if (!$("#input-token-usage")[0].checked) { if (!$("#setshowbudget")[0].checked) {
for (const el of document.getElementsByClassName("input-token-usage")) { for (const el of document.getElementsByClassName("input-token-usage")) {
el.classList.add("hidden"); el.classList.add("hidden");
} }
@ -1306,12 +1306,13 @@ function buildSamplerList(samplers) {
"Tail-free Sampling", "Tail-free Sampling",
"Typical Sampling", "Typical Sampling",
"Temperature", "Temperature",
"Repetition Penalty",
] ]
for(i=0; i<samplers.length; i++) { for(i=0; i<samplers.length; i++) {
samplerslist.append("<div class=\"flex\">\ samplerslist.append("<div class=\"flex\">\
<div class=\"samplerslistitem flex-row-container\" sid=\""+samplers[i]+"\">\ <div class=\"samplerslistitem flex-row-container\" sid=\""+samplers[i]+"\">\
<div class=\"flex-row\">\ <div class=\"flex-row\">\
<div>"+samplers_lookup_table[samplers[i]]+"</div>\ <div>"+(samplers[i] < samplers_lookup_table.length ? samplers_lookup_table[samplers[i]] : "Unknown sampler #" + samplers[i])+"</div>\
</div>\ </div>\
</div>\ </div>\
</div>"); </div>");

View File

@ -473,7 +473,7 @@ body.connected #popupfooter, #popupfooter.always-available {
} }
#samplerslist { #samplerslist {
height: 300px; height: 310px;
overflow-y: scroll; overflow-y: scroll;
overflow-wrap: anywhere; overflow-wrap: anywhere;
} }

View File

@ -176,7 +176,7 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
logits[tokens] = penalty_logits logits[tokens] = penalty_logits
return logits return logits
def kobold_sample_dynamic(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0): def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
''' '''
This gets called by generate_loop_fn to apply a series of 6 filters This gets called by generate_loop_fn to apply a series of 6 filters
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature) to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
@ -312,6 +312,7 @@ def kobold_sample_dynamic(key, logits, sampler_order: Optional[np.ndarray] = Non
if k == 3 and tfs < 1.0: logits = tail_free_filter(logits) if k == 3 and tfs < 1.0: logits = tail_free_filter(logits)
if k == 4 and typical < 1.0: logits = typical_filter(logits) if k == 4 and typical < 1.0: logits = typical_filter(logits)
if k == 5 and temp != 1.0: logits = temp_filter(logits) if k == 5 and temp != 1.0: logits = temp_filter(logits)
if k == 6 and rpargs[1] != 1.0: logits = apply_repetition_penalty_dynamic(logits, *rpargs)
# Finally, pick one token using the softmax thingy again (it gives # Finally, pick one token using the softmax thingy again (it gives
# an array whose elements sum to 1 so it can be used nicely as a # an array whose elements sum to 1 so it can be used nicely as a
# probability distribution) # probability distribution)
@ -362,7 +363,7 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate
# positions in the logits array # positions in the logits array
return logits.at[tokens].set(penalty_logits) return logits.at[tokens].set(penalty_logits)
def kobold_sample_static(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0): def kobold_sample_static(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
''' '''
This gets called by generate_loop_fn to apply a series of 6 filters This gets called by generate_loop_fn to apply a series of 6 filters
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature) to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
@ -497,6 +498,7 @@ def kobold_sample_static(key, logits, sampler_order: Optional[np.ndarray] = None
logits = jax.lax.cond(jnp.logical_and(k == 3, tfs < 1.0), tail_free_filter, lambda x: x, logits) logits = jax.lax.cond(jnp.logical_and(k == 3, tfs < 1.0), tail_free_filter, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), typical_filter, lambda x: x, logits) logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), typical_filter, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), temp_filter, lambda x: x, logits) logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), temp_filter, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 6, rpargs[1] != 1.0), lambda x: apply_repetition_penalty_static(*x), lambda x: x[0], (logits, *rpargs))
# Finally, pick one token using the softmax thingy again (it gives # Finally, pick one token using the softmax thingy again (it gives
# an array whose elements sum to 1 so it can be used nicely as a # an array whose elements sum to 1 so it can be used nicely as a
# probability distribution) # probability distribution)
@ -513,17 +515,6 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_
# Get the pseudo-random number generator key that will # Get the pseudo-random number generator key that will
# be used by kobold_sample_dynamic to randomly pick a token # be used by kobold_sample_dynamic to randomly pick a token
sample_key, new_key = jax.random.split(sample_key, num=2) sample_key, new_key = jax.random.split(sample_key, num=2)
# Apply repetition penalty to all tokens that are
# currently inside the "generated" array
logits = apply_repetition_penalty_dynamic(
logits,
generated,
repetition_penalty,
generated_index,
gen_length,
rpslope,
rprange,
)
# Remove any tokens in the badwords list by setting # Remove any tokens in the badwords list by setting
# their logits to negative infinity which effectively # their logits to negative infinity which effectively
# makes their probabilities of being chosen zero # makes their probabilities of being chosen zero
@ -535,6 +526,14 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_
next_token = kobold_sample_dynamic( next_token = kobold_sample_dynamic(
sample_key, sample_key,
logits, logits,
(
generated,
repetition_penalty,
generated_index,
gen_length,
rpslope,
rprange,
)
**sampler_options, **sampler_options,
) )
# Remember what token was picked # Remember what token was picked
@ -606,18 +605,6 @@ class PenalizingCausalTransformer(CausalTransformer):
assert logits.shape == (1, config["n_vocab"]) assert logits.shape == (1, config["n_vocab"])
# Flatten it into a 1D array to make it easier to use # Flatten it into a 1D array to make it easier to use
logits = logits[0] logits = logits[0]
# Apply repetition penalty to all tokens that are
# currently inside the "generated" array
if repetition_penalty is not None:
logits = apply_repetition_penalty_static(
logits,
generated,
repetition_penalty,
generated_index,
gen_length,
rpslope,
rprange,
)
# Remove any tokens in the badwords list by setting # Remove any tokens in the badwords list by setting
# their logits to negative infinity which effectively # their logits to negative infinity which effectively
# makes their probabilities of being chosen zero # makes their probabilities of being chosen zero
@ -629,6 +616,14 @@ class PenalizingCausalTransformer(CausalTransformer):
next_token = kobold_sample_static( next_token = kobold_sample_static(
sample_key, sample_key,
logits, logits,
(
generated,
repetition_penalty,
generated_index,
gen_length,
rpslope,
rprange,
),
**sampler_options, **sampler_options,
) )
# Remember what token was picked # Remember what token was picked
@ -863,6 +858,9 @@ def infer_static(
maps.thread_resources.env = thread_resources_env maps.thread_resources.env = thread_resources_env
if sampler_order is None: if sampler_order is None:
sampler_order = utils.default_sampler_order.copy() sampler_order = utils.default_sampler_order.copy()
sampler_order = sampler_order[:]
if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present
sampler_order = [6] + sampler_order
sampler_order = np.uint32(sampler_order) sampler_order = np.uint32(sampler_order)
total_batch = 1 total_batch = 1
tokens = context tokens = context

View File

@ -34,7 +34,7 @@ layers_module_names: Optional[List[str]] = None
module_names: Optional[List[str]] = None module_names: Optional[List[str]] = None
named_buffers: Optional[List[tuple]] = None named_buffers: Optional[List[tuple]] = None
default_sampler_order = [0, 1, 2, 3, 4, 5] default_sampler_order = [6, 0, 1, 2, 3, 4, 5]
emit = None emit = None

View File

@ -28,10 +28,10 @@ SOFTWARE.
''' '''
import torch import torch
from transformers import LogitsWarper, LogitsProcessor from transformers import LogitsWarper
class AdvancedRepetitionPenaltyLogitsProcessor(LogitsProcessor): class AdvancedRepetitionPenaltyLogitsProcessor(LogitsWarper):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
pass pass