diff --git a/aiserver.py b/aiserver.py index f6f335ef..b51219b6 100644 --- a/aiserver.py +++ b/aiserver.py @@ -964,7 +964,10 @@ def loadmodelsettings(): if("nobreakmodel" in js): vars.nobreakmodel = js["nobreakmodel"] if("sampler_order" in js): - vars.sampler_order = js["sampler_order"] + sampler_order = vars.sampler_order + if(len(sampler_order) < 7): + sampler_order = [6] + sampler_order + vars.sampler_order = sampler_order if("temp" in js): vars.temp = js["temp"] if("top_p" in js): @@ -1095,7 +1098,10 @@ def processsettings(js): if("andepth" in js): vars.andepth = js["andepth"] if("sampler_order" in js): - vars.sampler_order = js["sampler_order"] + sampler_order = vars.sampler_order + if(len(sampler_order) < 7): + sampler_order = [6] + sampler_order + vars.sampler_order = sampler_order if("temp" in js): vars.temp = js["temp"] if("top_p" in js): @@ -1732,8 +1738,6 @@ def patch_transformers(): dynamic_processor_wrap(TailFreeLogitsWarper, "tfs", "tfs", cond=lambda x: x < 1.0) dynamic_processor_wrap(TypicalLogitsWarper, "typical", "typical", cond=lambda x: x < 1.0) dynamic_processor_wrap(TemperatureLogitsWarper, "temperature", "temp", cond=lambda x: x != 1.0) - RepetitionPenaltyLogitsProcessor.__init__ = AdvancedRepetitionPenaltyLogitsProcessor.__init__ - RepetitionPenaltyLogitsProcessor.__call__ = AdvancedRepetitionPenaltyLogitsProcessor.__call__ class LuaLogitsProcessor(LogitsProcessor): @@ -1810,9 +1814,13 @@ def patch_transformers(): self.__warper_list.append(TailFreeLogitsWarper(tfs=0.5, min_tokens_to_keep=1 + (beams > 1))) self.__warper_list.append(TypicalLogitsWarper(typical=0.5, min_tokens_to_keep=1 + (beams > 1))) self.__warper_list.append(TemperatureLogitsWarper(temperature=0.5)) + self.__warper_list.append(AdvancedRepetitionPenaltyLogitsProcessor()) def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, *args, **kwargs): - for k in vars.sampler_order: + sampler_order = vars.sampler_order[:] + if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present + sampler_order = [6] + sampler_order + for k in sampler_order: scores = self.__warper_list[k](input_ids, scores, *args, **kwargs) return scores @@ -1945,7 +1953,7 @@ def reset_model_settings(): vars.badwordsids = [] vars.fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format vars.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B) - vars.sampler_order = [0, 1, 2, 3, 4, 5] + vars.sampler_order = [6, 0, 1, 2, 3, 4, 5] vars.newlinemode = "n" vars.revision = None @@ -2558,8 +2566,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal vars.compiling = False def tpumtjgenerate_settings_callback() -> dict: + sampler_order = vars.sampler_order[:] + if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present + sampler_order = [6] + sampler_order return { - "sampler_order": vars.sampler_order, + "sampler_order": sampler_order, "top_p": float(vars.top_p), "temp": float(vars.temp), "top_k": int(vars.top_k), @@ -3666,12 +3677,16 @@ def get_message(msg): sendUSStatItems() elif(msg['cmd'] == 'samplers'): sampler_order = msg["data"] + sampler_order_min_length = 6 + sampler_order_max_length = 7 if(not isinstance(sampler_order, list)): raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}") - if(len(sampler_order) != len(vars.sampler_order)): - raise ValueError(f"Sampler order must be a list of length {len(vars.sampler_order)}, but got a list of length {len(sampler_order)}") + if(not (sampler_order_min_length <= len(sampler_order) <= sampler_order_max_length)): + raise ValueError(f"Sampler order must be a list of length greater than or equal to {sampler_order_min_length} and less than or equal to {sampler_order_max_length}, but got a list of length {len(sampler_order)}") if(not all(isinstance(e, int) for e in sampler_order)): raise ValueError(f"Sampler order must be a list of ints, but got a list with at least one non-int element") + if(min(sampler_order) != 0 or max(sampler_order) != len(sampler_order) - 1 or len(set(sampler_order)) != len(sampler_order)): + raise ValueError(f"Sampler order list of length {len(sampler_order)} must be a permutation of the first {len(sampler_order)} nonnegative integers") vars.sampler_order = sampler_order settingschanged() elif(msg['cmd'] == 'list_model'): @@ -4624,7 +4639,7 @@ def _generate(txt, minimum, maximum, found_entries): gen_in, do_sample=True, max_length=int(2e9), - repetition_penalty=1.1, + repetition_penalty=1.0, bad_words_ids=vars.badwordsids, use_cache=True, num_return_sequences=numseqs diff --git a/static/application.js b/static/application.js index 06c426b4..9107e161 100644 --- a/static/application.js +++ b/static/application.js @@ -256,7 +256,7 @@ function addSetting(ob) { } }); - if (!$("#input-token-usage")[0].checked) { + if (!$("#setshowbudget")[0].checked) { for (const el of document.getElementsByClassName("input-token-usage")) { el.classList.add("hidden"); } @@ -1306,12 +1306,13 @@ function buildSamplerList(samplers) { "Tail-free Sampling", "Typical Sampling", "Temperature", + "Repetition Penalty", ] for(i=0; i\
\
\ -
"+samplers_lookup_table[samplers[i]]+"
\ +
"+(samplers[i] < samplers_lookup_table.length ? samplers_lookup_table[samplers[i]] : "Unknown sampler #" + samplers[i])+"
\
\
\ "); diff --git a/static/custom.css b/static/custom.css index af238dc7..d4bfe872 100644 --- a/static/custom.css +++ b/static/custom.css @@ -473,7 +473,7 @@ body.connected #popupfooter, #popupfooter.always-available { } #samplerslist { - height: 300px; + height: 310px; overflow-y: scroll; overflow-wrap: anywhere; } diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 7b0f6807..effb3de0 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -176,7 +176,7 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat logits[tokens] = penalty_logits return logits -def kobold_sample_dynamic(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0): +def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0): ''' This gets called by generate_loop_fn to apply a series of 6 filters to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature) @@ -312,6 +312,7 @@ def kobold_sample_dynamic(key, logits, sampler_order: Optional[np.ndarray] = Non if k == 3 and tfs < 1.0: logits = tail_free_filter(logits) if k == 4 and typical < 1.0: logits = typical_filter(logits) if k == 5 and temp != 1.0: logits = temp_filter(logits) + if k == 6 and rpargs[1] != 1.0: logits = apply_repetition_penalty_dynamic(logits, *rpargs) # Finally, pick one token using the softmax thingy again (it gives # an array whose elements sum to 1 so it can be used nicely as a # probability distribution) @@ -362,7 +363,7 @@ def apply_repetition_penalty_static(logits, tokens, repetition_penalty, generate # positions in the logits array return logits.at[tokens].set(penalty_logits) -def kobold_sample_static(key, logits, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0): +def kobold_sample_static(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0): ''' This gets called by generate_loop_fn to apply a series of 6 filters to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature) @@ -497,6 +498,7 @@ def kobold_sample_static(key, logits, sampler_order: Optional[np.ndarray] = None logits = jax.lax.cond(jnp.logical_and(k == 3, tfs < 1.0), tail_free_filter, lambda x: x, logits) logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), typical_filter, lambda x: x, logits) logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), temp_filter, lambda x: x, logits) + logits = jax.lax.cond(jnp.logical_and(k == 6, rpargs[1] != 1.0), lambda x: apply_repetition_penalty_static(*x), lambda x: x[0], (logits, *rpargs)) # Finally, pick one token using the softmax thingy again (it gives # an array whose elements sum to 1 so it can be used nicely as a # probability distribution) @@ -513,17 +515,6 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_ # Get the pseudo-random number generator key that will # be used by kobold_sample_dynamic to randomly pick a token sample_key, new_key = jax.random.split(sample_key, num=2) - # Apply repetition penalty to all tokens that are - # currently inside the "generated" array - logits = apply_repetition_penalty_dynamic( - logits, - generated, - repetition_penalty, - generated_index, - gen_length, - rpslope, - rprange, - ) # Remove any tokens in the badwords list by setting # their logits to negative infinity which effectively # makes their probabilities of being chosen zero @@ -535,6 +526,14 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_ next_token = kobold_sample_dynamic( sample_key, logits, + ( + generated, + repetition_penalty, + generated_index, + gen_length, + rpslope, + rprange, + ) **sampler_options, ) # Remember what token was picked @@ -606,18 +605,6 @@ class PenalizingCausalTransformer(CausalTransformer): assert logits.shape == (1, config["n_vocab"]) # Flatten it into a 1D array to make it easier to use logits = logits[0] - # Apply repetition penalty to all tokens that are - # currently inside the "generated" array - if repetition_penalty is not None: - logits = apply_repetition_penalty_static( - logits, - generated, - repetition_penalty, - generated_index, - gen_length, - rpslope, - rprange, - ) # Remove any tokens in the badwords list by setting # their logits to negative infinity which effectively # makes their probabilities of being chosen zero @@ -629,6 +616,14 @@ class PenalizingCausalTransformer(CausalTransformer): next_token = kobold_sample_static( sample_key, logits, + ( + generated, + repetition_penalty, + generated_index, + gen_length, + rpslope, + rprange, + ), **sampler_options, ) # Remember what token was picked @@ -863,6 +858,9 @@ def infer_static( maps.thread_resources.env = thread_resources_env if sampler_order is None: sampler_order = utils.default_sampler_order.copy() + sampler_order = sampler_order[:] + if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present + sampler_order = [6] + sampler_order sampler_order = np.uint32(sampler_order) total_batch = 1 tokens = context diff --git a/utils.py b/utils.py index 8f4ec607..6578db6f 100644 --- a/utils.py +++ b/utils.py @@ -34,7 +34,7 @@ layers_module_names: Optional[List[str]] = None module_names: Optional[List[str]] = None named_buffers: Optional[List[tuple]] = None -default_sampler_order = [0, 1, 2, 3, 4, 5] +default_sampler_order = [6, 0, 1, 2, 3, 4, 5] emit = None diff --git a/warpers.py b/warpers.py index fb683f50..488a901e 100644 --- a/warpers.py +++ b/warpers.py @@ -28,10 +28,10 @@ SOFTWARE. ''' import torch -from transformers import LogitsWarper, LogitsProcessor +from transformers import LogitsWarper -class AdvancedRepetitionPenaltyLogitsProcessor(LogitsProcessor): +class AdvancedRepetitionPenaltyLogitsProcessor(LogitsWarper): def __init__(self, *args, **kwargs): pass