diff --git a/aiserver.py b/aiserver.py index f69747a3..db96e9e5 100644 --- a/aiserver.py +++ b/aiserver.py @@ -941,7 +941,7 @@ tags = [ api_version = None # This gets set automatically so don't change this value api_v1 = KoboldAPISpec( - version="1.2.6", + version="1.2.5", prefixes=["/api/v1", "/api/latest"], tags=tags, ) @@ -1162,7 +1162,7 @@ def loadmodelsettings(): koboldai_vars.nobreakmodel = js["nobreakmodel"] if("sampler_order" in js): sampler_order = js["sampler_order"] - if(len(sampler_order) < 8): + if(len(sampler_order) < 7): sampler_order = [6] + sampler_order koboldai_vars.sampler_order = sampler_order if("temp" in js): @@ -1192,12 +1192,6 @@ def loadmodelsettings(): if("rep_pen_range" in js): koboldai_vars.rep_pen_range = js["rep_pen_range"] koboldai_vars.default_preset['rep_pen_range'] = js["rep_pen_range"] - if("eps_cutoff" in js): - koboldai_vars.eps_cutoff = js["eps_cutoff"] - koboldai_vars.default_preset['eps_cutoff'] = js["eps_cutoff"] - if("eta_cutoff" in js): - koboldai_vars.eta_cutoff = js["eta_cutoff"] - koboldai_vars.default_preset['eta_cutoff'] = js["eta_cutoff"] if("adventure" in js): koboldai_vars.adventure = js["adventure"] if("chatmode" in js): @@ -1260,7 +1254,7 @@ def processsettings(js): koboldai_vars.andepth = js["andepth"] if("sampler_order" in js): sampler_order = js["sampler_order"] - if(len(sampler_order) < 8): + if(len(sampler_order) < 7): sampler_order = [6] + sampler_order koboldai_vars.sampler_order = sampler_order if("temp" in js): @@ -1281,10 +1275,6 @@ def processsettings(js): koboldai_vars.rep_pen_slope = js["rep_pen_slope"] if("rep_pen_range" in js): koboldai_vars.rep_pen_range = js["rep_pen_range"] - if("eps_cutoff" in js): - koboldai_vars.eps = js["eps_cutoff"] - if("eta_cutoff" in js): - koboldai_vars.eta = js["eta_cutoff"] if("genamt" in js): koboldai_vars.genamt = js["genamt"] if("max_length" in js): @@ -2261,8 +2251,6 @@ def lua_has_setting(setting): "setreppen", "setreppenslope", "setreppenrange", - "seteps_cutoff", - "seteta_cutoff", "settknmax", "setwidepth", "setuseprompt", @@ -2283,8 +2271,6 @@ def lua_has_setting(setting): "reppen", "reppenslope", "reppenrange", - "eps_cutoff", - "eta_cutoff", "tknmax", "widepth", "useprompt", @@ -2323,8 +2309,6 @@ def lua_get_setting(setting): if(setting in ("setreppen", "reppen")): return koboldai_vars.rep_pen if(setting in ("setreppenslope", "reppenslope")): return koboldai_vars.rep_pen_slope if(setting in ("setreppenrange", "reppenrange")): return koboldai_vars.rep_pen_range - if(setting in ("seteps_cutoff", "eps_cutoff")): return koboldai_vars.eps_cutoff - if(setting in ("seteta_cutoff", "eta_cutoff")): return koboldai_vars.eta_cutoff if(setting in ("settknmax", "tknmax")): return koboldai_vars.max_length if(setting == "anotedepth"): return koboldai_vars.andepth if(setting in ("setwidepth", "widepth")): return koboldai_vars.widepth @@ -2363,8 +2347,6 @@ def lua_set_setting(setting, v): if(setting in ("setreppen", "reppen")): koboldai_vars.rep_pen = v if(setting in ("setreppenslope", "reppenslope")): koboldai_vars.rep_pen_slope = v if(setting in ("setreppenrange", "reppenrange")): koboldai_vars.rep_pen_range = v - if(setting in ("seteps_cutoff", "eps_cutoff")): koboldai_vars.eps_cutoff = v - if(setting in ("seteta_cutoff", "eta_cutoff")): koboldai_vars.eta_cutoff = v if(setting in ("settknmax", "tknmax")): koboldai_vars.max_length = v; return True if(setting == "anotedepth"): koboldai_vars.andepth = v; return True if(setting in ("setwidepth", "widepth")): koboldai_vars.widepth = v; return True @@ -2788,16 +2770,6 @@ def get_message(msg): emit('from_server', {'cmd': 'setlabelreppenrange', 'data': msg['data']}, broadcast=True, room="UI_1") settingschanged() refresh_settings() - elif(msg['cmd'] == 'seteps_cutoff'): - koboldai_vars.eps_cutoff = float(msg['data']) - emit('from_server', {'cmd': 'setlabeleps_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1") - settingschanged() - refresh_settings() - elif(msg['cmd'] == 'seteta_cutoff'): - koboldai_vars.eta_cutoff = float(msg['data']) - emit('from_server', {'cmd': 'setlabeleta_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1") - settingschanged() - refresh_settings() elif(msg['cmd'] == 'setoutput'): koboldai_vars.genamt = int(msg['data']) emit('from_server', {'cmd': 'setlabeloutput', 'data': msg['data']}, broadcast=True, room="UI_1") @@ -2949,7 +2921,7 @@ def get_message(msg): elif(msg['cmd'] == 'samplers'): sampler_order = msg["data"] sampler_order_min_length = 6 - sampler_order_max_length = 9 + sampler_order_max_length = 7 if(not isinstance(sampler_order, list)): raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}") if(not (sampler_order_min_length <= len(sampler_order) <= sampler_order_max_length)): @@ -3527,8 +3499,6 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum): repetition_penalty=koboldai_vars.rep_pen, rpslope=koboldai_vars.rep_pen_slope, rprange=koboldai_vars.rep_pen_range, - eps_cutoff=koboldai_vars.eps_cutoff, - eta_cutoff=koboldai_vars.eta_cutoff, soft_embeddings=koboldai_vars.sp, soft_tokens=soft_tokens, sampler_order=koboldai_vars.sampler_order, @@ -4176,8 +4146,6 @@ def refresh_settings(): socketio.emit('from_server', {'cmd': 'updatereppen', 'data': koboldai_vars.rep_pen}, broadcast=True, room="UI_1") socketio.emit('from_server', {'cmd': 'updatereppenslope', 'data': koboldai_vars.rep_pen_slope}, broadcast=True, room="UI_1") socketio.emit('from_server', {'cmd': 'updatereppenrange', 'data': koboldai_vars.rep_pen_range}, broadcast=True, room="UI_1") - socketio.emit('from_server', {'cmd': 'updateeps_cutoff', 'data': koboldai_vars.eps_cutoff}, broadcast=True, room="UI_1") - socketio.emit('from_server', {'cmd': 'updateeta_cutoff', 'data': koboldai_vars.eta_cutoff}, broadcast=True, room="UI_1") socketio.emit('from_server', {'cmd': 'updateoutlen', 'data': koboldai_vars.genamt}, broadcast=True, room="UI_1") socketio.emit('from_server', {'cmd': 'updatetknmax', 'data': koboldai_vars.max_length}, broadcast=True, room="UI_1") socketio.emit('from_server', {'cmd': 'updatenumseq', 'data': koboldai_vars.numseqs}, broadcast=True, room="UI_1") @@ -7176,7 +7144,7 @@ def UI_2_load_cookies(): def UI_2_save_new_preset(data): preset = model_info() #Data to get from current settings - for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical", "eps_cutoff", "eta_cutoff"]: + for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical"]: preset[item] = getattr(koboldai_vars, item) #Data to get from UI for item in ['preset', 'description']: @@ -8161,9 +8129,6 @@ class SamplerSettingsSchema(KoboldSchema): tfs: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Tail free sampling value."}) typical: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Typical sampling value."}) temperature: Optional[float] = fields.Float(validate=validate.Range(min=0, min_inclusive=False), metadata={"description": "Temperature value."}) - eps_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1000.0), metadata={"description": "Epsilon sampling value."}) - eta_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0,), metadata={"description": "Eta sampling value."}) - def soft_prompt_validator(soft_prompt: str): if len(soft_prompt.strip()) == 0: @@ -8214,7 +8179,7 @@ class GenerationInputSchema(SamplerSettingsSchema): disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"}) frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."}) quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."}) - sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 8 and the array must be a permutation of the first N non-negative integers."}) + sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."}) sampler_seed: Optional[int] = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."}) sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."}) stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."}) @@ -8332,7 +8297,7 @@ def _generate_text(body: GenerationInputSchema): torch.manual_seed(body.sampler_seed) koboldai_vars.rng_states[body.sampler_seed] = tpu_mtj_backend.get_rng_state() if koboldai_vars.use_colab_tpu else torch.get_rng_state() if hasattr(body, "sampler_order"): - if len(body.sampler_order) < 9: + if len(body.sampler_order) < 7: body.sampler_order = [6] + body.sampler_order # This maps each property of the setting to use when sending the generate idempotently # To the object which typically contains it's value @@ -8350,8 +8315,6 @@ def _generate_text(body: GenerationInputSchema): "tfs": ("koboldai_vars", "tfs", None), "typical": ("koboldai_vars", "typical", None), "temperature": ("koboldai_vars", "temp", None), - "eps_cutoff": ("koboldai_vars", "eps_cutoff", None), - "eta_cutoff": ("koboldai_vars", "eta_cutoff", None), "frmtadsnsp": ("koboldai_vars", "frmtadsnsp", "input"), "frmttriminc": ("koboldai_vars", "frmttriminc", "output"), "frmtrmblln": ("koboldai_vars", "frmtrmblln", "output"), @@ -10797,26 +10760,6 @@ class TemperatureSamplingSettingSchema(KoboldSchema): name = "temperature" example_yaml_value = "0.5" -@config_endpoint_schema -class EpsilonSamplingSettingSchema(KoboldSchema): - value = fields.Float(validate=validate.Range(min=0, max=1000), required=True) - class KoboldMeta: - route_name = "eps_cutoff" - obj = "koboldai_vars" - var_name = "eps_cutoff" - name = "Epsilon sampling" - example_yaml_value = "0.0" - -@config_endpoint_schema -class EtaSamplingSettingSchema(KoboldSchema): - value = fields.Float(validate=validate.Range(min=0), required=True) - class KoboldMeta: - route_name = "eta_cutoff" - obj = "koboldai_vars" - var_name = "eta_cutoff" - name = "Eta sampling" - example_yaml_value = "0.0" - @config_endpoint_schema class GensPerActionSettingSchema(KoboldSchema): value = fields.Integer(validate=validate.Range(min=0, max=5), required=True) @@ -10925,7 +10868,7 @@ class SamplerOrderSettingSchema(KoboldSchema): obj = "koboldai_vars" var_name = "sampler_order" name = "sampler order" - example_yaml_value = "[6, 0, 1, 2, 3, 4, 5, 7, 8]" + example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]" @config_endpoint_schema class SamplerFullDeterminismSettingSchema(KoboldSchema): diff --git a/api_example.py b/api_example.py index 0ae476d6..aa35f883 100644 --- a/api_example.py +++ b/api_example.py @@ -23,9 +23,7 @@ def get_prompt(user_msg): "top_k": 0, # Keep the X most probable tokens "top_p": 0.9, # Top P sampling / Nucleus Sampling, https://arxiv.org/pdf/1904.09751.pdf "typical": 1.0, # Typical Sampling, https://arxiv.org/pdf/2202.00666.pdf - "eps": 0.0, # Discard tokens with low probability, from https://arxiv.org/pdf/2210.15191.pdf - "eta": 0.0, # Entropy adaptive epsilon, from the same work as epsilon - "sampler_order": [6,0,7,1,3,8,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the + "sampler_order": [6,0,1,3,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the "stop_sequence": [f"{user}"], # When should the AI stop generating? In this example we stop when it tries to speak on behalf of the user. #"sampler_seed": 1337, # Use specific seed for text generation? This helps with consistency across tests. "singleline": "False", # Only return a response that fits on a single line, this can help with chatbots but also makes them less verbose diff --git a/bridge.lua b/bridge.lua index 4fedc89e..9173302a 100644 --- a/bridge.lua +++ b/bridge.lua @@ -890,8 +890,6 @@ return function(_python, _bridged) ---@field reppen number ---@field reppenslope number ---@field reppenrange number - ---@field eps_cutoff number - ---@field eta_cutoff number ---@field tknmax integer ---@field widepth integer ---@field useprompt boolean diff --git a/gensettings.py b/gensettings.py index b761d03c..9b69af43 100644 --- a/gensettings.py +++ b/gensettings.py @@ -176,38 +176,6 @@ gensettingstf = [ "name": "use_alt_rep_pen", "ui_level": 2 }, - { - "uitype": "slider", - "unit": "float", - "label": "Epsilon Sampling", - "id": "seteps_cutoff", - "min": 0.0, - "max": 9.0, - "step": 0.01, - "default": 0.0, - "tooltip": "Slider is in units of 1e-4. Discards tokens with probabilities under eps. (Put this value on 0 to disable its effect)", - "menu_path": "Settings", - "sub_path": "Sampling", - "classname": "model", - "name": "eps_cutoff", - "ui_level": 1 - }, - { - "uitype": "slider", - "unit": "float", - "label": "Eta Sampling", - "id": "seteta_cutoff", - "min": 0.0, - "max": 20, - "step": 0.01, - "default": 0.0, - "tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output. (Put this value on 0 to disable its effect)", - "menu_path": "Settings", - "sub_path": "Sampling", - "classname": "model", - "name": "eta_cutoff", - "ui_level": 1 - }, { "uitype": "slider", "unit": "int", @@ -1117,36 +1085,6 @@ gensettingsik =[{ "classname": "model", "name": "tfs" }, - { - "uitype": "slider", - "unit": "float", - "label": "Epsilon Sampling", - "id": "seteps_cutoff", - "min": 0.0, - "max": 9.0, - "step": 0.01, - "default": 0.0, - "tooltip": "Slider is in units of 1e-4.Discards tokens with probabilities under eps.", - "menu_path": "Settings", - "sub_path": "Sampling", - "classname": "model", - "name": "eps_cutoff", - }, - { - "uitype": "slider", - "unit": "float", - "label": "Eta Sampling", - "id": "seteta_cutoff", - "min": 0.0, - "max": 20, - "step": 0.01, - "default": 0.0, - "tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output.", - "menu_path": "Settings", - "sub_path": "Sampling", - "classname": "model", - "name": "eta_cutoff", - }, { "uitype": "slider", "unit": "int", diff --git a/koboldai_settings.py b/koboldai_settings.py index f4419d58..9d2f5f4d 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -614,7 +614,7 @@ class settings(object): start_time = time.time() if key in self.__dict__ and key not in self.no_save_variables: if key == 'sampler_order': - if(len(value) < 9): + if(len(value) < 7): value = [6] + value elif key == 'autosave': autosave = value @@ -669,9 +669,8 @@ class model_settings(settings): 'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition', 'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm'] settings_name = "model" - default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048, - "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0, "eps_cutoff": 0.0, "eta_cutoff": 0.0, - "sampler_order": [6,0,7,1,3,8,4,2,5]} + default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0, + "sampler_order": [6,0,1,2,3,4,5]} def __init__(self, socketio, koboldai_vars): self.enable_whitelist = False self._socketio = socketio @@ -722,14 +721,12 @@ class model_settings(settings): self.top_a = 0.0 # Default generator top-a self.tfs = 1.0 # Default generator tfs (tail-free sampling) self.typical = 1.0 # Default generator typical sampling threshold - self.eps_cutoff = 0.0 # Default generator epsilon_cutoff - self.eta_cutoff = 0.0 # Default generator eta_cutoff self.numseqs = 1 # Number of sequences to ask the generator to create self.generated_tkns = 0 # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0 self.badwordsids = [] self.fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format self.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B) - self.sampler_order = [6, 0, 1, 2, 3, 4, 5, 7, 8] + self.sampler_order = [6, 0, 1, 2, 3, 4, 5] self.newlinemode = "n" self.presets = [] # Holder for presets self.selected_preset = "" @@ -761,8 +758,6 @@ class model_settings(settings): self.top_a = 0.0 self.tfs = 1.0 self.typical = 1.0 - self.eps_cutoff = 0.0 - self.eta_cutoff = 0.0 self.rep_pen_range = 1024 self.rep_pen_slope = 0.7 @@ -2768,8 +2763,6 @@ default_preset = { "rep_pen": 1.1, "rep_pen_range": 1024, "rep_pen_slope": 0.7, - "eps_cutoff": 0.0, - "eta_cutoff": 0.0, "sampler_order": [ 6, 0, @@ -2777,9 +2770,7 @@ default_preset = { 2, 3, 4, - 5, - 7, - 8 + 5 ] } badwordsids_default = [[6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting diff --git a/modeling/inference_model.py b/modeling/inference_model.py index e20e57c0..2bcb21a7 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -134,8 +134,6 @@ class GenerationSettings: "rep_pen", "rep_pen_slope", "rep_pen_range", - "eps_cutoff", - "eta_cutoff", "sampler_order", ]: setattr( diff --git a/modeling/inference_models/hf_mtj/class.py b/modeling/inference_models/hf_mtj/class.py index b3f9a749..a4600465 100644 --- a/modeling/inference_models/hf_mtj/class.py +++ b/modeling/inference_models/hf_mtj/class.py @@ -140,7 +140,7 @@ class model_backend(HFInferenceModel): def mtj_settings_callback() -> dict: sampler_order = utils.koboldai_vars.sampler_order[:] if ( - len(sampler_order) < 9 + len(sampler_order) < 7 ): # Add repetition penalty at beginning if it's not present sampler_order = [6] + sampler_order return { @@ -154,8 +154,6 @@ class model_backend(HFInferenceModel): "repetition_penalty": float(utils.koboldai_vars.rep_pen), "rpslope": float(utils.koboldai_vars.rep_pen_slope), "rprange": int(utils.koboldai_vars.rep_pen_range), - "eps_cutoff": float(utils.koboldai_vars.eps_cutoff), - "eta_cutoff": float(utils.koboldai_vars.eta_cutoff), } tpu_mtj_backend.socketio = utils.socketio @@ -295,8 +293,6 @@ class model_backend(HFInferenceModel): tfs=gen_settings.tfs, typical=gen_settings.typical, top_a=gen_settings.top_a, - eps_cutoff=gen_settings.eps_cutoff, - eta_cutoff=gen_settings.eta_cutoff, numseqs=batch_count, repetition_penalty=gen_settings.rep_pen, rpslope=gen_settings.rep_pen_slope, diff --git a/modeling/warpers.py b/modeling/warpers.py index 4b59361a..b8035023 100644 --- a/modeling/warpers.py +++ b/modeling/warpers.py @@ -68,8 +68,6 @@ def update_settings(): RepetitionPenalty.rep_pen_range = koboldai_vars.rep_pen_range RepetitionPenalty.rep_pen_slope = koboldai_vars.rep_pen_slope RepetitionPenalty.use_alt_rep_pen = koboldai_vars.use_alt_rep_pen - Epsilon.epsilon = koboldai_vars.eps_cutoff - Eta.eta = koboldai_vars.eta_cutoff class Warper: @@ -105,8 +103,6 @@ class Warper: 4: Typical, 5: Temperature, 6: RepetitionPenalty, - 7: Epsilon, - 8: Eta, }[warper_id] @classmethod @@ -544,92 +540,3 @@ class RepetitionPenalty(Warper): @classmethod def value_is_valid(cls) -> bool: return cls.rep_pen != 1.0 - -class Epsilon(Warper): - """ - Eta sampling, as described in https://arxiv.org/pdf/2210.15191.pdf - """ - - epsilon: float = 0.0 - - @classmethod - def torch(cls, scores: torch.Tensor) -> torch.Tensor: - # Probably the simplest sampler there is, just remove tokens with probs under a threshold - probs = scores.softmax(dim=-1) - - indices_to_remove = probs < (cls.epsilon * 1e-4) - - # hack to avoid nulling out all the logits for misconfigured sampler param - # equivalent to keep_min_k=1, as is default in hf transformers implementation - # implemented this way to be more easily modifiable to fallback to topk for a configurable k - if(torch.all(indices_to_remove)): - topk_prob = torch.max(probs) - indices_to_remove = probs < topk_prob - - scores = scores.masked_fill(indices_to_remove, -torch.inf) - return scores - - @classmethod - def jax_dynamic(cls, scores: np.array) -> np.array: - probabilities = np.array(jax.nn.softmax(scores), copy=True) - - indices_to_remove = probabilities < (cls.epsilon * 1e-4) - if(np.all(indices_to_remove)): - topk_prob = np.max(probabilities) - indices_to_remove = probabilities < topk_prob - - return np.where( - indices_to_remove, -np.inf, scores - ) - - @classmethod - def value_is_valid(cls) -> bool: - return cls.epsilon > 0.0 - -class Eta(Warper): - """ - Eta sampling, as described in https://arxiv.org/pdf/2210.15191.pdf - """ - - eta: float = 0.0 - - @classmethod - def torch(cls, scores: torch.Tensor) -> torch.Tensor: - shifted_logits = torch.log_softmax(scores, dim=-1) - probs = shifted_logits.exp() - - neg_entropy = (probs * shifted_logits).nansum(dim=-1, keepdim=True) - epsilon = torch.min(torch.tensor(cls.eta * 1e-4), torch.sqrt(torch.tensor(cls.eta*1e-4))*torch.exp(neg_entropy)) - - indices_to_remove = probs < epsilon - - # hack to avoid nulling out all the logits for misconfigured sampler param - # equivalent to keep_min_k=1, as is default in hf transformers implementation - # implemented this way to be more easily modifiable to fallback to topk for a configurable k - if(torch.all(indices_to_remove)): - topk_prob = torch.max(probs) - indices_to_remove = probs < topk_prob - - scores = scores.masked_fill(indices_to_remove, -torch.inf) - return scores - - @classmethod - def jax_dynamic(cls, scores: np.array) -> np.array: - shifted_logits = jax.nn.log_softmax(scores) - probabilities = np.exp(shifted_logits) - neg_entropy = np.nansum(probabilities * shifted_logits) - epsilon = min(cls.eta * 1e-4, np.sqrt(cls.eta*1e-4)*np.exp(neg_entropy)) - - indices_to_remove = probabilities < epsilon - if(np.all(indices_to_remove)): - topk_prob = np.max(probabilities) - indices_to_remove = probabilities < topk_prob - - return np.where( - indices_to_remove, -np.inf, scores - ) - - @classmethod - def value_is_valid(cls) -> bool: - return cls.eta > 0.0 - diff --git a/static/application.js b/static/application.js index 682165b7..0c689646 100644 --- a/static/application.js +++ b/static/application.js @@ -1315,8 +1315,6 @@ function buildSamplerList(samplers) { "Typical Sampling", "Temperature", "Repetition Penalty", - "Epsilon Sampling", - "Eta Sampling" ] for(i=0; i\ @@ -2642,14 +2640,6 @@ $(document).ready(function(){ // Send current rep pen value to input $("#setreppenrangecur").val(msg.data); $("#setreppenrange").val(parseFloat(msg.data)).trigger("change"); - } else if(msg.cmd == "updateeps_cutoff") { - // Send current epsilon value to input - $("#seteps_cutoffcurr").val(msg.data); - $("#seteps_cutoff").val(parseFloat(msg.data)).trigger("change"); - } else if(msg.cmd == "updateeta_cutoff") { - // Send current eta value to input - $("#seteta_cutoffcur").val(msg.data); - $("#seteta_cutoff").val(parseFloat(msg.data)).trigger("change"); } else if(msg.cmd == "updateoutlen") { // Send current output amt value to input $("#setoutputcur").val(msg.data); @@ -2689,12 +2679,6 @@ $(document).ready(function(){ } else if(msg.cmd == "setlabelreppenrange") { // Update setting label with value from server $("#setreppenrangecur").val(msg.data); - } else if(msg.cmd == "setlabeleps_cutoff") { - // Update setting label with value from server - $("#seteps_cutoffcur").val(msg.data); - } else if(msg.cmd == "setlabeleta_cutoff") { - // Update setting label with value from server - $("#seteta_cutoffcur").val(msg.data); } else if(msg.cmd == "setlabeloutput") { // Update setting label with value from server $("#setoutputcur").val(msg.data); diff --git a/static/koboldai.js b/static/koboldai.js index e58e0593..038b6e87 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -250,8 +250,6 @@ map1.set('Tail Free Sampling', 3) map1.set('Typical Sampling', 4) map1.set('Temperature', 5) map1.set('Repetition Penalty', 6) -map1.set('Epsilon Sampling', 7) -map1.set('Eta Sampling', 8) const map2 = new Map() map2.set(0, 'Top K Sampling') map2.set(1, 'Top A Sampling') @@ -260,8 +258,6 @@ map2.set(3, 'Tail Free Sampling') map2.set(4, 'Typical Sampling') map2.set(5, 'Temperature') map2.set(6, 'Repetition Penalty') -map2.set(7, 'Epsilon Sampling') -map2.set(8, 'Eta Sampling') var calc_token_usage_timeout; var game_text_scroll_timeout; var auto_loader_timeout; diff --git a/templates/settings flyout.html b/templates/settings flyout.html index a572d054..e394601d 100644 --- a/templates/settings flyout.html +++ b/templates/settings flyout.html @@ -176,8 +176,6 @@
  • Tail Free Sampling
  • Typical Sampling
  • Temperature
  • -
  • Epsilon Sampling
  • -
  • Eta Sampling
  • arrow_upward
    diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 3d846178..c49e27da 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -104,8 +104,6 @@ def settings_callback() -> dict: "repetition_penalty": 1.0, "rpslope": 0.0, "rprange": 0, - "eps_cutoff": 0.0, - "eta_cutoff": 0.0, } def started_compiling_callback() -> None: @@ -210,10 +208,10 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat logits[tokens] = penalty_logits return logits -def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0, eps_cutoff=0.0, eta_cutoff=0.0): +def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0): ''' - This gets called by generate_loop_fn to apply a series of 8 filters - to the logits (top-k, eps, top-a, top-p, TFS, eta, typical, temperature) + This gets called by generate_loop_fn to apply a series of 6 filters + to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature) before picking one token using the modified logits ''' for sid in jnp.array(sampler_order, int): @@ -245,12 +243,10 @@ def kobold_sample_static( tfs=1.0, typical=1.0, top_a=0.0, - eps_cutoff=0.0, - eta_cutoff=0.0 ): ''' - This gets called by generate_loop_fn to apply a series of 8 filters - to the logits (top-k, eps, top-a, top-p, TFS, eta, typical, temperature) + This gets called by generate_loop_fn to apply a series of 6 filters + to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature) before picking one token using the modified logits ''' @@ -311,33 +307,6 @@ def kobold_sample_static( sorted_indices_to_remove, ) return jnp.where(indices_to_remove, -jnp.inf, scores) - - def sample_eps(scores: jnp.array) -> jnp.array: - probabilities = jax.nn.softmax(scores) - indices_to_remove = probabilities < (eps_cutoff * 1e-4) - - # Seems like JAX doesn't like if-s, so it's done this way - topk_idx = jnp.argmax(probabilities) - indices_to_remove = indices_to_remove.at[topk_idx].set(False) - - return jnp.where( - indices_to_remove, -jnp.inf, scores - ) - - def sample_eta(scores: jnp.array) -> jnp.array: - shifted_logits = jax.nn.log_softmax(scores) - probabilities = jnp.exp(shifted_logits) - neg_entropy = jnp.nansum(probabilities * shifted_logits) - - eps = jax.lax.min(eta_cutoff * 1e-4, jnp.sqrt(eta_cutoff*1e-4)*jnp.exp(neg_entropy)) - indices_to_remove = probabilities < eps - # Seems like JAX doesn't like if-s, so it's done this way - topk_idx = jnp.argmax(probabilities) - indices_to_remove = indices_to_remove.at[topk_idx].set(False) - - return jnp.where( - indices_to_remove, -jnp.inf, scores - ) def sample_typical(scores: jnp.array) -> jnp.array: @@ -442,8 +411,6 @@ def kobold_sample_static( logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), sample_typical, lambda x: x, logits) logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), sample_temperature, lambda x: x, logits) logits = jax.lax.cond(jnp.logical_and(k == 6, rpargs[1] != 1.0), lambda x: sample_repetition_penalty(*x), lambda x: x[0], (logits, *rpargs)) - logits = jax.lax.cond(jnp.logical_and(k == 7, eps_cutoff > 0.0), sample_eps, lambda x: x, logits) - logits = jax.lax.cond(jnp.logical_and(k == 8, eta_cutoff > 0.0), sample_eta, lambda x: x, logits) return jax.random.categorical(key, logits, -1).astype(jnp.uint32) pad_token_id = 50256 @@ -782,8 +749,6 @@ def infer_static( repetition_penalty=1.0, rpslope=0.0, rprange=0, - eps_cutoff=0.0, - eta_cutoff=0.0, numseqs=1, gen_len=80, soft_embeddings: Optional[np.array] = None, @@ -794,7 +759,7 @@ def infer_static( if sampler_order is None: sampler_order = utils.default_sampler_order.copy() sampler_order = sampler_order[:] - if len(sampler_order) < 9: # Add repetition penalty at beginning if it's not present + if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present sampler_order = [6] + sampler_order sampler_order = np.uint32(sampler_order) total_batch = 1 @@ -816,9 +781,7 @@ def infer_static( "repetition_penalty": repetition_penalty * np.ones(total_batch), "rpslope": rpslope * np.ones(total_batch), "rprange": np.full(total_batch, rprange, dtype=np.uint32), - "top_k": np.full(total_batch, top_k, dtype=np.uint32), - "eps_cutoff": eps_cutoff * np.ones(total_batch), - "eta_cutoff": eta_cutoff * np.ones(total_batch) + "top_k": np.full(total_batch, top_k, dtype=np.uint32) } output = network.generate_static( batched_tokens, diff --git a/utils.py b/utils.py index dd909832..ab90ea6a 100644 --- a/utils.py +++ b/utils.py @@ -32,7 +32,7 @@ layers_module_names: Optional[List[str]] = None module_names: Optional[List[str]] = None named_buffers: Optional[List[tuple]] = None -default_sampler_order = [6, 0, 1, 2, 3, 4, 5, 7 ,8] +default_sampler_order = [6, 0, 1, 2, 3, 4, 5] emit = None