Undo broken samplers

2025-06-05 21:59:24 +02:00 · 2023-10-14 20:38:35 +02:00
parent e3a43eef15
commit 829ea147fc
13 changed files with 23 additions and 313 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -941,7 +941,7 @@ tags = [
 api_version = None  # This gets set automatically so don't change this value

 api_v1 = KoboldAPISpec(
-    version="1.2.6",
+    version="1.2.5",
    prefixes=["/api/v1", "/api/latest"],
    tags=tags,
 )
@@ -1162,7 +1162,7 @@ def loadmodelsettings():
        koboldai_vars.nobreakmodel = js["nobreakmodel"]
    if("sampler_order" in js):
        sampler_order = js["sampler_order"]
-        if(len(sampler_order) < 8):
+        if(len(sampler_order) < 7):
            sampler_order = [6] + sampler_order
        koboldai_vars.sampler_order = sampler_order
    if("temp" in js):
@@ -1192,12 +1192,6 @@ def loadmodelsettings():
    if("rep_pen_range" in js):
        koboldai_vars.rep_pen_range = js["rep_pen_range"]
        koboldai_vars.default_preset['rep_pen_range'] = js["rep_pen_range"]
-    if("eps_cutoff" in js):
-        koboldai_vars.eps_cutoff = js["eps_cutoff"]
-        koboldai_vars.default_preset['eps_cutoff'] = js["eps_cutoff"]
-    if("eta_cutoff" in js):
-        koboldai_vars.eta_cutoff = js["eta_cutoff"]
-        koboldai_vars.default_preset['eta_cutoff'] = js["eta_cutoff"]
    if("adventure" in js):
        koboldai_vars.adventure = js["adventure"]
    if("chatmode" in js):
@@ -1260,7 +1254,7 @@ def processsettings(js):
        koboldai_vars.andepth = js["andepth"]
    if("sampler_order" in js):
        sampler_order = js["sampler_order"]
-        if(len(sampler_order) < 8):
+        if(len(sampler_order) < 7):
            sampler_order = [6] + sampler_order
        koboldai_vars.sampler_order = sampler_order
    if("temp" in js):
@@ -1281,10 +1275,6 @@ def processsettings(js):
        koboldai_vars.rep_pen_slope = js["rep_pen_slope"]
    if("rep_pen_range" in js):
        koboldai_vars.rep_pen_range = js["rep_pen_range"]
-    if("eps_cutoff" in js):
-        koboldai_vars.eps = js["eps_cutoff"]
-    if("eta_cutoff" in js):
-        koboldai_vars.eta = js["eta_cutoff"] 
    if("genamt" in js):
        koboldai_vars.genamt = js["genamt"]
    if("max_length" in js):
@@ -2261,8 +2251,6 @@ def lua_has_setting(setting):
        "setreppen",
        "setreppenslope",
        "setreppenrange",
-        "seteps_cutoff",
-        "seteta_cutoff",
        "settknmax",
        "setwidepth",
        "setuseprompt",
@@ -2283,8 +2271,6 @@ def lua_has_setting(setting):
        "reppen",
        "reppenslope",
        "reppenrange",
-        "eps_cutoff",
-        "eta_cutoff",
        "tknmax",
        "widepth",
        "useprompt",
@@ -2323,8 +2309,6 @@ def lua_get_setting(setting):
    if(setting in ("setreppen", "reppen")): return koboldai_vars.rep_pen
    if(setting in ("setreppenslope", "reppenslope")): return koboldai_vars.rep_pen_slope
    if(setting in ("setreppenrange", "reppenrange")): return koboldai_vars.rep_pen_range
-    if(setting in ("seteps_cutoff", "eps_cutoff")): return koboldai_vars.eps_cutoff
-    if(setting in ("seteta_cutoff", "eta_cutoff")): return koboldai_vars.eta_cutoff
    if(setting in ("settknmax", "tknmax")): return koboldai_vars.max_length
    if(setting == "anotedepth"): return koboldai_vars.andepth
    if(setting in ("setwidepth", "widepth")): return koboldai_vars.widepth
@@ -2363,8 +2347,6 @@ def lua_set_setting(setting, v):
    if(setting in ("setreppen", "reppen")): koboldai_vars.rep_pen = v
    if(setting in ("setreppenslope", "reppenslope")): koboldai_vars.rep_pen_slope = v
    if(setting in ("setreppenrange", "reppenrange")): koboldai_vars.rep_pen_range = v
-    if(setting in ("seteps_cutoff", "eps_cutoff")): koboldai_vars.eps_cutoff = v
-    if(setting in ("seteta_cutoff", "eta_cutoff")): koboldai_vars.eta_cutoff = v
    if(setting in ("settknmax", "tknmax")): koboldai_vars.max_length = v; return True
    if(setting == "anotedepth"): koboldai_vars.andepth = v; return True
    if(setting in ("setwidepth", "widepth")): koboldai_vars.widepth = v; return True
@@ -2788,16 +2770,6 @@ def get_message(msg):
        emit('from_server', {'cmd': 'setlabelreppenrange', 'data': msg['data']}, broadcast=True, room="UI_1")
        settingschanged()
        refresh_settings()
-    elif(msg['cmd'] == 'seteps_cutoff'):
-        koboldai_vars.eps_cutoff = float(msg['data'])
-        emit('from_server', {'cmd': 'setlabeleps_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
-        settingschanged()
-        refresh_settings()
-    elif(msg['cmd'] == 'seteta_cutoff'):
-        koboldai_vars.eta_cutoff = float(msg['data'])
-        emit('from_server', {'cmd': 'setlabeleta_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
-        settingschanged()
-        refresh_settings()
    elif(msg['cmd'] == 'setoutput'):
        koboldai_vars.genamt = int(msg['data'])
        emit('from_server', {'cmd': 'setlabeloutput', 'data': msg['data']}, broadcast=True, room="UI_1")
@@ -2949,7 +2921,7 @@ def get_message(msg):
    elif(msg['cmd'] == 'samplers'):
        sampler_order = msg["data"]
        sampler_order_min_length = 6
-        sampler_order_max_length = 9
+        sampler_order_max_length = 7
        if(not isinstance(sampler_order, list)):
            raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
        if(not (sampler_order_min_length <= len(sampler_order) <= sampler_order_max_length)):
@@ -3527,8 +3499,6 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum):
        repetition_penalty=koboldai_vars.rep_pen,
        rpslope=koboldai_vars.rep_pen_slope,
        rprange=koboldai_vars.rep_pen_range,
-        eps_cutoff=koboldai_vars.eps_cutoff,
-        eta_cutoff=koboldai_vars.eta_cutoff,
        soft_embeddings=koboldai_vars.sp,
        soft_tokens=soft_tokens,
        sampler_order=koboldai_vars.sampler_order,
@@ -4176,8 +4146,6 @@ def refresh_settings():
        socketio.emit('from_server', {'cmd': 'updatereppen', 'data': koboldai_vars.rep_pen}, broadcast=True, room="UI_1")
        socketio.emit('from_server', {'cmd': 'updatereppenslope', 'data': koboldai_vars.rep_pen_slope}, broadcast=True, room="UI_1")
        socketio.emit('from_server', {'cmd': 'updatereppenrange', 'data': koboldai_vars.rep_pen_range}, broadcast=True, room="UI_1")
-        socketio.emit('from_server', {'cmd': 'updateeps_cutoff', 'data': koboldai_vars.eps_cutoff}, broadcast=True, room="UI_1")
-        socketio.emit('from_server', {'cmd': 'updateeta_cutoff', 'data': koboldai_vars.eta_cutoff}, broadcast=True, room="UI_1")
        socketio.emit('from_server', {'cmd': 'updateoutlen', 'data': koboldai_vars.genamt}, broadcast=True, room="UI_1")
        socketio.emit('from_server', {'cmd': 'updatetknmax', 'data': koboldai_vars.max_length}, broadcast=True, room="UI_1")
        socketio.emit('from_server', {'cmd': 'updatenumseq', 'data': koboldai_vars.numseqs}, broadcast=True, room="UI_1")
@@ -7176,7 +7144,7 @@ def UI_2_load_cookies():
 def UI_2_save_new_preset(data):
    preset = model_info()
    #Data to get from current settings
-    for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical", "eps_cutoff", "eta_cutoff"]:
+    for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical"]:
        preset[item] = getattr(koboldai_vars, item)
    #Data to get from UI
    for item in ['preset', 'description']:
@@ -8161,9 +8129,6 @@ class SamplerSettingsSchema(KoboldSchema):
    tfs: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Tail free sampling value."})
    typical: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Typical sampling value."})
    temperature: Optional[float] = fields.Float(validate=validate.Range(min=0, min_inclusive=False), metadata={"description": "Temperature value."})
-    eps_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1000.0), metadata={"description": "Epsilon sampling value."})
-    eta_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0,), metadata={"description": "Eta sampling value."})
-    

 def soft_prompt_validator(soft_prompt: str):
    if len(soft_prompt.strip()) == 0:
@@ -8214,7 +8179,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
    disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
    frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
    quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
-    sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 8 and the array must be a permutation of the first N non-negative integers."})
+    sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."})
    sampler_seed: Optional[int] = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
    sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
    stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."})
@@ -8332,7 +8297,7 @@ def _generate_text(body: GenerationInputSchema):
                torch.manual_seed(body.sampler_seed)
        koboldai_vars.rng_states[body.sampler_seed] = tpu_mtj_backend.get_rng_state() if koboldai_vars.use_colab_tpu else torch.get_rng_state()
    if hasattr(body, "sampler_order"):
-        if len(body.sampler_order) < 9:
+        if len(body.sampler_order) < 7:
            body.sampler_order = [6] + body.sampler_order
    # This maps each property of the setting to use when sending the generate idempotently
    # To the object which typically contains it's value
@@ -8350,8 +8315,6 @@ def _generate_text(body: GenerationInputSchema):
        "tfs": ("koboldai_vars", "tfs", None),
        "typical": ("koboldai_vars", "typical", None),
        "temperature": ("koboldai_vars", "temp", None),
-        "eps_cutoff": ("koboldai_vars", "eps_cutoff", None),
-        "eta_cutoff": ("koboldai_vars", "eta_cutoff", None),
        "frmtadsnsp": ("koboldai_vars", "frmtadsnsp", "input"),
        "frmttriminc": ("koboldai_vars", "frmttriminc", "output"),
        "frmtrmblln": ("koboldai_vars", "frmtrmblln", "output"),
@@ -10797,26 +10760,6 @@ class TemperatureSamplingSettingSchema(KoboldSchema):
        name = "temperature"
        example_yaml_value = "0.5"

-@config_endpoint_schema
-class EpsilonSamplingSettingSchema(KoboldSchema):
-    value = fields.Float(validate=validate.Range(min=0, max=1000), required=True)
-    class KoboldMeta:
-        route_name = "eps_cutoff"
-        obj = "koboldai_vars"
-        var_name = "eps_cutoff"
-        name = "Epsilon sampling"
-        example_yaml_value = "0.0"
-
-@config_endpoint_schema
-class EtaSamplingSettingSchema(KoboldSchema):
-    value = fields.Float(validate=validate.Range(min=0), required=True)
-    class KoboldMeta:
-        route_name = "eta_cutoff"
-        obj = "koboldai_vars"
-        var_name = "eta_cutoff"
-        name = "Eta sampling"
-        example_yaml_value = "0.0"
-
@config_endpoint_schema
 class GensPerActionSettingSchema(KoboldSchema):
    value = fields.Integer(validate=validate.Range(min=0, max=5), required=True)
@@ -10925,7 +10868,7 @@ class SamplerOrderSettingSchema(KoboldSchema):
        obj = "koboldai_vars"
        var_name = "sampler_order"
        name = "sampler order"
-        example_yaml_value = "[6, 0, 1, 2, 3, 4, 5, 7, 8]"
+        example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]"

@config_endpoint_schema
 class SamplerFullDeterminismSettingSchema(KoboldSchema):
--- a/api_example.py
+++ b/api_example.py
@@ -23,9 +23,7 @@ def get_prompt(user_msg):
        "top_k": 0, # Keep the X most probable tokens
        "top_p": 0.9, # Top P sampling / Nucleus Sampling, https://arxiv.org/pdf/1904.09751.pdf
        "typical": 1.0, # Typical Sampling, https://arxiv.org/pdf/2202.00666.pdf
-        "eps": 0.0, # Discard tokens with low probability, from https://arxiv.org/pdf/2210.15191.pdf
-        "eta": 0.0, # Entropy adaptive epsilon, from the same work as epsilon
-        "sampler_order": [6,0,7,1,3,8,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
+        "sampler_order": [6,0,1,3,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
        "stop_sequence": [f"{user}"], # When should the AI stop generating? In this example we stop when it tries to speak on behalf of the user.
        #"sampler_seed": 1337, # Use specific seed for text generation? This helps with consistency across tests.
        "singleline": "False", # Only return a response that fits on a single line, this can help with chatbots but also makes them less verbose
--- a/bridge.lua
+++ b/bridge.lua
@@ -890,8 +890,6 @@ return function(_python, _bridged)
    ---@field reppen number
    ---@field reppenslope number
    ---@field reppenrange number
-    ---@field eps_cutoff number
-    ---@field eta_cutoff number
    ---@field tknmax integer
    ---@field widepth integer
    ---@field useprompt boolean
--- a/gensettings.py
+++ b/gensettings.py
@@ -176,38 +176,6 @@ gensettingstf = [
    "name": "use_alt_rep_pen",
    "ui_level": 2
 	},
-    {
-	"uitype": "slider",
-	"unit": "float",
-	"label": "Epsilon Sampling",
-	"id": "seteps_cutoff", 
-	"min": 0.0,
-	"max": 9.0,
-	"step": 0.01,
-	"default": 0.0,
-    "tooltip": "Slider is in units of 1e-4. Discards tokens with probabilities under eps. (Put this value on 0 to disable its effect)",
-    "menu_path": "Settings",
-    "sub_path":  "Sampling",
-    "classname": "model",
-    "name": "eps_cutoff",
-    "ui_level": 1
-	},
-    {
-	"uitype": "slider",
-	"unit": "float",
-	"label": "Eta Sampling",
-	"id": "seteta_cutoff", 
-	"min": 0.0,
-	"max": 20,
-	"step": 0.01,
-	"default": 0.0,
-    "tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output. (Put this value on 0 to disable its effect)",
-    "menu_path": "Settings",
-    "sub_path":  "Sampling",
-    "classname": "model",
-    "name": "eta_cutoff",
-    "ui_level": 1
-	},
    {
 	"uitype": "slider",
 	"unit": "int",
@@ -1117,36 +1085,6 @@ gensettingsik =[{
    "classname": "model",
    "name": "tfs"
 	},
-    {
-	"uitype": "slider",
-	"unit": "float",
-	"label": "Epsilon Sampling",
-	"id": "seteps_cutoff", 
-	"min": 0.0,
-	"max": 9.0,
-	"step": 0.01,
-	"default": 0.0,
-    "tooltip": "Slider is in units of 1e-4.Discards tokens with probabilities under eps.",
-    "menu_path": "Settings",
-    "sub_path":  "Sampling",
-    "classname": "model",
-    "name": "eps_cutoff",
-	},
-    {
-	"uitype": "slider",
-	"unit": "float",
-	"label": "Eta Sampling",
-	"id": "seteta_cutoff", 
-	"min": 0.0,
-	"max": 20,
-	"step": 0.01,
-	"default": 0.0,
-    "tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output.",
-    "menu_path": "Settings",
-    "sub_path":  "Sampling",
-    "classname": "model",
-    "name": "eta_cutoff",
-	},
    {
 	"uitype": "slider",
 	"unit": "int",
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -614,7 +614,7 @@ class settings(object):
            start_time = time.time()
            if key in self.__dict__ and key not in self.no_save_variables:
                if key == 'sampler_order':
-                    if(len(value) < 9):
+                    if(len(value) < 7):
                        value = [6] + value
                elif key == 'autosave':
                    autosave = value
@@ -669,9 +669,8 @@ class model_settings(settings):
                         'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
                         'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
    settings_name = "model"
-    default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048,
-                        "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0, "eps_cutoff": 0.0, "eta_cutoff": 0.0,
-                        "sampler_order": [6,0,7,1,3,8,4,2,5]}
+    default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
+                        "sampler_order": [6,0,1,2,3,4,5]}
    def __init__(self, socketio, koboldai_vars):
        self.enable_whitelist = False
        self._socketio = socketio
@@ -722,14 +721,12 @@ class model_settings(settings):
        self.top_a       = 0.0     # Default generator top-a
        self.tfs         = 1.0     # Default generator tfs (tail-free sampling)
        self.typical     = 1.0     # Default generator typical sampling threshold
-        self.eps_cutoff  = 0.0     # Default generator epsilon_cutoff
-        self.eta_cutoff  = 0.0     # Default generator eta_cutoff
        self.numseqs     = 1       # Number of sequences to ask the generator to create
        self.generated_tkns = 0    # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0
        self.badwordsids = []
        self.fp32_model  = False  # Whether or not the most recently loaded HF model was in fp32 format
        self.modeldim    = -1     # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
-        self.sampler_order = [6, 0, 1, 2, 3, 4, 5, 7, 8]
+        self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
        self.newlinemode = "n"
        self.presets     = []   # Holder for presets
        self.selected_preset = ""
@@ -761,8 +758,6 @@ class model_settings(settings):
            self.top_a = 0.0
            self.tfs = 1.0
            self.typical = 1.0
-            self.eps_cutoff = 0.0
-            self.eta_cutoff = 0.0
            self.rep_pen_range = 1024
            self.rep_pen_slope = 0.7
            
@@ -2768,8 +2763,6 @@ default_preset = {
        "rep_pen": 1.1,
        "rep_pen_range": 1024,
        "rep_pen_slope": 0.7,
-        "eps_cutoff": 0.0,
-        "eta_cutoff": 0.0,
        "sampler_order": [
            6,
            0,
@@ -2777,9 +2770,7 @@ default_preset = {
            2,
            3,
            4,
-            5,
-            7,
-            8
+            5
        ]
    }
 badwordsids_default = [[6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -134,8 +134,6 @@ class GenerationSettings:
            "rep_pen",
            "rep_pen_slope",
            "rep_pen_range",
-            "eps_cutoff",
-            "eta_cutoff",
            "sampler_order",
        ]:
            setattr(
--- a/modeling/inference_models/hf_mtj/class.py
+++ b/modeling/inference_models/hf_mtj/class.py
@@ -140,7 +140,7 @@ class model_backend(HFInferenceModel):
        def mtj_settings_callback() -> dict:
            sampler_order = utils.koboldai_vars.sampler_order[:]
            if (
-                len(sampler_order) < 9
+                len(sampler_order) < 7
            ):  # Add repetition penalty at beginning if it's not present
                sampler_order = [6] + sampler_order
            return {
@@ -154,8 +154,6 @@ class model_backend(HFInferenceModel):
                "repetition_penalty": float(utils.koboldai_vars.rep_pen),
                "rpslope": float(utils.koboldai_vars.rep_pen_slope),
                "rprange": int(utils.koboldai_vars.rep_pen_range),
-                "eps_cutoff": float(utils.koboldai_vars.eps_cutoff),
-                "eta_cutoff": float(utils.koboldai_vars.eta_cutoff),
            }

        tpu_mtj_backend.socketio = utils.socketio
@@ -295,8 +293,6 @@ class model_backend(HFInferenceModel):
                tfs=gen_settings.tfs,
                typical=gen_settings.typical,
                top_a=gen_settings.top_a,
-                eps_cutoff=gen_settings.eps_cutoff,
-                eta_cutoff=gen_settings.eta_cutoff,
                numseqs=batch_count,
                repetition_penalty=gen_settings.rep_pen,
                rpslope=gen_settings.rep_pen_slope,
--- a/modeling/warpers.py
+++ b/modeling/warpers.py
@@ -68,8 +68,6 @@ def update_settings():
    RepetitionPenalty.rep_pen_range = koboldai_vars.rep_pen_range
    RepetitionPenalty.rep_pen_slope = koboldai_vars.rep_pen_slope
    RepetitionPenalty.use_alt_rep_pen = koboldai_vars.use_alt_rep_pen
-    Epsilon.epsilon = koboldai_vars.eps_cutoff
-    Eta.eta = koboldai_vars.eta_cutoff


 class Warper:
@@ -105,8 +103,6 @@ class Warper:
            4: Typical,
            5: Temperature,
            6: RepetitionPenalty,
-            7: Epsilon,
-            8: Eta,
        }[warper_id]

    @classmethod
@@ -544,92 +540,3 @@ class RepetitionPenalty(Warper):
    @classmethod
    def value_is_valid(cls) -> bool:
        return cls.rep_pen != 1.0
-    
-class Epsilon(Warper):
-    """
-    Eta sampling, as described in https://arxiv.org/pdf/2210.15191.pdf
-    """
-
-    epsilon: float = 0.0
-
-    @classmethod
-    def torch(cls, scores: torch.Tensor) -> torch.Tensor:
-        # Probably the simplest sampler there is, just remove tokens with probs under a threshold
-        probs = scores.softmax(dim=-1)
-
-        indices_to_remove = probs < (cls.epsilon * 1e-4)
-        
-        # hack to avoid nulling out all the logits for misconfigured sampler param
-        # equivalent to keep_min_k=1, as is default in hf transformers implementation
-        # implemented this way to be more easily modifiable to fallback to topk for a configurable k
-        if(torch.all(indices_to_remove)):
-            topk_prob = torch.max(probs)
-            indices_to_remove = probs < topk_prob
-
-        scores = scores.masked_fill(indices_to_remove, -torch.inf)
-        return scores
-
-    @classmethod
-    def jax_dynamic(cls, scores: np.array) -> np.array:
-        probabilities = np.array(jax.nn.softmax(scores), copy=True)
-
-        indices_to_remove = probabilities < (cls.epsilon * 1e-4)
-        if(np.all(indices_to_remove)):
-            topk_prob = np.max(probabilities)
-            indices_to_remove = probabilities < topk_prob
-
-        return np.where(
-            indices_to_remove, -np.inf, scores
-        )
-
-    @classmethod
-    def value_is_valid(cls) -> bool:
-        return cls.epsilon > 0.0
-
-class Eta(Warper):
-    """
-    Eta sampling, as described in https://arxiv.org/pdf/2210.15191.pdf
-    """
-
-    eta: float = 0.0
-
-    @classmethod
-    def torch(cls, scores: torch.Tensor) -> torch.Tensor:
-        shifted_logits = torch.log_softmax(scores, dim=-1)
-        probs = shifted_logits.exp()
-
-        neg_entropy = (probs * shifted_logits).nansum(dim=-1, keepdim=True)
-        epsilon = torch.min(torch.tensor(cls.eta * 1e-4), torch.sqrt(torch.tensor(cls.eta*1e-4))*torch.exp(neg_entropy))
-
-        indices_to_remove = probs < epsilon
-
-        # hack to avoid nulling out all the logits for misconfigured sampler param
-        # equivalent to keep_min_k=1, as is default in hf transformers implementation
-        # implemented this way to be more easily modifiable to fallback to topk for a configurable k
-        if(torch.all(indices_to_remove)):
-            topk_prob = torch.max(probs)
-            indices_to_remove = probs < topk_prob
-
-        scores = scores.masked_fill(indices_to_remove, -torch.inf)
-        return scores
-
-    @classmethod
-    def jax_dynamic(cls, scores: np.array) -> np.array:
-        shifted_logits = jax.nn.log_softmax(scores)
-        probabilities = np.exp(shifted_logits)
-        neg_entropy = np.nansum(probabilities * shifted_logits)
-        epsilon = min(cls.eta * 1e-4, np.sqrt(cls.eta*1e-4)*np.exp(neg_entropy))
-
-        indices_to_remove = probabilities < epsilon
-        if(np.all(indices_to_remove)):
-            topk_prob = np.max(probabilities)
-            indices_to_remove = probabilities < topk_prob
-
-        return np.where(
-            indices_to_remove, -np.inf, scores
-        )
-
-    @classmethod
-    def value_is_valid(cls) -> bool:
-        return cls.eta > 0.0
-
--- a/static/application.js
+++ b/static/application.js
@@ -1315,8 +1315,6 @@ function buildSamplerList(samplers) {
 		"Typical Sampling",
 		"Temperature",
 		"Repetition Penalty",
-		"Epsilon Sampling",
-		"Eta Sampling"
 	]
 	for(i=0; i<samplers.length; i++) {
 		samplerslist.append("<div class=\"flex\">\
@@ -2642,14 +2640,6 @@ $(document).ready(function(){
 			// Send current rep pen value to input
 			$("#setreppenrangecur").val(msg.data);
 			$("#setreppenrange").val(parseFloat(msg.data)).trigger("change");
-		} else if(msg.cmd == "updateeps_cutoff") {
-			// Send current epsilon value to input
-			$("#seteps_cutoffcurr").val(msg.data);
-			$("#seteps_cutoff").val(parseFloat(msg.data)).trigger("change");
-		} else if(msg.cmd == "updateeta_cutoff") {
-			// Send current eta value to input
-			$("#seteta_cutoffcur").val(msg.data);
-			$("#seteta_cutoff").val(parseFloat(msg.data)).trigger("change");
 		} else if(msg.cmd == "updateoutlen") {
 			// Send current output amt value to input
 			$("#setoutputcur").val(msg.data);
@@ -2689,12 +2679,6 @@ $(document).ready(function(){
 		} else if(msg.cmd == "setlabelreppenrange") {
 			// Update setting label with value from server
 			$("#setreppenrangecur").val(msg.data);
-		} else if(msg.cmd == "setlabeleps_cutoff") {
-			// Update setting label with value from server
-			$("#seteps_cutoffcur").val(msg.data);
-		} else if(msg.cmd == "setlabeleta_cutoff") {
-			// Update setting label with value from server
-			$("#seteta_cutoffcur").val(msg.data);
 		} else if(msg.cmd == "setlabeloutput") {
 			// Update setting label with value from server
 			$("#setoutputcur").val(msg.data);
--- a/static/koboldai.js
+++ b/static/koboldai.js
@@ -250,8 +250,6 @@ map1.set('Tail Free Sampling', 3)
 map1.set('Typical Sampling', 4)
 map1.set('Temperature', 5)
 map1.set('Repetition Penalty', 6)
-map1.set('Epsilon Sampling', 7)
-map1.set('Eta Sampling', 8)
 const map2 = new Map()
 map2.set(0, 'Top K Sampling')
 map2.set(1, 'Top A Sampling')
@@ -260,8 +258,6 @@ map2.set(3, 'Tail Free Sampling')
 map2.set(4, 'Typical Sampling')
 map2.set(5, 'Temperature')
 map2.set(6, 'Repetition Penalty')
-map2.set(7, 'Epsilon Sampling')
-map2.set(8, 'Eta Sampling')
 var calc_token_usage_timeout;
 var game_text_scroll_timeout;
 var auto_loader_timeout;
--- a/templates/settings
+++ b/templates/settings
@@ -176,8 +176,6 @@
 								<li class="sample_order cursor" onclick="select_sample(this);">Tail Free Sampling</li>
 								<li class="sample_order cursor" onclick="select_sample(this);">Typical Sampling</li>
 								<li class="sample_order cursor" onclick="select_sample(this);">Temperature</li>
-								<li class="sample_order cursor" onclick="select_sample(this);">Epsilon Sampling</li>
-								<li class="sample_order cursor" onclick="select_sample(this);">Eta Sampling</li>
 							</ul>
 							<div style="display:flex;flex-direction:column;margin-top: 25px;">
 								<div class="material-icons-outlined cursor" onclick="move_sample('up');">arrow_upward</div>
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -104,8 +104,6 @@ def settings_callback() -> dict:
        "repetition_penalty": 1.0,
        "rpslope": 0.0,
        "rprange": 0,
-        "eps_cutoff": 0.0,
-        "eta_cutoff": 0.0,
    }

 def started_compiling_callback() -> None:
@@ -210,10 +208,10 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
    logits[tokens] = penalty_logits
    return logits

-def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0, eps_cutoff=0.0, eta_cutoff=0.0):
+def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
    '''
-    This gets called by generate_loop_fn to apply a series of 8 filters
-    to the logits (top-k, eps, top-a, top-p, TFS, eta, typical, temperature)
+    This gets called by generate_loop_fn to apply a series of 6 filters
+    to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
    before picking one token using the modified logits
    '''
    for sid in jnp.array(sampler_order, int):
@@ -245,12 +243,10 @@ def kobold_sample_static(
    tfs=1.0,
    typical=1.0,
    top_a=0.0,
-    eps_cutoff=0.0,
-    eta_cutoff=0.0
 ):
    '''
-    This gets called by generate_loop_fn to apply a series of 8 filters
-    to the logits (top-k, eps, top-a, top-p, TFS, eta, typical, temperature)
+    This gets called by generate_loop_fn to apply a series of 6 filters
+    to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
    before picking one token using the modified logits
    '''

@@ -311,33 +307,6 @@ def kobold_sample_static(
            sorted_indices_to_remove,
        )
        return jnp.where(indices_to_remove, -jnp.inf, scores)
-    
-    def sample_eps(scores: jnp.array) -> jnp.array:
-        probabilities = jax.nn.softmax(scores)
-        indices_to_remove = probabilities < (eps_cutoff * 1e-4)
-        
-        # Seems like JAX doesn't like if-s, so it's done this way
-        topk_idx = jnp.argmax(probabilities)
-        indices_to_remove = indices_to_remove.at[topk_idx].set(False)
-
-        return jnp.where(
-            indices_to_remove, -jnp.inf, scores
-        )
-    
-    def sample_eta(scores: jnp.array) -> jnp.array:
-        shifted_logits = jax.nn.log_softmax(scores)
-        probabilities = jnp.exp(shifted_logits)
-        neg_entropy = jnp.nansum(probabilities * shifted_logits)
-
-        eps = jax.lax.min(eta_cutoff * 1e-4, jnp.sqrt(eta_cutoff*1e-4)*jnp.exp(neg_entropy))
-        indices_to_remove = probabilities < eps
-        # Seems like JAX doesn't like if-s, so it's done this way
-        topk_idx = jnp.argmax(probabilities)
-        indices_to_remove = indices_to_remove.at[topk_idx].set(False)
-
-        return jnp.where(
-            indices_to_remove, -jnp.inf, scores
-        )


    def sample_typical(scores: jnp.array) -> jnp.array:
@@ -442,8 +411,6 @@ def kobold_sample_static(
        logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), sample_typical, lambda x: x, logits)
        logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), sample_temperature, lambda x: x, logits)
        logits = jax.lax.cond(jnp.logical_and(k == 6, rpargs[1] != 1.0), lambda x: sample_repetition_penalty(*x), lambda x: x[0], (logits, *rpargs))
-        logits = jax.lax.cond(jnp.logical_and(k == 7, eps_cutoff > 0.0), sample_eps, lambda x: x, logits)
-        logits = jax.lax.cond(jnp.logical_and(k == 8, eta_cutoff > 0.0), sample_eta, lambda x: x, logits)
    return jax.random.categorical(key, logits, -1).astype(jnp.uint32)

 pad_token_id = 50256
@@ -782,8 +749,6 @@ def infer_static(
    repetition_penalty=1.0,
    rpslope=0.0,
    rprange=0,
-    eps_cutoff=0.0,
-    eta_cutoff=0.0,
    numseqs=1,
    gen_len=80,
    soft_embeddings: Optional[np.array] = None,
@@ -794,7 +759,7 @@ def infer_static(
    if sampler_order is None:
        sampler_order = utils.default_sampler_order.copy()
    sampler_order = sampler_order[:]
-    if len(sampler_order) < 9:  # Add repetition penalty at beginning if it's not present
+    if len(sampler_order) < 7:  # Add repetition penalty at beginning if it's not present
        sampler_order = [6] + sampler_order
    sampler_order = np.uint32(sampler_order)
    total_batch = 1
@@ -816,9 +781,7 @@ def infer_static(
        "repetition_penalty": repetition_penalty * np.ones(total_batch),
        "rpslope": rpslope * np.ones(total_batch),
        "rprange": np.full(total_batch, rprange, dtype=np.uint32),
-        "top_k": np.full(total_batch, top_k, dtype=np.uint32),
-        "eps_cutoff": eps_cutoff * np.ones(total_batch),
-        "eta_cutoff": eta_cutoff * np.ones(total_batch)
+        "top_k": np.full(total_batch, top_k, dtype=np.uint32)
    }
    output = network.generate_static(
        batched_tokens,
--- a/utils.py
+++ b/utils.py
@@ -32,7 +32,7 @@ layers_module_names: Optional[List[str]] = None
 module_names: Optional[List[str]] = None
 named_buffers: Optional[List[tuple]] = None

-default_sampler_order = [6, 0, 1, 2, 3, 4, 5, 7 ,8]
+default_sampler_order = [6, 0, 1, 2, 3, 4, 5]

 emit = None