Undo broken samplers

This commit is contained in:
Henk
2023-10-14 20:38:35 +02:00
parent e3a43eef15
commit 829ea147fc
13 changed files with 23 additions and 313 deletions

View File

@@ -941,7 +941,7 @@ tags = [
api_version = None # This gets set automatically so don't change this value
api_v1 = KoboldAPISpec(
version="1.2.6",
version="1.2.5",
prefixes=["/api/v1", "/api/latest"],
tags=tags,
)
@@ -1162,7 +1162,7 @@ def loadmodelsettings():
koboldai_vars.nobreakmodel = js["nobreakmodel"]
if("sampler_order" in js):
sampler_order = js["sampler_order"]
if(len(sampler_order) < 8):
if(len(sampler_order) < 7):
sampler_order = [6] + sampler_order
koboldai_vars.sampler_order = sampler_order
if("temp" in js):
@@ -1192,12 +1192,6 @@ def loadmodelsettings():
if("rep_pen_range" in js):
koboldai_vars.rep_pen_range = js["rep_pen_range"]
koboldai_vars.default_preset['rep_pen_range'] = js["rep_pen_range"]
if("eps_cutoff" in js):
koboldai_vars.eps_cutoff = js["eps_cutoff"]
koboldai_vars.default_preset['eps_cutoff'] = js["eps_cutoff"]
if("eta_cutoff" in js):
koboldai_vars.eta_cutoff = js["eta_cutoff"]
koboldai_vars.default_preset['eta_cutoff'] = js["eta_cutoff"]
if("adventure" in js):
koboldai_vars.adventure = js["adventure"]
if("chatmode" in js):
@@ -1260,7 +1254,7 @@ def processsettings(js):
koboldai_vars.andepth = js["andepth"]
if("sampler_order" in js):
sampler_order = js["sampler_order"]
if(len(sampler_order) < 8):
if(len(sampler_order) < 7):
sampler_order = [6] + sampler_order
koboldai_vars.sampler_order = sampler_order
if("temp" in js):
@@ -1281,10 +1275,6 @@ def processsettings(js):
koboldai_vars.rep_pen_slope = js["rep_pen_slope"]
if("rep_pen_range" in js):
koboldai_vars.rep_pen_range = js["rep_pen_range"]
if("eps_cutoff" in js):
koboldai_vars.eps = js["eps_cutoff"]
if("eta_cutoff" in js):
koboldai_vars.eta = js["eta_cutoff"]
if("genamt" in js):
koboldai_vars.genamt = js["genamt"]
if("max_length" in js):
@@ -2261,8 +2251,6 @@ def lua_has_setting(setting):
"setreppen",
"setreppenslope",
"setreppenrange",
"seteps_cutoff",
"seteta_cutoff",
"settknmax",
"setwidepth",
"setuseprompt",
@@ -2283,8 +2271,6 @@ def lua_has_setting(setting):
"reppen",
"reppenslope",
"reppenrange",
"eps_cutoff",
"eta_cutoff",
"tknmax",
"widepth",
"useprompt",
@@ -2323,8 +2309,6 @@ def lua_get_setting(setting):
if(setting in ("setreppen", "reppen")): return koboldai_vars.rep_pen
if(setting in ("setreppenslope", "reppenslope")): return koboldai_vars.rep_pen_slope
if(setting in ("setreppenrange", "reppenrange")): return koboldai_vars.rep_pen_range
if(setting in ("seteps_cutoff", "eps_cutoff")): return koboldai_vars.eps_cutoff
if(setting in ("seteta_cutoff", "eta_cutoff")): return koboldai_vars.eta_cutoff
if(setting in ("settknmax", "tknmax")): return koboldai_vars.max_length
if(setting == "anotedepth"): return koboldai_vars.andepth
if(setting in ("setwidepth", "widepth")): return koboldai_vars.widepth
@@ -2363,8 +2347,6 @@ def lua_set_setting(setting, v):
if(setting in ("setreppen", "reppen")): koboldai_vars.rep_pen = v
if(setting in ("setreppenslope", "reppenslope")): koboldai_vars.rep_pen_slope = v
if(setting in ("setreppenrange", "reppenrange")): koboldai_vars.rep_pen_range = v
if(setting in ("seteps_cutoff", "eps_cutoff")): koboldai_vars.eps_cutoff = v
if(setting in ("seteta_cutoff", "eta_cutoff")): koboldai_vars.eta_cutoff = v
if(setting in ("settknmax", "tknmax")): koboldai_vars.max_length = v; return True
if(setting == "anotedepth"): koboldai_vars.andepth = v; return True
if(setting in ("setwidepth", "widepth")): koboldai_vars.widepth = v; return True
@@ -2788,16 +2770,6 @@ def get_message(msg):
emit('from_server', {'cmd': 'setlabelreppenrange', 'data': msg['data']}, broadcast=True, room="UI_1")
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'seteps_cutoff'):
koboldai_vars.eps_cutoff = float(msg['data'])
emit('from_server', {'cmd': 'setlabeleps_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'seteta_cutoff'):
koboldai_vars.eta_cutoff = float(msg['data'])
emit('from_server', {'cmd': 'setlabeleta_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
settingschanged()
refresh_settings()
elif(msg['cmd'] == 'setoutput'):
koboldai_vars.genamt = int(msg['data'])
emit('from_server', {'cmd': 'setlabeloutput', 'data': msg['data']}, broadcast=True, room="UI_1")
@@ -2949,7 +2921,7 @@ def get_message(msg):
elif(msg['cmd'] == 'samplers'):
sampler_order = msg["data"]
sampler_order_min_length = 6
sampler_order_max_length = 9
sampler_order_max_length = 7
if(not isinstance(sampler_order, list)):
raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
if(not (sampler_order_min_length <= len(sampler_order) <= sampler_order_max_length)):
@@ -3527,8 +3499,6 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum):
repetition_penalty=koboldai_vars.rep_pen,
rpslope=koboldai_vars.rep_pen_slope,
rprange=koboldai_vars.rep_pen_range,
eps_cutoff=koboldai_vars.eps_cutoff,
eta_cutoff=koboldai_vars.eta_cutoff,
soft_embeddings=koboldai_vars.sp,
soft_tokens=soft_tokens,
sampler_order=koboldai_vars.sampler_order,
@@ -4176,8 +4146,6 @@ def refresh_settings():
socketio.emit('from_server', {'cmd': 'updatereppen', 'data': koboldai_vars.rep_pen}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updatereppenslope', 'data': koboldai_vars.rep_pen_slope}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updatereppenrange', 'data': koboldai_vars.rep_pen_range}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updateeps_cutoff', 'data': koboldai_vars.eps_cutoff}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updateeta_cutoff', 'data': koboldai_vars.eta_cutoff}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updateoutlen', 'data': koboldai_vars.genamt}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updatetknmax', 'data': koboldai_vars.max_length}, broadcast=True, room="UI_1")
socketio.emit('from_server', {'cmd': 'updatenumseq', 'data': koboldai_vars.numseqs}, broadcast=True, room="UI_1")
@@ -7176,7 +7144,7 @@ def UI_2_load_cookies():
def UI_2_save_new_preset(data):
preset = model_info()
#Data to get from current settings
for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical", "eps_cutoff", "eta_cutoff"]:
for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical"]:
preset[item] = getattr(koboldai_vars, item)
#Data to get from UI
for item in ['preset', 'description']:
@@ -8161,9 +8129,6 @@ class SamplerSettingsSchema(KoboldSchema):
tfs: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Tail free sampling value."})
typical: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Typical sampling value."})
temperature: Optional[float] = fields.Float(validate=validate.Range(min=0, min_inclusive=False), metadata={"description": "Temperature value."})
eps_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1000.0), metadata={"description": "Epsilon sampling value."})
eta_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0,), metadata={"description": "Eta sampling value."})
def soft_prompt_validator(soft_prompt: str):
if len(soft_prompt.strip()) == 0:
@@ -8214,7 +8179,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 8 and the array must be a permutation of the first N non-negative integers."})
sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."})
sampler_seed: Optional[int] = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."})
@@ -8332,7 +8297,7 @@ def _generate_text(body: GenerationInputSchema):
torch.manual_seed(body.sampler_seed)
koboldai_vars.rng_states[body.sampler_seed] = tpu_mtj_backend.get_rng_state() if koboldai_vars.use_colab_tpu else torch.get_rng_state()
if hasattr(body, "sampler_order"):
if len(body.sampler_order) < 9:
if len(body.sampler_order) < 7:
body.sampler_order = [6] + body.sampler_order
# This maps each property of the setting to use when sending the generate idempotently
# To the object which typically contains it's value
@@ -8350,8 +8315,6 @@ def _generate_text(body: GenerationInputSchema):
"tfs": ("koboldai_vars", "tfs", None),
"typical": ("koboldai_vars", "typical", None),
"temperature": ("koboldai_vars", "temp", None),
"eps_cutoff": ("koboldai_vars", "eps_cutoff", None),
"eta_cutoff": ("koboldai_vars", "eta_cutoff", None),
"frmtadsnsp": ("koboldai_vars", "frmtadsnsp", "input"),
"frmttriminc": ("koboldai_vars", "frmttriminc", "output"),
"frmtrmblln": ("koboldai_vars", "frmtrmblln", "output"),
@@ -10797,26 +10760,6 @@ class TemperatureSamplingSettingSchema(KoboldSchema):
name = "temperature"
example_yaml_value = "0.5"
@config_endpoint_schema
class EpsilonSamplingSettingSchema(KoboldSchema):
value = fields.Float(validate=validate.Range(min=0, max=1000), required=True)
class KoboldMeta:
route_name = "eps_cutoff"
obj = "koboldai_vars"
var_name = "eps_cutoff"
name = "Epsilon sampling"
example_yaml_value = "0.0"
@config_endpoint_schema
class EtaSamplingSettingSchema(KoboldSchema):
value = fields.Float(validate=validate.Range(min=0), required=True)
class KoboldMeta:
route_name = "eta_cutoff"
obj = "koboldai_vars"
var_name = "eta_cutoff"
name = "Eta sampling"
example_yaml_value = "0.0"
@config_endpoint_schema
class GensPerActionSettingSchema(KoboldSchema):
value = fields.Integer(validate=validate.Range(min=0, max=5), required=True)
@@ -10925,7 +10868,7 @@ class SamplerOrderSettingSchema(KoboldSchema):
obj = "koboldai_vars"
var_name = "sampler_order"
name = "sampler order"
example_yaml_value = "[6, 0, 1, 2, 3, 4, 5, 7, 8]"
example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]"
@config_endpoint_schema
class SamplerFullDeterminismSettingSchema(KoboldSchema):

View File

@@ -23,9 +23,7 @@ def get_prompt(user_msg):
"top_k": 0, # Keep the X most probable tokens
"top_p": 0.9, # Top P sampling / Nucleus Sampling, https://arxiv.org/pdf/1904.09751.pdf
"typical": 1.0, # Typical Sampling, https://arxiv.org/pdf/2202.00666.pdf
"eps": 0.0, # Discard tokens with low probability, from https://arxiv.org/pdf/2210.15191.pdf
"eta": 0.0, # Entropy adaptive epsilon, from the same work as epsilon
"sampler_order": [6,0,7,1,3,8,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
"sampler_order": [6,0,1,3,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
"stop_sequence": [f"{user}"], # When should the AI stop generating? In this example we stop when it tries to speak on behalf of the user.
#"sampler_seed": 1337, # Use specific seed for text generation? This helps with consistency across tests.
"singleline": "False", # Only return a response that fits on a single line, this can help with chatbots but also makes them less verbose

View File

@@ -890,8 +890,6 @@ return function(_python, _bridged)
---@field reppen number
---@field reppenslope number
---@field reppenrange number
---@field eps_cutoff number
---@field eta_cutoff number
---@field tknmax integer
---@field widepth integer
---@field useprompt boolean

View File

@@ -176,38 +176,6 @@ gensettingstf = [
"name": "use_alt_rep_pen",
"ui_level": 2
},
{
"uitype": "slider",
"unit": "float",
"label": "Epsilon Sampling",
"id": "seteps_cutoff",
"min": 0.0,
"max": 9.0,
"step": 0.01,
"default": 0.0,
"tooltip": "Slider is in units of 1e-4. Discards tokens with probabilities under eps. (Put this value on 0 to disable its effect)",
"menu_path": "Settings",
"sub_path": "Sampling",
"classname": "model",
"name": "eps_cutoff",
"ui_level": 1
},
{
"uitype": "slider",
"unit": "float",
"label": "Eta Sampling",
"id": "seteta_cutoff",
"min": 0.0,
"max": 20,
"step": 0.01,
"default": 0.0,
"tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output. (Put this value on 0 to disable its effect)",
"menu_path": "Settings",
"sub_path": "Sampling",
"classname": "model",
"name": "eta_cutoff",
"ui_level": 1
},
{
"uitype": "slider",
"unit": "int",
@@ -1117,36 +1085,6 @@ gensettingsik =[{
"classname": "model",
"name": "tfs"
},
{
"uitype": "slider",
"unit": "float",
"label": "Epsilon Sampling",
"id": "seteps_cutoff",
"min": 0.0,
"max": 9.0,
"step": 0.01,
"default": 0.0,
"tooltip": "Slider is in units of 1e-4.Discards tokens with probabilities under eps.",
"menu_path": "Settings",
"sub_path": "Sampling",
"classname": "model",
"name": "eps_cutoff",
},
{
"uitype": "slider",
"unit": "float",
"label": "Eta Sampling",
"id": "seteta_cutoff",
"min": 0.0,
"max": 20,
"step": 0.01,
"default": 0.0,
"tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output.",
"menu_path": "Settings",
"sub_path": "Sampling",
"classname": "model",
"name": "eta_cutoff",
},
{
"uitype": "slider",
"unit": "int",

View File

@@ -614,7 +614,7 @@ class settings(object):
start_time = time.time()
if key in self.__dict__ and key not in self.no_save_variables:
if key == 'sampler_order':
if(len(value) < 9):
if(len(value) < 7):
value = [6] + value
elif key == 'autosave':
autosave = value
@@ -669,9 +669,8 @@ class model_settings(settings):
'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
settings_name = "model"
default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048,
"temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0, "eps_cutoff": 0.0, "eta_cutoff": 0.0,
"sampler_order": [6,0,7,1,3,8,4,2,5]}
default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
"sampler_order": [6,0,1,2,3,4,5]}
def __init__(self, socketio, koboldai_vars):
self.enable_whitelist = False
self._socketio = socketio
@@ -722,14 +721,12 @@ class model_settings(settings):
self.top_a = 0.0 # Default generator top-a
self.tfs = 1.0 # Default generator tfs (tail-free sampling)
self.typical = 1.0 # Default generator typical sampling threshold
self.eps_cutoff = 0.0 # Default generator epsilon_cutoff
self.eta_cutoff = 0.0 # Default generator eta_cutoff
self.numseqs = 1 # Number of sequences to ask the generator to create
self.generated_tkns = 0 # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0
self.badwordsids = []
self.fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format
self.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
self.sampler_order = [6, 0, 1, 2, 3, 4, 5, 7, 8]
self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
self.newlinemode = "n"
self.presets = [] # Holder for presets
self.selected_preset = ""
@@ -761,8 +758,6 @@ class model_settings(settings):
self.top_a = 0.0
self.tfs = 1.0
self.typical = 1.0
self.eps_cutoff = 0.0
self.eta_cutoff = 0.0
self.rep_pen_range = 1024
self.rep_pen_slope = 0.7
@@ -2768,8 +2763,6 @@ default_preset = {
"rep_pen": 1.1,
"rep_pen_range": 1024,
"rep_pen_slope": 0.7,
"eps_cutoff": 0.0,
"eta_cutoff": 0.0,
"sampler_order": [
6,
0,
@@ -2777,9 +2770,7 @@ default_preset = {
2,
3,
4,
5,
7,
8
5
]
}
badwordsids_default = [[6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting

View File

@@ -134,8 +134,6 @@ class GenerationSettings:
"rep_pen",
"rep_pen_slope",
"rep_pen_range",
"eps_cutoff",
"eta_cutoff",
"sampler_order",
]:
setattr(

View File

@@ -140,7 +140,7 @@ class model_backend(HFInferenceModel):
def mtj_settings_callback() -> dict:
sampler_order = utils.koboldai_vars.sampler_order[:]
if (
len(sampler_order) < 9
len(sampler_order) < 7
): # Add repetition penalty at beginning if it's not present
sampler_order = [6] + sampler_order
return {
@@ -154,8 +154,6 @@ class model_backend(HFInferenceModel):
"repetition_penalty": float(utils.koboldai_vars.rep_pen),
"rpslope": float(utils.koboldai_vars.rep_pen_slope),
"rprange": int(utils.koboldai_vars.rep_pen_range),
"eps_cutoff": float(utils.koboldai_vars.eps_cutoff),
"eta_cutoff": float(utils.koboldai_vars.eta_cutoff),
}
tpu_mtj_backend.socketio = utils.socketio
@@ -295,8 +293,6 @@ class model_backend(HFInferenceModel):
tfs=gen_settings.tfs,
typical=gen_settings.typical,
top_a=gen_settings.top_a,
eps_cutoff=gen_settings.eps_cutoff,
eta_cutoff=gen_settings.eta_cutoff,
numseqs=batch_count,
repetition_penalty=gen_settings.rep_pen,
rpslope=gen_settings.rep_pen_slope,

View File

@@ -68,8 +68,6 @@ def update_settings():
RepetitionPenalty.rep_pen_range = koboldai_vars.rep_pen_range
RepetitionPenalty.rep_pen_slope = koboldai_vars.rep_pen_slope
RepetitionPenalty.use_alt_rep_pen = koboldai_vars.use_alt_rep_pen
Epsilon.epsilon = koboldai_vars.eps_cutoff
Eta.eta = koboldai_vars.eta_cutoff
class Warper:
@@ -105,8 +103,6 @@ class Warper:
4: Typical,
5: Temperature,
6: RepetitionPenalty,
7: Epsilon,
8: Eta,
}[warper_id]
@classmethod
@@ -544,92 +540,3 @@ class RepetitionPenalty(Warper):
@classmethod
def value_is_valid(cls) -> bool:
return cls.rep_pen != 1.0
class Epsilon(Warper):
"""
Eta sampling, as described in https://arxiv.org/pdf/2210.15191.pdf
"""
epsilon: float = 0.0
@classmethod
def torch(cls, scores: torch.Tensor) -> torch.Tensor:
# Probably the simplest sampler there is, just remove tokens with probs under a threshold
probs = scores.softmax(dim=-1)
indices_to_remove = probs < (cls.epsilon * 1e-4)
# hack to avoid nulling out all the logits for misconfigured sampler param
# equivalent to keep_min_k=1, as is default in hf transformers implementation
# implemented this way to be more easily modifiable to fallback to topk for a configurable k
if(torch.all(indices_to_remove)):
topk_prob = torch.max(probs)
indices_to_remove = probs < topk_prob
scores = scores.masked_fill(indices_to_remove, -torch.inf)
return scores
@classmethod
def jax_dynamic(cls, scores: np.array) -> np.array:
probabilities = np.array(jax.nn.softmax(scores), copy=True)
indices_to_remove = probabilities < (cls.epsilon * 1e-4)
if(np.all(indices_to_remove)):
topk_prob = np.max(probabilities)
indices_to_remove = probabilities < topk_prob
return np.where(
indices_to_remove, -np.inf, scores
)
@classmethod
def value_is_valid(cls) -> bool:
return cls.epsilon > 0.0
class Eta(Warper):
"""
Eta sampling, as described in https://arxiv.org/pdf/2210.15191.pdf
"""
eta: float = 0.0
@classmethod
def torch(cls, scores: torch.Tensor) -> torch.Tensor:
shifted_logits = torch.log_softmax(scores, dim=-1)
probs = shifted_logits.exp()
neg_entropy = (probs * shifted_logits).nansum(dim=-1, keepdim=True)
epsilon = torch.min(torch.tensor(cls.eta * 1e-4), torch.sqrt(torch.tensor(cls.eta*1e-4))*torch.exp(neg_entropy))
indices_to_remove = probs < epsilon
# hack to avoid nulling out all the logits for misconfigured sampler param
# equivalent to keep_min_k=1, as is default in hf transformers implementation
# implemented this way to be more easily modifiable to fallback to topk for a configurable k
if(torch.all(indices_to_remove)):
topk_prob = torch.max(probs)
indices_to_remove = probs < topk_prob
scores = scores.masked_fill(indices_to_remove, -torch.inf)
return scores
@classmethod
def jax_dynamic(cls, scores: np.array) -> np.array:
shifted_logits = jax.nn.log_softmax(scores)
probabilities = np.exp(shifted_logits)
neg_entropy = np.nansum(probabilities * shifted_logits)
epsilon = min(cls.eta * 1e-4, np.sqrt(cls.eta*1e-4)*np.exp(neg_entropy))
indices_to_remove = probabilities < epsilon
if(np.all(indices_to_remove)):
topk_prob = np.max(probabilities)
indices_to_remove = probabilities < topk_prob
return np.where(
indices_to_remove, -np.inf, scores
)
@classmethod
def value_is_valid(cls) -> bool:
return cls.eta > 0.0

View File

@@ -1315,8 +1315,6 @@ function buildSamplerList(samplers) {
"Typical Sampling",
"Temperature",
"Repetition Penalty",
"Epsilon Sampling",
"Eta Sampling"
]
for(i=0; i<samplers.length; i++) {
samplerslist.append("<div class=\"flex\">\
@@ -2642,14 +2640,6 @@ $(document).ready(function(){
// Send current rep pen value to input
$("#setreppenrangecur").val(msg.data);
$("#setreppenrange").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updateeps_cutoff") {
// Send current epsilon value to input
$("#seteps_cutoffcurr").val(msg.data);
$("#seteps_cutoff").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updateeta_cutoff") {
// Send current eta value to input
$("#seteta_cutoffcur").val(msg.data);
$("#seteta_cutoff").val(parseFloat(msg.data)).trigger("change");
} else if(msg.cmd == "updateoutlen") {
// Send current output amt value to input
$("#setoutputcur").val(msg.data);
@@ -2689,12 +2679,6 @@ $(document).ready(function(){
} else if(msg.cmd == "setlabelreppenrange") {
// Update setting label with value from server
$("#setreppenrangecur").val(msg.data);
} else if(msg.cmd == "setlabeleps_cutoff") {
// Update setting label with value from server
$("#seteps_cutoffcur").val(msg.data);
} else if(msg.cmd == "setlabeleta_cutoff") {
// Update setting label with value from server
$("#seteta_cutoffcur").val(msg.data);
} else if(msg.cmd == "setlabeloutput") {
// Update setting label with value from server
$("#setoutputcur").val(msg.data);

View File

@@ -250,8 +250,6 @@ map1.set('Tail Free Sampling', 3)
map1.set('Typical Sampling', 4)
map1.set('Temperature', 5)
map1.set('Repetition Penalty', 6)
map1.set('Epsilon Sampling', 7)
map1.set('Eta Sampling', 8)
const map2 = new Map()
map2.set(0, 'Top K Sampling')
map2.set(1, 'Top A Sampling')
@@ -260,8 +258,6 @@ map2.set(3, 'Tail Free Sampling')
map2.set(4, 'Typical Sampling')
map2.set(5, 'Temperature')
map2.set(6, 'Repetition Penalty')
map2.set(7, 'Epsilon Sampling')
map2.set(8, 'Eta Sampling')
var calc_token_usage_timeout;
var game_text_scroll_timeout;
var auto_loader_timeout;

View File

@@ -176,8 +176,6 @@
<li class="sample_order cursor" onclick="select_sample(this);">Tail Free Sampling</li>
<li class="sample_order cursor" onclick="select_sample(this);">Typical Sampling</li>
<li class="sample_order cursor" onclick="select_sample(this);">Temperature</li>
<li class="sample_order cursor" onclick="select_sample(this);">Epsilon Sampling</li>
<li class="sample_order cursor" onclick="select_sample(this);">Eta Sampling</li>
</ul>
<div style="display:flex;flex-direction:column;margin-top: 25px;">
<div class="material-icons-outlined cursor" onclick="move_sample('up');">arrow_upward</div>

View File

@@ -104,8 +104,6 @@ def settings_callback() -> dict:
"repetition_penalty": 1.0,
"rpslope": 0.0,
"rprange": 0,
"eps_cutoff": 0.0,
"eta_cutoff": 0.0,
}
def started_compiling_callback() -> None:
@@ -210,10 +208,10 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
logits[tokens] = penalty_logits
return logits
def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0, eps_cutoff=0.0, eta_cutoff=0.0):
def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
'''
This gets called by generate_loop_fn to apply a series of 8 filters
to the logits (top-k, eps, top-a, top-p, TFS, eta, typical, temperature)
This gets called by generate_loop_fn to apply a series of 6 filters
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
before picking one token using the modified logits
'''
for sid in jnp.array(sampler_order, int):
@@ -245,12 +243,10 @@ def kobold_sample_static(
tfs=1.0,
typical=1.0,
top_a=0.0,
eps_cutoff=0.0,
eta_cutoff=0.0
):
'''
This gets called by generate_loop_fn to apply a series of 8 filters
to the logits (top-k, eps, top-a, top-p, TFS, eta, typical, temperature)
This gets called by generate_loop_fn to apply a series of 6 filters
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
before picking one token using the modified logits
'''
@@ -311,33 +307,6 @@ def kobold_sample_static(
sorted_indices_to_remove,
)
return jnp.where(indices_to_remove, -jnp.inf, scores)
def sample_eps(scores: jnp.array) -> jnp.array:
probabilities = jax.nn.softmax(scores)
indices_to_remove = probabilities < (eps_cutoff * 1e-4)
# Seems like JAX doesn't like if-s, so it's done this way
topk_idx = jnp.argmax(probabilities)
indices_to_remove = indices_to_remove.at[topk_idx].set(False)
return jnp.where(
indices_to_remove, -jnp.inf, scores
)
def sample_eta(scores: jnp.array) -> jnp.array:
shifted_logits = jax.nn.log_softmax(scores)
probabilities = jnp.exp(shifted_logits)
neg_entropy = jnp.nansum(probabilities * shifted_logits)
eps = jax.lax.min(eta_cutoff * 1e-4, jnp.sqrt(eta_cutoff*1e-4)*jnp.exp(neg_entropy))
indices_to_remove = probabilities < eps
# Seems like JAX doesn't like if-s, so it's done this way
topk_idx = jnp.argmax(probabilities)
indices_to_remove = indices_to_remove.at[topk_idx].set(False)
return jnp.where(
indices_to_remove, -jnp.inf, scores
)
def sample_typical(scores: jnp.array) -> jnp.array:
@@ -442,8 +411,6 @@ def kobold_sample_static(
logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), sample_typical, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), sample_temperature, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 6, rpargs[1] != 1.0), lambda x: sample_repetition_penalty(*x), lambda x: x[0], (logits, *rpargs))
logits = jax.lax.cond(jnp.logical_and(k == 7, eps_cutoff > 0.0), sample_eps, lambda x: x, logits)
logits = jax.lax.cond(jnp.logical_and(k == 8, eta_cutoff > 0.0), sample_eta, lambda x: x, logits)
return jax.random.categorical(key, logits, -1).astype(jnp.uint32)
pad_token_id = 50256
@@ -782,8 +749,6 @@ def infer_static(
repetition_penalty=1.0,
rpslope=0.0,
rprange=0,
eps_cutoff=0.0,
eta_cutoff=0.0,
numseqs=1,
gen_len=80,
soft_embeddings: Optional[np.array] = None,
@@ -794,7 +759,7 @@ def infer_static(
if sampler_order is None:
sampler_order = utils.default_sampler_order.copy()
sampler_order = sampler_order[:]
if len(sampler_order) < 9: # Add repetition penalty at beginning if it's not present
if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present
sampler_order = [6] + sampler_order
sampler_order = np.uint32(sampler_order)
total_batch = 1
@@ -816,9 +781,7 @@ def infer_static(
"repetition_penalty": repetition_penalty * np.ones(total_batch),
"rpslope": rpslope * np.ones(total_batch),
"rprange": np.full(total_batch, rprange, dtype=np.uint32),
"top_k": np.full(total_batch, top_k, dtype=np.uint32),
"eps_cutoff": eps_cutoff * np.ones(total_batch),
"eta_cutoff": eta_cutoff * np.ones(total_batch)
"top_k": np.full(total_batch, top_k, dtype=np.uint32)
}
output = network.generate_static(
batched_tokens,

View File

@@ -32,7 +32,7 @@ layers_module_names: Optional[List[str]] = None
module_names: Optional[List[str]] = None
named_buffers: Optional[List[tuple]] = None
default_sampler_order = [6, 0, 1, 2, 3, 4, 5, 7 ,8]
default_sampler_order = [6, 0, 1, 2, 3, 4, 5]
emit = None