mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Undo broken samplers
This commit is contained in:
73
aiserver.py
73
aiserver.py
@@ -941,7 +941,7 @@ tags = [
|
||||
api_version = None # This gets set automatically so don't change this value
|
||||
|
||||
api_v1 = KoboldAPISpec(
|
||||
version="1.2.6",
|
||||
version="1.2.5",
|
||||
prefixes=["/api/v1", "/api/latest"],
|
||||
tags=tags,
|
||||
)
|
||||
@@ -1162,7 +1162,7 @@ def loadmodelsettings():
|
||||
koboldai_vars.nobreakmodel = js["nobreakmodel"]
|
||||
if("sampler_order" in js):
|
||||
sampler_order = js["sampler_order"]
|
||||
if(len(sampler_order) < 8):
|
||||
if(len(sampler_order) < 7):
|
||||
sampler_order = [6] + sampler_order
|
||||
koboldai_vars.sampler_order = sampler_order
|
||||
if("temp" in js):
|
||||
@@ -1192,12 +1192,6 @@ def loadmodelsettings():
|
||||
if("rep_pen_range" in js):
|
||||
koboldai_vars.rep_pen_range = js["rep_pen_range"]
|
||||
koboldai_vars.default_preset['rep_pen_range'] = js["rep_pen_range"]
|
||||
if("eps_cutoff" in js):
|
||||
koboldai_vars.eps_cutoff = js["eps_cutoff"]
|
||||
koboldai_vars.default_preset['eps_cutoff'] = js["eps_cutoff"]
|
||||
if("eta_cutoff" in js):
|
||||
koboldai_vars.eta_cutoff = js["eta_cutoff"]
|
||||
koboldai_vars.default_preset['eta_cutoff'] = js["eta_cutoff"]
|
||||
if("adventure" in js):
|
||||
koboldai_vars.adventure = js["adventure"]
|
||||
if("chatmode" in js):
|
||||
@@ -1260,7 +1254,7 @@ def processsettings(js):
|
||||
koboldai_vars.andepth = js["andepth"]
|
||||
if("sampler_order" in js):
|
||||
sampler_order = js["sampler_order"]
|
||||
if(len(sampler_order) < 8):
|
||||
if(len(sampler_order) < 7):
|
||||
sampler_order = [6] + sampler_order
|
||||
koboldai_vars.sampler_order = sampler_order
|
||||
if("temp" in js):
|
||||
@@ -1281,10 +1275,6 @@ def processsettings(js):
|
||||
koboldai_vars.rep_pen_slope = js["rep_pen_slope"]
|
||||
if("rep_pen_range" in js):
|
||||
koboldai_vars.rep_pen_range = js["rep_pen_range"]
|
||||
if("eps_cutoff" in js):
|
||||
koboldai_vars.eps = js["eps_cutoff"]
|
||||
if("eta_cutoff" in js):
|
||||
koboldai_vars.eta = js["eta_cutoff"]
|
||||
if("genamt" in js):
|
||||
koboldai_vars.genamt = js["genamt"]
|
||||
if("max_length" in js):
|
||||
@@ -2261,8 +2251,6 @@ def lua_has_setting(setting):
|
||||
"setreppen",
|
||||
"setreppenslope",
|
||||
"setreppenrange",
|
||||
"seteps_cutoff",
|
||||
"seteta_cutoff",
|
||||
"settknmax",
|
||||
"setwidepth",
|
||||
"setuseprompt",
|
||||
@@ -2283,8 +2271,6 @@ def lua_has_setting(setting):
|
||||
"reppen",
|
||||
"reppenslope",
|
||||
"reppenrange",
|
||||
"eps_cutoff",
|
||||
"eta_cutoff",
|
||||
"tknmax",
|
||||
"widepth",
|
||||
"useprompt",
|
||||
@@ -2323,8 +2309,6 @@ def lua_get_setting(setting):
|
||||
if(setting in ("setreppen", "reppen")): return koboldai_vars.rep_pen
|
||||
if(setting in ("setreppenslope", "reppenslope")): return koboldai_vars.rep_pen_slope
|
||||
if(setting in ("setreppenrange", "reppenrange")): return koboldai_vars.rep_pen_range
|
||||
if(setting in ("seteps_cutoff", "eps_cutoff")): return koboldai_vars.eps_cutoff
|
||||
if(setting in ("seteta_cutoff", "eta_cutoff")): return koboldai_vars.eta_cutoff
|
||||
if(setting in ("settknmax", "tknmax")): return koboldai_vars.max_length
|
||||
if(setting == "anotedepth"): return koboldai_vars.andepth
|
||||
if(setting in ("setwidepth", "widepth")): return koboldai_vars.widepth
|
||||
@@ -2363,8 +2347,6 @@ def lua_set_setting(setting, v):
|
||||
if(setting in ("setreppen", "reppen")): koboldai_vars.rep_pen = v
|
||||
if(setting in ("setreppenslope", "reppenslope")): koboldai_vars.rep_pen_slope = v
|
||||
if(setting in ("setreppenrange", "reppenrange")): koboldai_vars.rep_pen_range = v
|
||||
if(setting in ("seteps_cutoff", "eps_cutoff")): koboldai_vars.eps_cutoff = v
|
||||
if(setting in ("seteta_cutoff", "eta_cutoff")): koboldai_vars.eta_cutoff = v
|
||||
if(setting in ("settknmax", "tknmax")): koboldai_vars.max_length = v; return True
|
||||
if(setting == "anotedepth"): koboldai_vars.andepth = v; return True
|
||||
if(setting in ("setwidepth", "widepth")): koboldai_vars.widepth = v; return True
|
||||
@@ -2788,16 +2770,6 @@ def get_message(msg):
|
||||
emit('from_server', {'cmd': 'setlabelreppenrange', 'data': msg['data']}, broadcast=True, room="UI_1")
|
||||
settingschanged()
|
||||
refresh_settings()
|
||||
elif(msg['cmd'] == 'seteps_cutoff'):
|
||||
koboldai_vars.eps_cutoff = float(msg['data'])
|
||||
emit('from_server', {'cmd': 'setlabeleps_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
|
||||
settingschanged()
|
||||
refresh_settings()
|
||||
elif(msg['cmd'] == 'seteta_cutoff'):
|
||||
koboldai_vars.eta_cutoff = float(msg['data'])
|
||||
emit('from_server', {'cmd': 'setlabeleta_cutoff', 'data': msg['data']}, broadcast=True, room="UI_1")
|
||||
settingschanged()
|
||||
refresh_settings()
|
||||
elif(msg['cmd'] == 'setoutput'):
|
||||
koboldai_vars.genamt = int(msg['data'])
|
||||
emit('from_server', {'cmd': 'setlabeloutput', 'data': msg['data']}, broadcast=True, room="UI_1")
|
||||
@@ -2949,7 +2921,7 @@ def get_message(msg):
|
||||
elif(msg['cmd'] == 'samplers'):
|
||||
sampler_order = msg["data"]
|
||||
sampler_order_min_length = 6
|
||||
sampler_order_max_length = 9
|
||||
sampler_order_max_length = 7
|
||||
if(not isinstance(sampler_order, list)):
|
||||
raise ValueError(f"Sampler order must be a list, but got a {type(sampler_order)}")
|
||||
if(not (sampler_order_min_length <= len(sampler_order) <= sampler_order_max_length)):
|
||||
@@ -3527,8 +3499,6 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum):
|
||||
repetition_penalty=koboldai_vars.rep_pen,
|
||||
rpslope=koboldai_vars.rep_pen_slope,
|
||||
rprange=koboldai_vars.rep_pen_range,
|
||||
eps_cutoff=koboldai_vars.eps_cutoff,
|
||||
eta_cutoff=koboldai_vars.eta_cutoff,
|
||||
soft_embeddings=koboldai_vars.sp,
|
||||
soft_tokens=soft_tokens,
|
||||
sampler_order=koboldai_vars.sampler_order,
|
||||
@@ -4176,8 +4146,6 @@ def refresh_settings():
|
||||
socketio.emit('from_server', {'cmd': 'updatereppen', 'data': koboldai_vars.rep_pen}, broadcast=True, room="UI_1")
|
||||
socketio.emit('from_server', {'cmd': 'updatereppenslope', 'data': koboldai_vars.rep_pen_slope}, broadcast=True, room="UI_1")
|
||||
socketio.emit('from_server', {'cmd': 'updatereppenrange', 'data': koboldai_vars.rep_pen_range}, broadcast=True, room="UI_1")
|
||||
socketio.emit('from_server', {'cmd': 'updateeps_cutoff', 'data': koboldai_vars.eps_cutoff}, broadcast=True, room="UI_1")
|
||||
socketio.emit('from_server', {'cmd': 'updateeta_cutoff', 'data': koboldai_vars.eta_cutoff}, broadcast=True, room="UI_1")
|
||||
socketio.emit('from_server', {'cmd': 'updateoutlen', 'data': koboldai_vars.genamt}, broadcast=True, room="UI_1")
|
||||
socketio.emit('from_server', {'cmd': 'updatetknmax', 'data': koboldai_vars.max_length}, broadcast=True, room="UI_1")
|
||||
socketio.emit('from_server', {'cmd': 'updatenumseq', 'data': koboldai_vars.numseqs}, broadcast=True, room="UI_1")
|
||||
@@ -7176,7 +7144,7 @@ def UI_2_load_cookies():
|
||||
def UI_2_save_new_preset(data):
|
||||
preset = model_info()
|
||||
#Data to get from current settings
|
||||
for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical", "eps_cutoff", "eta_cutoff"]:
|
||||
for item in ["genamt", "rep_pen", "rep_pen_range", "rep_pen_slope", "sampler_order", "temp", "tfs", "top_a", "top_k", "top_p", "typical"]:
|
||||
preset[item] = getattr(koboldai_vars, item)
|
||||
#Data to get from UI
|
||||
for item in ['preset', 'description']:
|
||||
@@ -8161,9 +8129,6 @@ class SamplerSettingsSchema(KoboldSchema):
|
||||
tfs: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Tail free sampling value."})
|
||||
typical: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1), metadata={"description": "Typical sampling value."})
|
||||
temperature: Optional[float] = fields.Float(validate=validate.Range(min=0, min_inclusive=False), metadata={"description": "Temperature value."})
|
||||
eps_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0, max=1000.0), metadata={"description": "Epsilon sampling value."})
|
||||
eta_cutoff: Optional[float] = fields.Float(validate=validate.Range(min=0,), metadata={"description": "Eta sampling value."})
|
||||
|
||||
|
||||
def soft_prompt_validator(soft_prompt: str):
|
||||
if len(soft_prompt.strip()) == 0:
|
||||
@@ -8214,7 +8179,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
|
||||
disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
|
||||
frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
|
||||
quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
|
||||
sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 8 and the array must be a permutation of the first N non-negative integers."})
|
||||
sampler_order: Optional[List[int]] = fields.List(fields.Integer(), validate=[validate.Length(min=6), permutation_validator], metadata={"description": "Sampler order to be used. If N is the length of this array, then N must be greater than or equal to 6 and the array must be a permutation of the first N non-negative integers."})
|
||||
sampler_seed: Optional[int] = fields.Integer(validate=validate.Range(min=0, max=2**64 - 1), metadata={"description": "RNG seed to use for sampling. If not specified, the global RNG will be used."})
|
||||
sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
|
||||
stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."})
|
||||
@@ -8332,7 +8297,7 @@ def _generate_text(body: GenerationInputSchema):
|
||||
torch.manual_seed(body.sampler_seed)
|
||||
koboldai_vars.rng_states[body.sampler_seed] = tpu_mtj_backend.get_rng_state() if koboldai_vars.use_colab_tpu else torch.get_rng_state()
|
||||
if hasattr(body, "sampler_order"):
|
||||
if len(body.sampler_order) < 9:
|
||||
if len(body.sampler_order) < 7:
|
||||
body.sampler_order = [6] + body.sampler_order
|
||||
# This maps each property of the setting to use when sending the generate idempotently
|
||||
# To the object which typically contains it's value
|
||||
@@ -8350,8 +8315,6 @@ def _generate_text(body: GenerationInputSchema):
|
||||
"tfs": ("koboldai_vars", "tfs", None),
|
||||
"typical": ("koboldai_vars", "typical", None),
|
||||
"temperature": ("koboldai_vars", "temp", None),
|
||||
"eps_cutoff": ("koboldai_vars", "eps_cutoff", None),
|
||||
"eta_cutoff": ("koboldai_vars", "eta_cutoff", None),
|
||||
"frmtadsnsp": ("koboldai_vars", "frmtadsnsp", "input"),
|
||||
"frmttriminc": ("koboldai_vars", "frmttriminc", "output"),
|
||||
"frmtrmblln": ("koboldai_vars", "frmtrmblln", "output"),
|
||||
@@ -10797,26 +10760,6 @@ class TemperatureSamplingSettingSchema(KoboldSchema):
|
||||
name = "temperature"
|
||||
example_yaml_value = "0.5"
|
||||
|
||||
@config_endpoint_schema
|
||||
class EpsilonSamplingSettingSchema(KoboldSchema):
|
||||
value = fields.Float(validate=validate.Range(min=0, max=1000), required=True)
|
||||
class KoboldMeta:
|
||||
route_name = "eps_cutoff"
|
||||
obj = "koboldai_vars"
|
||||
var_name = "eps_cutoff"
|
||||
name = "Epsilon sampling"
|
||||
example_yaml_value = "0.0"
|
||||
|
||||
@config_endpoint_schema
|
||||
class EtaSamplingSettingSchema(KoboldSchema):
|
||||
value = fields.Float(validate=validate.Range(min=0), required=True)
|
||||
class KoboldMeta:
|
||||
route_name = "eta_cutoff"
|
||||
obj = "koboldai_vars"
|
||||
var_name = "eta_cutoff"
|
||||
name = "Eta sampling"
|
||||
example_yaml_value = "0.0"
|
||||
|
||||
@config_endpoint_schema
|
||||
class GensPerActionSettingSchema(KoboldSchema):
|
||||
value = fields.Integer(validate=validate.Range(min=0, max=5), required=True)
|
||||
@@ -10925,7 +10868,7 @@ class SamplerOrderSettingSchema(KoboldSchema):
|
||||
obj = "koboldai_vars"
|
||||
var_name = "sampler_order"
|
||||
name = "sampler order"
|
||||
example_yaml_value = "[6, 0, 1, 2, 3, 4, 5, 7, 8]"
|
||||
example_yaml_value = "[6, 0, 1, 2, 3, 4, 5]"
|
||||
|
||||
@config_endpoint_schema
|
||||
class SamplerFullDeterminismSettingSchema(KoboldSchema):
|
||||
|
@@ -23,9 +23,7 @@ def get_prompt(user_msg):
|
||||
"top_k": 0, # Keep the X most probable tokens
|
||||
"top_p": 0.9, # Top P sampling / Nucleus Sampling, https://arxiv.org/pdf/1904.09751.pdf
|
||||
"typical": 1.0, # Typical Sampling, https://arxiv.org/pdf/2202.00666.pdf
|
||||
"eps": 0.0, # Discard tokens with low probability, from https://arxiv.org/pdf/2210.15191.pdf
|
||||
"eta": 0.0, # Entropy adaptive epsilon, from the same work as epsilon
|
||||
"sampler_order": [6,0,7,1,3,8,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
|
||||
"sampler_order": [6,0,1,3,4,2,5], # Order to apply the samplers, our default in this script is already the optimal one. KoboldAI Lite contains an easy list of what the
|
||||
"stop_sequence": [f"{user}"], # When should the AI stop generating? In this example we stop when it tries to speak on behalf of the user.
|
||||
#"sampler_seed": 1337, # Use specific seed for text generation? This helps with consistency across tests.
|
||||
"singleline": "False", # Only return a response that fits on a single line, this can help with chatbots but also makes them less verbose
|
||||
|
@@ -890,8 +890,6 @@ return function(_python, _bridged)
|
||||
---@field reppen number
|
||||
---@field reppenslope number
|
||||
---@field reppenrange number
|
||||
---@field eps_cutoff number
|
||||
---@field eta_cutoff number
|
||||
---@field tknmax integer
|
||||
---@field widepth integer
|
||||
---@field useprompt boolean
|
||||
|
@@ -176,38 +176,6 @@ gensettingstf = [
|
||||
"name": "use_alt_rep_pen",
|
||||
"ui_level": 2
|
||||
},
|
||||
{
|
||||
"uitype": "slider",
|
||||
"unit": "float",
|
||||
"label": "Epsilon Sampling",
|
||||
"id": "seteps_cutoff",
|
||||
"min": 0.0,
|
||||
"max": 9.0,
|
||||
"step": 0.01,
|
||||
"default": 0.0,
|
||||
"tooltip": "Slider is in units of 1e-4. Discards tokens with probabilities under eps. (Put this value on 0 to disable its effect)",
|
||||
"menu_path": "Settings",
|
||||
"sub_path": "Sampling",
|
||||
"classname": "model",
|
||||
"name": "eps_cutoff",
|
||||
"ui_level": 1
|
||||
},
|
||||
{
|
||||
"uitype": "slider",
|
||||
"unit": "float",
|
||||
"label": "Eta Sampling",
|
||||
"id": "seteta_cutoff",
|
||||
"min": 0.0,
|
||||
"max": 20,
|
||||
"step": 0.01,
|
||||
"default": 0.0,
|
||||
"tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output. (Put this value on 0 to disable its effect)",
|
||||
"menu_path": "Settings",
|
||||
"sub_path": "Sampling",
|
||||
"classname": "model",
|
||||
"name": "eta_cutoff",
|
||||
"ui_level": 1
|
||||
},
|
||||
{
|
||||
"uitype": "slider",
|
||||
"unit": "int",
|
||||
@@ -1117,36 +1085,6 @@ gensettingsik =[{
|
||||
"classname": "model",
|
||||
"name": "tfs"
|
||||
},
|
||||
{
|
||||
"uitype": "slider",
|
||||
"unit": "float",
|
||||
"label": "Epsilon Sampling",
|
||||
"id": "seteps_cutoff",
|
||||
"min": 0.0,
|
||||
"max": 9.0,
|
||||
"step": 0.01,
|
||||
"default": 0.0,
|
||||
"tooltip": "Slider is in units of 1e-4.Discards tokens with probabilities under eps.",
|
||||
"menu_path": "Settings",
|
||||
"sub_path": "Sampling",
|
||||
"classname": "model",
|
||||
"name": "eps_cutoff",
|
||||
},
|
||||
{
|
||||
"uitype": "slider",
|
||||
"unit": "float",
|
||||
"label": "Eta Sampling",
|
||||
"id": "seteta_cutoff",
|
||||
"min": 0.0,
|
||||
"max": 20,
|
||||
"step": 0.01,
|
||||
"default": 0.0,
|
||||
"tooltip": "Slider is in units of 1e-4.Eta sampling is a variant of epsilon sampling that adaptively estimates eps based on entropy of the output.",
|
||||
"menu_path": "Settings",
|
||||
"sub_path": "Sampling",
|
||||
"classname": "model",
|
||||
"name": "eta_cutoff",
|
||||
},
|
||||
{
|
||||
"uitype": "slider",
|
||||
"unit": "int",
|
||||
|
@@ -614,7 +614,7 @@ class settings(object):
|
||||
start_time = time.time()
|
||||
if key in self.__dict__ and key not in self.no_save_variables:
|
||||
if key == 'sampler_order':
|
||||
if(len(value) < 9):
|
||||
if(len(value) < 7):
|
||||
value = [6] + value
|
||||
elif key == 'autosave':
|
||||
autosave = value
|
||||
@@ -669,9 +669,8 @@ class model_settings(settings):
|
||||
'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition',
|
||||
'badwordsids', 'uid_presets', 'model', 'model_type', 'lazy_load', 'fp32_model', 'modeldim', 'horde_wait_time', 'horde_queue_position', 'horde_queue_size', 'newlinemode', 'tqdm_progress', 'tqdm_rem_time', '_tqdm']
|
||||
settings_name = "model"
|
||||
default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048,
|
||||
"temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0, "eps_cutoff": 0.0, "eta_cutoff": 0.0,
|
||||
"sampler_order": [6,0,7,1,3,8,4,2,5]}
|
||||
default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 1.0, "rep_pen_range": 2048, "temp": 0.5, "top_p": 0.9, "top_k": 0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0,
|
||||
"sampler_order": [6,0,1,2,3,4,5]}
|
||||
def __init__(self, socketio, koboldai_vars):
|
||||
self.enable_whitelist = False
|
||||
self._socketio = socketio
|
||||
@@ -722,14 +721,12 @@ class model_settings(settings):
|
||||
self.top_a = 0.0 # Default generator top-a
|
||||
self.tfs = 1.0 # Default generator tfs (tail-free sampling)
|
||||
self.typical = 1.0 # Default generator typical sampling threshold
|
||||
self.eps_cutoff = 0.0 # Default generator epsilon_cutoff
|
||||
self.eta_cutoff = 0.0 # Default generator eta_cutoff
|
||||
self.numseqs = 1 # Number of sequences to ask the generator to create
|
||||
self.generated_tkns = 0 # If using a backend that supports Lua generation modifiers, how many tokens have already been generated, otherwise 0
|
||||
self.badwordsids = []
|
||||
self.fp32_model = False # Whether or not the most recently loaded HF model was in fp32 format
|
||||
self.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B)
|
||||
self.sampler_order = [6, 0, 1, 2, 3, 4, 5, 7, 8]
|
||||
self.sampler_order = [6, 0, 1, 2, 3, 4, 5]
|
||||
self.newlinemode = "n"
|
||||
self.presets = [] # Holder for presets
|
||||
self.selected_preset = ""
|
||||
@@ -761,8 +758,6 @@ class model_settings(settings):
|
||||
self.top_a = 0.0
|
||||
self.tfs = 1.0
|
||||
self.typical = 1.0
|
||||
self.eps_cutoff = 0.0
|
||||
self.eta_cutoff = 0.0
|
||||
self.rep_pen_range = 1024
|
||||
self.rep_pen_slope = 0.7
|
||||
|
||||
@@ -2768,8 +2763,6 @@ default_preset = {
|
||||
"rep_pen": 1.1,
|
||||
"rep_pen_range": 1024,
|
||||
"rep_pen_slope": 0.7,
|
||||
"eps_cutoff": 0.0,
|
||||
"eta_cutoff": 0.0,
|
||||
"sampler_order": [
|
||||
6,
|
||||
0,
|
||||
@@ -2777,9 +2770,7 @@ default_preset = {
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
7,
|
||||
8
|
||||
5
|
||||
]
|
||||
}
|
||||
badwordsids_default = [[6880], [50256], [42496], [4613], [17414], [22039], [16410], [27], [29], [38430], [37922], [15913], [24618], [28725], [58], [47175], [36937], [26700], [12878], [16471], [37981], [5218], [29795], [13412], [45160], [3693], [49778], [4211], [20598], [36475], [33409], [44167], [32406], [29847], [29342], [42669], [685], [25787], [7359], [3784], [5320], [33994], [33490], [34516], [43734], [17635], [24293], [9959], [23785], [21737], [28401], [18161], [26358], [32509], [1279], [38155], [18189], [26894], [6927], [14610], [23834], [11037], [14631], [26933], [46904], [22330], [25915], [47934], [38214], [1875], [14692], [41832], [13163], [25970], [29565], [44926], [19841], [37250], [49029], [9609], [44438], [16791], [17816], [30109], [41888], [47527], [42924], [23984], [49074], [33717], [31161], [49082], [30138], [31175], [12240], [14804], [7131], [26076], [33250], [3556], [38381], [36338], [32756], [46581], [17912], [49146]] # Tokenized array of badwords used to prevent AI artifacting
|
||||
|
@@ -134,8 +134,6 @@ class GenerationSettings:
|
||||
"rep_pen",
|
||||
"rep_pen_slope",
|
||||
"rep_pen_range",
|
||||
"eps_cutoff",
|
||||
"eta_cutoff",
|
||||
"sampler_order",
|
||||
]:
|
||||
setattr(
|
||||
|
@@ -140,7 +140,7 @@ class model_backend(HFInferenceModel):
|
||||
def mtj_settings_callback() -> dict:
|
||||
sampler_order = utils.koboldai_vars.sampler_order[:]
|
||||
if (
|
||||
len(sampler_order) < 9
|
||||
len(sampler_order) < 7
|
||||
): # Add repetition penalty at beginning if it's not present
|
||||
sampler_order = [6] + sampler_order
|
||||
return {
|
||||
@@ -154,8 +154,6 @@ class model_backend(HFInferenceModel):
|
||||
"repetition_penalty": float(utils.koboldai_vars.rep_pen),
|
||||
"rpslope": float(utils.koboldai_vars.rep_pen_slope),
|
||||
"rprange": int(utils.koboldai_vars.rep_pen_range),
|
||||
"eps_cutoff": float(utils.koboldai_vars.eps_cutoff),
|
||||
"eta_cutoff": float(utils.koboldai_vars.eta_cutoff),
|
||||
}
|
||||
|
||||
tpu_mtj_backend.socketio = utils.socketio
|
||||
@@ -295,8 +293,6 @@ class model_backend(HFInferenceModel):
|
||||
tfs=gen_settings.tfs,
|
||||
typical=gen_settings.typical,
|
||||
top_a=gen_settings.top_a,
|
||||
eps_cutoff=gen_settings.eps_cutoff,
|
||||
eta_cutoff=gen_settings.eta_cutoff,
|
||||
numseqs=batch_count,
|
||||
repetition_penalty=gen_settings.rep_pen,
|
||||
rpslope=gen_settings.rep_pen_slope,
|
||||
|
@@ -68,8 +68,6 @@ def update_settings():
|
||||
RepetitionPenalty.rep_pen_range = koboldai_vars.rep_pen_range
|
||||
RepetitionPenalty.rep_pen_slope = koboldai_vars.rep_pen_slope
|
||||
RepetitionPenalty.use_alt_rep_pen = koboldai_vars.use_alt_rep_pen
|
||||
Epsilon.epsilon = koboldai_vars.eps_cutoff
|
||||
Eta.eta = koboldai_vars.eta_cutoff
|
||||
|
||||
|
||||
class Warper:
|
||||
@@ -105,8 +103,6 @@ class Warper:
|
||||
4: Typical,
|
||||
5: Temperature,
|
||||
6: RepetitionPenalty,
|
||||
7: Epsilon,
|
||||
8: Eta,
|
||||
}[warper_id]
|
||||
|
||||
@classmethod
|
||||
@@ -544,92 +540,3 @@ class RepetitionPenalty(Warper):
|
||||
@classmethod
|
||||
def value_is_valid(cls) -> bool:
|
||||
return cls.rep_pen != 1.0
|
||||
|
||||
class Epsilon(Warper):
|
||||
"""
|
||||
Eta sampling, as described in https://arxiv.org/pdf/2210.15191.pdf
|
||||
"""
|
||||
|
||||
epsilon: float = 0.0
|
||||
|
||||
@classmethod
|
||||
def torch(cls, scores: torch.Tensor) -> torch.Tensor:
|
||||
# Probably the simplest sampler there is, just remove tokens with probs under a threshold
|
||||
probs = scores.softmax(dim=-1)
|
||||
|
||||
indices_to_remove = probs < (cls.epsilon * 1e-4)
|
||||
|
||||
# hack to avoid nulling out all the logits for misconfigured sampler param
|
||||
# equivalent to keep_min_k=1, as is default in hf transformers implementation
|
||||
# implemented this way to be more easily modifiable to fallback to topk for a configurable k
|
||||
if(torch.all(indices_to_remove)):
|
||||
topk_prob = torch.max(probs)
|
||||
indices_to_remove = probs < topk_prob
|
||||
|
||||
scores = scores.masked_fill(indices_to_remove, -torch.inf)
|
||||
return scores
|
||||
|
||||
@classmethod
|
||||
def jax_dynamic(cls, scores: np.array) -> np.array:
|
||||
probabilities = np.array(jax.nn.softmax(scores), copy=True)
|
||||
|
||||
indices_to_remove = probabilities < (cls.epsilon * 1e-4)
|
||||
if(np.all(indices_to_remove)):
|
||||
topk_prob = np.max(probabilities)
|
||||
indices_to_remove = probabilities < topk_prob
|
||||
|
||||
return np.where(
|
||||
indices_to_remove, -np.inf, scores
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def value_is_valid(cls) -> bool:
|
||||
return cls.epsilon > 0.0
|
||||
|
||||
class Eta(Warper):
|
||||
"""
|
||||
Eta sampling, as described in https://arxiv.org/pdf/2210.15191.pdf
|
||||
"""
|
||||
|
||||
eta: float = 0.0
|
||||
|
||||
@classmethod
|
||||
def torch(cls, scores: torch.Tensor) -> torch.Tensor:
|
||||
shifted_logits = torch.log_softmax(scores, dim=-1)
|
||||
probs = shifted_logits.exp()
|
||||
|
||||
neg_entropy = (probs * shifted_logits).nansum(dim=-1, keepdim=True)
|
||||
epsilon = torch.min(torch.tensor(cls.eta * 1e-4), torch.sqrt(torch.tensor(cls.eta*1e-4))*torch.exp(neg_entropy))
|
||||
|
||||
indices_to_remove = probs < epsilon
|
||||
|
||||
# hack to avoid nulling out all the logits for misconfigured sampler param
|
||||
# equivalent to keep_min_k=1, as is default in hf transformers implementation
|
||||
# implemented this way to be more easily modifiable to fallback to topk for a configurable k
|
||||
if(torch.all(indices_to_remove)):
|
||||
topk_prob = torch.max(probs)
|
||||
indices_to_remove = probs < topk_prob
|
||||
|
||||
scores = scores.masked_fill(indices_to_remove, -torch.inf)
|
||||
return scores
|
||||
|
||||
@classmethod
|
||||
def jax_dynamic(cls, scores: np.array) -> np.array:
|
||||
shifted_logits = jax.nn.log_softmax(scores)
|
||||
probabilities = np.exp(shifted_logits)
|
||||
neg_entropy = np.nansum(probabilities * shifted_logits)
|
||||
epsilon = min(cls.eta * 1e-4, np.sqrt(cls.eta*1e-4)*np.exp(neg_entropy))
|
||||
|
||||
indices_to_remove = probabilities < epsilon
|
||||
if(np.all(indices_to_remove)):
|
||||
topk_prob = np.max(probabilities)
|
||||
indices_to_remove = probabilities < topk_prob
|
||||
|
||||
return np.where(
|
||||
indices_to_remove, -np.inf, scores
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def value_is_valid(cls) -> bool:
|
||||
return cls.eta > 0.0
|
||||
|
||||
|
@@ -1315,8 +1315,6 @@ function buildSamplerList(samplers) {
|
||||
"Typical Sampling",
|
||||
"Temperature",
|
||||
"Repetition Penalty",
|
||||
"Epsilon Sampling",
|
||||
"Eta Sampling"
|
||||
]
|
||||
for(i=0; i<samplers.length; i++) {
|
||||
samplerslist.append("<div class=\"flex\">\
|
||||
@@ -2642,14 +2640,6 @@ $(document).ready(function(){
|
||||
// Send current rep pen value to input
|
||||
$("#setreppenrangecur").val(msg.data);
|
||||
$("#setreppenrange").val(parseFloat(msg.data)).trigger("change");
|
||||
} else if(msg.cmd == "updateeps_cutoff") {
|
||||
// Send current epsilon value to input
|
||||
$("#seteps_cutoffcurr").val(msg.data);
|
||||
$("#seteps_cutoff").val(parseFloat(msg.data)).trigger("change");
|
||||
} else if(msg.cmd == "updateeta_cutoff") {
|
||||
// Send current eta value to input
|
||||
$("#seteta_cutoffcur").val(msg.data);
|
||||
$("#seteta_cutoff").val(parseFloat(msg.data)).trigger("change");
|
||||
} else if(msg.cmd == "updateoutlen") {
|
||||
// Send current output amt value to input
|
||||
$("#setoutputcur").val(msg.data);
|
||||
@@ -2689,12 +2679,6 @@ $(document).ready(function(){
|
||||
} else if(msg.cmd == "setlabelreppenrange") {
|
||||
// Update setting label with value from server
|
||||
$("#setreppenrangecur").val(msg.data);
|
||||
} else if(msg.cmd == "setlabeleps_cutoff") {
|
||||
// Update setting label with value from server
|
||||
$("#seteps_cutoffcur").val(msg.data);
|
||||
} else if(msg.cmd == "setlabeleta_cutoff") {
|
||||
// Update setting label with value from server
|
||||
$("#seteta_cutoffcur").val(msg.data);
|
||||
} else if(msg.cmd == "setlabeloutput") {
|
||||
// Update setting label with value from server
|
||||
$("#setoutputcur").val(msg.data);
|
||||
|
@@ -250,8 +250,6 @@ map1.set('Tail Free Sampling', 3)
|
||||
map1.set('Typical Sampling', 4)
|
||||
map1.set('Temperature', 5)
|
||||
map1.set('Repetition Penalty', 6)
|
||||
map1.set('Epsilon Sampling', 7)
|
||||
map1.set('Eta Sampling', 8)
|
||||
const map2 = new Map()
|
||||
map2.set(0, 'Top K Sampling')
|
||||
map2.set(1, 'Top A Sampling')
|
||||
@@ -260,8 +258,6 @@ map2.set(3, 'Tail Free Sampling')
|
||||
map2.set(4, 'Typical Sampling')
|
||||
map2.set(5, 'Temperature')
|
||||
map2.set(6, 'Repetition Penalty')
|
||||
map2.set(7, 'Epsilon Sampling')
|
||||
map2.set(8, 'Eta Sampling')
|
||||
var calc_token_usage_timeout;
|
||||
var game_text_scroll_timeout;
|
||||
var auto_loader_timeout;
|
||||
|
@@ -176,8 +176,6 @@
|
||||
<li class="sample_order cursor" onclick="select_sample(this);">Tail Free Sampling</li>
|
||||
<li class="sample_order cursor" onclick="select_sample(this);">Typical Sampling</li>
|
||||
<li class="sample_order cursor" onclick="select_sample(this);">Temperature</li>
|
||||
<li class="sample_order cursor" onclick="select_sample(this);">Epsilon Sampling</li>
|
||||
<li class="sample_order cursor" onclick="select_sample(this);">Eta Sampling</li>
|
||||
</ul>
|
||||
<div style="display:flex;flex-direction:column;margin-top: 25px;">
|
||||
<div class="material-icons-outlined cursor" onclick="move_sample('up');">arrow_upward</div>
|
||||
|
@@ -104,8 +104,6 @@ def settings_callback() -> dict:
|
||||
"repetition_penalty": 1.0,
|
||||
"rpslope": 0.0,
|
||||
"rprange": 0,
|
||||
"eps_cutoff": 0.0,
|
||||
"eta_cutoff": 0.0,
|
||||
}
|
||||
|
||||
def started_compiling_callback() -> None:
|
||||
@@ -210,10 +208,10 @@ def apply_repetition_penalty_dynamic(logits, tokens, repetition_penalty, generat
|
||||
logits[tokens] = penalty_logits
|
||||
return logits
|
||||
|
||||
def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0, eps_cutoff=0.0, eta_cutoff=0.0):
|
||||
def kobold_sample_dynamic(key, logits, rpargs, sampler_order: Optional[np.ndarray] = None, top_p=0.9, temp=0.5, top_k=0, tfs=1.0, typical=1.0, top_a=0.0):
|
||||
'''
|
||||
This gets called by generate_loop_fn to apply a series of 8 filters
|
||||
to the logits (top-k, eps, top-a, top-p, TFS, eta, typical, temperature)
|
||||
This gets called by generate_loop_fn to apply a series of 6 filters
|
||||
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
|
||||
before picking one token using the modified logits
|
||||
'''
|
||||
for sid in jnp.array(sampler_order, int):
|
||||
@@ -245,12 +243,10 @@ def kobold_sample_static(
|
||||
tfs=1.0,
|
||||
typical=1.0,
|
||||
top_a=0.0,
|
||||
eps_cutoff=0.0,
|
||||
eta_cutoff=0.0
|
||||
):
|
||||
'''
|
||||
This gets called by generate_loop_fn to apply a series of 8 filters
|
||||
to the logits (top-k, eps, top-a, top-p, TFS, eta, typical, temperature)
|
||||
This gets called by generate_loop_fn to apply a series of 6 filters
|
||||
to the logits (top-k, then top-a, then top-p, then TFS, then typical, then temperature)
|
||||
before picking one token using the modified logits
|
||||
'''
|
||||
|
||||
@@ -311,33 +307,6 @@ def kobold_sample_static(
|
||||
sorted_indices_to_remove,
|
||||
)
|
||||
return jnp.where(indices_to_remove, -jnp.inf, scores)
|
||||
|
||||
def sample_eps(scores: jnp.array) -> jnp.array:
|
||||
probabilities = jax.nn.softmax(scores)
|
||||
indices_to_remove = probabilities < (eps_cutoff * 1e-4)
|
||||
|
||||
# Seems like JAX doesn't like if-s, so it's done this way
|
||||
topk_idx = jnp.argmax(probabilities)
|
||||
indices_to_remove = indices_to_remove.at[topk_idx].set(False)
|
||||
|
||||
return jnp.where(
|
||||
indices_to_remove, -jnp.inf, scores
|
||||
)
|
||||
|
||||
def sample_eta(scores: jnp.array) -> jnp.array:
|
||||
shifted_logits = jax.nn.log_softmax(scores)
|
||||
probabilities = jnp.exp(shifted_logits)
|
||||
neg_entropy = jnp.nansum(probabilities * shifted_logits)
|
||||
|
||||
eps = jax.lax.min(eta_cutoff * 1e-4, jnp.sqrt(eta_cutoff*1e-4)*jnp.exp(neg_entropy))
|
||||
indices_to_remove = probabilities < eps
|
||||
# Seems like JAX doesn't like if-s, so it's done this way
|
||||
topk_idx = jnp.argmax(probabilities)
|
||||
indices_to_remove = indices_to_remove.at[topk_idx].set(False)
|
||||
|
||||
return jnp.where(
|
||||
indices_to_remove, -jnp.inf, scores
|
||||
)
|
||||
|
||||
|
||||
def sample_typical(scores: jnp.array) -> jnp.array:
|
||||
@@ -442,8 +411,6 @@ def kobold_sample_static(
|
||||
logits = jax.lax.cond(jnp.logical_and(k == 4, typical < 1.0), sample_typical, lambda x: x, logits)
|
||||
logits = jax.lax.cond(jnp.logical_and(k == 5, temp != 1.0), sample_temperature, lambda x: x, logits)
|
||||
logits = jax.lax.cond(jnp.logical_and(k == 6, rpargs[1] != 1.0), lambda x: sample_repetition_penalty(*x), lambda x: x[0], (logits, *rpargs))
|
||||
logits = jax.lax.cond(jnp.logical_and(k == 7, eps_cutoff > 0.0), sample_eps, lambda x: x, logits)
|
||||
logits = jax.lax.cond(jnp.logical_and(k == 8, eta_cutoff > 0.0), sample_eta, lambda x: x, logits)
|
||||
return jax.random.categorical(key, logits, -1).astype(jnp.uint32)
|
||||
|
||||
pad_token_id = 50256
|
||||
@@ -782,8 +749,6 @@ def infer_static(
|
||||
repetition_penalty=1.0,
|
||||
rpslope=0.0,
|
||||
rprange=0,
|
||||
eps_cutoff=0.0,
|
||||
eta_cutoff=0.0,
|
||||
numseqs=1,
|
||||
gen_len=80,
|
||||
soft_embeddings: Optional[np.array] = None,
|
||||
@@ -794,7 +759,7 @@ def infer_static(
|
||||
if sampler_order is None:
|
||||
sampler_order = utils.default_sampler_order.copy()
|
||||
sampler_order = sampler_order[:]
|
||||
if len(sampler_order) < 9: # Add repetition penalty at beginning if it's not present
|
||||
if len(sampler_order) < 7: # Add repetition penalty at beginning if it's not present
|
||||
sampler_order = [6] + sampler_order
|
||||
sampler_order = np.uint32(sampler_order)
|
||||
total_batch = 1
|
||||
@@ -816,9 +781,7 @@ def infer_static(
|
||||
"repetition_penalty": repetition_penalty * np.ones(total_batch),
|
||||
"rpslope": rpslope * np.ones(total_batch),
|
||||
"rprange": np.full(total_batch, rprange, dtype=np.uint32),
|
||||
"top_k": np.full(total_batch, top_k, dtype=np.uint32),
|
||||
"eps_cutoff": eps_cutoff * np.ones(total_batch),
|
||||
"eta_cutoff": eta_cutoff * np.ones(total_batch)
|
||||
"top_k": np.full(total_batch, top_k, dtype=np.uint32)
|
||||
}
|
||||
output = network.generate_static(
|
||||
batched_tokens,
|
||||
|
Reference in New Issue
Block a user