diff --git a/aiserver.py b/aiserver.py index f8f7f85f..14298d68 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 #==================================================================# # KoboldAI -# Version: 1.19.0 +# Version: 1.19.1 # By: The KoboldAI Community #==================================================================# @@ -723,7 +723,7 @@ tags = [ api_version = None # This gets set automatically so don't change this value api_v1 = KoboldAPISpec( - version="1.2.0", + version="1.2.1", prefixes=["/api/v1", "/api/latest"], tags=tags, ) @@ -873,6 +873,12 @@ def getmodelname(): modelname = koboldai_vars.model return modelname +#==================================================================# +# Get hidden size from model +#==================================================================# +def get_hidden_size_from_model(model): + return model.get_input_embeddings().embedding_dim + #==================================================================# # Breakmodel configuration functions #==================================================================# @@ -2702,9 +2708,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal return lazy_load_callback - def get_hidden_size_from_model(model): - return model.get_input_embeddings().embedding_dim - def maybe_low_cpu_mem_usage() -> Dict[str, Any]: if(packaging.version.parse(transformers_version) < packaging.version.parse("4.11.0")): logger.warning(f"Please upgrade to transformers 4.11.0 for lower RAM usage. You have transformers {transformers_version}.") @@ -3237,7 +3240,7 @@ def lua_startup(): except lupa.LuaError as e: print(colors.RED + "ERROR!" + colors.END) koboldai_vars.lua_koboldbridge.obliterate_multiverse() - logger.debug('LUA ERROR: ' + str(e).replace("\033", "")) + logger.error('LUA ERROR: ' + str(e).replace("\033", "")) logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.") socketio.emit("error", str(e), broadcast=True, room="UI_2") exit(1) @@ -3297,7 +3300,7 @@ def load_lua_scripts(): if(koboldai_vars.serverstarted): emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True, room="UI_1") sendUSStatItems() - logger.debug('LUA ERROR: ' + str(e).replace("\033", "")) + logger.error('LUA ERROR: ' + str(e).replace("\033", "")) logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.") socketio.emit("error", str(e), broadcast=True, room="UI_2") if(koboldai_vars.serverstarted): @@ -3791,7 +3794,7 @@ def execute_inmod(): koboldai_vars.lua_running = False emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True, room="UI_1") sendUSStatItems() - logger.debug('LUA ERROR: ' + str(e).replace("\033", "")) + logger.error('LUA ERROR: ' + str(e).replace("\033", "")) logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.") socketio.emit("error", str(e), broadcast=True, room="UI_2") set_aibusy(0) @@ -3809,7 +3812,7 @@ def execute_outmod(): koboldai_vars.lua_running = False emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True, room="UI_1") sendUSStatItems() - logger.debug('LUA ERROR: ' + str(e).replace("\033", "")) + logger.error('LUA ERROR: ' + str(e).replace("\033", "")) logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.") socketio.emit("error", str(e), broadcast=True, room="UI_2") set_aibusy(0) @@ -5904,7 +5907,7 @@ def generate(txt, minimum, maximum, found_entries=None): koboldai_vars.lua_running = False emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True, room="UI_1") sendUSStatItems() - logger.debug('LUA ERROR: ' + str(e).replace("\033", "")) + logger.error('LUA ERROR: ' + str(e).replace("\033", "")) logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.") socketio.emit("error", str(e), broadcast=True, room="UI_2") else: @@ -6126,7 +6129,7 @@ def tpumtjgenerate(txt, minimum, maximum, found_entries=None): koboldai_vars.lua_running = False emit('from_server', {'cmd': 'errmsg', 'data': 'Lua script error; please check console.'}, broadcast=True, room="UI_1") sendUSStatItems() - logger.debug('LUA ERROR: ' + str(e).replace("\033", "")) + logger.error('LUA ERROR: ' + str(e).replace("\033", "")) logger.warning("Lua engine stopped; please open 'Userscripts' and press Load to reinitialize scripts.") socketio.emit("error", str(e), broadcast=True, room="UI_2") else: @@ -9792,7 +9795,7 @@ def prompt_validator(prompt: str): raise ValidationError("String does not match expected pattern.") class SubmissionInputSchema(KoboldSchema): - prompt: str = fields.String(required=True, validate=prompt_validator, metadata={"pattern": r"^.*\S.*$", "description": "This is the submission."}) + prompt: str = fields.String(required=True, validate=prompt_validator, metadata={"pattern": r"^[\S\s]*\S[\S\s]*$", "description": "This is the submission."}) disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, disables all input formatting options, overriding their individual enabled/disabled states."}) frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action."}) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index f5527a96..e32a8fee 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1149,6 +1149,9 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo if param not in params: params[param] = default_params[param] + # Use an optimization that will allow us to avoid one extra transpose operation + params["transposed_linear"] = True + # Load tokenizer if koboldai_vars.model == "TPUMeshTransformerGPTNeoX": tokenizer = Tokenizer.from_file(os.path.join(path, "20B_tokenizer.json")) @@ -1308,8 +1311,9 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo tensor /= params["cores_per_replica"] if "vocab_pad" in transforms: tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"])) - if "no_transpose" not in transforms and tensor.ndim == 2: - tensor = tensor.T + # We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config + #if "no_transpose" not in transforms and tensor.ndim == 2: + # tensor = tensor.T tensor.unsqueeze_(0) if tensor.dtype is torch.float16 or tensor.dtype is torch.float32: tensor = tensor.bfloat16() diff --git a/userscripts/api_documentation.html b/userscripts/api_documentation.html index b581eb5f..462216c8 100644 --- a/userscripts/api_documentation.html +++ b/userscripts/api_documentation.html @@ -500,6 +500,7 @@
  • kwargs? (table<string, any>): Table of optional keyword arguments from the following list. Defaults to {}.
  • @@ -574,6 +575,7 @@
  • kwargs? (table<string, any>): Table of optional keyword arguments from the following list. Defaults to {}.
  • @@ -687,6 +689,7 @@
  • kwargs? (table<string, any>): Table of optional keyword arguments from the following list. Defaults to {}.
  • diff --git a/userscripts/api_documentation.md b/userscripts/api_documentation.md index 198d272e..a50eb9c9 100644 --- a/userscripts/api_documentation.md +++ b/userscripts/api_documentation.md @@ -538,6 +538,7 @@ Computes the context that would be sent to the generator with the user's current * entries? (`KoboldWorldInfoEntry|table`): A `KoboldWorldInfoEntry` or table thereof that indicates an allowed subset of world info entries to include in the context. Defaults to all world info entries. * kwargs? (`table`): Table of optional keyword arguments from the following list. Defaults to `{}`. * scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`. + * include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note. ### Returns @@ -636,6 +637,7 @@ The same as calling `kobold.worldinfo:compute_context()` with this world info en * submission (`string`): String to use as simulated user's input after being formatted by input formatting. * kwargs? (`table`): Table of optional keyword arguments from the following list. Defaults to `{}`. * scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`. + * include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note. ### Returns @@ -819,6 +821,7 @@ Unlike `kobold.worldinfo:compute_context()`, this function doesn't include world * entries? (`KoboldWorldInfoEntry|table`): A `KoboldWorldInfoEntry` or table thereof that indicates an allowed subset of world info entries to include in the context. Entries that are not inside of the folder are still not included. Defaults to all world info entries in the folder. * kwargs? (`table`): Table of optional keyword arguments from the following list. Defaults to `{}`. * scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`. + * include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note. ### Returns