diff --git a/aiserver.py b/aiserver.py index d883640a..0d3ea356 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3046,6 +3046,8 @@ def lua_compute_context(submission, entries, folders, kwargs): force_use_txt=True, scan_story=kwargs["scan_story"] if kwargs["scan_story"] != None else True, ) + if kwargs["include_anote"] is not None and not kwargs["include_anote"]: + anotetxt = "" txt, _, _ = calcsubmitbudget( len(actions), winfo, diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index d992ba45..99efa05f 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1148,6 +1148,9 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo if param not in params: params[param] = default_params[param] + # Use an optimization that will allow us to avoid one extra transpose operation + params["transposed_linear"] = True + # Load tokenizer if vars.model == "TPUMeshTransformerGPTNeoX": tokenizer = Tokenizer.from_file(os.path.join(path, "20B_tokenizer.json")) @@ -1305,8 +1308,9 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo tensor /= params["cores_per_replica"] if "vocab_pad" in transforms: tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"])) - if "no_transpose" not in transforms and tensor.ndim == 2: - tensor = tensor.T + # We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config + #if "no_transpose" not in transforms and tensor.ndim == 2: + # tensor = tensor.T tensor.unsqueeze_(0) if tensor.dtype is torch.float16 or tensor.dtype is torch.float32: tensor = tensor.bfloat16() diff --git a/userscripts/api_documentation.html b/userscripts/api_documentation.html index b581eb5f..462216c8 100644 --- a/userscripts/api_documentation.html +++ b/userscripts/api_documentation.html @@ -500,6 +500,7 @@
  • kwargs? (table<string, any>): Table of optional keyword arguments from the following list. Defaults to {}.
  • @@ -574,6 +575,7 @@
  • kwargs? (table<string, any>): Table of optional keyword arguments from the following list. Defaults to {}.
  • @@ -687,6 +689,7 @@
  • kwargs? (table<string, any>): Table of optional keyword arguments from the following list. Defaults to {}.
  • diff --git a/userscripts/api_documentation.md b/userscripts/api_documentation.md index 198d272e..a50eb9c9 100644 --- a/userscripts/api_documentation.md +++ b/userscripts/api_documentation.md @@ -538,6 +538,7 @@ Computes the context that would be sent to the generator with the user's current * entries? (`KoboldWorldInfoEntry|table`): A `KoboldWorldInfoEntry` or table thereof that indicates an allowed subset of world info entries to include in the context. Defaults to all world info entries. * kwargs? (`table`): Table of optional keyword arguments from the following list. Defaults to `{}`. * scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`. + * include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note. ### Returns @@ -636,6 +637,7 @@ The same as calling `kobold.worldinfo:compute_context()` with this world info en * submission (`string`): String to use as simulated user's input after being formatted by input formatting. * kwargs? (`table`): Table of optional keyword arguments from the following list. Defaults to `{}`. * scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`. + * include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note. ### Returns @@ -819,6 +821,7 @@ Unlike `kobold.worldinfo:compute_context()`, this function doesn't include world * entries? (`KoboldWorldInfoEntry|table`): A `KoboldWorldInfoEntry` or table thereof that indicates an allowed subset of world info entries to include in the context. Entries that are not inside of the folder are still not included. Defaults to all world info entries in the folder. * kwargs? (`table`): Table of optional keyword arguments from the following list. Defaults to `{}`. * scan_story? (`boolean`): Whether or not to scan the past few actions of the story for world info keys in addition to the submission like how world info normally behaves. If this is set to `false`, only the `submission` is scanned for world info keys. Defaults to `true`. + * include_anote? (`boolean`): Whether to include the author's note in the story. Defaults to `true`, pass `false` to suppress including the author's note. ### Returns