From efe268df605b3bb8f8ba4b2ac9fd161b672ef7e4 Mon Sep 17 00:00:00 2001 From: somebody Date: Tue, 2 May 2023 20:14:10 -0500 Subject: [PATCH 1/3] Move overrides to better places --- modeling/inference_model.py | 6 ------ modeling/inference_models/hf.py | 17 +++++++++++++++++ modeling/inference_models/hf_torch.py | 9 --------- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 8d0c5294..886c7e5e 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -197,12 +197,6 @@ class InferenceModel: Returns: AutoTokenizer: Tokenizer deemed fit for the location string. May be a fallback tokenizer. """ - if utils.koboldai_vars.model_type == "xglm": - # Default to newline mode if using XGLM - utils.koboldai_vars.newlinemode = "s" - elif utils.koboldai_vars.model_type in ["opt", "bloom"]: - # Handle but don't convert newlines if using Fairseq models that have newlines trained in them - utils.koboldai_vars.newlinemode = "ns" std_kwargs = {"revision": utils.koboldai_vars.revision, "cache_dir": "cache"} diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index eac5284f..63c0a40d 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -32,6 +32,23 @@ class HFInferenceModel(InferenceModel): if utils.koboldai_vars.newlinemode == "n": utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) + # These are model specific tokenizer overrides if a model has bad defaults + if utils.koboldai_vars.model_type == "llama": + self.tokenizer.decode_with_prefix_space = True + self.tokenizer.add_bos_token = False + elif utils.koboldai_vars.model_type == "opt": + self.tokenizer._koboldai_header = self.tokenizer.encode("") + self.tokenizer.add_bos_token = False + self.tokenizer.add_prefix_space = False + + # Change newline behavior to match model quirks + if utils.koboldai_vars.model_type == "xglm": + # Default to newline mode if using XGLM + utils.koboldai_vars.newlinemode = "s" + elif utils.koboldai_vars.model_type in ["opt", "bloom"]: + # Handle but don't convert newlines if using Fairseq models that have newlines trained in them + utils.koboldai_vars.newlinemode = "ns" + return super()._post_load() def get_local_model_path( diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 1997e7fe..49cdfc0f 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -132,15 +132,6 @@ class HFTorchInferenceModel(HFInferenceModel): if not utils.koboldai_vars.model_type: utils.koboldai_vars.model_type = m_self.get_model_type() - # These are model specific overrides if a model has bad defaults - if utils.koboldai_vars.model_type == "llama": - m_self.tokenizer.decode_with_prefix_space = True - m_self.tokenizer.add_bos_token = False - elif utils.koboldai_vars.model_type == "opt": - m_self.tokenizer._koboldai_header = m_self.tokenizer.encode("") - m_self.tokenizer.add_bos_token = False - m_self.tokenizer.add_prefix_space = False - # Patch stopping_criteria class PTHStopper(StoppingCriteria): def __call__( From a0f4ab5c6a0df7e7c57353e165fcad933121eae8 Mon Sep 17 00:00:00 2001 From: somebody Date: Tue, 2 May 2023 20:23:36 -0500 Subject: [PATCH 2/3] Move bad token grabber until after newlinemode has been deduced --- modeling/inference_models/hf.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 63c0a40d..013590ef 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -18,20 +18,6 @@ class HFInferenceModel(InferenceModel): self.tokenizer = None def _post_load(self) -> None: - # Clean up tokens that cause issues - if ( - utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default - and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") - ): - utils.koboldai_vars.badwordsids = [ - [v] - for k, v in self.tokenizer.get_vocab().items() - if any(c in str(k) for c in "[]") - ] - - if utils.koboldai_vars.newlinemode == "n": - utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) - # These are model specific tokenizer overrides if a model has bad defaults if utils.koboldai_vars.model_type == "llama": self.tokenizer.decode_with_prefix_space = True @@ -49,6 +35,20 @@ class HFInferenceModel(InferenceModel): # Handle but don't convert newlines if using Fairseq models that have newlines trained in them utils.koboldai_vars.newlinemode = "ns" + # Clean up tokens that cause issues + if ( + utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default + and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") + ): + utils.koboldai_vars.badwordsids = [ + [v] + for k, v in self.tokenizer.get_vocab().items() + if any(c in str(k) for c in "[]") + ] + + if utils.koboldai_vars.newlinemode == "n": + utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) + return super()._post_load() def get_local_model_path( From 4b3b240bce94745069ba310c17b35bef677c6f7f Mon Sep 17 00:00:00 2001 From: somebody Date: Tue, 2 May 2023 20:33:37 -0500 Subject: [PATCH 3/3] Move loadmodelsettings --- aiserver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index 2e9dcfe9..2977ebee 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1916,9 +1916,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if koboldai_vars.model == "ReadOnly": koboldai_vars.noai = True - loadmodelsettings() - loadsettings() - # TODO: InferKit if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai: pass @@ -1984,6 +1981,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if model: tokenizer = model.tokenizer + loadmodelsettings() + loadsettings() + lua_startup() # Load scripts load_lua_scripts()