From 9bd445c2a8d24a20b04aa905486c367455286ff9 Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 23 May 2023 20:33:55 -0400 Subject: [PATCH] gpt2 fixed --- modeling/inference_models/generic_hf_torch/class.py | 2 +- modeling/inference_models/hf.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 572337e2..bbd42096 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -59,7 +59,7 @@ class model_backend(HFTorchInferenceModel): "low_cpu_mem_usage": True, } - if utils.koboldai_vars.model_type == "gpt2": + if self.model_type == "gpt2": # We must disable low_cpu_mem_usage and if using a GPT-2 model # because GPT-2 is not compatible with this feature yet. tf_kwargs.pop("low_cpu_mem_usage", None) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 2417bffb..7b8f356c 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -61,6 +61,7 @@ class HFInferenceModel(InferenceModel): else: self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None + layer_count = None if hasattr(self, "get_model_type") and self.get_model_type() == "gpt2" else layer_count #Skip layers if we're a GPT2 model as it doesn't support breakmodel if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: @@ -143,15 +144,13 @@ class HFInferenceModel(InferenceModel): return requested_parameters def set_input_parameters(self, parameters): - if self.hf_torch: + if self.hf_torch and hasattr(self, "get_model_type") and self.get_model_type() != "gpt2": import breakmodel layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: gpu_count = torch.cuda.device_count() layers = [] - logger.info(parameters) for i in range(gpu_count): - logger.info(parameters["{}_Layers".format(i)]) if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric(): layers.append(int(parameters["{}_Layers".format(i)])) elif isinstance(parameters["{}_Layers".format(i)], str): @@ -170,8 +169,13 @@ class HFInferenceModel(InferenceModel): self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0 self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel + self.lazy_load = True + logger.debug("Model type: {}".format(self.model_type)) else: + logger.debug("Disabling breakmodel and lazyload") self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.breakmodel = False + self.lazy_load = False self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.path = parameters['path'] if 'path' in parameters else None @@ -199,6 +203,7 @@ class HFInferenceModel(InferenceModel): pass def _post_load(self) -> None: + utils.koboldai_vars.badwordsids = koboldai_settings.badwordsids_default self.model_type = str(self.model_config.model_type) # These are model specific tokenizer overrides if a model has bad defaults if self.model_type == "llama":