From b81f61b8209c54d1325ff9a0803d01b62f226f38 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 21 Jun 2023 18:35:56 -0500 Subject: [PATCH] Clean debug --- modeling/inference_models/hf_torch.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index a10a48f3..6bcd88cd 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -110,6 +110,9 @@ class HFTorchInferenceModel(HFInferenceModel): self.breakmodel_config.gpu_blocks = self.layers self.breakmodel_config.disk_blocks = self.disk_layers + # HACK: Prevent get_auxiliary_device from returning cuda + utils.koboldai_vars.hascuda = self.usegpu + return ret def _get_target_dtype(self) -> Union[torch.float16, torch.float32]: @@ -117,6 +120,8 @@ class HFTorchInferenceModel(HFInferenceModel): return torch.float32 elif utils.args.cpu: return torch.float32 + elif not self.usegpu: + return torch.float32 return torch.float16 def _apply_warpers( @@ -316,11 +321,6 @@ class HFTorchInferenceModel(HFInferenceModel): # Try to determine model type from either AutoModel or falling back to legacy try: - print(f"self.lazy_load {self.lazy_load}") - print(f"self.breakmodel {self.breakmodel}") - print(f"self.nobreakmodel {self.nobreakmodel}") - print(f"args.cpu {utils.args.cpu}") - if self.lazy_load: with lazy_loader.use_lazy_load(dematerialized_modules=True): metamodel = AutoModelForCausalLM.from_config(self.model_config) @@ -344,6 +344,13 @@ class HFTorchInferenceModel(HFInferenceModel): **tf_kwargs, ) + if not self.lazy_load: + # We need to move the model to the desired device + if (not self.usegpu) or torch.cuda.device_count() <= 0: + model = model.to("cpu") + else: + model = model.to("cuda") + return model except Exception as e: traceback_string = traceback.format_exc().lower()