mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Clean debug
This commit is contained in:
@@ -110,6 +110,9 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
self.breakmodel_config.gpu_blocks = self.layers
|
self.breakmodel_config.gpu_blocks = self.layers
|
||||||
self.breakmodel_config.disk_blocks = self.disk_layers
|
self.breakmodel_config.disk_blocks = self.disk_layers
|
||||||
|
|
||||||
|
# HACK: Prevent get_auxiliary_device from returning cuda
|
||||||
|
utils.koboldai_vars.hascuda = self.usegpu
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
|
def _get_target_dtype(self) -> Union[torch.float16, torch.float32]:
|
||||||
@@ -117,6 +120,8 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
return torch.float32
|
return torch.float32
|
||||||
elif utils.args.cpu:
|
elif utils.args.cpu:
|
||||||
return torch.float32
|
return torch.float32
|
||||||
|
elif not self.usegpu:
|
||||||
|
return torch.float32
|
||||||
return torch.float16
|
return torch.float16
|
||||||
|
|
||||||
def _apply_warpers(
|
def _apply_warpers(
|
||||||
@@ -316,11 +321,6 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
|
|
||||||
# Try to determine model type from either AutoModel or falling back to legacy
|
# Try to determine model type from either AutoModel or falling back to legacy
|
||||||
try:
|
try:
|
||||||
print(f"self.lazy_load {self.lazy_load}")
|
|
||||||
print(f"self.breakmodel {self.breakmodel}")
|
|
||||||
print(f"self.nobreakmodel {self.nobreakmodel}")
|
|
||||||
print(f"args.cpu {utils.args.cpu}")
|
|
||||||
|
|
||||||
if self.lazy_load:
|
if self.lazy_load:
|
||||||
with lazy_loader.use_lazy_load(dematerialized_modules=True):
|
with lazy_loader.use_lazy_load(dematerialized_modules=True):
|
||||||
metamodel = AutoModelForCausalLM.from_config(self.model_config)
|
metamodel = AutoModelForCausalLM.from_config(self.model_config)
|
||||||
@@ -344,6 +344,13 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
**tf_kwargs,
|
**tf_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not self.lazy_load:
|
||||||
|
# We need to move the model to the desired device
|
||||||
|
if (not self.usegpu) or torch.cuda.device_count() <= 0:
|
||||||
|
model = model.to("cpu")
|
||||||
|
else:
|
||||||
|
model = model.to("cuda")
|
||||||
|
|
||||||
return model
|
return model
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback_string = traceback.format_exc().lower()
|
traceback_string = traceback.format_exc().lower()
|
||||||
|
Reference in New Issue
Block a user