diff --git a/aiserver.py b/aiserver.py index 2c2eff1b..27cafd59 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3172,7 +3172,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal else: raise RuntimeError(f"4-bit load failed. Model type {koboldai_vars.model_type} not supported in 4-bit") - model = model.float() + model = model.half() else: try: tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False) diff --git a/repos/gptq b/repos/gptq index 0748a680..5d07f25a 160000 --- a/repos/gptq +++ b/repos/gptq @@ -1 +1 @@ -Subproject commit 0748a680e95ab0a9f8860953a5d705a01070d1cc +Subproject commit 5d07f25a30f8602aedb3e69f11de07624e486ce9