mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Fix high VRAM usage caused by workaround for scalar type error
This commit is contained in:
@@ -3172,7 +3172,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
else:
|
else:
|
||||||
raise RuntimeError(f"4-bit load failed. Model type {koboldai_vars.model_type} not supported in 4-bit")
|
raise RuntimeError(f"4-bit load failed. Model type {koboldai_vars.model_type} not supported in 4-bit")
|
||||||
|
|
||||||
model = model.float()
|
model = model.half()
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained(koboldai_vars.custmodpth, revision=koboldai_vars.revision, cache_dir="cache", use_fast=False)
|
||||||
|
Submodule repos/gptq updated: 0748a680e9...5d07f25a30
Reference in New Issue
Block a user