Better fallback

This commit is contained in:
onesome
2023-04-25 22:28:07 -05:00
parent 6e3aebc1ea
commit 9579298df7
5 changed files with 38 additions and 51 deletions

View File

@@ -12,7 +12,6 @@ import random
import shutil
import eventlet
from modeling.inference_model import SuperLegacyModelError
eventlet.monkey_patch(all=True, thread=False, os=False)
import os, inspect
os.system("")
@@ -1910,7 +1909,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
# TODO: InferKit
if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai:
print(":P")
pass
elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]:
koboldai_vars.colaburl = url or koboldai_vars.colaburl
koboldai_vars.usegpu = False
@@ -1946,31 +1945,17 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
except:
pass
try:
from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel
model = GenericHFTorchInferenceModel(
koboldai_vars.model,
lazy_load=koboldai_vars.lazy_load,
low_mem=args.lowmem
)
model.load(
save_model=not (args.colab or args.cacheonly) or args.savemodel,
initial_load=initial_load,
)
except SuperLegacyModelError:
from modeling.inference_models.legacy_gpt2_hf import CustomGPT2HFTorchInferenceModel
model = CustomGPT2HFTorchInferenceModel(
koboldai_vars.model,
lazy_load=koboldai_vars.lazy_load,
low_mem=args.lowmem
)
model.load(
save_model=not (args.colab or args.cacheonly) or args.savemodel,
initial_load=initial_load,
)
from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel
model = GenericHFTorchInferenceModel(
koboldai_vars.model,
lazy_load=koboldai_vars.lazy_load,
low_mem=args.lowmem
)
model.load(
save_model=not (args.colab or args.cacheonly) or args.savemodel,
initial_load=initial_load,
)
logger.info(f"Pipeline created: {koboldai_vars.model}")
else:
# TPU