Tokenizer Fixes (Slow first to keep coherency)
This commit is contained in:
parent
07896867b2
commit
11455697ef
21
aiserver.py
21
aiserver.py
|
@ -2489,12 +2489,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||||
if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
|
if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
|
||||||
lowmem = {}
|
lowmem = {}
|
||||||
if(os.path.isdir(vars.custmodpth)):
|
if(os.path.isdir(vars.custmodpth)):
|
||||||
try:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
|
except Exception as e:
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -2507,12 +2506,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||||
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
|
||||||
model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
|
model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
|
||||||
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
||||||
try:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
|
except Exception as e:
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -2538,12 +2536,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||||
return old_rebuild_tensor(storage, storage_offset, shape, stride)
|
return old_rebuild_tensor(storage, storage_offset, shape, stride)
|
||||||
torch._utils._rebuild_tensor = new_rebuild_tensor
|
torch._utils._rebuild_tensor = new_rebuild_tensor
|
||||||
|
|
||||||
try:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
|
except Exception as e:
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
|
|
@ -1350,12 +1350,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
||||||
print("\n", flush=True)
|
print("\n", flush=True)
|
||||||
with torch_lazy_loader.use_lazy_torch_load(callback=callback, dematerialized_modules=True):
|
with torch_lazy_loader.use_lazy_torch_load(callback=callback, dematerialized_modules=True):
|
||||||
if(os.path.isdir(vars.custmodpth)):
|
if(os.path.isdir(vars.custmodpth)):
|
||||||
try:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
|
except Exception as e:
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -1366,12 +1365,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
|
||||||
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
|
||||||
try:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
|
except Exception as e:
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
|
@ -1382,12 +1380,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
model = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
|
||||||
else:
|
else:
|
||||||
try:
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
try:
|
try:
|
||||||
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
|
||||||
|
except Exception as e:
|
||||||
|
try:
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
try:
|
try:
|
||||||
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
|
||||||
|
|
Loading…
Reference in New Issue