Tokenizer Fixes (Slow first to keep coherency)

2025-06-05 21:59:24 +02:00 · 2022-09-27 17:57:18 +02:00
parent 07896867b2
commit 11455697ef
2 changed files with 30 additions and 36 deletions
--- a/aiserver.py
+++ b/aiserver.py
@ -2489,12 +2489,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                    if(vars.lazy_load):  # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
                        lowmem = {}
                    if(os.path.isdir(vars.custmodpth)):
-                        try:
-                            tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
-                        except Exception as e:
-                            pass
                        try:
                            tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
+                        except Exception as e:
+                            try:
+                                tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
                            except Exception as e:
                                try:
                                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
@ -2507,12 +2506,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                                raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
                            model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
                    elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
-                        try:
-                            tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
-                        except Exception as e:
-                            pass
                        try:
                            tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
+                        except Exception as e:
+                            try:
+                                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
                            except Exception as e:
                                try:
                                    tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
@ -2538,12 +2536,11 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
                            return old_rebuild_tensor(storage, storage_offset, shape, stride)
                        torch._utils._rebuild_tensor = new_rebuild_tensor

-                        try:
-                            tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
-                        except Exception as e:
-                            pass
                        try:
                            tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
+                        except Exception as e:
+                            try:
+                                tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
                            except Exception as e:
                                try:
                                    tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@ -1350,12 +1350,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
    print("\n", flush=True)
    with torch_lazy_loader.use_lazy_torch_load(callback=callback, dematerialized_modules=True):
        if(os.path.isdir(vars.custmodpth)):
-            try:
-                tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
-            except Exception as e:
-                pass
            try:
                tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", use_fast=False)
+            except Exception as e:
+                try:
+                    tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
                except Exception as e:
                    try:
                        tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
@ -1366,12 +1365,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
            except Exception as e:
                model     = GPTNeoForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
        elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))):
-            try:
-                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
-            except Exception as e:
-                pass
            try:
                tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", use_fast=False)
+            except Exception as e:
+                try:
+                    tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
                except Exception as e:
                    try:
                        tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
@ -1382,12 +1380,11 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
            except Exception as e:
                model     = GPTNeoForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
        else:
-            try:
-                tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
-            except Exception as e:
-                pass
            try:
                tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", use_fast=False)
+            except Exception as e:
+                try:
+                    tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
                except Exception as e:
                    try:
                        tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")