From 922394c68fd872d87edf79a420fbf41ce509c72e Mon Sep 17 00:00:00 2001 From: vfbd Date: Wed, 22 Jun 2022 11:23:03 -0400 Subject: [PATCH 1/2] Don't blacklist token in "s" newline mode --- aiserver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 4bbd89b0..71cd67dc 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1995,7 +1995,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal shutil.rmtree("cache/") if(vars.badwordsids is vars.badwordsids_default and vars.model_type not in ("gpt2", "gpt_neo", "gptj")): - vars.badwordsids = [[v] for k, v in tokenizer.get_vocab().items() if any(c in str(k) for c in "<>[]")] + vars.badwordsids = [[v] for k, v in tokenizer.get_vocab().items() if any(c in str(k) for c in "<>[]") if vars.newlinemode != "s" or str(k) != ""] patch_causallm(model) @@ -2162,7 +2162,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal vars.modeldim = int(tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])) tokenizer = tpu_mtj_backend.tokenizer if(vars.badwordsids is vars.badwordsids_default and vars.model_type not in ("gpt2", "gpt_neo", "gptj")): - vars.badwordsids = [[v] for k, v in tokenizer.get_vocab().items() if any(c in str(k) for c in "<>[]")] + vars.badwordsids = [[v] for k, v in tokenizer.get_vocab().items() if any(c in str(k) for c in "<>[]") if vars.newlinemode != "s" or str(k) != ""] else: loadsettings() From 53034ee533227e6b650aabbd3f6e599829a5ee8d Mon Sep 17 00:00:00 2001 From: vfbd Date: Wed, 22 Jun 2022 12:07:36 -0400 Subject: [PATCH 2/2] Delete all torch tensors before loading model --- aiserver.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/aiserver.py b/aiserver.py index 71cd67dc..6afee9a6 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1528,6 +1528,14 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal model = None generator = None model_config = None + for tensor in gc.get_objects(): + try: + if torch.is_tensor(tensor): + with torch.no_grad(): + tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype)) + except: + pass + gc.collect() try: torch.cuda.empty_cache() except: