From d788d5228ddfae3a3b8000259558d1d8cb71b377 Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 20 Sep 2022 16:58:31 -0400 Subject: [PATCH] Better GPU management of summarizer --- aiserver.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 04245558..329c0ed5 100644 --- a/aiserver.py +++ b/aiserver.py @@ -8212,7 +8212,7 @@ def generate_image_in_background(): koboldai_vars.summarizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB") #Try GPU accel - if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 6000000000: + if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560: koboldai_vars.summarizer.to(0) device=0 else: @@ -8229,6 +8229,8 @@ def generate_image_in_background(): transformers.generation_utils.GenerationMixin._get_stopping_criteria = temp logger.debug("Time to summarize: {}".format(time.time()-start_time)) #move model back to CPU to save precious vram + torch.cuda.empty_cache() + logger.debug("VRAM used by summarization: {}".format(torch.cuda.memory_reserved(0))) koboldai_vars.summarizer.to("cpu") torch.cuda.empty_cache()