From d788d5228ddfae3a3b8000259558d1d8cb71b377 Mon Sep 17 00:00:00 2001
From: ebolam <ebolam@gmail.com>
Date: Tue, 20 Sep 2022 16:58:31 -0400
Subject: [PATCH] Better GPU management of summarizer

---
 aiserver.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/aiserver.py b/aiserver.py
index 04245558..329c0ed5 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -8212,7 +8212,7 @@ def generate_image_in_background():
                 koboldai_vars.summarizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")
 
         #Try GPU accel
-        if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 6000000000:
+        if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
             koboldai_vars.summarizer.to(0)
             device=0
         else:
@@ -8229,6 +8229,8 @@ def generate_image_in_background():
         transformers.generation_utils.GenerationMixin._get_stopping_criteria = temp
         logger.debug("Time to summarize: {}".format(time.time()-start_time))
         #move model back to CPU to save precious vram
+        torch.cuda.empty_cache()
+        logger.debug("VRAM used by summarization: {}".format(torch.cuda.memory_reserved(0)))
         koboldai_vars.summarizer.to("cpu")
         torch.cuda.empty_cache()