Better GPU management of summarizer

2025-06-05 21:59:24 +02:00 · 2022-09-20 16:58:31 -04:00
parent 8f5066c83c
commit d788d5228d
1 changed files with 3 additions and 1 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -8212,7 +8212,7 @@ def generate_image_in_background():
                koboldai_vars.summarizer.save_pretrained("models/{}".format(args.summarizer_model.replace('/', '_')), max_shard_size="500MiB")

        #Try GPU accel
-        if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 6000000000:
+        if koboldai_vars.hascuda and torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_reserved(0) >= 1645778560:
            koboldai_vars.summarizer.to(0)
            device=0
        else:
@@ -8229,6 +8229,8 @@ def generate_image_in_background():
        transformers.generation_utils.GenerationMixin._get_stopping_criteria = temp
        logger.debug("Time to summarize: {}".format(time.time()-start_time))
        #move model back to CPU to save precious vram
+        torch.cuda.empty_cache()
+        logger.debug("VRAM used by summarization: {}".format(torch.cuda.memory_reserved(0)))
        koboldai_vars.summarizer.to("cpu")
        torch.cuda.empty_cache()