Seperate Low Memory Mode

In 1.16 we had significantly faster loading speeds because we did not do as much memory conservation, its time to give users the choice. If you want the original faster behavior and have the memory run KoboldAI as usual. Otherwise run play-lowmem.bat or aiserver.py with --lowmem. For colab this is still the default behavior to avoid breaking models that would otherwise load fine.
2025-06-05 21:59:24 +02:00 · 2022-02-18 16:21:28 +01:00
parent 4c84d731db
commit a47e93cee7
2 changed files with 4 additions and 1 deletions
--- a/aiserver.py
+++ b/aiserver.py
@ -479,6 +479,7 @@ parser.add_argument("--colab", action='store_true', help="Optimize for Google Co
 parser.add_argument("--nobreakmodel", action='store_true', help="Disables Breakmodel support completely.")
 parser.add_argument("--unblock", action='store_true', default=False, help="Unblocks the KoboldAI port to be accessible from other machines without optimizing for remote play (It is recommended to use --host instead)")
 parser.add_argument("--quiet", action='store_true', default=False, help="If present will suppress any story related text from showing on the console")
+parser.add_argument("--lowmem", action='store_true', help="Extra Low Memory loading for the GPU, slower but memory does not peak to twice the usage")

 args: argparse.Namespace = None
 if(os.environ.get("KOBOLDAI_ARGS") is not None):
@ -495,6 +496,7 @@ if args.colab:
    args.override_delete = True;
    args.nobreakmodel = True;
    args.quiet = True;
+    args.lowmem = True;

 if args.quiet:
    vars.quiet = True
@ -960,7 +962,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
        
        @contextlib.contextmanager
        def maybe_use_float16(always_use=False):
-            if(always_use or (vars.hascuda and (vars.usegpu or vars.breakmodel))):
+            if(always_use or (vars.hascuda and args.lowmem and (vars.usegpu or vars.breakmodel))):
                original_dtype = torch.get_default_dtype()
                torch.set_default_dtype(torch.float16)
                yield True
--- a/play-lowmem.bat
+++ b/play-lowmem.bat
@ -0,0 +1 @@
+play --lowmem %*