From 5cdeb79752c40b878c9c1b38a13f6cdb35901a30 Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Sat, 24 Sep 2022 12:54:20 -0500
Subject: [PATCH] Final touches

---
 aiserver.py | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index e3d2897d..628a0d17 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -4826,9 +4826,6 @@ def calcsubmit(txt):
         # Send it!
         ikrequest(subtxt)
 
-def __debug(*args):
-    print("[DBG] ", *args)
-
 def core_generate(text: list, min: int, max: int, found_entries: set):
     # This generation function is tangled with koboldai_vars intentionally. It
     # is meant for the story and nothing else.
@@ -4866,13 +4863,11 @@ def core_generate(text: list, min: int, max: int, found_entries: set):
 
     koboldai_vars._prompt = koboldai_vars.prompt
 
-    __debug("generate core", text)
     with torch.no_grad():
         already_generated = 0
         numseqs = koboldai_vars.numseqs
 
         while True:
-            __debug("generate loop start", text)
             # The reason this is a loop is due to how Dynamic WI works. We
             # cannot simply add the WI to the context mid-generation, so we
             # stop early, and then insert WI, then continue generating. That
@@ -4888,15 +4883,12 @@ def core_generate(text: list, min: int, max: int, found_entries: set):
                 bypass_hf_maxlength=True,
             )
 
-            __debug("generate result", result.__dict__)
-
             genout = result.encoded
 
-            already_generated += len(genout[0]) - 1 # - len(gen_in[0])
+            already_generated += len(genout[0]) - 1
             assert already_generated <= koboldai_vars.genamt
 
             if result.is_whole_generation:
-                __debug("Outa here")
                 break
 
             # Generation stopped; why?
@@ -4953,13 +4945,9 @@ def core_generate(text: list, min: int, max: int, found_entries: set):
                 )
                 genout = torch.cat((soft_tokens.tile(koboldai_vars.numseqs, 1), genout), dim=-1)
             assert genout.shape[-1] + koboldai_vars.genamt - already_generated <= koboldai_vars.max_length
-            # diff = genout.shape[-1] - gen_in.shape[-1]
-            # minimum += diff
-            # maximum += diff
             gen_in = genout
             numseqs = 1
     
-    __debug("final out", genout, "already_gen", already_generated)
     return genout, already_generated
 
 class GenerationResult:
@@ -4975,10 +4963,8 @@ class GenerationResult:
         # Controls if we should trim output by prompt length
         output_includes_prompt: bool = False,
     ):
-        # Shave prompt off of encoded response. Decoded does not return prompt.
-        # TODO: Does MTJ generation shave this off automatically? Test it!
-        print("shape", out_batches.shape)
-
+        # Shave prompt off of encoded response when needed (HF). Decoded does
+        # not return prompt.
         if output_includes_prompt:
             self.encoded = out_batches[:, len(prompt) - 1:]
         else:
@@ -5065,7 +5051,6 @@ def tpu_raw_generate(
 
     # Mostly lifted from apiactionsubmit_tpumtjgenerate
     soft_tokens = tpumtjgetsofttokens()
-    __debug("we are generating with", prompt_tokens, "batch", batch_count, "soft tokens", soft_tokens)
 
     genout = tpool.execute(
         tpu_mtj_backend.infer_static,