Add safeguards for token budget and text formatting

* Error messages are now shown when memory, author's note, etc. exceeds budget by itself * Formatting options no longer break if there are empty chunks in the story (although there shouldn't be any in the first place) * Number of generated tokens is now kept track of from Python
2025-06-05 21:59:24 +02:00 · 2021-12-26 18:29:54 -05:00
parent 6183ecd669
commit 8742453f95
3 changed files with 82 additions and 50 deletions
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -268,7 +268,7 @@ class PenalizingCausalTransformer(CausalTransformer):


 def infer(
-    context: str,
+    context: np.array,
    top_p=0.9,
    temp=0.5,
    top_k=0,
@@ -281,7 +281,7 @@ def infer(
 ) -> List[str]:
    maps.thread_resources.env = thread_resources_env
    total_batch = 1
-    tokens = np.uint32(tokenizer.encode(context, max_length=params["seq"] - (soft_tokens.shape[0] if soft_tokens is not None else 0), truncation=True))
+    tokens = context
    if(soft_tokens is not None):
        tokens = np.uint32(np.concatenate((soft_tokens, tokens)))
    provided_ctx = tokens.shape[0]