Add safeguards for token budget and text formatting

* Error messages are now shown when memory, author's note, etc. exceeds
  budget by itself
* Formatting options no longer break if there are empty chunks in the
  story (although there shouldn't be any in the first place)
* Number of generated tokens is now kept track of from Python
This commit is contained in:
Gnome Ann
2021-12-26 18:29:54 -05:00
parent 6183ecd669
commit 8742453f95
3 changed files with 82 additions and 50 deletions

View File

@@ -268,7 +268,7 @@ class PenalizingCausalTransformer(CausalTransformer):
def infer(
context: str,
context: np.array,
top_p=0.9,
temp=0.5,
top_k=0,
@@ -281,7 +281,7 @@ def infer(
) -> List[str]:
maps.thread_resources.env = thread_resources_env
total_batch = 1
tokens = np.uint32(tokenizer.encode(context, max_length=params["seq"] - (soft_tokens.shape[0] if soft_tokens is not None else 0), truncation=True))
tokens = context
if(soft_tokens is not None):
tokens = np.uint32(np.concatenate((soft_tokens, tokens)))
provided_ctx = tokens.shape[0]