mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Add safeguards for token budget and text formatting
* Error messages are now shown when memory, author's note, etc. exceeds budget by itself * Formatting options no longer break if there are empty chunks in the story (although there shouldn't be any in the first place) * Number of generated tokens is now kept track of from Python
This commit is contained in:
@@ -268,7 +268,7 @@ class PenalizingCausalTransformer(CausalTransformer):
|
||||
|
||||
|
||||
def infer(
|
||||
context: str,
|
||||
context: np.array,
|
||||
top_p=0.9,
|
||||
temp=0.5,
|
||||
top_k=0,
|
||||
@@ -281,7 +281,7 @@ def infer(
|
||||
) -> List[str]:
|
||||
maps.thread_resources.env = thread_resources_env
|
||||
total_batch = 1
|
||||
tokens = np.uint32(tokenizer.encode(context, max_length=params["seq"] - (soft_tokens.shape[0] if soft_tokens is not None else 0), truncation=True))
|
||||
tokens = context
|
||||
if(soft_tokens is not None):
|
||||
tokens = np.uint32(np.concatenate((soft_tokens, tokens)))
|
||||
provided_ctx = tokens.shape[0]
|
||||
|
Reference in New Issue
Block a user