diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py index 614a3de1..811f8da1 100644 --- a/modeling/inference_models/exllama/class.py +++ b/modeling/inference_models/exllama/class.py @@ -300,8 +300,12 @@ class model_backend(InferenceModel): self._post_token_gen(self.generator.sequence) + utils.koboldai_vars.generated_tkns += 1 + if token.item() == self.tokenizer.eos_token_id: break + utils.koboldai_vars.generated_tkns = max_new + return GenerationResult( model=self, out_batches=np.array(