From cbbcc6250e3d5dd678763973dbcad2f5c55a31c5 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 12 Oct 2023 20:42:42 -0400 Subject: [PATCH] Fix for exllama (v1 and v2) showing 2x status (0-200%) on generation --- modeling/inference_models/exllama/class.py | 3 ++- modeling/inference_models/exllamav2/class.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py index f688d611..569f6d61 100644 --- a/modeling/inference_models/exllama/class.py +++ b/modeling/inference_models/exllama/class.py @@ -340,7 +340,8 @@ class model_backend(InferenceModel): self._post_token_gen(self.generator.sequence) - utils.koboldai_vars.generated_tkns += 1 + #This is taken care of in the core stopper class that's called below. If you're not using core stoppers then it should remain here + #utils.koboldai_vars.generated_tkns += 1 # Apply stoppers do_stop = False diff --git a/modeling/inference_models/exllamav2/class.py b/modeling/inference_models/exllamav2/class.py index 15b91c8d..dd97e83f 100644 --- a/modeling/inference_models/exllamav2/class.py +++ b/modeling/inference_models/exllamav2/class.py @@ -315,7 +315,8 @@ class model_backend(InferenceModel): self._post_token_gen(self.generator.sequence_ids) - utils.koboldai_vars.generated_tkns += 1 + #This is taken care of in the core stopper class that's called below. If you're not using core stoppers then it should remain here + #utils.koboldai_vars.generated_tkns += 1 # Apply stoppers do_stop = False