Fix for exllama (v1 and v2) showing 2x status (0-200%) on generation

2025-06-05 21:59:24 +02:00 · 2023-10-12 20:42:42 -04:00
parent 334eec6127
commit cbbcc6250e
2 changed files with 4 additions and 2 deletions
--- a/modeling/inference_models/exllama/class.py
+++ b/modeling/inference_models/exllama/class.py
@@ -340,7 +340,8 @@ class model_backend(InferenceModel):

            self._post_token_gen(self.generator.sequence)

-            utils.koboldai_vars.generated_tkns += 1
+            #This is taken care of in the core stopper class that's called below. If you're not using core stoppers then it should remain here
+            #utils.koboldai_vars.generated_tkns += 1

            # Apply stoppers
            do_stop = False
--- a/modeling/inference_models/exllamav2/class.py
+++ b/modeling/inference_models/exllamav2/class.py
@@ -315,7 +315,8 @@ class model_backend(InferenceModel):

            self._post_token_gen(self.generator.sequence_ids)

-            utils.koboldai_vars.generated_tkns += 1
+            #This is taken care of in the core stopper class that's called below. If you're not using core stoppers then it should remain here
+            #utils.koboldai_vars.generated_tkns += 1

            # Apply stoppers
            do_stop = False