Fix for exllama (v1 and v2) showing 2x status (0-200%) on generation

This commit is contained in:
ebolam
2023-10-12 20:42:42 -04:00
parent 334eec6127
commit cbbcc6250e
2 changed files with 4 additions and 2 deletions

View File

@@ -340,7 +340,8 @@ class model_backend(InferenceModel):
self._post_token_gen(self.generator.sequence)
utils.koboldai_vars.generated_tkns += 1
#This is taken care of in the core stopper class that's called below. If you're not using core stoppers then it should remain here
#utils.koboldai_vars.generated_tkns += 1
# Apply stoppers
do_stop = False

View File

@@ -315,7 +315,8 @@ class model_backend(InferenceModel):
self._post_token_gen(self.generator.sequence_ids)
utils.koboldai_vars.generated_tkns += 1
#This is taken care of in the core stopper class that's called below. If you're not using core stoppers then it should remain here
#utils.koboldai_vars.generated_tkns += 1
# Apply stoppers
do_stop = False