diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py index 508c6a79..2540d3f4 100644 --- a/modeling/inference_models/exllama/class.py +++ b/modeling/inference_models/exllama/class.py @@ -91,7 +91,7 @@ class model_backend(InferenceModel): self.capabilties = ModelCapabilities( embedding_manipulation=False, post_token_hooks=True, - stopper_hooks=False, + stopper_hooks=True, post_token_probs=False, ) @@ -305,6 +305,15 @@ class model_backend(InferenceModel): trim_count = 1 break + # Apply stoppers + do_stop = False + for stopper in self.stopper_hooks: + do_stop = stopper(self, self.generator.sequence) + if do_stop: + break + if do_stop: + break + utils.koboldai_vars.generated_tkns = max_new - trim_count if trim_count > 0: seq = self.generator.sequence[:, gen_in.size(1):-trim_count]