mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge pull request #63 from pi6am/feat/exllama-stoppers
Add stopper hooks suppport to exllama
This commit is contained in:
@@ -91,7 +91,7 @@ class model_backend(InferenceModel):
|
|||||||
self.capabilties = ModelCapabilities(
|
self.capabilties = ModelCapabilities(
|
||||||
embedding_manipulation=False,
|
embedding_manipulation=False,
|
||||||
post_token_hooks=True,
|
post_token_hooks=True,
|
||||||
stopper_hooks=False,
|
stopper_hooks=True,
|
||||||
post_token_probs=False,
|
post_token_probs=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -305,6 +305,15 @@ class model_backend(InferenceModel):
|
|||||||
trim_count = 1
|
trim_count = 1
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Apply stoppers
|
||||||
|
do_stop = False
|
||||||
|
for stopper in self.stopper_hooks:
|
||||||
|
do_stop = stopper(self, self.generator.sequence)
|
||||||
|
if do_stop:
|
||||||
|
break
|
||||||
|
if do_stop:
|
||||||
|
break
|
||||||
|
|
||||||
utils.koboldai_vars.generated_tkns = max_new - trim_count
|
utils.koboldai_vars.generated_tkns = max_new - trim_count
|
||||||
if trim_count > 0:
|
if trim_count > 0:
|
||||||
seq = self.generator.sequence[:, gen_in.size(1):-trim_count]
|
seq = self.generator.sequence[:, gen_in.size(1):-trim_count]
|
||||||
|
Reference in New Issue
Block a user