Resample to work around a bug in torch.multinomial

There is a bug in PyTorch 2.0.1 that allows torch.multinomial to sometimes choose elements that have zero probability. Since this is uncommon we can continue to use torch.multinomial as long as we verify that the results are valid. If they aren't, try again until the probability of each selected token is positive.
2025-06-05 21:59:24 +02:00 · 2023-08-26 22:26:26 -07:00
parent b1895de518
commit b7e38b4757
1 changed files with 9 additions and 2 deletions
--- a/modeling/inference_models/exllama/class.py
+++ b/modeling/inference_models/exllama/class.py
@@ -293,7 +293,14 @@ class model_backend(InferenceModel):

            scores = torch.softmax(scores, dim=-1)

-            token = torch.multinomial(scores, 1)
+            # Work around a bug in torch.multinomial (https://github.com/pytorch/pytorch/issues/48841)
+            # With low probability, multinomial can return an element with zero weight. Since this
+            # happens infrequently, just sample repeatedly until all tokens have non-zero probability.
+            for _ in range(100):
+                token = torch.multinomial(scores, 1)
+                # Verify that all selected tokens correspond to positive probabilities.
+                if (scores.gather(1, token) > 0).all():
+                    break

            self.generator.gen_accept_token(token)

@@ -301,7 +308,7 @@ class model_backend(InferenceModel):

            utils.koboldai_vars.generated_tkns += 1

-            if token.item() == self.tokenizer.eos_token_id:
+            if (token == self.tokenizer.eos_token_id).any():
                trim_count = 1
                break