diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py
index 2540d3f4..3fb8d252 100644
--- a/modeling/inference_models/exllama/class.py
+++ b/modeling/inference_models/exllama/class.py
@@ -293,7 +293,14 @@ class model_backend(InferenceModel):
 
             scores = torch.softmax(scores, dim=-1)
 
-            token = torch.multinomial(scores, 1)
+            # Work around a bug in torch.multinomial (https://github.com/pytorch/pytorch/issues/48841)
+            # With low probability, multinomial can return an element with zero weight. Since this
+            # happens infrequently, just sample repeatedly until all tokens have non-zero probability.
+            for _ in range(100):
+                token = torch.multinomial(scores, 1)
+                # Verify that all selected tokens correspond to positive probabilities.
+                if (scores.gather(1, token) > 0).all():
+                    break
 
             self.generator.gen_accept_token(token)
 
@@ -301,7 +308,7 @@ class model_backend(InferenceModel):
 
             utils.koboldai_vars.generated_tkns += 1
 
-            if token.item() == self.tokenizer.eos_token_id:
+            if (token == self.tokenizer.eos_token_id).any():
                 trim_count = 1
                 break