From ffa7b227349cd6f512b3dcf9397f84e60d07b031 Mon Sep 17 00:00:00 2001
From: somebody <onesome01@protonmail.com>
Date: Thu, 27 Apr 2023 20:28:04 -0500
Subject: [PATCH] Experiment

---
 modeling/tokenizer.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/modeling/tokenizer.py b/modeling/tokenizer.py
index 17e402fb..26b619f6 100644
--- a/modeling/tokenizer.py
+++ b/modeling/tokenizer.py
@@ -29,15 +29,16 @@ class GenericTokenizer:
         return ret.ids
 
     def decode(self, tokens: Union[int, List[int], torch.Tensor]) -> str:
+        return self.tokenizer.decode(tokens)
         if isinstance(tokens, torch.Tensor):
             tokens = tokens.cpu().tolist()
 
         if isinstance(tokens, int):
             tokens = [tokens]
         
-        for t in tokens:
-            if t not in self.valid_tokens:
-                print(f"WHAT ON EARTH IS {t}")
+        # Sometimes soft token placeholders aren't in the vocab, which causes
+        # errors on decode. Obviously we can't express these tokens as text so
+        # we can probably slice 'em out without too much issue
         tokens = [t for t in tokens if t in self.valid_tokens]
 
         return self.tokenizer.decode(tokens)