From 4559112551b6a679e747f1f94d0f51ceb21c8934 Mon Sep 17 00:00:00 2001 From: somebody Date: Thu, 27 Apr 2023 19:51:10 -0500 Subject: [PATCH] Potential fix --- modeling/stoppers.py | 1 - modeling/tokenizer.py | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/modeling/stoppers.py b/modeling/stoppers.py index 14cfe752..94c09e85 100644 --- a/modeling/stoppers.py +++ b/modeling/stoppers.py @@ -122,7 +122,6 @@ class Stoppers: input_ids: torch.LongTensor, ) -> bool: - print(f"[stop_sequence_stopper] Input ids: {input_ids}") data = [model.tokenizer.decode(x) for x in input_ids] # null_character = model.tokenizer.encode(chr(0))[0] if "completed" not in model.gen_state: diff --git a/modeling/tokenizer.py b/modeling/tokenizer.py index cd35758b..4cb5a092 100644 --- a/modeling/tokenizer.py +++ b/modeling/tokenizer.py @@ -28,11 +28,10 @@ class GenericTokenizer: return ret.ids def decode(self, tokens: Union[int, List[int], torch.Tensor]) -> str: - print(f"[decode] Tokens: {tokens}") if isinstance(tokens, torch.Tensor): tokens = tokens.cpu().tolist() if isinstance(tokens, int): tokens = [tokens] - return self.tokenizer.decode(tokens) + return self.tokenizer.decode(tokens, skip_special_tokens=True)