Fix the Lua tokenizer API

This commit is contained in:
Gnome Ann
2021-12-11 21:24:34 -05:00
parent 67974947b2
commit 8e6a62259e
2 changed files with 19 additions and 8 deletions

View File

@ -923,7 +923,12 @@ def load_lua_scripts():
# Decode tokens into a string using current tokenizer
#==================================================================#
def lua_decode(tokens):
tokens = list(tokens.values())
assert type(tokens) is list
if("tokenizer" not in globals()):
from transformers import GPT2TokenizerFast
global tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
return tokenizer.decode(tokens)
#==================================================================#
@ -931,6 +936,11 @@ def lua_decode(tokens):
#==================================================================#
def lua_encode(string):
assert type(string) is str
if("tokenizer" not in globals()):
thinking = False
from transformers import GPT2TokenizerFast
global tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
return tokenizer.encode(string, max_length=int(4e9), truncation=True)
#==================================================================#