Make sure tokenizer is initialized when used in read-only mode

This commit is contained in:
Gnome Ann 2021-12-31 17:13:11 -05:00
parent a580f18a14
commit 7241188408
1 changed files with 5 additions and 0 deletions

View File

@ -2333,6 +2333,11 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
lnsp = vars.sp.shape[0] if vars.sp is not None else 0 lnsp = vars.sp.shape[0] if vars.sp is not None else 0
if("tokenizer" not in globals()):
from transformers import GPT2TokenizerFast
global tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
# Calculate token budget # Calculate token budget
prompttkns = tokenizer.encode(vars.comregex_ai.sub('', vars.prompt), max_length=int(2e9), truncation=True) prompttkns = tokenizer.encode(vars.comregex_ai.sub('', vars.prompt), max_length=int(2e9), truncation=True)
lnprompt = len(prompttkns) lnprompt = len(prompttkns)