Don't use Fast tokenizers when we don't have to

This commit is contained in:
Henk 2022-09-27 18:26:13 +02:00
parent 11455697ef
commit 60d09899ea

View File

@ -59,7 +59,7 @@ from utils import debounce
import utils import utils
import structures import structures
import torch import torch
from transformers import StoppingCriteria, GPT2TokenizerFast, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, modeling_utils from transformers import StoppingCriteria, GPT2Tokenizer, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoTokenizer, PreTrainedModel, modeling_utils
from transformers import __version__ as transformers_version from transformers import __version__ as transformers_version
import transformers import transformers
try: try:
@ -2096,7 +2096,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
global generator global generator
global torch global torch
global model_config global model_config
global GPT2TokenizerFast global GPT2Tokenizer
global tokenizer global tokenizer
if(initial_load): if(initial_load):
use_breakmodel_args = True use_breakmodel_args = True
@ -2445,7 +2445,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
if("out of memory" in traceback.format_exc().lower()): if("out of memory" in traceback.format_exc().lower()):
raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.")
raise e raise e
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
vars.modeldim = get_hidden_size_from_model(model) vars.modeldim = get_hidden_size_from_model(model)
# Is CUDA available? If so, use GPU, otherwise fall back to CPU # Is CUDA available? If so, use GPU, otherwise fall back to CPU
if(vars.hascuda and vars.usegpu): if(vars.hascuda and vars.usegpu):
@ -2496,9 +2496,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache")
except Exception as e: except Exception as e:
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
try: try:
model = AutoModelForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem) model = AutoModelForCausalLM.from_pretrained(vars.custmodpth, revision=vars.revision, cache_dir="cache", **lowmem)
except Exception as e: except Exception as e:
@ -2513,9 +2513,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache")
except Exception as e: except Exception as e:
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
try: try:
model = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem) model = AutoModelForCausalLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=vars.revision, cache_dir="cache", **lowmem)
except Exception as e: except Exception as e:
@ -2543,9 +2543,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
except Exception as e: except Exception as e:
try: try:
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache")
except Exception as e: except Exception as e:
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
try: try:
model = AutoModelForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem) model = AutoModelForCausalLM.from_pretrained(vars.model, revision=vars.revision, cache_dir="cache", **lowmem)
except Exception as e: except Exception as e:
@ -2625,8 +2625,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
logger.info(f"Pipeline created: {vars.model}") logger.info(f"Pipeline created: {vars.model}")
else: else:
from transformers import GPT2TokenizerFast from transformers import GPT2Tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
else: else:
from transformers import PreTrainedModel from transformers import PreTrainedModel
from transformers import modeling_utils from transformers import modeling_utils
@ -2722,12 +2722,12 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
# If we're running Colab or OAI, we still need a tokenizer. # If we're running Colab or OAI, we still need a tokenizer.
if(vars.model in ("Colab", "API", "CLUSTER")): if(vars.model in ("Colab", "API", "CLUSTER")):
from transformers import GPT2TokenizerFast from transformers import GPT2Tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=vars.revision, cache_dir="cache")
loadsettings() loadsettings()
elif(vars.model == "OAI"): elif(vars.model == "OAI"):
from transformers import GPT2TokenizerFast from transformers import GPT2Tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
loadsettings() loadsettings()
# Load the TPU backend if requested # Load the TPU backend if requested
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")): elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
@ -2982,9 +2982,9 @@ def lua_decode(tokens):
tokens = list(tokens.values()) tokens = list(tokens.values())
assert type(tokens) is list assert type(tokens) is list
if("tokenizer" not in globals()): if("tokenizer" not in globals()):
from transformers import GPT2TokenizerFast from transformers import GPT2Tokenizer
global tokenizer global tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
return utils.decodenewlines(tokenizer.decode(tokens)) return utils.decodenewlines(tokenizer.decode(tokens))
#==================================================================# #==================================================================#
@ -2994,9 +2994,9 @@ def lua_decode(tokens):
def lua_encode(string): def lua_encode(string):
assert type(string) is str assert type(string) is str
if("tokenizer" not in globals()): if("tokenizer" not in globals()):
from transformers import GPT2TokenizerFast from transformers import GPT2Tokenizer
global tokenizer global tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True) return tokenizer.encode(utils.encodenewlines(string), max_length=int(4e9), truncation=True)
#==================================================================# #==================================================================#
@ -4565,9 +4565,9 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None,
lnsp = vars.sp_length lnsp = vars.sp_length
if("tokenizer" not in globals()): if("tokenizer" not in globals()):
from transformers import GPT2TokenizerFast from transformers import GPT2Tokenizer
global tokenizer global tokenizer
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache") tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=vars.revision, cache_dir="cache")
lnheader = len(tokenizer._koboldai_header) lnheader = len(tokenizer._koboldai_header)