mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
GPT-NeoX-20B support in Colab TPU instances
This commit is contained in:
34
aiserver.py
34
aiserver.py
@ -317,7 +317,7 @@ def getmodelname():
|
||||
if(args.configname):
|
||||
modelname = args.configname
|
||||
return modelname
|
||||
if(vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ")):
|
||||
if(vars.model in ("NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
modelname = os.path.basename(os.path.normpath(vars.custmodpth))
|
||||
return modelname
|
||||
else:
|
||||
@ -699,7 +699,7 @@ def spRequest(filename):
|
||||
vars.sp_length = tensor.shape[-2]
|
||||
vars.spmeta["n_tokens"] = vars.sp_length
|
||||
|
||||
if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ",)):
|
||||
if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
rows = tensor.shape[0]
|
||||
padding_amount = tpu_mtj_backend.params["seq"] - (tpu_mtj_backend.params["seq"] % -tpu_mtj_backend.params["cores_per_replica"]) - rows
|
||||
tensor = np.pad(tensor, ((0, padding_amount), (0, 0)))
|
||||
@ -790,7 +790,7 @@ else:
|
||||
getModelSelection(mainmenu)
|
||||
|
||||
# If transformers model was selected & GPU available, ask to use CPU or GPU
|
||||
if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
|
||||
if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
vars.allowsp = True
|
||||
# Test for GPU support
|
||||
import torch
|
||||
@ -830,7 +830,7 @@ if(vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMe
|
||||
print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
|
||||
vars.model_type = "gpt_neo"
|
||||
|
||||
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
|
||||
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
loadmodelsettings()
|
||||
loadsettings()
|
||||
print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="")
|
||||
@ -1032,7 +1032,7 @@ socketio = SocketIO(app, async_method="eventlet")
|
||||
print("{0}OK!{1}".format(colors.GREEN, colors.END))
|
||||
|
||||
# Start transformers and create pipeline
|
||||
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]):
|
||||
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
if(not vars.noai):
|
||||
print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END))
|
||||
from transformers import StoppingCriteria, GPT2TokenizerFast, GPT2LMHeadModel, GPTNeoForCausalLM, GPTNeoModel, AutoModelForCausalLM, AutoTokenizer
|
||||
@ -1050,7 +1050,7 @@ if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "Go
|
||||
if not vars.lazy_load:
|
||||
return
|
||||
|
||||
from tqdm import tqdm
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
if "breakmodel" in globals():
|
||||
gpu_blocks = breakmodel.gpu_blocks
|
||||
@ -1553,9 +1553,9 @@ else:
|
||||
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", cache_dir="cache/")
|
||||
loadsettings()
|
||||
# Load the TPU backend if requested
|
||||
elif(vars.use_colab_tpu or vars.model == "TPUMeshTransformerGPTJ"):
|
||||
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
print("{0}Initializing Mesh Transformer JAX, please wait...{1}".format(colors.PURPLE, colors.END))
|
||||
if vars.model == "TPUMeshTransformerGPTJ" and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)):
|
||||
if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)):
|
||||
raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder")
|
||||
import tpu_mtj_backend
|
||||
tpu_mtj_backend.vars = vars
|
||||
@ -1567,7 +1567,7 @@ else:
|
||||
vars.allowsp = True
|
||||
loadmodelsettings()
|
||||
loadsettings()
|
||||
tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model != "TPUMeshTransformerGPTJ" and vars.use_colab_tpu, **vars.modelconfig)
|
||||
tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig)
|
||||
vars.modeldim = int(tpu_mtj_backend.params["d_model"])
|
||||
tokenizer = tpu_mtj_backend.tokenizer
|
||||
else:
|
||||
@ -2098,7 +2098,7 @@ def lua_get_modeltype():
|
||||
return "readonly"
|
||||
if(vars.model in ("Colab", "OAI", "InferKit")):
|
||||
return "api"
|
||||
if(not vars.use_colab_tpu and vars.model not in ("TPUMeshTransformerGPTJ",) and (vars.model in ("GPT2Custom", "NeoCustom") or vars.model_type in ("gpt2", "gpt_neo", "gptj"))):
|
||||
if(not vars.use_colab_tpu and vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (vars.model in ("GPT2Custom", "NeoCustom") or vars.model_type in ("gpt2", "gpt_neo", "gptj"))):
|
||||
hidden_size = get_hidden_size_from_model(model)
|
||||
if(vars.model in ("gpt2",) or (vars.model_type == "gpt2" and hidden_size == 768)):
|
||||
return "gpt2"
|
||||
@ -2127,7 +2127,7 @@ def lua_get_modelbackend():
|
||||
return "readonly"
|
||||
if(vars.model in ("Colab", "OAI", "InferKit")):
|
||||
return "api"
|
||||
if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ",)):
|
||||
if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
return "mtj"
|
||||
return "transformers"
|
||||
|
||||
@ -2136,7 +2136,7 @@ def lua_get_modelbackend():
|
||||
#==================================================================#
|
||||
@bridged_kwarg()
|
||||
def lua_is_custommodel():
|
||||
return vars.model in ("GPT2Custom", "NeoCustom", "TPUMeshTransformerGPTJ")
|
||||
return vars.model in ("GPT2Custom", "NeoCustom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")
|
||||
|
||||
#==================================================================#
|
||||
#
|
||||
@ -3074,22 +3074,22 @@ def calcsubmit(txt):
|
||||
if(vars.model != "InferKit"):
|
||||
subtxt, min, max = calcsubmitbudget(actionlen, winfo, mem, anotetxt, vars.actions, submission=txt)
|
||||
if(actionlen == 0):
|
||||
if(not vars.use_colab_tpu and vars.model not in ["Colab", "OAI", "TPUMeshTransformerGPTJ"]):
|
||||
if(not vars.use_colab_tpu and vars.model not in ["Colab", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
generate(subtxt, min, max, found_entries=found_entries)
|
||||
elif(vars.model == "Colab"):
|
||||
sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(vars.model == "OAI"):
|
||||
oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(vars.use_colab_tpu or vars.model == "TPUMeshTransformerGPTJ"):
|
||||
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
tpumtjgenerate(subtxt, min, max, found_entries=found_entries)
|
||||
else:
|
||||
if(not vars.use_colab_tpu and vars.model not in ["Colab", "OAI", "TPUMeshTransformerGPTJ"]):
|
||||
if(not vars.use_colab_tpu and vars.model not in ["Colab", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
|
||||
generate(subtxt, min, max, found_entries=found_entries)
|
||||
elif(vars.model == "Colab"):
|
||||
sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(vars.model == "OAI"):
|
||||
oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max)
|
||||
elif(vars.use_colab_tpu or vars.model == "TPUMeshTransformerGPTJ"):
|
||||
elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
tpumtjgenerate(subtxt, min, max, found_entries=found_entries)
|
||||
|
||||
# For InferKit web API
|
||||
@ -5105,7 +5105,7 @@ if(path.exists("settings/" + getmodelname().replace('/', '_') + ".settings")):
|
||||
file.close()
|
||||
|
||||
# Precompile TPU backend if required
|
||||
if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ",)):
|
||||
if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")):
|
||||
soft_tokens = tpumtjgetsofttokens()
|
||||
if(vars.dynamicscan or (not vars.nogenmod and vars.has_genmod)):
|
||||
threading.Thread(
|
||||
|
Reference in New Issue
Block a user