Merge pull request #29 from VE-FORBRYDERNE/hidden-size

Fix hidden size detection for GPTJForCausalLM
This commit is contained in:
henk717 2021-11-17 22:30:24 +01:00 committed by GitHub
commit e71271933a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 5 additions and 2 deletions

View File

@ -609,17 +609,20 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly"]):
try: try:
return int(model.transformer.hidden_size) return int(model.transformer.hidden_size)
except: except:
return int(model.transformer.embed_dim) try:
return int(model.transformer.embed_dim)
except:
return int(model.lm_head.in_features)
# If custom GPT Neo model was chosen # If custom GPT Neo model was chosen
if(vars.model == "NeoCustom"): if(vars.model == "NeoCustom"):
model_config = open(vars.custmodpth + "/config.json", "r") model_config = open(vars.custmodpth + "/config.json", "r")
js = json.load(model_config) js = json.load(model_config)
vars.modeldim = int(js['hidden_size'])
if("model_type" in js): if("model_type" in js):
model = AutoModelForCausalLM.from_pretrained(vars.custmodpth) model = AutoModelForCausalLM.from_pretrained(vars.custmodpth)
else: else:
model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth) model = GPTNeoForCausalLM.from_pretrained(vars.custmodpth)
vars.modeldim = get_hidden_size_from_model(model)
tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth) tokenizer = GPT2Tokenizer.from_pretrained(vars.custmodpth)
# Is CUDA available? If so, use GPU, otherwise fall back to CPU # Is CUDA available? If so, use GPU, otherwise fall back to CPU
if(vars.hascuda): if(vars.hascuda):