Merge pull request #48 from VE-FORBRYDERNE/patch

Disable `low_cpu_mem_usage` when using GPT-2
This commit is contained in:
henk717 2021-12-21 02:45:44 +01:00 committed by GitHub
commit 41d7c2acfe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 10 additions and 3 deletions

View File

@ -846,7 +846,7 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
model_config = open(vars.custmodpth + "/config.json", "r") model_config = open(vars.custmodpth + "/config.json", "r")
js = json.load(model_config) js = json.load(model_config)
with(maybe_use_float16()): with(maybe_use_float16()):
model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, cache_dir="cache/", **maybe_low_cpu_mem_usage()) model = GPT2LMHeadModel.from_pretrained(vars.custmodpth, cache_dir="cache/")
tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, cache_dir="cache/") tokenizer = GPT2TokenizerFast.from_pretrained(vars.custmodpth, cache_dir="cache/")
vars.modeldim = get_hidden_size_from_model(model) vars.modeldim = get_hidden_size_from_model(model)
# Is CUDA available? If so, use GPU, otherwise fall back to CPU # Is CUDA available? If so, use GPU, otherwise fall back to CPU
@ -858,17 +858,24 @@ if(not vars.model in ["InferKit", "Colab", "OAI", "ReadOnly", "TPUMeshTransforme
generator = model.generate generator = model.generate
# If base HuggingFace model was chosen # If base HuggingFace model was chosen
else: else:
lowmem = maybe_low_cpu_mem_usage()
# We must disable low_cpu_mem_usage (by setting lowmem to {}) if
# using a GPT-2 model because GPT-2 is not compatible with this
# feature yet
if("/" not in vars.model and vars.model.lower().startswith("gpt2")):
lowmem = {}
# Is CUDA available? If so, use GPU, otherwise fall back to CPU # Is CUDA available? If so, use GPU, otherwise fall back to CPU
if(os.path.isdir(vars.model.replace('/', '_'))): if(os.path.isdir(vars.model.replace('/', '_'))):
with(maybe_use_float16()): with(maybe_use_float16()):
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/") tokenizer = GPT2TokenizerFast.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/")
model = AutoModelForCausalLM.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/", **maybe_low_cpu_mem_usage()) model = AutoModelForCausalLM.from_pretrained(vars.model.replace('/', '_'), cache_dir="cache/", **lowmem)
else: else:
print("Model does not exist locally, attempting to download from Huggingface...") print("Model does not exist locally, attempting to download from Huggingface...")
tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, cache_dir="cache/") tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, cache_dir="cache/")
with(maybe_use_float16()): with(maybe_use_float16()):
model = AutoModelForCausalLM.from_pretrained(vars.model, cache_dir="cache/", **maybe_low_cpu_mem_usage()) model = AutoModelForCausalLM.from_pretrained(vars.model, cache_dir="cache/", **lowmem)
model = model.half() model = model.half()
import shutil import shutil
shutil.rmtree("cache/") shutil.rmtree("cache/")