Merge pull request #103 from VE-FORBRYDERNE/neox

Divide GPT-NeoX replicated bias layers by 4 again instead of by 8
This commit is contained in:
henk717 2022-03-21 02:19:32 +01:00 committed by GitHub
commit 38d78d10db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 1 additions and 1 deletions

View File

@ -885,7 +885,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
original_shape = shards[0][key].shape original_shape = shards[0][key].shape
for checkpoint_shard in range(checkpoint_shards): for checkpoint_shard in range(checkpoint_shards):
if key in ("attention.dense.bias", "mlp.dense_4h_to_h.bias"): if key in ("attention.dense.bias", "mlp.dense_4h_to_h.bias"):
shards[checkpoint_shard][key] /= config["cores_per_replica"] shards[checkpoint_shard][key] /= output_shards
if key != "word_embeddings.weight" and shards[checkpoint_shard][key].ndim == 2: if key != "word_embeddings.weight" and shards[checkpoint_shard][key].ndim == 2:
shards[checkpoint_shard][key] = shards[checkpoint_shard][key].T shards[checkpoint_shard][key] = shards[checkpoint_shard][key].T
tensor = shards[checkpoint_shard][key] tensor = shards[checkpoint_shard][key]