mirror of
				https://github.com/KoboldAI/KoboldAI-Client.git
				synced 2025-06-05 21:59:24 +02:00 
			
		
		
		
	Merge pull request #103 from VE-FORBRYDERNE/neox
Divide GPT-NeoX replicated bias layers by 4 again instead of by 8
This commit is contained in:
		| @@ -885,7 +885,7 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2): | |||||||
|             original_shape = shards[0][key].shape |             original_shape = shards[0][key].shape | ||||||
|             for checkpoint_shard in range(checkpoint_shards): |             for checkpoint_shard in range(checkpoint_shards): | ||||||
|                 if key in ("attention.dense.bias", "mlp.dense_4h_to_h.bias"): |                 if key in ("attention.dense.bias", "mlp.dense_4h_to_h.bias"): | ||||||
|                     shards[checkpoint_shard][key] /= config["cores_per_replica"] |                     shards[checkpoint_shard][key] /= output_shards | ||||||
|                 if key != "word_embeddings.weight" and shards[checkpoint_shard][key].ndim == 2: |                 if key != "word_embeddings.weight" and shards[checkpoint_shard][key].ndim == 2: | ||||||
|                     shards[checkpoint_shard][key] = shards[checkpoint_shard][key].T |                     shards[checkpoint_shard][key] = shards[checkpoint_shard][key].T | ||||||
|                 tensor = shards[checkpoint_shard][key] |                 tensor = shards[checkpoint_shard][key] | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user