mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Add TPU support for OPT-350M
The 350M model seems to have a different structure than the other ones ???
This commit is contained in:
@@ -3,13 +3,16 @@
|
||||
"mtj_pe": "fixed",
|
||||
"mtj_config_map": {
|
||||
"do_layer_norm_before": ["do_layer_norm_before", true],
|
||||
"d_embed": "word_embed_proj_dim",
|
||||
"d_model": "hidden_size",
|
||||
"n_heads": "num_attention_heads",
|
||||
"layers": "num_hidden_layers"
|
||||
},
|
||||
"static_weights": {
|
||||
"decoder.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
|
||||
"decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}}
|
||||
"decoder.project_in.weight": {"mtj": {"module": "embedding_shard", "param": "project_in"}},
|
||||
"decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}},
|
||||
"decoder.project_out.weight": {"mtj": {"module": "projection_shard", "param": "project_out"}}
|
||||
},
|
||||
"layer_weights": {
|
||||
"decoder.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
|
||||
|
Reference in New Issue
Block a user