Upload maps/opt.json and update requirements

2025-06-05 21:59:24 +02:00 · 2022-05-12 19:09:31 -04:00
parent 376e76f5da
commit f5e689a725
4 changed files with 37 additions and 3 deletions
--- a/maps/opt.json
+++ b/maps/opt.json
@ -0,0 +1,32 @@
+{
+  "mtj_compat": "opt",
+  "mtj_pe": "fixed",
+  "mtj_config_map": {
+    "do_layer_norm_before": ["do_layer_norm_before", true],
+    "d_model": "hidden_size",
+    "n_heads": "num_attention_heads",
+    "layers": "num_hidden_layers"
+  },
+  "static_weights": {
+    "decoder.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
+    "decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}}
+  },
+  "layer_weights": {
+    "decoder.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.q_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.v_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.k_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "b"}},
+    "decoder.layers.{layer}.self_attn.out_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
+    "decoder.layers.{layer}.self_attn.out_proj.bias": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "b", "transforms": ["divide_by_shards"]}},
+    "decoder.layers.{layer}.fc1.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
+    "decoder.layers.{layer}.fc1.bias": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "b"}},
+    "decoder.layers.{layer}.fc2.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
+    "decoder.layers.{layer}.fc2.bias": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "b", "transforms": ["divide_by_shards"]}},
+    "decoder.layers.{layer}.self_attn_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
+    "decoder.layers.{layer}.self_attn_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "offset"}},
+    "decoder.layers.{layer}.final_layer_norm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}},
+    "decoder.layers.{layer}.final_layer_norm.bias": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "offset"}}
+  }
+}
--- a/requirements.txt
+++ b/requirements.txt
@ -1,4 +1,4 @@
-transformers>=4.17
+transformers>=4.19
 Flask
 Flask-SocketIO
 requests
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@ -5,9 +5,9 @@ requests
 optax >= 0.0.5, <= 0.0.9
 dm-haiku == 0.0.5
 jax == 0.2.21
-transformers >= 4.17
+transformers >= 4.19
 progressbar2
-git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
+git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck-staging
 flask
 Flask-SocketIO
 flask-cloudflared >= 0.0.5
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@ -1200,6 +1200,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo

                    # MTJ requires certain mathematical operations to be performed
                    # on tensors in order for them to be in the correct format
+                    if "remove_first_two_rows" in transforms:
+                        tensor = tensor[2:]
                    if "divide_by_shards" in transforms:
                        tensor /= params["cores_per_replica"]
                    if "vocab_pad" in transforms: