From a7e3ef71aaf7258cd02f122fbbef9dfdfe4770d9 Mon Sep 17 00:00:00 2001 From: Gnome Ann <> Date: Tue, 21 Jun 2022 16:36:26 -0400 Subject: [PATCH 1/2] Add final layer norm to OPT --- maps/opt.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/maps/opt.json b/maps/opt.json index c99ae19f..59d41cd5 100644 --- a/maps/opt.json +++ b/maps/opt.json @@ -12,6 +12,8 @@ "decoder.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}}, "decoder.project_in.weight": {"mtj": {"module": "embedding_shard", "param": "project_in"}}, "decoder.embed_positions.weight": {"mtj": {"module": "embedding_shard", "param": "pos_embs", "transforms": ["no_transpose", "remove_first_two_rows"]}}, + "decoder.final_layer_norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}}, + "decoder.final_layer_norm.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}}, "decoder.project_out.weight": {"mtj": {"module": "projection_shard", "param": "project_out"}} }, "layer_weights": { From 33a2a318db56f3564c683fd4434127469cf237ed Mon Sep 17 00:00:00 2001 From: Gnome Ann <> Date: Tue, 21 Jun 2022 17:16:01 -0400 Subject: [PATCH 2/2] Fix 20B TPU model --- tpu_mtj_backend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index bc228998..db31b902 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1119,6 +1119,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo return old_encode(s).ids return encode tokenizer.encode = new_encode(tokenizer.encode) + tokenizer._koboldai_header = [] elif not hf_checkpoint: if not isinstance(params["tokenizer_class"], str) or not any(params["tokenizer_class"].endswith(s) for s in ("Tokenizer", "TokenizerFast")): raise ValueError("`tokenizer_class` must be a string ending in 'Tokenizer' or 'TokenizerFast'")