From fca7f8659fb32ce3a70d10854b2f6876c5e26c19 Mon Sep 17 00:00:00 2001 From: henk717 Date: Sat, 29 Jan 2022 18:09:53 +0100 Subject: [PATCH] Badwords unification TPU's no longer use hardcoded badwords but instead use the var --- aiserver.py | 1 + tpu_mtj_backend.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index e738e2ad..50194802 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1058,6 +1058,7 @@ else: print("{0}Initializing Mesh Transformer JAX, please wait...{1}".format(colors.PURPLE, colors.END)) assert vars.model == "TPUMeshTransformerGPTJ" and vars.custmodpth and os.path.isdir(vars.custmodpth) import tpu_mtj_backend + tpu_mtj_backend.vars = vars tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback tpu_mtj_backend.stopping_callback = tpumtjgenerate_stopping_callback tpu_mtj_backend.compiling_callback = tpumtjgenerate_compiling_callback diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index 653f8cf1..40059425 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -450,7 +450,7 @@ class PenalizingCausalTransformer(CausalTransformer): compiling_callback() numseqs = numseqs_aux.shape[0] # These are the tokens that we don't want the AI to ever write - self.badwords = jnp.array([6880, 50256, 42496, 4613, 17414, 22039, 16410, 27, 29, 38430, 37922, 15913, 24618, 28725, 58, 47175, 36937, 26700, 12878, 16471, 37981, 5218, 29795, 13412, 45160, 3693, 49778, 4211, 20598, 36475, 33409, 44167, 32406, 29847, 29342, 42669, 685, 25787, 7359, 3784, 5320, 33994, 33490, 34516, 43734, 17635, 24293, 9959, 23785, 21737, 28401, 18161, 26358, 32509, 1279, 38155, 18189, 26894, 6927, 14610, 23834, 11037, 14631, 26933, 46904, 22330, 25915, 47934, 38214, 1875, 14692, 41832, 13163, 25970, 29565, 44926, 19841, 37250, 49029, 9609, 44438, 16791, 17816, 30109, 41888, 47527, 42924, 23984, 49074, 33717, 31161, 49082, 30138, 31175, 12240, 14804, 7131, 26076, 33250, 3556, 38381, 36338, 32756, 46581, 17912, 49146]) + self.badwords = jnp.array(vars.badwordsids).squeeze() @hk.transform def generate_sample(context, ctx_length): # Give the initial context to the transformer @@ -827,7 +827,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", **kwargs) global badwords # These are the tokens that we don't want the AI to ever write - badwords = jnp.array([6880, 50256, 42496, 4613, 17414, 22039, 16410, 27, 29, 38430, 37922, 15913, 24618, 28725, 58, 47175, 36937, 26700, 12878, 16471, 37981, 5218, 29795, 13412, 45160, 3693, 49778, 4211, 20598, 36475, 33409, 44167, 32406, 29847, 29342, 42669, 685, 25787, 7359, 3784, 5320, 33994, 33490, 34516, 43734, 17635, 24293, 9959, 23785, 21737, 28401, 18161, 26358, 32509, 1279, 38155, 18189, 26894, 6927, 14610, 23834, 11037, 14631, 26933, 46904, 22330, 25915, 47934, 38214, 1875, 14692, 41832, 13163, 25970, 29565, 44926, 19841, 37250, 49029, 9609, 44438, 16791, 17816, 30109, 41888, 47527, 42924, 23984, 49074, 33717, 31161, 49082, 30138, 31175, 12240, 14804, 7131, 26076, 33250, 3556, 38381, 36338, 32756, 46581, 17912, 49146]) + badwords = jnp.array(vars.badwordsids).squeeze() if not path.endswith("/"): path += "/"