diff --git a/aiserver.py b/aiserver.py index 9ba299c4..b8a90b2d 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1,7 +1,7 @@ #!/usr/bin/python3 #==================================================================# # KoboldAI -# Version: 1.19.1 +# Version: 1.19.2 # By: The KoboldAI Community #==================================================================# @@ -171,6 +171,7 @@ model_menu = { ["NSFW Models", "nsfwlist", "", True], ["Untuned OPT", "optlist", "", True], ["Untuned GPT-Neo/J", "gptneolist", "", True], + ["Untuned Pythia", "pythialist", "", True], ["Untuned Fairseq Dense", "fsdlist", "", True], ["Untuned Bloom", "bloomlist", "", True], ["Untuned XGLM", "xglmlist", "", True], @@ -201,6 +202,7 @@ model_menu = { ["OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB", False], ["Janeway FSD 6.7B", "KoboldAI/fairseq-dense-6.7B-Janeway", "16GB", False], ["Janeway Neo 6B", "KoboldAI/GPT-J-6B-Janeway", "16GB", False], + ["Qilin Lit 6B (SFW)", "rexwang8/qilin-lit-6b", "16GB", False], ["Janeway Neo 2.7B", "KoboldAI/GPT-Neo-2.7B-Janeway", "8GB", False], ["Janeway FSD 2.7B", "KoboldAI/fairseq-dense-2.7B-Janeway", "8GB", False], ["Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB", False], @@ -230,12 +232,31 @@ model_menu = { ], 'gptneolist': [ ["GPT-NeoX 20B", "EleutherAI/gpt-neox-20b", "64GB", False], + ["Pythia 13B (NeoX, Same dataset)", "EleutherAI/pythia-13b", "32GB", False], ["GPT-J 6B", "EleutherAI/gpt-j-6B", "16GB", False], ["GPT-Neo 2.7B", "EleutherAI/gpt-neo-2.7B", "8GB", False], ["GPT-Neo 1.3B", "EleutherAI/gpt-neo-1.3B", "6GB", False], + ["Pythia 800M (NeoX, Same dataset)", "EleutherAI/pythia-800m", "4GB", False], + ["Pythia 350M (NeoX, Same dataset)", "EleutherAI/pythia-350m", "2GB", False], ["GPT-Neo 125M", "EleutherAI/gpt-neo-125M", "2GB", False], ["Return to Main Menu", "mainmenu", "", True], ], + 'pythialist': [ + ["Pythia 13B Deduped", "EleutherAI/pythia-13b-deduped", "32GB", False], + ["Pythia 13B", "EleutherAI/pythia-13b", "32GB", False], + ["Pythia 6.7B Deduped", "EleutherAI/pythia-6.7b-deduped", "16GB", False], + ["Pythia 6.7B", "EleutherAI/pythia-6.7b", "16GB", False], + ["Pythia 1.3B Deduped", "EleutherAI/pythia-1.3b-deduped", "6GB", False], + ["Pythia 1.3B", "EleutherAI/pythia-1.3b", "6GB", False], + ["Pythia 800M", "EleutherAI/pythia-800m", "4GB", False], + ["Pythia 350M Deduped", "EleutherAI/pythia-350m-deduped", "2GB", False], + ["Pythia 350M", "EleutherAI/pythia-350m", "2GB", False], + ["Pythia 125M Deduped", "EleutherAI/pythia-125m-deduped", "2GB", False], + ["Pythia 125M", "EleutherAI/pythia-125m", "2GB", False], + ["Pythia 19M Deduped", "EleutherAI/pythia-19m-deduped", "1GB", False], + ["Pythia 19M", "EleutherAI/pythia-19m", "1GB", False], + ["Return to Main Menu", "mainmenu", "", True], + ], 'gpt2list': [ ["GPT-2 XL", "gpt2-xl", "6GB", False], ["GPT-2 Large", "gpt2-large", "4GB", False], @@ -1166,7 +1187,7 @@ def loadmodelsettings(): if("nobreakmodel" in js): koboldai_vars.nobreakmodel = js["nobreakmodel"] if("sampler_order" in js): - sampler_order = koboldai_vars.sampler_order + sampler_order = js["sampler_order"] if(len(sampler_order) < 7): sampler_order = [6] + sampler_order koboldai_vars.sampler_order = sampler_order @@ -1247,6 +1268,119 @@ def loadsettings(): with open("settings/" + getmodelname().replace('/', '_') + ".v2_settings", "r") as file: getattr(koboldai_vars, "_model_settings").from_json(file.read()) + processsettings(js) + file.close() + if(path.exists(get_config_filename())): + # Read file contents into JSON object + file = open(get_config_filename(), "r") + js = json.load(file) + + processsettings(js) + file.close() + +def processsettings(js): +# Copy file contents to vars + if("apikey" in js): + # If the model is the HORDE, then previously saved API key in settings + # Will always override a new key set. + if koboldai_vars.model != "CLUSTER" or koboldai_vars.apikey == '': + koboldai_vars.apikey = js["apikey"] + if("andepth" in js): + koboldai_vars.andepth = js["andepth"] + if("sampler_order" in js): + sampler_order = js["sampler_order"] + if(len(sampler_order) < 7): + sampler_order = [6] + sampler_order + koboldai_vars.sampler_order = sampler_order + if("temp" in js): + koboldai_vars.temp = js["temp"] + if("top_p" in js): + koboldai_vars.top_p = js["top_p"] + if("top_k" in js): + koboldai_vars.top_k = js["top_k"] + if("tfs" in js): + koboldai_vars.tfs = js["tfs"] + if("typical" in js): + koboldai_vars.typical = js["typical"] + if("top_a" in js): + koboldai_vars.top_a = js["top_a"] + if("rep_pen" in js): + koboldai_vars.rep_pen = js["rep_pen"] + if("rep_pen_slope" in js): + koboldai_vars.rep_pen_slope = js["rep_pen_slope"] + if("rep_pen_range" in js): + koboldai_vars.rep_pen_range = js["rep_pen_range"] + if("genamt" in js): + koboldai_vars.genamt = js["genamt"] + if("max_length" in js): + koboldai_vars.max_length = js["max_length"] + if("ikgen" in js): + koboldai_vars.ikgen = js["ikgen"] + if("formatoptns" in js): + koboldai_vars.formatoptns = js["formatoptns"] + if("numseqs" in js): + koboldai_vars.numseqs = js["numseqs"] + if("widepth" in js): + koboldai_vars.widepth = js["widepth"] + if("useprompt" in js): + koboldai_vars.useprompt = js["useprompt"] + if("adventure" in js): + koboldai_vars.adventure = js["adventure"] + if("chatmode" in js): + koboldai_vars.chatmode = js["chatmode"] + if("chatname" in js): + koboldai_vars.chatname = js["chatname"] + if("dynamicscan" in js): + koboldai_vars.dynamicscan = js["dynamicscan"] + if("nopromptgen" in js): + koboldai_vars.nopromptgen = js["nopromptgen"] + if("rngpersist" in js): + koboldai_vars.rngpersist = js["rngpersist"] + if("nogenmod" in js): + koboldai_vars.nogenmod = js["nogenmod"] + if("fulldeterminism" in js): + koboldai_vars.full_determinism = js["fulldeterminism"] + if("autosave" in js): + koboldai_vars.autosave = js["autosave"] + if("newlinemode" in js): + koboldai_vars.newlinemode = js["newlinemode"] + if("welcome" in js): + koboldai_vars.welcome = js["welcome"] + if("output_streaming" in js): + koboldai_vars.output_streaming = js["output_streaming"] + if("show_probs" in js): + koboldai_vars.show_probs = js["show_probs"] + if("show_budget" in js): + koboldai_vars.show_budget = js["show_budget"] + + if("seed" in js): + koboldai_vars.seed = js["seed"] + if(koboldai_vars.seed is not None): + koboldai_vars.seed_specified = True + else: + koboldai_vars.seed_specified = False + else: + koboldai_vars.seed_specified = False + + if("antemplate" in js): + koboldai_vars.setauthornotetemplate = js["antemplate"] + if(not koboldai_vars.gamestarted): + koboldai_vars.authornotetemplate = koboldai_vars.setauthornotetemplate + + if("userscripts" in js): + koboldai_vars.userscripts = [] + for userscript in js["userscripts"]: + if type(userscript) is not str: + continue + userscript = userscript.strip() + if len(userscript) != 0 and all(q not in userscript for q in ("..", ":")) and all(userscript[0] not in q for q in ("/", "\\")) and os.path.exists(fileops.uspath(userscript)): + koboldai_vars.userscripts.append(userscript) + + if("corescript" in js and type(js["corescript"]) is str and all(q not in js["corescript"] for q in ("..", ":")) and all(js["corescript"][0] not in q for q in ("/", "\\"))): + koboldai_vars.corescript = js["corescript"] + else: + koboldai_vars.corescript = "default.lua" + #==================================================================# # Load a soft prompt from a file #==================================================================# @@ -2802,6 +2936,15 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if utils.num_shards is None or utils.current_shard >= utils.num_shards: if utils.offload_index: for name, tensor in utils.named_buffers: + dtype = tensor.dtype + if convert_to_float16 and breakmodel.primary_device != "cpu" and vars.hascuda and (vars.breakmodel or vars.usegpu): + dtype = torch.float16 + if breakmodel.primary_device == "cpu" or (not vars.usegpu and not vars.breakmodel): + dtype = torch.float32 + if name in model_dict and model_dict[name].dtype is not dtype: + model_dict[name] = model_dict[name].to(dtype) + if tensor.dtype is not dtype: + tensor = tensor.to(dtype) if name not in utils.offload_index: accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index) accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache") @@ -2972,7 +3115,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if not args.colab or args.savemodel: import shutil tokenizer.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_'))) - if(koboldai_vars.fp32_model): # Use save_pretrained to convert fp32 models to fp16 + if(koboldai_vars.fp32_model and ("breakmodel" not in globals() or not breakmodel.disk_blocks)): # Use save_pretrained to convert fp32 models to fp16, unless we are using disk cache because save_pretrained is not supported in that case model = model.half() model.save_pretrained("models/{}".format(koboldai_vars.model.replace('/', '_')), max_shard_size="500MiB") else: # For fp16 models, we can just copy the model files directly diff --git a/environments/finetuneanon.yml b/environments/finetuneanon.yml deleted file mode 100644 index 85d5ea66..00000000 --- a/environments/finetuneanon.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: koboldai -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - colorama - - flask-socketio - - flask-session - - pytorch - - cudatoolkit=11.1 - - tensorflow-gpu - - python=3.8.* - - eventlet - - markdown - - bleach=4.1.0 - - pip - - git=2.35.1 - - marshmallow>=3.13 - - apispec-webframeworks - - loguru - - pip: - - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b - - flask-cloudflared - - flask-ngrok - - lupa==1.10 diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 90cbd1de..c5f43d6f 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -28,7 +28,8 @@ dependencies: - flask-cloudflared - flask-ngrok - lupa==1.10 - - transformers>=4.24.0 + - transformers==4.24.0 + - huggingface_hub>=0.10.1 - accelerate - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session diff --git a/environments/rocm-finetune.yml b/environments/rocm-finetune.yml deleted file mode 100644 index fc56eb4f..00000000 --- a/environments/rocm-finetune.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: koboldai-ft -channels: - - conda-forge - - defaults -dependencies: - - colorama - - flask-socketio - - flask-session - - python=3.8.* - - eventlet - - markdown - - bleach=4.1.0 - - pip - - git=2.35.1 - - marshmallow>=3.13 - - apispec-webframeworks - - loguru - - pip: - - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html - - torch - - torchvision==0.11.1 - - flask-cloudflared - - git+https://github.com/finetuneanon/transformers@gpt-neo-localattention3-rp-b - - flask-ngrok - - lupa==1.10 diff --git a/environments/rocm.yml b/environments/rocm.yml index cb0edcbc..b07869bd 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -28,7 +28,8 @@ dependencies: - flask-cloudflared - flask-ngrok - lupa==1.10 - - transformers>=4.24.0 + - ttransformers==4.24.0 + - huggingface_hub>=0.10.1 - accelerate - git+https://github.com/VE-FORBRYDERNE/mkultra - ansi2html diff --git a/fileops.py b/fileops.py index 213e0c3e..165fe022 100644 --- a/fileops.py +++ b/fileops.py @@ -86,7 +86,7 @@ def uspath(filename): def getstoryfiles(): list = [] for file in listdir("stories"): - if file.endswith(".json"): + if file.endswith(".json") and not file.endswith(".v2.json"): ob = {} ob["name"] = file.replace(".json", "") f = open("stories/"+file, "r") diff --git a/maps/gptj.json b/maps/gptj.json index 8e0bc9da..08b22130 100644 --- a/maps/gptj.json +++ b/maps/gptj.json @@ -9,11 +9,11 @@ }, "static_weights": { "transformer.wte.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}}, - "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b"}}, + "transformer.wte.bias": {"mtj": {"module": "embedding_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}}, "transformer.ln_f.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}}, "transformer.ln_f.bias": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "offset"}}, "lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}}, - "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b"}} + "lm_head.bias": {"mtj": {"module": "projection_shard/~/linear", "param": "b", "transforms": ["vocab_pad"]}} }, "layer_weights": { "transformer.h.{layer}.attn.bias": {}, diff --git a/play.bat b/play.bat index a44f0afa..c3d96045 100644 --- a/play.bat +++ b/play.bat @@ -2,6 +2,8 @@ cd /D %~dp0 SET CONDA_SHLVL= +rmdir /S /Q flask_session + TITLE KoboldAI - Server SET /P M==4.20.1 +transformers==4.24.0 +huggingface_hub>=0.10.1 Flask Flask-SocketIO requests diff --git a/requirements_mtj.txt b/requirements_mtj.txt index e8817e6f..69a5f25d 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,8 @@ requests dm-haiku == 0.0.5 jax == 0.2.21 jaxlib >= 0.1.69, <= 0.3.7 -transformers >=4.20.1 +transformers == 4.24.0 +huggingface_hub >= 0.10.1 progressbar2 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck flask diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index f29adda4..cbe67168 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -1180,7 +1180,8 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo params[param] = default_params[param] # Use an optimization that will allow us to avoid one extra transpose operation - params["transposed_linear"] = True + if hf_checkpoint: + params["transposed_linear"] = True # Load tokenizer if koboldai_vars.model == "TPUMeshTransformerGPTNeoX": @@ -1376,7 +1377,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo if "divide_by_shards" in transforms: tensor /= params["cores_per_replica"] if "vocab_pad" in transforms: - tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"])) + tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],)) # We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config #if "no_transpose" not in transforms and tensor.ndim == 2: # tensor = tensor.T