From a9ef4751420d24864ef9d13f0ae934d82592f741 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 3 May 2023 17:57:38 -0500 Subject: [PATCH 001/102] Lock safetensors in version jail Let's have breaking changes when we expect them --- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 669d5d6d..2ac51431 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -33,7 +33,7 @@ dependencies: - lupa==1.10 - transformers==4.28.0 - huggingface_hub==0.12.1 - - safetensors + - safetensors==0.3.1 - accelerate==0.18.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session diff --git a/environments/rocm.yml b/environments/rocm.yml index dc2dd40e..1329612b 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -32,7 +32,7 @@ dependencies: - lupa==1.10 - transformers==4.28.0 - huggingface_hub==0.12.1 - - safetensors + - safetensors==0.3.1 - accelerate==0.18.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - ansi2html diff --git a/requirements.txt b/requirements.txt index 6407303c..cb567b08 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,4 +36,4 @@ pytest==7.2.2 pytest-html==3.2.0 pytest-metadata==2.0.4 requests-mock==1.10.0 -safetensors \ No newline at end of file +safetensors==0.3.1 From 35b56117e6423dfcdedeffc13e4308da4b6342bc Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 3 May 2023 18:51:01 -0500 Subject: [PATCH 002/102] Basic PEFT support --- aiserver.py | 1 + environments/huggingface.yml | 1 + environments/rocm.yml | 1 + modeling/inference_models/hf_torch.py | 33 ++++++++++++++++++++++++++- models/peft/README.txt | 2 ++ requirements.txt | 1 + 6 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 models/peft/README.txt diff --git a/aiserver.py b/aiserver.py index 8d481b75..6af7f2b1 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1345,6 +1345,7 @@ def general_startup(override_args=None): parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6") parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation") parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)") + parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)") parser.add_argument('-f', action='store', help="option for compatability with colab memory profiles") parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen") diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 2ac51431..b4df45ec 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -45,3 +45,4 @@ dependencies: - ftfy - pydub - diffusers + - peft==0.3.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index 1329612b..a33a8f96 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -41,3 +41,4 @@ dependencies: - ftfy - pydub - diffusers + - peft==0.3.0 diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 49cdfc0f..324cf953 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -22,6 +22,7 @@ from transformers import ( AutoModelForCausalLM, LogitsProcessorList, ) +from peft import PeftModel, PeftConfig import utils import modeling.lazy_loader as lazy_loader @@ -211,6 +212,31 @@ class HFTorchInferenceModel(HFInferenceModel): new_sample.old_sample = transformers.GenerationMixin.sample use_core_manipulations.sample = new_sample + # PEFT Loading. This MUST be done after all save_pretrained calls are + # finished on the main model. + if utils.args.peft: + peft_local_path = os.path.join("models/peft", utils.args.peft.replace("/", "_")) + logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.") + + peft_installed_locally = True + possible_peft_locations = [peft_local_path, utils.args.peft] + + for i, location in enumerate(possible_peft_locations): + try: + m_self.model = PeftModel.from_pretrained(m_self.model, location) + logger.debug(f"Loaded PEFT at '{location}'") + break + except ValueError: + peft_installed_locally = False + if i == len(possible_peft_locations) - 1: + raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?") + except RuntimeError: + raise RuntimeError("Error while loading PeftModel. Are you using the correct model?") + + if not peft_installed_locally: + logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'") + m_self.model.save_pretrained(peft_local_path) + return super()._post_load() def _raw_generate( @@ -238,8 +264,13 @@ class HFTorchInferenceModel(HFInferenceModel): with torch.no_grad(): start_time = time.time() + + # HEED & BEWARE: All arguments passed to self.model.generate MUST be + # kwargs; see https://github.com/huggingface/peft/issues/232. If they + # aren't, PeftModel will EXPLODE!!!! But nothing will happen without + # a PEFT loaded so it's sneaky. genout = self.model.generate( - gen_in, + input_ids=gen_in, do_sample=True, max_length=min( len(prompt_tokens) + max_new, utils.koboldai_vars.max_length diff --git a/models/peft/README.txt b/models/peft/README.txt new file mode 100644 index 00000000..fc7b72c4 --- /dev/null +++ b/models/peft/README.txt @@ -0,0 +1,2 @@ +PEFT models will be stored in this directory when downloaded. +Please don't be too mean to this directory. diff --git a/requirements.txt b/requirements.txt index cb567b08..800877ad 100644 --- a/requirements.txt +++ b/requirements.txt @@ -37,3 +37,4 @@ pytest-html==3.2.0 pytest-metadata==2.0.4 requests-mock==1.10.0 safetensors==0.3.1 +peft==0.3.0 From 91463a4d9790a8f09b0e573161a4e513e2db2b26 Mon Sep 17 00:00:00 2001 From: Bogdan Drema Date: Thu, 4 May 2023 01:47:41 +0100 Subject: [PATCH 003/102] feat: llama config and updated mtj requirement --- maps/llama.json | 35 +++++++++++++++++++++++++++++++++++ requirements_mtj.txt | 2 +- 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 maps/llama.json diff --git a/maps/llama.json b/maps/llama.json new file mode 100644 index 00000000..c1da6491 --- /dev/null +++ b/maps/llama.json @@ -0,0 +1,35 @@ +{ + "mtj_compat": "llama", + "mtj_pe": "neox_rotary", + "mtj_config_map": { + "norm": ["norm", "layernorm-nobias"], + "pe_rotary_dims": ["pe_rotary_dims", 128], + "d_model": "hidden_size", + "n_heads": "num_attention_heads", + "n_vocab": "vocab_size", + "layers": "num_hidden_layers", + "seq": "max_position_embeddings", + "tokenizer_class": ["tokenizer_class", "LlamaTokenizer"], + "tokenizer": ["tokenizer", "llama"] + }, + "static_weights": { + "model.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}}, + "model.norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}}, + "lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}} + }, + "layer_weights": { + "transformer.h.{layer}.attn.attention.bias": {}, + "transformer.h.{layer}.attn.attention.masked_bias": {}, + "model.layers.{layer}.self_attn.rotary_emb.inv_freq": {}, + "model.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}}, + "model.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}}, + "model.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}}, + "model.layers.{layer}.self_attn.o_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}}, + "model.layers.{layer}.mlp.gate_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}}, + "model.layers.{layer}.mlp.down_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}}, + "model.layers.{layer}.mlp.up_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_6", "param": "w"}}, + "model.layers.{layer}.input_layernorm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}}, + "model.layers.{layer}.post_attention_layernorm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}} + } + } + \ No newline at end of file diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 19da3910..ef9bb2b4 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -9,7 +9,7 @@ transformers == 4.28.0 chex == 0.1.5 huggingface_hub==0.12.1 progressbar2 -git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck +git+https://github.com/Zurnaz/mesh-transformer-jax.git@llama_tpu Flask==2.2.3 Flask-SocketIO==5.3.2 python-socketio==5.7.2 From 33745669dd8a08d3c5d743de67c0ed169a5d1dce Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 5 May 2023 13:14:58 +0200 Subject: [PATCH 004/102] Pytorch 2.0 --- environments/huggingface.yml | 6 ++++-- requirements.txt | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 669d5d6d..b8d640fb 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -1,6 +1,7 @@ name: koboldai channels: - pytorch + - nvidia - conda-forge - defaults dependencies: @@ -9,9 +10,9 @@ dependencies: - flask-socketio=5.3.2 - flask-session=0.4.0 - python-socketio=5.7.2 - - pytorch=1.11.* + - pytorch=2.0.* - python=3.8.* - - cudatoolkit=11.1 + - pytorch-cuda=11.8 - eventlet=0.33.3 - dnspython=2.2.1 - markdown @@ -45,3 +46,4 @@ dependencies: - ftfy - pydub - diffusers + - peft diff --git a/requirements.txt b/requirements.txt index 6407303c..584e7377 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ Flask==2.2.3 Flask-SocketIO==5.3.2 python-socketio==5.7.2 requests -torch >= 1.9, < 1.13 +torch == 2.0.* flask-cloudflared==0.0.10 flask-ngrok flask-cors @@ -36,4 +36,5 @@ pytest==7.2.2 pytest-html==3.2.0 pytest-metadata==2.0.4 requests-mock==1.10.0 -safetensors \ No newline at end of file +safetensors +peft \ No newline at end of file From b1722081a505019a269bedb1a6177ab4874da765 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 5 May 2023 15:12:59 +0200 Subject: [PATCH 005/102] AMD Pytorch 2.0 --- environments/rocm.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/environments/rocm.yml b/environments/rocm.yml index dc2dd40e..6213089b 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -24,8 +24,8 @@ dependencies: - Pillow - psutil - pip: - - --extra-index-url https://download.pytorch.org/whl/rocm5.2 - - torch==1.13.1+rocm5.2 + - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2 + - torch==2.0.* - flask-cloudflared==0.0.10 - flask-ngrok - flask-cors @@ -41,3 +41,4 @@ dependencies: - ftfy - pydub - diffusers + - peft From 33969b5845fec660d206ba9deba79de5500d4e6c Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 5 May 2023 17:23:01 +0200 Subject: [PATCH 006/102] Basic HF code execution support --- aiserver.py | 12 +++++++++--- koboldai_settings.py | 5 +++-- modeling/inference_models/hf_torch.py | 1 + 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index 8d481b75..32823dd0 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1345,7 +1345,8 @@ def general_startup(override_args=None): parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6") parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation") parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)") - + parser.add_argument("--trust_remote_code", action='store_true', default=False, help="Allow Huggingface Models to Execute Code (Insecure!)") + parser.add_argument('-f', action='store', help="option for compatability with colab memory profiles") parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen") parser.add_argument('-q', '--quiesce', action='count', default=0, help="The default logging level is ERROR or higher. This value decreases the amount of logging seen in your screen") @@ -1469,8 +1470,13 @@ def general_startup(override_args=None): allowed_ips = sorted(allowed_ips, key=lambda ip: int(''.join([i.zfill(3) for i in ip.split('.')]))) print(f"Allowed IPs: {allowed_ips}") - - + if args.trust_remote_code: + logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") + logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") + logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") + logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") + logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") + koboldai_vars.trust_remote_code = True if args.cpu: koboldai_vars.use_colab_tpu = False diff --git a/koboldai_settings.py b/koboldai_settings.py index dfccd4ef..d8416df2 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1207,12 +1207,12 @@ class system_settings(settings): local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui', 'sp', '_horde_pid', 'inference_config', 'image_pipeline', - 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui'] + 'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'trust_remote_code'] no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model', - 'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch'] + 'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'trust_remote_code'] settings_name = "system" def __init__(self, socketio, koboldai_var): self._socketio = socketio @@ -1298,6 +1298,7 @@ class system_settings(settings): self.seen_messages = [] self.git_repository = "" self.git_branch = "" + self.trust_remote_code = False @dataclass diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 49cdfc0f..ca1f1cdf 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -265,6 +265,7 @@ class HFTorchInferenceModel(HFInferenceModel): def _get_model(self, location: str, tf_kwargs: Dict): tf_kwargs["revision"] = utils.koboldai_vars.revision tf_kwargs["cache_dir"] = "cache" + tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code # If we have model hints for legacy model, use them rather than fall back. try: From d508b4a3199df05eafa559ac021abbd6e88fb574 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 5 May 2023 19:50:56 +0200 Subject: [PATCH 007/102] More max_context_length flexibility --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 32823dd0..28a0e298 100644 --- a/aiserver.py +++ b/aiserver.py @@ -8292,7 +8292,7 @@ class GenerationInputSchema(SamplerSettingsSchema): use_userscripts: bool = fields.Boolean(load_default=False, metadata={"description": "Whether or not to use the userscripts from the KoboldAI GUI when generating text."}) soft_prompt: Optional[str] = fields.String(metadata={"description": "Soft prompt to use when generating. If set to the empty string or any other string containing no non-whitespace characters, uses no soft prompt."}, validate=[soft_prompt_validator, validate.Regexp(r"^[^/\\]*$")]) max_length: int = fields.Integer(validate=validate.Range(min=1, max=512), metadata={"description": "Number of tokens to generate."}) - max_context_length: int = fields.Integer(validate=validate.Range(min=512, max=2048), metadata={"description": "Maximum number of tokens to send to the model."}) + max_context_length: int = fields.Integer(validate=validate.Range(min=1), metadata={"description": "Maximum number of tokens to send to the model."}) n: int = fields.Integer(validate=validate.Range(min=1, max=5), metadata={"description": "Number of outputs to generate."}) disable_output_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all output formatting options default to `false` instead of the value in the KoboldAI GUI."}) frmttriminc: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes some characters from the end of the output such that the output doesn't end in the middle of a sentence. If the output is less than one sentence long, does nothing.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."}) From 2730879c61273a4484d66a5fcfc195d45d7af015 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 5 May 2023 21:28:06 +0200 Subject: [PATCH 008/102] Better warning until something more robust is in --- aiserver.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index 28a0e298..0a467aa3 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1472,10 +1472,10 @@ def general_startup(override_args=None): if args.trust_remote_code: logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") - logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") - logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") - logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") - logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!") + logger.warning("You are not protected from Model Viruses in this mode!") + logger.warning("Exit the program now to abort execution!") + logger.warning("Only use this mode with models that you trust and verified!") + time.sleep(25) koboldai_vars.trust_remote_code = True if args.cpu: koboldai_vars.use_colab_tpu = False From b7db709c4729cc2ec522fc987a4fe6f0126f4bea Mon Sep 17 00:00:00 2001 From: somebody Date: Sat, 6 May 2023 11:16:09 -0500 Subject: [PATCH 009/102] PEFT: Change directory structure to be inside model --- modeling/inference_models/hf_torch.py | 10 +++++++++- models/peft/README.txt | 2 -- 2 files changed, 9 insertions(+), 3 deletions(-) delete mode 100644 models/peft/README.txt diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index d4d5320b..c46195ba 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -215,7 +215,15 @@ class HFTorchInferenceModel(HFInferenceModel): # PEFT Loading. This MUST be done after all save_pretrained calls are # finished on the main model. if utils.args.peft: - peft_local_path = os.path.join("models/peft", utils.args.peft.replace("/", "_")) + local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft") + + # Make PEFT dir if it doesn't exist + try: + os.makedirs(local_peft_dir) + except FileExistsError: + pass + + peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_")) logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.") peft_installed_locally = True diff --git a/models/peft/README.txt b/models/peft/README.txt deleted file mode 100644 index fc7b72c4..00000000 --- a/models/peft/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -PEFT models will be stored in this directory when downloaded. -Please don't be too mean to this directory. From bb206f598ee114de000bbfaa99371834c944740b Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 6 May 2023 18:55:26 +0200 Subject: [PATCH 010/102] Don't load peft when unused --- modeling/inference_models/hf_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index c46195ba..990fabfc 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -22,7 +22,6 @@ from transformers import ( AutoModelForCausalLM, LogitsProcessorList, ) -from peft import PeftModel, PeftConfig import utils import modeling.lazy_loader as lazy_loader @@ -215,6 +214,7 @@ class HFTorchInferenceModel(HFInferenceModel): # PEFT Loading. This MUST be done after all save_pretrained calls are # finished on the main model. if utils.args.peft: + from peft import PeftModel, PeftConfig local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft") # Make PEFT dir if it doesn't exist From cb4af7e56e67ac877a0d396b51559d4a48b7d986 Mon Sep 17 00:00:00 2001 From: henk717 Date: Mon, 8 May 2023 17:23:49 +0200 Subject: [PATCH 011/102] Update requirements_mtj.txt --- requirements_mtj.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_mtj.txt b/requirements_mtj.txt index ef9bb2b4..1b40fded 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -32,4 +32,5 @@ ansi2html flask_compress ijson ftfy -pydub \ No newline at end of file +pydub +sentencepiece From d53726bed610d03ec4b3edf3613c72f3754a7fba Mon Sep 17 00:00:00 2001 From: Bogdan Drema Date: Mon, 8 May 2023 18:24:34 +0100 Subject: [PATCH 012/102] fix: tpu tokenizers errors --- modeling/inference_models/hf.py | 2 +- modeling/inference_models/hf_mtj.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index cd609fed..37f473ca 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -59,7 +59,7 @@ class HFInferenceModel(InferenceModel): token_ids = [first] elif len(token_ids) > 0: first = int(token_ids[0]) - elif token_ids: + elif token_ids is not None and len(token_ids) > 0: first = token_ids[0] result = original_decode(self, token_ids, *args, **kwargs) if first is not None and first in has_prefix_space: diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index 7661a67f..d7035cbf 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -17,6 +17,7 @@ from modeling.inference_model import ( ModelCapabilities, ) from modeling.inference_models.hf import HFInferenceModel +from modeling.tokenizer import GenericTokenizer # This file shouldn't be imported unless using the TPU assert utils.koboldai_vars.use_colab_tpu @@ -193,8 +194,7 @@ class HFMTJInferenceModel(HFInferenceModel): utils.koboldai_vars.modeldim = int( tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]) ) - - self.tokenizer = tpu_mtj_backend.tokenizer + self.tokenizer = GenericTokenizer(tpu_mtj_backend.tokenizer) if ( utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default From a9e342ca64f8376e85d92beb9e65d246ec3997a8 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 8 May 2023 17:10:47 -0500 Subject: [PATCH 013/102] Fix TPU API errors --- aiserver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aiserver.py b/aiserver.py index e744d18e..ef49f05c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3708,6 +3708,7 @@ def apiactionsubmit_tpumtjgenerate(txt, minimum, maximum): soft_tokens=soft_tokens, sampler_order=koboldai_vars.sampler_order, ) + genout = np.array(genout) genout = [utils.applyoutputformatting(utils.decodenewlines(tokenizer.decode(txt))) for txt in genout] return genout From 9fdc2f73a63e1f6fd64fdad06f37aef1f97b0adc Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 9 May 2023 20:59:10 +0200 Subject: [PATCH 014/102] ROCM Downgrade for stability --- environments/rocm.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments/rocm.yml b/environments/rocm.yml index 51b3e852..81e32a58 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -25,7 +25,7 @@ dependencies: - psutil - pip: - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2 - - torch==2.0.* + - torch==1.13.* - flask-cloudflared==0.0.10 - flask-ngrok - flask-cors From 702f59b2dbd458ccc9426cee0226740870a62b36 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 9 May 2023 22:10:01 +0200 Subject: [PATCH 015/102] Downgrade ROCM properly --- environments/rocm.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/environments/rocm.yml b/environments/rocm.yml index 81e32a58..a33a8f96 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -24,8 +24,8 @@ dependencies: - Pillow - psutil - pip: - - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2 - - torch==1.13.* + - --extra-index-url https://download.pytorch.org/whl/rocm5.2 + - torch==1.13.1+rocm5.2 - flask-cloudflared==0.0.10 - flask-ngrok - flask-cors From 71aee4dbd8f1d429e0ebd27dbf98bfd6fcf6c52c Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 10 May 2023 16:30:46 -0400 Subject: [PATCH 016/102] First concept of model plugins with a conceptual UI. Completely breaks UI2 model loading. --- aiserver.py | 123 ++-- modeling/inference_model.py | 9 + modeling/inference_models/api.py | 26 +- modeling/inference_models/basic_api.py | 29 +- modeling/inference_models/generic_hf_torch.py | 8 +- modeling/inference_models/hf.py | 190 ------ modeling/inference_models/hf_mtj.py | 22 +- modeling/inference_models/horde.py | 88 ++- modeling/inference_models/openai.py | 85 ++- modeling/inference_models/parents/hf.py | 219 +++++++ .../{ => parents}/hf_torch.py | 56 +- modeling/inference_models/rwkv.py | 26 +- static/koboldai.css | 44 ++ static/koboldai.js | 548 +++++++++--------- templates/popups.html | 30 +- templates/templates.html | 19 + 16 files changed, 912 insertions(+), 610 deletions(-) delete mode 100644 modeling/inference_models/hf.py create mode 100644 modeling/inference_models/parents/hf.py rename modeling/inference_models/{ => parents}/hf_torch.py (94%) diff --git a/aiserver.py b/aiserver.py index e744d18e..e7227c81 100644 --- a/aiserver.py +++ b/aiserver.py @@ -168,6 +168,7 @@ class MenuFolder(MenuItem): "size": "", "isMenu": True, "isDownloaded": False, + "isDirectory": False } class MenuModel(MenuItem): @@ -200,8 +201,28 @@ class MenuModel(MenuItem): "size": self.vram_requirements, "isMenu": False, "isDownloaded": self.is_downloaded, + "isDirectory": False, } +class MenuPath(MenuItem): + def to_ui1(self) -> list: + return [ + self.label, + self.name, + "", + True, + ] + + def to_json(self) -> dict: + return { + "label": self.label, + "name": self.name, + "size": "", + "isMenu": True, + "isDownloaded": False, + "isDirectory": True, + "path": "./models" + } # AI models Menu # This is a dict of lists where they key is the menu name, and the list is the menu items. @@ -209,8 +230,8 @@ class MenuModel(MenuItem): # 3: the memory requirement for the model, 4: if the item is a menu or not (True/False) model_menu = { "mainmenu": [ - MenuModel("Load a model from its directory", "NeoCustom"), - MenuModel("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), + MenuPath("Load a model from its directory", "NeoCustom"), + MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), MenuFolder("Load custom model from Hugging Face", "customhuggingface"), MenuFolder("Adventure Models", "adventurelist"), MenuFolder("Novel Models", "novellist"), @@ -600,6 +621,15 @@ utils.socketio = socketio # Weird import position to steal koboldai_vars from utils from modeling.patches import patch_transformers +#Load all of the model importers +import importlib +model_loader_code = {} +model_loaders = {} +for module in os.listdir("./modeling/inference_models"): + if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py': + model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3])) + model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader() + old_socketio_on = socketio.on def new_socketio_on(*a, **k): @@ -906,6 +936,8 @@ def sendModelSelection(menu="mainmenu", folder="./models"): ) def get_folder_path_info(base): + if base is None: + return [], [] if base == 'This PC': breadcrumbs = [['This PC', 'This PC']] paths = [["{}:\\".format(chr(i)), "{}:\\".format(chr(i))] for i in range(65, 91) if os.path.exists("{}:".format(chr(i)))] @@ -1932,25 +1964,25 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal koboldai_vars.breakmodel = False if koboldai_vars.model == "Colab": - from modeling.inference_models.basic_api import BasicAPIInferenceModel - model = BasicAPIInferenceModel() + from modeling.inference_models.basic_api import model_loader + model = model_loader() elif koboldai_vars.model == "API": - from modeling.inference_models.api import APIInferenceModel - model = APIInferenceModel(koboldai_vars.colaburl.replace("/request", "")) + from modeling.inference_models.api import model_loader + model = model_loader(koboldai_vars.colaburl.replace("/request", "")) elif koboldai_vars.model == "CLUSTER": - from modeling.inference_models.horde import HordeInferenceModel - model = HordeInferenceModel() + from modeling.inference_models.horde import model_loader + model = model_loader() elif koboldai_vars.model == "OAI": - from modeling.inference_models.openai import OpenAIAPIInferenceModel - model = OpenAIAPIInferenceModel() + from modeling.inference_models.openai import model_loader + model = model_loader() model.load(initial_load=initial_load) # TODO: This check sucks, make a model object or somethign elif "rwkv" in koboldai_vars.model: if koboldai_vars.use_colab_tpu: raise RuntimeError("RWKV is not supported on the TPU.") - from modeling.inference_models.rwkv import RWKVInferenceModel - model = RWKVInferenceModel(koboldai_vars.model) + from modeling.inference_models.rwkv import model_loader + model = model_loader(koboldai_vars.model) model.load() elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai: # HF Torch @@ -1961,8 +1993,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal except: pass - from modeling.inference_models.generic_hf_torch import GenericHFTorchInferenceModel - model = GenericHFTorchInferenceModel( + from modeling.inference_models.generic_hf_torch import model_loader + model = model_loader( koboldai_vars.model, lazy_load=koboldai_vars.lazy_load, low_mem=args.lowmem @@ -1975,8 +2007,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal logger.info(f"Pipeline created: {koboldai_vars.model}") else: # TPU - from modeling.inference_models.hf_mtj import HFMTJInferenceModel - model = HFMTJInferenceModel( + from modeling.inference_models.hf_mtj import model_loader + model = model_loader( koboldai_vars.model ) model.load( @@ -6430,7 +6462,9 @@ def UI_2_retry(data): @socketio.on('load_model_button') @logger.catch def UI_2_load_model_button(data): - sendModelSelection() + emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":"mainmenu"}} for item in model_menu['mainmenu'] if item.should_show()]}) + + #==================================================================# # Event triggered when user clicks the a model @@ -6438,6 +6472,38 @@ def UI_2_load_model_button(data): @socketio.on('select_model') @logger.catch def UI_2_select_model(data): + logger.debug("Clicked on model entry: {}".format(data)) + if data["name"] in model_menu and data['ismenu'] == "true": + emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]}) + else: + #Get load methods + logger.debug("Asking for model info on potential model: {}".format(data)) + valid = False + if 'path' not in data or data['path'] == "": + valid_loaders = {} + for model_loader in model_loaders: + logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]))) + if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]): + valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + valid = True + if valid: + logger.debug("Valid Loaders: {}".format(valid_loaders)) + emit("selected_model_info", valid_loaders) + if not valid: + #Get directories + paths, breadcrumbs = get_folder_path_info(data['path']) + output = [] + for path in paths: + valid=False + for model_loader in model_loaders: + if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"): + valid=True + break + output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) + emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) + + return + #We've selected a menu if data['model'] in model_menu: @@ -6462,26 +6528,9 @@ def UI_2_select_model(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): - if not os.path.exists("settings/"): - os.mkdir("settings") - changed = True - if os.path.exists("settings/" + data['model'].replace('/', '_') + ".breakmodel"): - with open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "r") as file: - file_data = file.read().split('\n')[:2] - if len(file_data) < 2: - file_data.append("0") - gpu_layers, disk_layers = file_data - if gpu_layers == data['gpu_layers'] and disk_layers == data['disk_layers']: - changed = False - if changed: - f = open("settings/" + data['model'].replace('/', '_') + ".breakmodel", "w") - f.write("{}\n{}".format(data['gpu_layers'], data['disk_layers'])) - f.close() - koboldai_vars.colaburl = data['url'] + "/request" - koboldai_vars.model = data['model'] - koboldai_vars.custmodpth = data['path'] - print("loading Model") - load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) + logger.info("loading Model") + logger.info(data) + #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) #==================================================================# # Event triggered when load story is clicked diff --git a/modeling/inference_model.py b/modeling/inference_model.py index b253c5bf..27ad46db 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -169,6 +169,15 @@ class InferenceModel: ] self.tokenizer = None self.capabilties = ModelCapabilities() + + def is_valid(self, model_name, model_path, menu_path, vram): + return True + + def requested_parameters(self, model_name, model_path, menu_path, vram): + return {} + + def define_input_parameters(self): + return def load(self, save_model: bool = False, initial_load: bool = False) -> None: """User-facing load function. Do not override this; try `_load()` instead.""" diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py index d25505b0..41088bc7 100644 --- a/modeling/inference_models/api.py +++ b/modeling/inference_models/api.py @@ -22,9 +22,31 @@ class APIException(Exception): """To be used for errors when using the Kobold API as an interface.""" -class APIInferenceModel(InferenceModel): - def __init__(self, base_url: str) -> None: +class model_loader(InferenceModel): + def __init__(self) -> None: super().__init__() + #self.base_url = "" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "API" + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "base_url", + "default": False, + "check": {"value": "", 'check': "!="}, + "tooltip": "The URL of the KoboldAI API to connect to.", + "menu_path": "", + "extra_classes": "", + "refresh_model_inputs": False + }) + return requested_parameters + + def set_input_parameters(self, base_url=""): self.base_url = base_url.rstrip("/") def _load(self, save_model: bool, initial_load: bool) -> None: diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py index c96eb42c..d7fc0863 100644 --- a/modeling/inference_models/basic_api.py +++ b/modeling/inference_models/basic_api.py @@ -19,12 +19,37 @@ class BasicAPIException(Exception): """To be used for errors when using the Basic API as an interface.""" -class BasicAPIInferenceModel(InferenceModel): +class model_loader(InferenceModel): def __init__(self) -> None: super().__init__() # Do not allow API to be served over the API self.capabilties = ModelCapabilities(api_host=False) + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "Colab" + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "colaburl", + "default": False, + "check": {"value": "", 'check': "!="}, + "tooltip": "The URL of the Colab KoboldAI API to connect to.", + "menu_path": "", + "extra_classes": "", + "refresh_model_inputs": False + }) + return requested_parameters + + def set_input_parameters(self, colaburl=""): + self.colaburl = colaburl + + def _initialize_model(self): + return def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer("EleutherAI/gpt-neo-2.7B") @@ -68,7 +93,7 @@ class BasicAPIInferenceModel(InferenceModel): } # Create request - req = requests.post(utils.koboldai_vars.colaburl, json=reqdata) + req = requests.post(self.colaburl, json=reqdata) if req.status_code != 200: raise BasicAPIException(f"Bad status code {req.status_code}") diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index aa602b1a..366fbbb7 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -20,10 +20,14 @@ except ModuleNotFoundError as e: if not utils.koboldai_vars.use_colab_tpu: raise e -from modeling.inference_models.hf_torch import HFTorchInferenceModel +from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel -class GenericHFTorchInferenceModel(HFTorchInferenceModel): +class model_loader(HFTorchInferenceModel): + + def _initialize_model(self): + return + def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = True diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py deleted file mode 100644 index cd609fed..00000000 --- a/modeling/inference_models/hf.py +++ /dev/null @@ -1,190 +0,0 @@ -import os -from typing import Optional -from transformers import AutoConfig - -import utils -import koboldai_settings -from logger import logger -from modeling.inference_model import InferenceModel - - -class HFInferenceModel(InferenceModel): - def __init__(self, model_name: str) -> None: - super().__init__() - self.model_config = None - self.model_name = model_name - - self.model = None - self.tokenizer = None - - def _post_load(self) -> None: - # These are model specific tokenizer overrides if a model has bad defaults - if utils.koboldai_vars.model_type == "llama": - # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer - self.tokenizer.add_bos_token = False - - # HF transformers no longer supports decode_with_prefix_space - # We work around this by wrapping decode, encode, and __call__ - # with versions that work around the 'prefix space' misfeature - # of sentencepiece. - vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size)) - has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")} - - # Wrap 'decode' with a method that always returns text starting with a space - # when the head token starts with a space. This is what 'decode_with_prefix_space' - # used to do, and we implement it using the same technique (building a cache of - # tokens that should have a prefix space, and then prepending a space if the first - # token is in this set.) We also work around a bizarre behavior in which decoding - # a single token 13 behaves differently than decoding a squence containing only [13]. - original_decode = type(self.tokenizer.tokenizer).decode - def decode_wrapper(self, token_ids, *args, **kwargs): - first = None - # Note, the code below that wraps single-value token_ids in a list - # is to work around this wonky behavior: - # >>> t.decode(13) - # '<0x0A>' - # >>> t.decode([13]) - # '\n' - # Not doing this causes token streaming to receive <0x0A> characters - # instead of newlines. - if isinstance(token_ids, int): - first = token_ids - token_ids = [first] - elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor - # Tensors don't support the Python standard of 'empty is False' - # and the special case of dimension 0 tensors also needs to be - # handled separately. - if token_ids.dim() == 0: - first = int(token_ids.item()) - token_ids = [first] - elif len(token_ids) > 0: - first = int(token_ids[0]) - elif token_ids: - first = token_ids[0] - result = original_decode(self, token_ids, *args, **kwargs) - if first is not None and first in has_prefix_space: - result = " " + result - return result - # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it - object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer)) - - # Wrap encode and __call__ to work around the 'prefix space' misfeature also. - # The problem is that "Bob" at the start of text is encoded as if it is - # " Bob". This creates a problem because it means you can't split text, encode - # the pieces, concatenate the tokens, decode them, and get the original text back. - # The workaround is to prepend a known token that (1) starts with a space; and - # (2) is not the prefix of any other token. After searching through the vocab - # " ," (space comma) is the only token containing only printable ascii characters - # that fits this bill. By prepending ',' to the text, the original encode - # method always returns [1919, ...], where the tail of the sequence is the - # actual encoded result we want without the prefix space behavior. - original_encode = type(self.tokenizer.tokenizer).encode - def encode_wrapper(self, text, *args, **kwargs): - if type(text) is str: - text = ',' + text - result = original_encode(self, text, *args, **kwargs) - result = result[1:] - else: - result = original_encode(self, text, *args, **kwargs) - return result - object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer)) - - # Since 'encode' is documented as being deprecated, also override __call__. - # This doesn't appear to currently be used by KoboldAI, but doing so - # in case someone uses it in the future. - original_call = type(self.tokenizer.tokenizer).__call__ - def call_wrapper(self, text, *args, **kwargs): - if type(text) is str: - text = ',' + text - result = original_call(self, text, *args, **kwargs) - result = result[1:] - else: - result = original_call(self, text, *args, **kwargs) - return result - object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer)) - - elif utils.koboldai_vars.model_type == "opt": - self.tokenizer._koboldai_header = self.tokenizer.encode("") - self.tokenizer.add_bos_token = False - self.tokenizer.add_prefix_space = False - - # Change newline behavior to match model quirks - if utils.koboldai_vars.model_type == "xglm": - # Default to newline mode if using XGLM - utils.koboldai_vars.newlinemode = "s" - elif utils.koboldai_vars.model_type in ["opt", "bloom"]: - # Handle but don't convert newlines if using Fairseq models that have newlines trained in them - utils.koboldai_vars.newlinemode = "ns" - - # Clean up tokens that cause issues - if ( - utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default - and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") - ): - utils.koboldai_vars.badwordsids = [ - [v] - for k, v in self.tokenizer.get_vocab().items() - if any(c in str(k) for c in "[]") - ] - - if utils.koboldai_vars.newlinemode == "n": - utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) - - return super()._post_load() - - def get_local_model_path( - self, legacy: bool = False, ignore_existance: bool = False - ) -> Optional[str]: - """ - Returns a string of the model's path locally, or None if it is not downloaded. - If ignore_existance is true, it will always return a path. - """ - - if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]: - model_path = utils.koboldai_vars.custmodpth - assert model_path - - # Path can be absolute or relative to models directory - if os.path.exists(model_path): - return model_path - - model_path = os.path.join("models", model_path) - - try: - assert os.path.exists(model_path) - except AssertionError: - logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.") - raise - - return model_path - - basename = utils.koboldai_vars.model.replace("/", "_") - if legacy: - ret = basename - else: - ret = os.path.join("models", basename) - - if os.path.isdir(ret) or ignore_existance: - return ret - return None - - def init_model_config(self) -> None: - # Get the model_type from the config or assume a model type if it isn't present - try: - self.model_config = AutoConfig.from_pretrained( - self.get_local_model_path() or self.model_name, - revision=utils.koboldai_vars.revision, - cache_dir="cache", - ) - utils.koboldai_vars.model_type = self.model_config.model_type - except ValueError: - utils.koboldai_vars.model_type = { - "NeoCustom": "gpt_neo", - "GPT2Custom": "gpt2", - }.get(utils.koboldai_vars.model) - - if not utils.koboldai_vars.model_type: - logger.warning( - "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)" - ) - utils.koboldai_vars.model_type = "gpt_neo" diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index 7661a67f..c99e9a05 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -16,19 +16,17 @@ from modeling.inference_model import ( GenerationSettings, ModelCapabilities, ) -from modeling.inference_models.hf import HFInferenceModel - -# This file shouldn't be imported unless using the TPU -assert utils.koboldai_vars.use_colab_tpu -import tpu_mtj_backend +from modeling.inference_models.parents.hf import HFInferenceModel -class HFMTJInferenceModel(HFInferenceModel): + + +class model_loader(HFInferenceModel): def __init__( self, - model_name: str, + #model_name: str, ) -> None: - super().__init__(model_name) + super().__init__() self.model_config = None self.capabilties = ModelCapabilities( @@ -38,8 +36,13 @@ class HFMTJInferenceModel(HFInferenceModel): post_token_probs=False, uses_tpu=True, ) + + def is_valid(self, model_name, model_path, menu_path): + # This file shouldn't be imported unless using the TPU + return utils.koboldai_vars.use_colab_tpu and super().is_valid(model_name, model_path, menu_path) def setup_mtj(self) -> None: + import tpu_mtj_backend def mtj_warper_callback(scores) -> "np.array": scores_shape = scores.shape scores_list = scores.tolist() @@ -175,6 +178,7 @@ class HFMTJInferenceModel(HFInferenceModel): tpu_mtj_backend.settings_callback = mtj_settings_callback def _load(self, save_model: bool, initial_load: bool) -> None: + import tpu_mtj_backend self.setup_mtj() self.init_model_config() utils.koboldai_vars.allowsp = True @@ -207,6 +211,7 @@ class HFMTJInferenceModel(HFInferenceModel): ] def get_soft_tokens(self) -> np.array: + import tpu_mtj_backend soft_tokens = None if utils.koboldai_vars.sp is None: @@ -258,6 +263,7 @@ class HFMTJInferenceModel(HFInferenceModel): seed: Optional[int] = None, **kwargs, ) -> GenerationResult: + import tpu_mtj_backend warpers.update_settings() soft_tokens = self.get_soft_tokens() diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index c6294374..56e88205 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -21,13 +21,99 @@ class HordeException(Exception): """To be used for errors on server side of the Horde.""" -class HordeInferenceModel(InferenceModel): +class model_loader(InferenceModel): def __init__(self) -> None: super().__init__() + self.url = "https://horde.koboldai.net" + self.key = "0000000000" + self.models = self.get_cluster_models() + # Do not allow API to be served over the API self.capabilties = ModelCapabilities(api_host=False) + def is_valid(self, model_name, model_path, menu_path): + logger.debug("Horde Models: {}".format(self.models)) + return model_name == "CLUSTER" or model_name in [x['value'] for x in self.models] + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + requested_parameters.extend([{ + "uitype": "text", + "unit": "text", + "label": "URL", + "id": "url", + "default": self.url, + "tooltip": "URL to the horde.", + "menu_path": "", + "check": {"value": "", 'check': "!="}, + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "text", + "unit": "text", + "label": "Key", + "id": "key", + "default": self.key, + "check": {"value": "", 'check': "!="}, + "tooltip": "User Key to use when connecting to Horde (0000000000 is anonymous).", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "dropdown", + "unit": "text", + "label": "Model", + "id": "model", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Which model to use when running OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": False, + "extra_classes": "", + 'children': self.models, + + }]) + return requested_parameters + + def set_input_parameters(self, url="", key="", model=""): + self.key = key.strip() + self.model = model + self.url = url + + def get_cluster_models(self): + # Get list of models from public cluster + logger.info("Retrieving engine list...") + try: + req = requests.get(f"{self.url}/api/v2/status/models?type=text") + except: + logger.init_err("KAI Horde Models", status="Failed") + logger.error("Provided KoboldAI Horde URL unreachable") + emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"}) + return + if not req.ok: + # Something went wrong, print the message and quit since we can't initialize an engine + logger.init_err("KAI Horde Models", status="Failed") + logger.error(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1") + return + + engines = req.json() + try: + engines = [{"text": en["name"], "value": en["name"]} for en in engines] + except: + logger.error(engines) + raise + logger.debug(engines) + + online_model = "" + + logger.init_ok("KAI Horde Models", status="OK") + + return engines + def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer( utils.koboldai_vars.cluster_requested_models[0] diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py index 1441ae2f..01c0c037 100644 --- a/modeling/inference_models/openai.py +++ b/modeling/inference_models/openai.py @@ -12,13 +12,96 @@ from modeling.inference_model import ( ) + class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") -class OpenAIAPIInferenceModel(InferenceModel): +class model_loader(InferenceModel): """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.key = "" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "OAI" or model_name == "GooseAI" + + def get_requested_parameters(self, model_name, model_path, menu_path): + self.source = model_name + requested_parameters = [] + requested_parameters.extend([{ + "uitype": "text", + "unit": "text", + "label": "Key", + "id": "key", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "User Key to use when connecting to OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "dropdown", + "unit": "text", + "label": "Model", + "id": "model", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Which model to use when running OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": False, + "extra_classes": "", + 'children': self.get_oai_models(), + + }]) + return requested_parameters + + def set_input_parameters(self, key="", model=""): + self.key = key.strip() + self.model = model + + def get_oai_models(self): + if self.key == "": + return [] + if self.source == 'OAI': + url = "https://api.openai.com/v1/engines" + elif self.source == 'GooseAI': + url = "https://api.goose.ai/v1/engines" + else: + return + + # Get list of models from OAI + logger.init("OAI Engines", status="Retrieving") + req = requests.get( + url, + headers = { + 'Authorization': 'Bearer '+self.key + } + ) + if(req.status_code == 200): + r = req.json() + engines = r["data"] + try: + engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines] + except: + logger.error(engines) + raise + + online_model = "" + + + logger.init_ok("OAI Engines", status="OK") + return engines + else: + # Something went wrong, print the message and quit since we can't initialize an engine + logger.init_err("OAI Engines", status="Failed") + logger.error(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) + return [] + def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer("gpt2") diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py new file mode 100644 index 00000000..54781296 --- /dev/null +++ b/modeling/inference_models/parents/hf.py @@ -0,0 +1,219 @@ +import os +from typing import Optional +from transformers import AutoConfig + +import utils +import koboldai_settings +from logger import logger +from modeling.inference_model import InferenceModel +import torch + + +class HFInferenceModel(InferenceModel): + def __init__(self) -> None: + super().__init__() + self.model_config = None + #self.model_name = model_name + + self.model = None + self.tokenizer = None + + def is_valid(self, model_name, model_path, menu_path): + try: + if model_path is not None and os.path.exists(model_path): + model_config = AutoConfig.from_pretrained(model_path) + elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): + model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + else: + model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + return True + except: + return False + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + + if model_path is not None and os.path.exists(model_path): + model_config = AutoConfig.from_pretrained(model_path) + elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): + model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + else: + model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None + if layer_count is not None and layer_count >= 0: + if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))): + with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file: + data = [x for x in file.read().split("\n")[:2] if x != ''] + if len(data) < 2: + data.append("0") + break_values, disk_blocks = data + break_values = break_values.split(",") + else: + break_values = [layer_count] + disk_blocks = None + break_values = [int(x) for x in break_values if x != '' and x is not None] + gpu_count = torch.cuda.device_count() + break_values += [0] * (gpu_count - len(break_values)) + if disk_blocks is not None: + break_values += [disk_blocks] + for i in range(gpu_count): + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "{} Layers".format(torch.cuda.get_device_name(i)), + "id": "{} Layers".format(i), + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": break_values[i], + "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "CPU Layers", + "id": "CPU Layers", + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": layer_count - sum(break_values), + "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + if disk_blocks is not None: + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "Disk Layers", + "id": "Disk_Layers", + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": disk_blocks, + "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + else: + requested_parameters.append({ + "uitype": "toggle", + "unit": "bool", + "label": "Use GPU", + "id": "use_gpu", + "default": False, + "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + + + return requested_parameters + + def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False): + self.layers = layers + self.disk_layers = disk_layers + self.use_gpu = use_gpu + + def _post_load(self) -> None: + # These are model specific tokenizer overrides if a model has bad defaults + if utils.koboldai_vars.model_type == "llama": + self.tokenizer.decode_with_prefix_space = True + self.tokenizer.add_bos_token = False + elif utils.koboldai_vars.model_type == "opt": + self.tokenizer._koboldai_header = self.tokenizer.encode("") + self.tokenizer.add_bos_token = False + self.tokenizer.add_prefix_space = False + + # Change newline behavior to match model quirks + if utils.koboldai_vars.model_type == "xglm": + # Default to newline mode if using XGLM + utils.koboldai_vars.newlinemode = "s" + elif utils.koboldai_vars.model_type in ["opt", "bloom"]: + # Handle but don't convert newlines if using Fairseq models that have newlines trained in them + utils.koboldai_vars.newlinemode = "ns" + + # Clean up tokens that cause issues + if ( + utils.koboldai_vars.badwordsids == koboldai_settings.badwordsids_default + and utils.koboldai_vars.model_type not in ("gpt2", "gpt_neo", "gptj") + ): + utils.koboldai_vars.badwordsids = [ + [v] + for k, v in self.tokenizer.get_vocab().items() + if any(c in str(k) for c in "[]") + ] + + if utils.koboldai_vars.newlinemode == "n": + utils.koboldai_vars.badwordsids.append([self.tokenizer.eos_token_id]) + + return super()._post_load() + + def get_local_model_path( + self, legacy: bool = False, ignore_existance: bool = False + ) -> Optional[str]: + """ + Returns a string of the model's path locally, or None if it is not downloaded. + If ignore_existance is true, it will always return a path. + """ + + if self.model_name in ["NeoCustom", "GPT2Custom", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]: + model_path = utils.koboldai_vars.custmodpth + assert model_path + + # Path can be absolute or relative to models directory + if os.path.exists(model_path): + return model_path + + model_path = os.path.join("models", model_path) + + try: + assert os.path.exists(model_path) + except AssertionError: + logger.error(f"Custom model does not exist at '{utils.koboldai_vars.custmodpth}' or '{model_path}'.") + raise + + return model_path + + basename = utils.koboldai_vars.model.replace("/", "_") + if legacy: + ret = basename + else: + ret = os.path.join("models", basename) + + if os.path.isdir(ret) or ignore_existance: + return ret + return None + + def init_model_config(self) -> None: + # Get the model_type from the config or assume a model type if it isn't present + try: + self.model_config = AutoConfig.from_pretrained( + self.get_local_model_path() or self.model_name, + revision=utils.koboldai_vars.revision, + cache_dir="cache", + ) + utils.koboldai_vars.model_type = self.model_config.model_type + except ValueError: + utils.koboldai_vars.model_type = { + "NeoCustom": "gpt_neo", + "GPT2Custom": "gpt2", + }.get(utils.koboldai_vars.model) + + if not utils.koboldai_vars.model_type: + logger.warning( + "No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)" + ) + utils.koboldai_vars.model_type = "gpt_neo" diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/parents/hf_torch.py similarity index 94% rename from modeling/inference_models/hf_torch.py rename to modeling/inference_models/parents/hf_torch.py index 990fabfc..d8afafb1 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -31,7 +31,7 @@ from modeling import warpers from modeling.warpers import Warper from modeling.stoppers import Stoppers from modeling.post_token_hooks import PostTokenHooks -from modeling.inference_models.hf import HFInferenceModel +from modeling.inference_models.parents.hf import HFInferenceModel from modeling.inference_model import ( GenerationResult, GenerationSettings, @@ -55,13 +55,13 @@ LOG_SAMPLER_NO_EFFECT = False class HFTorchInferenceModel(HFInferenceModel): def __init__( self, - model_name: str, - lazy_load: bool, - low_mem: bool, + #model_name: str, + #lazy_load: bool, + #low_mem: bool, ) -> None: - super().__init__(model_name) - self.lazy_load = lazy_load - self.low_mem = low_mem + super().__init__() + #self.lazy_load = lazy_load + #self.low_mem = low_mem self.post_token_hooks = [ PostTokenHooks.stream_tokens, @@ -211,40 +211,6 @@ class HFTorchInferenceModel(HFInferenceModel): new_sample.old_sample = transformers.GenerationMixin.sample use_core_manipulations.sample = new_sample - # PEFT Loading. This MUST be done after all save_pretrained calls are - # finished on the main model. - if utils.args.peft: - from peft import PeftModel, PeftConfig - local_peft_dir = os.path.join(m_self.get_local_model_path(), "peft") - - # Make PEFT dir if it doesn't exist - try: - os.makedirs(local_peft_dir) - except FileExistsError: - pass - - peft_local_path = os.path.join(local_peft_dir, utils.args.peft.replace("/", "_")) - logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.") - - peft_installed_locally = True - possible_peft_locations = [peft_local_path, utils.args.peft] - - for i, location in enumerate(possible_peft_locations): - try: - m_self.model = PeftModel.from_pretrained(m_self.model, location) - logger.debug(f"Loaded PEFT at '{location}'") - break - except ValueError: - peft_installed_locally = False - if i == len(possible_peft_locations) - 1: - raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?") - except RuntimeError: - raise RuntimeError("Error while loading PeftModel. Are you using the correct model?") - - if not peft_installed_locally: - logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'") - m_self.model.save_pretrained(peft_local_path) - return super()._post_load() def _raw_generate( @@ -272,13 +238,8 @@ class HFTorchInferenceModel(HFInferenceModel): with torch.no_grad(): start_time = time.time() - - # HEED & BEWARE: All arguments passed to self.model.generate MUST be - # kwargs; see https://github.com/huggingface/peft/issues/232. If they - # aren't, PeftModel will EXPLODE!!!! But nothing will happen without - # a PEFT loaded so it's sneaky. genout = self.model.generate( - input_ids=gen_in, + gen_in, do_sample=True, max_length=min( len(prompt_tokens) + max_new, utils.koboldai_vars.max_length @@ -304,7 +265,6 @@ class HFTorchInferenceModel(HFInferenceModel): def _get_model(self, location: str, tf_kwargs: Dict): tf_kwargs["revision"] = utils.koboldai_vars.revision tf_kwargs["cache_dir"] = "cache" - tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code # If we have model hints for legacy model, use them rather than fall back. try: diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py index 006bb8fd..d14d8c81 100644 --- a/modeling/inference_models/rwkv.py +++ b/modeling/inference_models/rwkv.py @@ -17,7 +17,7 @@ from torch.nn import functional as F os.environ["RWKV_JIT_ON"] = "1" # TODO: Include compiled kernel os.environ["RWKV_CUDA_ON"] = "1" -from rwkv.model import RWKV + import utils from logger import logger @@ -55,13 +55,13 @@ MODEL_FILES = { } -class RWKVInferenceModel(InferenceModel): +class model_loader(InferenceModel): def __init__( self, - model_name: str, + #model_name: str, ) -> None: super().__init__() - self.model_name = model_name + #self.model_name = model_name self.post_token_hooks = [ PostTokenHooks.stream_tokens, @@ -83,6 +83,23 @@ class RWKVInferenceModel(InferenceModel): ) self._old_stopping_criteria = None + def is_valid(self, model_name, model_path, menu_path): + try: + from rwkv.model import RWKV + valid = True + except: + valid = False + return valid and "rwkv" in model_name.lower() + + def get_requested_parameters(self, model_name, model_path, menu_path): + self.source = model_name + requested_parameters = [] + return requested_parameters + + def set_input_parameters(self): + return + + def _ensure_directory_structure(self) -> None: for path in ["models/rwkv", "models/rwkv/models"]: try: @@ -145,6 +162,7 @@ class RWKVInferenceModel(InferenceModel): # Now we load! # TODO: Breakmodel to strat + from rwkv.model import RWKV self.model = RWKV(model=model_path, strategy="cuda:0 fp16") def _apply_warpers( diff --git a/static/koboldai.css b/static/koboldai.css index 230f1cbf..f3dde4b7 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -347,6 +347,28 @@ border-top-right-radius: var(--tabs_rounding); } +.setting_container_model { + display: grid; + grid-template-areas: "label value" + "item item" + "minlabel maxlabel"; + grid-template-rows: 20px 23px 20px; + grid-template-columns: auto 30px; + row-gap: 0.2em; + background-color: var(--setting_background); + color: var(--setting_text); + border-radius: var(--radius_settings_background); + padding: 2px; + margin: 2px; + width: calc(100%); +} + +.setting_container_model .setting_item{ + font-size: calc(0.93em + var(--font_size_adjustment)); + margin-left: 10px; +} + + .setting_minlabel { padding-top: 6px; grid-area: minlabel; @@ -3370,6 +3392,23 @@ textarea { } } +@keyframes pulse-red { + 0% { + transform: scale(0.95); + box-shadow: 0 0 0 0 rgba(255, 0, 0, 0.7); + } + + 70% { + transform: scale(1); + box-shadow: 0 0 0 10px rgba(255, 0, 0, 0); + } + + 100% { + transform: scale(0.95); + box-shadow: 0 0 0 0 rgba(255, 0, 0, 0); + } +} + @keyframes pulse-text { 0% { filter: blur(3px); @@ -3391,6 +3430,11 @@ textarea { } } +.input_error { + border: 5px solid red !important; + box-sizing: border-box !important; +} + .single_pulse { animation: pulse-text 0.5s 1; } diff --git a/static/koboldai.js b/static/koboldai.js index cfc32d21..0656253f 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -15,6 +15,7 @@ socket.on('popup_items', function(data){popup_items(data);}); socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);}); socket.on('popup_edit_file', function(data){popup_edit_file(data);}); socket.on('show_model_menu', function(data){show_model_menu(data);}); +socket.on('open_model_load_menu', function(data){new_show_model_menu(data);}); socket.on('selected_model_info', function(data){selected_model_info(data);}); socket.on('oai_engines', function(data){oai_engines(data);}); socket.on('buildload', function(data){buildload(data);}); @@ -81,6 +82,7 @@ const on_colab = $el("#on_colab").textContent == "true"; let story_id = -1; var dirty_chunks = []; var initial_socketio_connection_occured = false; +var selected_model_data; // Each entry into this array should be an object that looks like: // {class: "class", key: "key", func: callback} @@ -1500,49 +1502,46 @@ function getModelParameterCount(modelName) { return base * multiplier; } -function show_model_menu(data) { - //clear old options - document.getElementById("modelkey").classList.add("hidden"); - document.getElementById("modelkey").value = ""; - document.getElementById("modelurl").classList.add("hidden"); - document.getElementById("use_gpu_div").classList.add("hidden"); - document.getElementById("use_8_bit_div").classList.add("hidden"); - document.getElementById("modellayers").classList.add("hidden"); - document.getElementById("oaimodel").classList.add("hidden"); - var model_layer_bars = document.getElementById('model_layer_bars'); - while (model_layer_bars.firstChild) { - model_layer_bars.removeChild(model_layer_bars.firstChild); +function new_show_model_menu(data) { + //clear out the loadmodelsettings + var loadmodelsettings = document.getElementById('loadmodelsettings') + while (loadmodelsettings.firstChild) { + loadmodelsettings.removeChild(loadmodelsettings.firstChild); } + document.getElementById("modelplugin").classList.add("hidden"); + var accept = document.getElementById("btn_loadmodelaccept"); + accept.disabled = false; //clear out the breadcrumbs var breadcrumbs = document.getElementById('loadmodellistbreadcrumbs') while (breadcrumbs.firstChild) { breadcrumbs.removeChild(breadcrumbs.firstChild); } - //add breadcrumbs - //console.log(data.breadcrumbs); - for (item of data.breadcrumbs) { - var button = document.createElement("button"); - button.classList.add("breadcrumbitem"); - button.setAttribute("model", data.menu); - button.setAttribute("folder", item[0]); - button.textContent = item[1]; - button.onclick = function () { - socket.emit('select_model', {'menu': "", 'model': this.getAttribute("model"), 'path': this.getAttribute("folder")}); - }; - breadcrumbs.append(button); - var span = document.createElement("span"); - span.textContent = "\\"; - breadcrumbs.append(span); - } + //add breadcrumbs + if ('breadcrumbs' in data) { + for (item of data.breadcrumbs) { + var button = document.createElement("button"); + button.classList.add("breadcrumbitem"); + button.setAttribute("model", data.menu); + button.setAttribute("folder", item[0]); + button.textContent = item[1]; + button.onclick = function () { + socket.emit('select_model', {'menu': "", 'name': this.getAttribute("model"), 'path': this.getAttribute("folder")}); + }; + breadcrumbs.append(button); + var span = document.createElement("span"); + span.textContent = "\\"; + breadcrumbs.append(span); + } + } //clear out the items var model_list = document.getElementById('loadmodellistcontent') while (model_list.firstChild) { model_list.removeChild(model_list.firstChild); } //add items - for (item of data.data) { + for (item of data.items) { var list_item = document.createElement("span"); list_item.classList.add("model_item"); @@ -1564,10 +1563,27 @@ function show_model_menu(data) { //create the actual item var popup_item = document.createElement("span"); popup_item.classList.add("model"); - popup_item.setAttribute("display_name", item.label); - popup_item.id = item.name; + for (const key in item) { + if (key == "name") { + popup_item.id = item[key]; + } + popup_item.setAttribute(key, item[key]); + } + + popup_item.onclick = function() { + var attributes = this.attributes; + var obj = {}; + + for (var i = 0, len = attributes.length; i < len; i++) { + obj[attributes[i].name] = attributes[i].value; + } + //put the model data on the accept button so we can send it to the server when you accept + var accept = document.getElementById("popup_accept"); + selected_model_data = obj; + //send the data to the server so it can figure out what data we need from the user for the model + socket.emit('select_model', obj); + } - popup_item.setAttribute("Menu", data.menu) //name text var text = document.createElement("span"); text.style="grid-area: item;"; @@ -1615,241 +1631,223 @@ function show_model_menu(data) { }); })(); - popup_item.onclick = function () { - var accept = document.getElementById("btn_loadmodelaccept"); - accept.classList.add("disabled"); - socket.emit("select_model", {"model": this.id, "menu": this.getAttribute("Menu"), "display_name": this.getAttribute("display_name")}); - var model_list = document.getElementById('loadmodellistcontent').getElementsByClassName("selected"); - for (model of model_list) { - model.classList.remove("selected"); - } - this.classList.add("selected"); - accept.setAttribute("selected_model", this.id); - accept.setAttribute("menu", this.getAttribute("Menu")); - accept.setAttribute("display_name", this.getAttribute("display_name")); - }; list_item.append(popup_item); - - model_list.append(list_item); } - var accept = document.getElementById("btn_loadmodelaccept"); - accept.disabled = true; - //finally, if they selected the custom hugging face menu we show the input box - if (data['menu'] == "customhuggingface") { - document.getElementById("custommodelname").classList.remove("hidden"); - } else { - document.getElementById("custommodelname").classList.add("hidden"); - } - - - // detect if we are in a model selection screen and show the reference - var refelement = document.getElementById("modelspecifier"); - var check = document.getElementById("mainmenu"); - if (check) { - refelement.classList.remove("hidden"); - } else { - refelement.classList.add("hidden"); - } openPopup("load-model"); + } + function selected_model_info(data) { + //clear out the loadmodelsettings + var loadmodelsettings = document.getElementById('loadmodelsettings') + while (loadmodelsettings.firstChild) { + loadmodelsettings.removeChild(loadmodelsettings.firstChild); + } var accept = document.getElementById("btn_loadmodelaccept"); - //hide or unhide key - if (data.key) { - document.getElementById("modelkey").classList.remove("hidden"); - document.getElementById("modelkey").value = data.key_value; - } else { - document.getElementById("modelkey").classList.add("hidden"); - document.getElementById("modelkey").value = ""; - } - //hide or unhide URL - if (data.url) { - document.getElementById("modelurl").classList.remove("hidden"); - } else { - document.getElementById("modelurl").classList.add("hidden"); - } - - //hide or unhide 8 bit mode - if (data.bit_8_available) { - document.getElementById("use_8_bit_div").classList.remove("hidden"); - } else { - document.getElementById("use_8_bit_div").classList.add("hidden"); - document.getElementById("use_8_bit").checked = false; - } - - //default URL loading - if (data.default_url != null) { - document.getElementById("modelurl").value = data.default_url; - } - - //change model loading on url if needed - if (data.models_on_url) { - document.getElementById("modelurl").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': document.getElementById("modelkey").value, 'url': this.value});}; - document.getElementById("modelkey").onchange = function () {socket.emit('get_cluster_models', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value, 'url': document.getElementById("modelurl").value});}; - } else { - document.getElementById("modelkey").ochange = function () {socket.emit('OAI_Key_Update', {'model': document.getElementById('btn_loadmodelaccept').getAttribute('selected_model'), 'key': this.value});}; - document.getElementById("modelurl").ochange = null; - } - - //show model select for APIs - if (data.show_online_model_select) { - document.getElementById("oaimodel").classList.remove("hidden"); - } else { - document.getElementById("oaimodel").classList.add("hidden"); - } - - //Multiple Model Select? - if (data.multi_online_models) { - document.getElementById("oaimodel").setAttribute("multiple", ""); - document.getElementById("oaimodel").options[0].textContent = "All" - } else { - document.getElementById("oaimodel").removeAttribute("multiple"); - document.getElementById("oaimodel").options[0].textContent = "Select Model(s)" - } - - //hide or unhide the use gpu checkbox - if (data.gpu) { - document.getElementById("use_gpu_div").classList.remove("hidden"); - } else { - document.getElementById("use_gpu_div").classList.add("hidden"); - } - //setup breakmodel - if (data.breakmodel) { - document.getElementById("modellayers").classList.remove("hidden"); - //setup model layer count - document.getElementById("gpu_layers_current").textContent = data.break_values.reduce((a, b) => a + b, 0); - document.getElementById("gpu_layers_max").textContent = data.layer_count; - document.getElementById("gpu_count").value = data.gpu_count; - - //create the gpu load bars - var model_layer_bars = document.getElementById('model_layer_bars'); - while (model_layer_bars.firstChild) { - model_layer_bars.removeChild(model_layer_bars.firstChild); - } - - //Add the bars - for (let i = 0; i < data.gpu_names.length; i++) { - var div = document.createElement("div"); - div.classList.add("model_setting_container"); - //build GPU text - var span = document.createElement("span"); - span.classList.add("model_setting_label"); - span.textContent = "GPU " + i + " " + data.gpu_names[i] + ": " - //build layer count box - var input = document.createElement("input"); - input.classList.add("model_setting_value"); - input.classList.add("setting_value"); - input.inputmode = "numeric"; - input.id = "gpu_layers_box_"+i; - input.value = data.break_values[i]; - input.onblur = function () { - document.getElementById(this.id.replace("_box", "")).value = this.value; - update_gpu_layers(); - } - span.append(input); - div.append(span); - //build layer count slider - var input = document.createElement("input"); - input.classList.add("model_setting_item"); - input.type = "range"; - input.min = 0; - input.max = data.layer_count; - input.step = 1; - input.value = data.break_values[i]; - input.id = "gpu_layers_" + i; - input.onchange = function () { - document.getElementById(this.id.replace("gpu_layers", "gpu_layers_box")).value = this.value; - update_gpu_layers(); - } - div.append(input); - //build slider bar #s - //min - var span = document.createElement("span"); - span.classList.add("model_setting_minlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = 0; - span.append(span2); - div.append(span); - //max - var span = document.createElement("span"); - span.classList.add("model_setting_maxlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = data.layer_count; - span.append(span2); - div.append(span); - - model_layer_bars.append(div); - } - - //add the disk layers - if (data.disk_break) { - var div = document.createElement("div"); - div.classList.add("model_setting_container"); - //build GPU text - var span = document.createElement("span"); - span.classList.add("model_setting_label"); - span.textContent = "Disk cache: " - //build layer count box - var input = document.createElement("input"); - input.classList.add("model_setting_value"); - input.classList.add("setting_value"); - input.inputmode = "numeric"; - input.id = "disk_layers_box"; - input.value = data.disk_break_value; - input.onblur = function () { - document.getElementById(this.id.replace("_box", "")).value = this.value; - update_gpu_layers(); - } - span.append(input); - div.append(span); - //build layer count slider - var input = document.createElement("input"); - input.classList.add("model_setting_item"); - input.type = "range"; - input.min = 0; - input.max = data.layer_count; - input.step = 1; - input.value = data.disk_break_value; - input.id = "disk_layers"; - input.onchange = function () { - document.getElementById(this.id+"_box").value = this.value; - update_gpu_layers(); - } - div.append(input); - //build slider bar #s - //min - var span = document.createElement("span"); - span.classList.add("model_setting_minlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = 0; - span.append(span2); - div.append(span); - //max - var span = document.createElement("span"); - span.classList.add("model_setting_maxlabel"); - var span2 = document.createElement("span"); - span2.style="top: -4px; position: relative;"; - span2.textContent = data.layer_count; - span.append(span2); - div.append(span); - } - - model_layer_bars.append(div); - - update_gpu_layers(); - } else { - document.getElementById("modellayers").classList.add("hidden"); - accept.classList.remove("disabled"); - } accept.disabled = false; + modelplugin = document.getElementById("modelplugin"); + modelplugin.classList.remove("hidden"); + modelplugin.onchange = function () { + for (const area of document.getElementsByClassName("model_plugin_settings_area")) { + area.classList.add("hidden"); + } + document.getElementById(this.value + "_settings_area").classList.remove("hidden"); + } + //create the content + for (const [loader, items] of Object.entries(data)) { + model_area = document.createElement("DIV"); + model_area.id = loader + "_settings_area"; + model_area.classList.add("model_plugin_settings_area"); + model_area.classList.add("hidden"); + modelpluginoption = document.createElement("option"); + modelpluginoption.innerText = loader; + modelpluginoption.value = loader; + modelplugin.append(modelpluginoption); + + for (item of items) { + let new_setting = document.getElementById('blank_model_settings').cloneNode(true); + new_setting.id = loader; + new_setting.classList.remove("hidden"); + new_setting.querySelector('#blank_model_settings_label').innerText = item['label']; + new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']); + + onchange_event = function () { + //get check value: + if ('sum' in this.check_data) { + check_value = 0 + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value); + } + } + } else { + check_value = this.value + } + if (this.check_data['check'] == "=") { + valid = (check_value == this.check_data['value']); + } else if (this.check_data['check'] == "!=") { + valid = (check_value != this.check_data['value']); + } else if (this.check_data['check'] == ">=") { + valid = (check_value >= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value <= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value > this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value < this.check_data['value']); + } + if (valid) { + //if we are supposed to refresh when this value changes we'll resubmit + if (this.getAttribute("refresh_model_inputs") == "true") { + console.log("resubmit"); + } + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } else { + this.closest(".setting_container_model").classList.remove('input_error'); + this.closest(".setting_container_model").removeAttribute("tooltip"); + } + var accept = document.getElementById("btn_loadmodelaccept"); + if (document.getElementsByClassName("input_error").length) + accept.disabled = true; + } else { + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } + } + } else { + this.closest(".setting_container_model").classList.add('input_error'); + this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } + } + var accept = document.getElementById("btn_loadmodelaccept"); + if (document.getElementsByClassName("input_error").length > 0) { + accept.classList.add("disabled"); + accept.disabled = true; + } else { + accept.classList.remove("disabled"); + accept.disabled = false; + } + + } + if (item['uitype'] == "slider") { + var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number'); + slider_number.value = item['default']; + slider_number.id = loader + "|" + item['id'] + "_value_text"; + slider_number.onchange = function() { document.getElementById(this.id.replace("_text", "")).value = this.value;}; + + var slider = new_setting.querySelector('#blank_model_settings_slider'); + slider.value = item['default']; + slider.min = item['min']; + slider.max = item['max']; + slider.id = loader + "|" + item['id'] + "_value"; + if ('check' in item) { + slider.check_data = item['check']; + slider_number.check_data = item['check']; + } else { + slider.check_data = null; + slider_number.check_data = null; + } + slider.oninput = function() { document.getElementById(this.id+"_text").value = this.value;}; + slider.onchange = onchange_event; + slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min']; + new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max']; + slider.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden"); + } + if (item['uitype'] == "toggle") { + var toggle = new_setting.querySelector('#blank_model_settings_toggle'); + toggle.id = loader + "|" + item['id'] + "_value"; + toggle.checked = item['default']; + toggle.onchange = onchange_event; + toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + toggle.check_data = item['check']; + } else { + toggle.check_data = null; + } + toggle.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden"); + } + if (item['uitype'] == "dropdown") { + var select_element = new_setting.querySelector('#blank_model_settings_dropdown'); + select_element.id = loader + "|" + item['id'] + "_value"; + for (const dropdown_value of item['children']) { + new_option = document.createElement("option"); + new_option.value = dropdown_value['value']; + new_option.innerText = dropdown_value['text']; + select_element.append(new_option); + } + select_element.value = item['default']; + select_element.onchange = onchange_event; + select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + select_element.check_data = item['check']; + } else { + select_element.check_data = null; + } + select_element.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden"); + } + if (item['uitype'] == "password") { + var password_item = new_setting.querySelector('#blank_model_settings_password'); + password_item.id = loader + "|" + item['id'] + "_value"; + password_item.value = item['default']; + password_item.onchange = onchange_event; + password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + password_item.check_data = item['check']; + } else { + password_item.check_data = null; + } + password_item.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_password').classList.add("hidden"); + } + if (item['uitype'] == "text") { + var text_item = new_setting.querySelector('#blank_model_settings_text'); + text_item.id = loader + "|" + item['id'] + "_value"; + text_item.value = item['default']; + text_item.onchange = onchange_event; + text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if ('check' in item) { + text_item.check_data = item['check']; + } else { + text_item.check_data = null; + } + text_item.onchange(); + } else { + new_setting.querySelector('#blank_model_settings_text').classList.add("hidden"); + } + + model_area.append(new_setting); + loadmodelsettings.append(model_area); + } + } + + //unhide the first plugin settings + console.log(document.getElementById("modelplugin").value + "_settings_area"); + if (document.getElementById(document.getElementById("modelplugin").value + "_settings_area")) { + document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); + } } @@ -1877,42 +1875,16 @@ function update_gpu_layers() { function load_model() { var accept = document.getElementById('btn_loadmodelaccept'); - gpu_layers = [] - disk_layers = 0; - if (!(document.getElementById("modellayers").classList.contains("hidden"))) { - for (let i=0; i < document.getElementById("gpu_count").value; i++) { - gpu_layers.push(document.getElementById("gpu_layers_"+i).value); - } - if (document.getElementById("disk_layers")) { - disk_layers = document.getElementById("disk_layers").value; - } - } - //Need to do different stuff with custom models - if ((accept.getAttribute('menu') == 'GPT2Custom') || (accept.getAttribute('menu') == 'NeoCustom')) { - var model = document.getElementById("btn_loadmodelaccept").getAttribute("menu"); - var path = document.getElementById("btn_loadmodelaccept").getAttribute("display_name"); - } else { - var model = document.getElementById("btn_loadmodelaccept").getAttribute("selected_model"); - var path = ""; - } + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); - let selected_models = []; - for (item of document.getElementById("oaimodel").selectedOptions) { - selected_models.push(item.value); - } - if (selected_models == ['']) { - - selected_models = []; - } else if (selected_models.length == 1) { - selected_models = selected_models[0]; + //get an object of all the input settings from the user + data = {} + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + data[element.id.split("|")[1].replace("_value", "")] = element.value; } + data = {...data, ...selected_model_data} - message = {'model': model, 'path': path, 'use_gpu': document.getElementById("use_gpu").checked, - 'key': document.getElementById('modelkey').value, 'gpu_layers': gpu_layers.join(), - 'disk_layers': disk_layers, 'url': document.getElementById("modelurl").value, - 'online_model': selected_models, - 'use_8_bit': document.getElementById('use_8_bit').checked}; - socket.emit("load_model", message); + socket.emit("load_model", data); closePopups(); } diff --git a/templates/popups.html b/templates/popups.html index 12c4c27a..59f07e70 100644 --- a/templates/popups.html +++ b/templates/popups.html @@ -46,35 +46,11 @@
Usage (VRAM)
-
- -
+
+ + diff --git a/templates/templates.html b/templates/templates.html index 4f16ff66..49cd3e5b 100644 --- a/templates/templates.html +++ b/templates/templates.html @@ -154,3 +154,22 @@ + +
+ + help_icon + + + + + + + + + + + + + + +
\ No newline at end of file From 546ba84723c84dec3a6f8cc70e41408fd66efa67 Mon Sep 17 00:00:00 2001 From: somebody Date: Wed, 10 May 2023 19:10:23 -0500 Subject: [PATCH 017/102] Fix memory->genre bug in context viewer bar tooltip Crazy change I know --- static/koboldai.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/static/koboldai.js b/static/koboldai.js index cfc32d21..87beb954 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -4006,7 +4006,7 @@ function update_context(data) { document.getElementById('world_info_'+entry.uid).classList.add("used_in_game"); } break; - case 'memory': + case 'genre': genre_length += entry.tokens.length; break; case 'memory': From 84e4cb0f4a216e58063cf1f61a0adb0c7b27124a Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 11 May 2023 13:44:53 +0200 Subject: [PATCH 018/102] Update Transformers --- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- requirements_mtj.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 1cc5a9c7..3d0ca633 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.0 + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index a33a8f96..eb2927bd 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,7 +30,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.0 + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/requirements.txt b/requirements.txt index 4eb2c282..28fdb28c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.28.0 +transformers==4.29.* huggingface_hub==0.12.1 Flask==2.2.3 Flask-SocketIO==5.3.2 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 1b40fded..7fc866f0 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers == 4.28.0 +transformers==4.29.* chex == 0.1.5 huggingface_hub==0.12.1 progressbar2 From e932364a1e3efe0c6973f1a19f4093115068c77d Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 11 May 2023 14:56:12 +0200 Subject: [PATCH 019/102] RWKV support --- aiserver.py | 31 ++-- modeling/inference_models/rwkv.py | 237 ------------------------------ 2 files changed, 11 insertions(+), 257 deletions(-) delete mode 100644 modeling/inference_models/rwkv.py diff --git a/aiserver.py b/aiserver.py index ef49f05c..b045ea71 100644 --- a/aiserver.py +++ b/aiserver.py @@ -136,7 +136,6 @@ class MenuModelType(Enum): HUGGINGFACE = 0 ONLINE_API = 1 OTHER = 2 - RWKV = 3 class MenuItem: def __init__( @@ -222,7 +221,7 @@ model_menu = { MenuFolder("Untuned Fairseq Dense", "fsdlist"), MenuFolder("Untuned Bloom", "bloomlist"), MenuFolder("Untuned XGLM", "xglmlist"), - MenuFolder("Untuned RWKV-4 (Experimental)", "rwkvlist", experimental=True), + MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), @@ -349,16 +348,16 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'rwkvlist': [ - MenuModel("RWKV-4 14B ctx4096", "rwkv-4-pile-14b:ctx4096", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 14B ctx1024", "rwkv-4-pile-14b", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 7B ctx4096", "rwkv-4-pile-7b:ctx4096", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 7B ctx1024", "rwkv-4-pile-7b", "??GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 3B ctx4096", "rwkv-4-pile-3b:ctx4096", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 3B ctx1024", "rwkv-4-pile-3b", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 1.5B ctx4096", "rwkv-4-pile-1b5:ctx4096", "9GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 1.5B ctx1024", "rwkv-4-pile-1b5", "9GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 340M", "rwkv-4-pile-430m", "?GB", model_type=MenuModelType.RWKV), - MenuModel("RWKV-4 169M ctx1024", "rwkv-4-pile-169m", "?GB", model_type=MenuModelType.RWKV), + MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""), + MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""), + MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""), + MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), + MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), + MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), + MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), + MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), + MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), + MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), MenuFolder("Return to Main Menu", "mainmenu"), ], 'apilist': [ @@ -1567,8 +1566,6 @@ def get_model_info(model, directory=""): print(":(") pass key = True - elif "rwkv" in model.lower(): - pass elif model == 'ReadOnly': pass #elif model == 'customhuggingface': @@ -1946,12 +1943,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal model.load(initial_load=initial_load) # TODO: This check sucks, make a model object or somethign - elif "rwkv" in koboldai_vars.model: - if koboldai_vars.use_colab_tpu: - raise RuntimeError("RWKV is not supported on the TPU.") - from modeling.inference_models.rwkv import RWKVInferenceModel - model = RWKVInferenceModel(koboldai_vars.model) - model.load() elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai: # HF Torch logger.init("Transformers", status='Starting') diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py deleted file mode 100644 index 006bb8fd..00000000 --- a/modeling/inference_models/rwkv.py +++ /dev/null @@ -1,237 +0,0 @@ -from __future__ import annotations -import os - - -import time -from typing import Dict, List, Optional, Union -import numpy as np -import requests -from tokenizers import Tokenizer -from tqdm import tqdm -from huggingface_hub import hf_hub_url - -import torch -from torch.nn import functional as F - -# Must be defined before import -os.environ["RWKV_JIT_ON"] = "1" -# TODO: Include compiled kernel -os.environ["RWKV_CUDA_ON"] = "1" -from rwkv.model import RWKV - -import utils -from logger import logger - -from modeling import warpers -from modeling.warpers import Warper -from modeling.stoppers import Stoppers -from modeling.post_token_hooks import PostTokenHooks -from modeling.tokenizer import GenericTokenizer -from modeling.inference_model import ( - GenerationResult, - GenerationSettings, - InferenceModel, - ModelCapabilities, -) - -TOKENIZER_URL = ( - "https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/20B_tokenizer.json" -) -TOKENIZER_PATH = "models/rwkv/20b_tokenizer.json" - -REPO_OWNER = "BlinkDL" -MODEL_FILES = { - "rwkv-4-pile-14b": "RWKV-4-Pile-14B-20230213-8019.pth", - # NOTE: Still in progress(?) - "rwkv-4-pile-14b:ctx4096": "RWKV-4-Pile-14B-20230228-ctx4096-test663.pth", - "rwkv-4-pile-7b": "RWKV-4-Pile-7B-20221115-8047.pth", - "rwkv-4-pile-7b:ctx4096": "RWKV-4-Pile-7B-20230109-ctx4096.pth", - "rwkv-4-pile-3b": "RWKV-4-Pile-3B-20221008-8023.pth", - "rwkv-4-pile-3b:ctx4096": "RWKV-4-Pile-3B-20221110-ctx4096.pth", - "rwkv-4-pile-1b5": "RWKV-4-Pile-1B5-20220903-8040.pth", - "rwkv-4-pile-1b5:ctx4096": "RWKV-4-Pile-1B5-20220929-ctx4096.pth", - "rwkv-4-pile-430m": "RWKV-4-Pile-430M-20220808-8066.pth", - "rwkv-4-pile-169m": "RWKV-4-Pile-169M-20220807-8023.pth", -} - - -class RWKVInferenceModel(InferenceModel): - def __init__( - self, - model_name: str, - ) -> None: - super().__init__() - self.model_name = model_name - - self.post_token_hooks = [ - PostTokenHooks.stream_tokens, - ] - - self.stopper_hooks = [ - Stoppers.core_stopper, - Stoppers.dynamic_wi_scanner, - Stoppers.singleline_stopper, - Stoppers.chat_mode_stopper, - Stoppers.stop_sequence_stopper, - ] - - self.capabilties = ModelCapabilities( - embedding_manipulation=False, - post_token_hooks=True, - stopper_hooks=True, - post_token_probs=True, - ) - self._old_stopping_criteria = None - - def _ensure_directory_structure(self) -> None: - for path in ["models/rwkv", "models/rwkv/models"]: - try: - os.mkdir(path) - except FileExistsError: - pass - - def _get_tokenizer(self) -> GenericTokenizer: - if not os.path.exists(TOKENIZER_PATH): - logger.info("RWKV tokenizer not found, downloading...") - - r = requests.get(TOKENIZER_URL) - with open(TOKENIZER_PATH, "wb") as file: - file.write(r.content) - - return GenericTokenizer(Tokenizer.from_file(TOKENIZER_PATH)) - - def _download_model(self, model_path: str, model_class: str) -> None: - logger.info(f"{self.model_name} not found, downloading...") - - url = hf_hub_url( - repo_id=f"{REPO_OWNER}/{model_class}", - filename=MODEL_FILES[self.model_name], - ) - - # TODO: Use aria2 - # https://stackoverflow.com/a/57030446 - with requests.get(url, stream=True) as r: - r.raise_for_status() - bar = tqdm( - desc="Downloading RWKV Model", - unit="B", - unit_scale=True, - total=int(r.headers["Content-Length"]), - ) - with open(model_path, "wb") as file: - for chunk in r.iter_content(chunk_size=8192): - if not chunk: - continue - file.write(chunk) - bar.update(len(chunk)) - - def _load(self, save_model: bool, initial_load: bool) -> None: - self._ensure_directory_structure() - self.tokenizer = self._get_tokenizer() - - # Parse model name - model_class, _, special = self.model_name.partition(":") - special = special or None - - model_dir = os.path.join("models", "rwkv", "models", model_class) - if not os.path.exists(model_dir): - os.mkdir(model_dir) - - # Download model if we need to - model_path = os.path.join(model_dir, MODEL_FILES[self.model_name]) - if not os.path.exists(model_path): - self._download_model(model_path, model_class) - - # Now we load! - - # TODO: Breakmodel to strat - self.model = RWKV(model=model_path, strategy="cuda:0 fp16") - - def _apply_warpers( - self, scores: torch.Tensor, input_ids: torch.Tensor - ) -> torch.Tensor: - warpers.update_settings() - for sid in utils.koboldai_vars.sampler_order: - warper = Warper.from_id(sid) - - if not warper.value_is_valid(): - continue - - if warper == warpers.RepetitionPenalty: - # Rep pen needs more data than other samplers - scores = warper.torch(scores, input_ids=input_ids) - else: - scores = warper.torch(scores) - return scores - - def _sample_token(self, logits: torch.Tensor, input_ids: torch.Tensor) -> int: - probs = F.softmax(logits.float(), dim=-1) - - if probs.device == torch.device("cpu"): - probs = probs.numpy() - sorted_ids = np.argsort(probs) - sorted_probs = probs[sorted_ids][::-1] - - probs = self._apply_warpers(probs[None, :], input_ids) - - # TODO: is this right? - probs[probs == -torch.inf] = 0.0 - - probs = probs / np.sum(probs) - out = np.random.choice(a=len(probs), p=probs) - return int(out) - else: - sorted_ids = torch.argsort(probs) - sorted_probs = probs[sorted_ids] - sorted_probs = torch.flip(sorted_probs, dims=(0,)) - - probs = self._apply_warpers(probs[None, :], input_ids) - - # TODO: is this right? - probs[probs == -torch.inf] = 0.0 - - out = torch.multinomial(probs, num_samples=1)[0] - return int(out) - - def _raw_generate( - self, - prompt_tokens: Union[List[int], torch.Tensor], - max_new: int, - gen_settings: GenerationSettings, - single_line: bool = False, - batch_count: int = 1, - seed: Optional[int] = None, - **kwargs, - ) -> GenerationResult: - if seed is not None: - torch.manual_seed(seed) - - aux_device = utils.get_auxilary_device() - context = torch.tensor(prompt_tokens)[None, :].to(aux_device) - out = [] - - start_time = time.time() - with torch.no_grad(): - logits, state = self.model.forward(prompt_tokens, None) - last_token = prompt_tokens[-1] - - for _ in range(max_new): - - logits, state = self.model.forward([last_token], state) - last_token = self._sample_token(logits, context) - out.append(last_token) - add = torch.tensor([[last_token]]).to(aux_device) - context = torch.cat((context, add), dim=-1) - self._post_token_gen(context) - - logger.debug( - "torch_raw_generate: run generator {}s".format(time.time() - start_time) - ) - - return GenerationResult( - self, - out_batches=torch.tensor([out]), - prompt=prompt_tokens, - is_whole_generation=False, - output_includes_prompt=True, - ) From 77dd5aa7259f65262f6077957b493c74d98eaa24 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 09:09:09 -0400 Subject: [PATCH 020/102] Minor update --- aiserver.py | 7 +++++-- modeling/inference_models/horde.py | 2 +- static/koboldai.js | 4 +++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index e7227c81..ac90d6f4 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6489,7 +6489,7 @@ def UI_2_select_model(data): if valid: logger.debug("Valid Loaders: {}".format(valid_loaders)) emit("selected_model_info", valid_loaders) - if not valid: + if not valid and 'path' in data: #Get directories paths, breadcrumbs = get_folder_path_info(data['path']) output = [] @@ -6501,7 +6501,9 @@ def UI_2_select_model(data): break output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) - + elif not valid: + logger.error("Nothing can load the model: {}".format(valid_loaders)) + return @@ -6530,6 +6532,7 @@ def UI_2_select_model(data): def UI_2_load_model(data): logger.info("loading Model") logger.info(data) + model_loaders[data['plugin']].set_input_parameters(**data) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) #==================================================================# diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index 56e88205..f02cf265 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -67,7 +67,7 @@ class model_loader(InferenceModel): "unit": "text", "label": "Model", "id": "model", - "default": "", + "default": model_name, "check": {"value": "", 'check': "!="}, "tooltip": "Which model to use when running OpenAI/GooseAI.", "menu_path": "", diff --git a/static/koboldai.js b/static/koboldai.js index 0656253f..1907add8 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1882,7 +1882,9 @@ function load_model() { for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { data[element.id.split("|")[1].replace("_value", "")] = element.value; } - data = {...data, ...selected_model_data} + data = {...data, ...selected_model_data}; + + data['plugin'] = document.getElementById("modelplugin").value; socket.emit("load_model", data); closePopups(); From edd9c7d782c9c59f9052f41e9f21498d2cdcaef2 Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 11 May 2023 15:13:59 +0200 Subject: [PATCH 021/102] Warning polish --- koboldai_settings.py | 3 ++- modeling/inference_model.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index d8416df2..7bc88422 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1129,7 +1129,7 @@ class story_settings(settings): class user_settings(settings): local_only_variables = ['importjs'] - no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision"] + no_save_variables = ['importnum', 'importjs', 'loadselect', 'spselect', 'svowname', 'saveow', 'laststory', 'sid', "revision", "model_selected"] settings_name = "user" def __init__(self, socketio): self._socketio = socketio @@ -1185,6 +1185,7 @@ class user_settings(settings): self.horde_api_key = "0000000000" self.horde_worker_name = "My Awesome Instance" self.horde_url = "https://horde.koboldai.net" + self.model_selected = "" def __setattr__(self, name, value): new_variable = name not in self.__dict__ diff --git a/modeling/inference_model.py b/modeling/inference_model.py index b253c5bf..e2329cf9 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -218,7 +218,7 @@ class InferenceModel: try: return GenericTokenizer(try_get_tokenizer()) except Exception as e: - logger.warning(f"Tokenizer falling back due to {e}") + logger.warning(f"Tokenizer falling back due to {e} (This can be normal behavior for some architectures that lack a slow tokenizer such as NeoX)") # If we error on each attempt, raise the last one if i == len(suppliers) - 1: raise From 4605d10c370b994cfbd1d27891ccae6ade8b9c6b Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 12:08:35 -0400 Subject: [PATCH 022/102] Next iteration. Model Loading is broken completely now :) --- aiserver.py | 180 +++--------------- modeling/inference_model.py | 6 +- modeling/inference_models/api.py | 4 +- modeling/inference_models/basic_api.py | 4 +- modeling/inference_models/generic_hf_torch.py | 1 + modeling/inference_models/horde.py | 8 +- modeling/inference_models/openai.py | 6 +- modeling/inference_models/parents/hf.py | 56 ++++-- modeling/inference_models/parents/hf_torch.py | 2 +- modeling/inference_models/readonly.py | 77 ++++++++ static/koboldai.js | 13 +- 11 files changed, 170 insertions(+), 187 deletions(-) create mode 100644 modeling/inference_models/readonly.py diff --git a/aiserver.py b/aiserver.py index ac90d6f4..f9e60641 100644 --- a/aiserver.py +++ b/aiserver.py @@ -645,10 +645,14 @@ def new_socketio_on(*a, **k): socketio.on = new_socketio_on def emit(*args, **kwargs): - try: - return _emit(*args, **kwargs) - except AttributeError: - return socketio.emit(*args, **kwargs) + if has_request_context(): + try: + return _emit(*args, **kwargs) + except AttributeError: + return socketio.emit(*args, **kwargs) + else: #We're trying to send data outside of the http context. This won't work. Try the relay + if koboldai_settings.queue is not None: + koboldai_settings.queue.put([args[0], args[1], kwargs]) utils.emit = emit #replacement for tpool.execute to maintain request contexts @@ -1780,10 +1784,6 @@ def get_cluster_models(msg): emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1") emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2") - -def reset_model_settings(): - koboldai_vars.reset_for_model_load() - def unload_model(): global model @@ -1816,7 +1816,7 @@ def unload_model(): koboldai_vars.badwordsids = koboldai_settings.badwordsids_default -def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False, url=None, use_8_bit=False): +def load_model(plugin, initial_load=False): global model global tokenizer global model_config @@ -1827,79 +1827,18 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if initial_load: use_breakmodel_args = True - reset_model_settings() koboldai_vars.reset_model() - koboldai_vars.cluster_requested_models = [online_model] if isinstance(online_model, str) else online_model - if koboldai_vars.cluster_requested_models == [""]: - koboldai_vars.cluster_requested_models = [] - koboldai_vars.noai = False - if not use_breakmodel_args: - set_aibusy(True) - if koboldai_vars.model != 'ReadOnly': - emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True) - #Have to add a sleep so the server will send the emit for some reason - time.sleep(0.1) + set_aibusy(True) + if koboldai_vars.model != 'ReadOnly': + emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(koboldai_vars.model)}, broadcast=True) + #Have to add a sleep so the server will send the emit for some reason + time.sleep(0.1) - if gpu_layers is not None: - args.breakmodel_gpulayers = gpu_layers - elif use_breakmodel_args: - gpu_layers = args.breakmodel_gpulayers - if breakmodel_args_default_to_cpu and gpu_layers is None: - gpu_layers = args.breakmodel_gpulayers = [] - if disk_layers is not None: - args.breakmodel_disklayers = int(disk_layers) - elif use_breakmodel_args: - disk_layers = args.breakmodel_disklayers - if breakmodel_args_default_to_cpu and disk_layers is None: - disk_layers = args.breakmodel_disklayers = 0 + if 'model' in globals(): + model.unload() - unload_model() - - if online_model == "": - koboldai_vars.configname = getmodelname() - #Let's set the GooseAI or OpenAI server URLs if that's applicable - else: - koboldai_vars.online_model = online_model - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - koboldai_vars.configname = f"GooseAI_{online_model.replace('/', '_')}" - elif koboldai_vars.model == "CLUSTER" and isinstance(online_model, list): - if len(online_model) != 1: - koboldai_vars.configname = koboldai_vars.model - else: - koboldai_vars.configname = f"{koboldai_vars.model}_{online_model[0].replace('/', '_')}" - else: - koboldai_vars.configname = f"{koboldai_vars.model}_{online_model.replace('/', '_')}" - - if path.exists(get_config_filename()): - changed=False - with open(get_config_filename(), "r") as file: - # Check if API key exists - js = json.load(file) - if 'online_model' in js: - if js['online_model'] != online_model: - changed=True - js['online_model'] = online_model - else: - changed=True - js['online_model'] = online_model - - if changed: - with open("settings/{}.v2_settings".format(koboldai_vars.model), "w") as file: - file.write(json.dumps(js, indent=3)) - - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - args.configname = "GooseAI" + "/" + online_model - elif koboldai_vars.model != "CLUSTER": - args.configname = koboldai_vars.model + "/" + online_model - koboldai_vars.oaiurl = koboldai_vars.oaiengines + "/{0}/completions".format(online_model) # If transformers model was selected & GPU available, ask to use CPU or GPU if(not koboldai_vars.use_colab_tpu and koboldai_vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): @@ -1937,84 +1876,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal else: koboldai_vars.default_preset = koboldai_settings.default_preset - - # Ask for API key if InferKit was selected - if koboldai_vars.model == "InferKit": - koboldai_vars.apikey = koboldai_vars.oaiapikey - # Swap OAI Server if GooseAI was selected - if koboldai_vars.model == "GooseAI": - koboldai_vars.oaiengines = "https://api.goose.ai/v1/engines" - koboldai_vars.model = "OAI" - koboldai_vars.configname = "GooseAI" - - # Ask for API key if OpenAI was selected - if koboldai_vars.model == "OAI" and not koboldai_vars.configname: - koboldai_vars.configname = "OAI" - - if koboldai_vars.model == "ReadOnly": - koboldai_vars.noai = True - - # TODO: InferKit - if koboldai_vars.model == "ReadOnly" or koboldai_vars.noai: - pass - elif koboldai_vars.model in ["Colab", "API", "CLUSTER", "OAI"]: - koboldai_vars.colaburl = url or koboldai_vars.colaburl - koboldai_vars.usegpu = False - koboldai_vars.breakmodel = False - - if koboldai_vars.model == "Colab": - from modeling.inference_models.basic_api import model_loader - model = model_loader() - elif koboldai_vars.model == "API": - from modeling.inference_models.api import model_loader - model = model_loader(koboldai_vars.colaburl.replace("/request", "")) - elif koboldai_vars.model == "CLUSTER": - from modeling.inference_models.horde import model_loader - model = model_loader() - elif koboldai_vars.model == "OAI": - from modeling.inference_models.openai import model_loader - model = model_loader() - - model.load(initial_load=initial_load) - # TODO: This check sucks, make a model object or somethign - elif "rwkv" in koboldai_vars.model: - if koboldai_vars.use_colab_tpu: - raise RuntimeError("RWKV is not supported on the TPU.") - from modeling.inference_models.rwkv import model_loader - model = model_loader(koboldai_vars.model) - model.load() - elif not koboldai_vars.use_colab_tpu and not koboldai_vars.noai: - # HF Torch - logger.init("Transformers", status='Starting') - for m in ("GPTJModel", "XGLMModel"): - try: - globals()[m] = getattr(__import__("transformers"), m) - except: - pass - - from modeling.inference_models.generic_hf_torch import model_loader - model = model_loader( - koboldai_vars.model, - lazy_load=koboldai_vars.lazy_load, - low_mem=args.lowmem - ) - - model.load( - save_model=not (args.colab or args.cacheonly) or args.savemodel, - initial_load=initial_load, - ) - logger.info(f"Pipeline created: {koboldai_vars.model}") - else: - # TPU - from modeling.inference_models.hf_mtj import model_loader - model = model_loader( - koboldai_vars.model - ) - model.load( - save_model=not (args.colab or args.cacheonly) or args.savemodel, - initial_load=initial_load, - ) + model = model_loaders[plugin] + model.load(initial_load=initial_load) # TODO: Convert everywhere to use model.tokenizer if model: @@ -6532,7 +6396,8 @@ def UI_2_select_model(data): def UI_2_load_model(data): logger.info("loading Model") logger.info(data) - model_loaders[data['plugin']].set_input_parameters(**data) + model_loaders[data['plugin']].set_input_parameters(data) + load_model(data['plugin']) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) #==================================================================# @@ -8155,7 +8020,8 @@ def send_one_time_messages(data, wait_time=0): # Test #==================================================================# def model_info(): - if model_config is not None: + global model_config + if 'model_config' in globals() and model_config is not None: if isinstance(model_config, dict): if 'model_type' in model_config: model_type = str(model_config['model_type']) @@ -11045,7 +10911,7 @@ for schema in config_endpoint_schemas: def startup(): if koboldai_vars.model == "" or koboldai_vars.model is None: koboldai_vars.model = "ReadOnly" - socketio.start_background_task(load_model, **{'initial_load':True}) + socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True}) print("", end="", flush=True) diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 27ad46db..343eb39a 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -169,6 +169,7 @@ class InferenceModel: ] self.tokenizer = None self.capabilties = ModelCapabilities() + self.model_name = "Not Defined" def is_valid(self, model_name, model_path, menu_path, vram): return True @@ -176,7 +177,7 @@ class InferenceModel: def requested_parameters(self, model_name, model_path, menu_path, vram): return {} - def define_input_parameters(self): + def set_input_parameters(self, parameters): return def load(self, save_model: bool = False, initial_load: bool = False) -> None: @@ -186,6 +187,9 @@ class InferenceModel: self._load(save_model=save_model, initial_load=initial_load) self._post_load() + def unload(self): + return + def _pre_load(self) -> None: """Pre load hook. Called before `_load()`.""" diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py index 41088bc7..5bddd714 100644 --- a/modeling/inference_models/api.py +++ b/modeling/inference_models/api.py @@ -46,8 +46,8 @@ class model_loader(InferenceModel): }) return requested_parameters - def set_input_parameters(self, base_url=""): - self.base_url = base_url.rstrip("/") + def set_input_parameters(self, parameters): + self.base_url = parameters['base_url'].rstrip("/") def _load(self, save_model: bool, initial_load: bool) -> None: tokenizer_id = requests.get(f"{self.base_url}/api/v1/model").json()["result"] diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py index d7fc0863..5666ba8e 100644 --- a/modeling/inference_models/basic_api.py +++ b/modeling/inference_models/basic_api.py @@ -45,8 +45,8 @@ class model_loader(InferenceModel): }) return requested_parameters - def set_input_parameters(self, colaburl=""): - self.colaburl = colaburl + def set_input_parameters(self, parameters): + self.colaburl = parameters['colaburl'] def _initialize_model(self): return diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index 366fbbb7..b542c712 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -30,6 +30,7 @@ class model_loader(HFTorchInferenceModel): def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = True + self.lazy_load = utils.koboldai_vars.lazy_load # Make model path the same as the model name to make this consistent # with the other loading method if it isn't a known model type. This diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index f02cf265..057669d7 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -78,10 +78,10 @@ class model_loader(InferenceModel): }]) return requested_parameters - def set_input_parameters(self, url="", key="", model=""): - self.key = key.strip() - self.model = model - self.url = url + def set_input_parameters(self, parameters): + self.key = parameters['key'].strip() + self.model = parameters['model'] + self.url = parameters['url'] def get_cluster_models(self): # Get list of models from public cluster diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py index 01c0c037..efbb01d3 100644 --- a/modeling/inference_models/openai.py +++ b/modeling/inference_models/openai.py @@ -59,9 +59,9 @@ class model_loader(InferenceModel): }]) return requested_parameters - def set_input_parameters(self, key="", model=""): - self.key = key.strip() - self.model = model + def set_input_parameters(self, parameters): + self.key = parameters['key'].strip() + self.model = parameters['model'] def get_oai_models(self): if self.key == "": diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 54781296..3099feaf 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -34,12 +34,12 @@ class HFInferenceModel(InferenceModel): requested_parameters = [] if model_path is not None and os.path.exists(model_path): - model_config = AutoConfig.from_pretrained(model_path) + self.model_config = AutoConfig.from_pretrained(model_path) elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): - model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") else: - model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") - layer_count = model_config["n_layer"] if isinstance(model_config, dict) else model_config.num_layers if hasattr(model_config, "num_layers") else model_config.n_layer if hasattr(model_config, "n_layer") else model_config.num_hidden_layers if hasattr(model_config, 'num_hidden_layers') else None + self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None if layer_count is not None and layer_count >= 0: if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))): with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file: @@ -61,11 +61,11 @@ class HFInferenceModel(InferenceModel): "uitype": "slider", "unit": "int", "label": "{} Layers".format(torch.cuda.get_device_name(i)), - "id": "{} Layers".format(i), + "id": "{}_Layers".format(i), "min": 0, "max": layer_count, "step": 1, - "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), "default": break_values[i], "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), @@ -77,11 +77,11 @@ class HFInferenceModel(InferenceModel): "uitype": "slider", "unit": "int", "label": "CPU Layers", - "id": "CPU Layers", + "id": "CPU_Layers", "min": 0, "max": layer_count, "step": 1, - "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), "default": layer_count - sum(break_values), "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", @@ -98,7 +98,7 @@ class HFInferenceModel(InferenceModel): "min": 0, "max": layer_count, "step": 1, - "check": {"sum": ["{} Layers".format(i) for i in range(gpu_count)]+['CPU Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), "default": disk_blocks, "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", @@ -122,10 +122,40 @@ class HFInferenceModel(InferenceModel): return requested_parameters - def set_input_parameters(self, layers=[], disk_layers=0, use_gpu=False): + def set_input_parameters(self, parameters): + gpu_count = torch.cuda.device_count() + layers = [] + for i in range(gpu_count): + layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) + self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None self.layers = layers - self.disk_layers = disk_layers - self.use_gpu = use_gpu + self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None + self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.model_name = parameters['id'] + self.path = parameters['path'] if 'path' in parameters else None + + def unload(self): + if hasattr(self, 'model'): + self.model = None + if hasattr(self, 'tokenizer'): + self.tokenizer = None + if hasattr(self, 'model_config'): + self.model_config = None + with torch.no_grad(): + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="torch.distributed.reduce_op is deprecated") + for tensor in gc.get_objects(): + try: + if torch.is_tensor(tensor): + tensor.set_(torch.tensor((), device=tensor.device, dtype=tensor.dtype)) + except: + pass + gc.collect() + try: + with torch.no_grad(): + torch.cuda.empty_cache() + except: + pass def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults @@ -187,7 +217,7 @@ class HFInferenceModel(InferenceModel): return model_path - basename = utils.koboldai_vars.model.replace("/", "_") + basename = self.model_name.replace("/", "_") if legacy: ret = basename else: diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index d8afafb1..4de13d7b 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -398,7 +398,7 @@ class HFTorchInferenceModel(HFInferenceModel): Embedding._koboldai_patch_causallm_model = self.model def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True): - if not self.lazy_load: + if not utils.koboldai_vars.lazy_load: return if utils.args.breakmodel_disklayers is not None: diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly.py new file mode 100644 index 00000000..c642c05a --- /dev/null +++ b/modeling/inference_models/readonly.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import torch +import requests +import numpy as np +from typing import List, Optional, Union + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, + ModelCapabilities, +) + + +class BasicAPIException(Exception): + """To be used for errors when using the Basic API as an interface.""" + + +class model_loader(InferenceModel): + def __init__(self) -> None: + super().__init__() + + # Do not allow API to be served over the API + self.capabilties = ModelCapabilities(api_host=False) + self.tokenizer = self._tokenizer() + self.model = None + self.model_name = "Read Only" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "ReadOnly" + + def get_requested_parameters(self, model_name, model_path, menu_path): + requested_parameters = [] + return requested_parameters + + def set_input_parameters(self, parameters): + return + + def unload(self): + utils.koboldai_vars.noai = False + + def _initialize_model(self): + return + + class _tokenizer(): + def __init__(self): + self._koboldai_header = [] + def decode(self, _input): + return "" + def encode(self, input_text): + return [] + + def _load(self, save_model: bool = False, initial_load: bool = False) -> None: + self.tokenizer = self.tokenizer + self.model = None + utils.koboldai_vars.noai = True + + def _raw_generate( + self, + prompt_tokens: Union[List[int], torch.Tensor], + max_new: int, + gen_settings: GenerationSettings, + single_line: bool = False, + batch_count: int = 1, + seed: Optional[int] = None, + **kwargs, + ): + return GenerationResult( + model=self, + out_batches=np.array([]), + prompt=prompt_tokens, + is_whole_generation=True, + single_line=single_line, + ) diff --git a/static/koboldai.js b/static/koboldai.js index 1907add8..7f004ff2 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -14,8 +14,8 @@ socket.on('load_popup', function(data){load_popup(data);}); socket.on('popup_items', function(data){popup_items(data);}); socket.on('popup_breadcrumbs', function(data){popup_breadcrumbs(data);}); socket.on('popup_edit_file', function(data){popup_edit_file(data);}); -socket.on('show_model_menu', function(data){show_model_menu(data);}); -socket.on('open_model_load_menu', function(data){new_show_model_menu(data);}); +//socket.on('show_model_menu', function(data){show_model_menu(data);}); +socket.on('open_model_load_menu', function(data){show_model_menu(data);}); socket.on('selected_model_info', function(data){selected_model_info(data);}); socket.on('oai_engines', function(data){oai_engines(data);}); socket.on('buildload', function(data){buildload(data);}); @@ -1502,13 +1502,18 @@ function getModelParameterCount(modelName) { return base * multiplier; } -function new_show_model_menu(data) { +function show_model_menu(data) { //clear out the loadmodelsettings var loadmodelsettings = document.getElementById('loadmodelsettings') while (loadmodelsettings.firstChild) { loadmodelsettings.removeChild(loadmodelsettings.firstChild); } - document.getElementById("modelplugin").classList.add("hidden"); + //Clear out plugin selector + var model_plugin = document.getElementById('modelplugin'); + while (model_plugin.firstChild) { + model_plugin.removeChild(model_plugin.firstChild); + } + model_plugin.classList.add("hidden"); var accept = document.getElementById("btn_loadmodelaccept"); accept.disabled = false; From 20b54eb9ff829526161c2822ada507b6c80bee41 Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 11 May 2023 19:06:39 +0200 Subject: [PATCH 023/102] Revert 4.29 due to unforseen consequences --- aiserver.py | 2 +- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- requirements_mtj.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index b045ea71..1abdd31e 100644 --- a/aiserver.py +++ b/aiserver.py @@ -221,7 +221,7 @@ model_menu = { MenuFolder("Untuned Fairseq Dense", "fsdlist"), MenuFolder("Untuned Bloom", "bloomlist"), MenuFolder("Untuned XGLM", "xglmlist"), - MenuFolder("Official RWKV-4", "rwkvlist"), + #MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 3d0ca633..af16423e 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.29.* + - transformers==4.28.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index eb2927bd..ffcacfb6 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,7 +30,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.29.* + - transformers==4.28.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/requirements.txt b/requirements.txt index 28fdb28c..c98b7252 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.29.* +transformers==4.28.* huggingface_hub==0.12.1 Flask==2.2.3 Flask-SocketIO==5.3.2 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 7fc866f0..b41b7ead 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers==4.29.* +transformers==4.28.* chex == 0.1.5 huggingface_hub==0.12.1 progressbar2 From e9c845dc2a1eae4927ed2a7417c6aa6969329bb9 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 14:14:52 -0400 Subject: [PATCH 024/102] Fix for badwordIDs --- modeling/inference_models/generic_hf_torch.py | 2 +- modeling/inference_models/parents/hf.py | 1 + modeling/inference_models/parents/hf_torch.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index b542c712..d5cf6397 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -43,7 +43,7 @@ class model_loader(HFTorchInferenceModel): self.model_name = os.path.basename( os.path.normpath(utils.koboldai_vars.custmodpth) ) - utils.koboldai_vars.model = self.model_name + utils.koboldai_vars.model = self.model_name # If we specify a model and it's in the root directory, we need to move # it to the models directory (legacy folder structure to new) diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 3099feaf..1941a12e 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -17,6 +17,7 @@ class HFInferenceModel(InferenceModel): self.model = None self.tokenizer = None + self.badwordsids = koboldai_settings.badwordsids_default def is_valid(self, model_name, model_path, menu_path): try: diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index 4de13d7b..7cc16ad5 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -245,7 +245,7 @@ class HFTorchInferenceModel(HFInferenceModel): len(prompt_tokens) + max_new, utils.koboldai_vars.max_length ), repetition_penalty=1.0, - bad_words_ids=utils.koboldai_vars.badwordsids + bad_words_ids=self.badwordsids + additional_bad_words_ids, use_cache=True, num_return_sequences=batch_count, From a9c785d0f0020847e342f18f9910f1ed9c4871dd Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 14:20:14 -0400 Subject: [PATCH 025/102] Fix for Horde --- modeling/inference_models/horde.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index 057669d7..bd457197 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -116,9 +116,9 @@ class model_loader(InferenceModel): def _load(self, save_model: bool, initial_load: bool) -> None: self.tokenizer = self._get_tokenizer( - utils.koboldai_vars.cluster_requested_models[0] - if len(utils.koboldai_vars.cluster_requested_models) > 0 - else "gpt2", + self.model + #if len(self.model) > 0 + #else "gpt2", ) def _raw_generate( @@ -166,14 +166,14 @@ class model_loader(InferenceModel): client_agent = "KoboldAI:2.0.0:koboldai.org" cluster_headers = { - "apikey": utils.koboldai_vars.horde_api_key, + "apikey": self.key, "Client-Agent": client_agent, } try: # Create request req = requests.post( - f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/async", + f"{self.url}/api/v2/generate/text/async", json=cluster_metadata, headers=cluster_headers, ) @@ -211,7 +211,7 @@ class model_loader(InferenceModel): while not finished: try: req = requests.get( - f"{utils.koboldai_vars.horde_url}/api/v2/generate/text/status/{request_id}", + f"{self.url}/api/v2/generate/text/status/{request_id}", headers=cluster_agent_headers, ) except requests.exceptions.ConnectionError: From c16336f6467fe11a8644b551d5700986d2ef4bf6 Mon Sep 17 00:00:00 2001 From: somebody Date: Thu, 11 May 2023 17:10:19 -0500 Subject: [PATCH 026/102] Add traceback to debug log on fallback --- modeling/inference_models/hf_torch.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 990fabfc..14ddd7af 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -332,10 +332,13 @@ class HFTorchInferenceModel(HFInferenceModel): raise logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") + logger.debug(traceback.format_exc()) + try: return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs) except Exception as e: logger.warning(f"Fell back to GPTNeoForCausalLM due to {e}") + logger.debug(traceback.format_exc()) return GPTNeoForCausalLM.from_pretrained(location, **tf_kwargs) def get_hidden_size(self) -> int: From 3065c1b40e758993565ea212ccf9f3b0db5c7f0e Mon Sep 17 00:00:00 2001 From: somebody Date: Thu, 11 May 2023 17:10:43 -0500 Subject: [PATCH 027/102] Ignore missing keys in get_original_key --- modeling/inference_models/hf_torch.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 14ddd7af..3f7c3967 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -465,19 +465,25 @@ class HFTorchInferenceModel(HFInferenceModel): device_map: Dict[str, Union[str, int]] = {} @functools.lru_cache(maxsize=None) - def get_original_key(key): - return max( - ( - original_key - for original_key in utils.module_names - if original_key.endswith(key) - ), - key=len, - ) + def get_original_key(key) -> Optional[str]: + key_candidates = [ + original_key + for original_key in utils.module_names + if original_key.endswith(key) + ] + + if not key_candidates: + logger.debug(f"!!! No key candidates for {key}") + return None + + return max(key_candidates, key=len) for key, value in model_dict.items(): original_key = get_original_key(key) + if not original_key: + continue + if isinstance(value, lazy_loader.LazyTensor) and not any( original_key.startswith(n) for n in utils.layers_module_names ): From 69d942c00cfd16708f82826fcc0d50355e322c0f Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 20:22:30 -0400 Subject: [PATCH 028/102] Kind of working breakmodel --- aiserver.py | 256 +----------------- koboldai_settings.py | 3 +- modeling/inference_models/generic_hf_torch.py | 7 +- modeling/inference_models/gooseai.py | 31 +++ modeling/inference_models/hf_mtj.py | 2 +- modeling/inference_models/openai.py | 168 +----------- modeling/inference_models/parents/hf.py | 35 ++- modeling/inference_models/parents/hf_torch.py | 27 +- .../parents/openai_gooseai.py | 189 +++++++++++++ static/koboldai.js | 6 + 10 files changed, 281 insertions(+), 443 deletions(-) create mode 100644 modeling/inference_models/gooseai.py create mode 100644 modeling/inference_models/parents/openai_gooseai.py diff --git a/aiserver.py b/aiserver.py index f9e60641..158a6699 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1473,7 +1473,7 @@ def general_startup(override_args=None): koboldai_vars.quiet = True if args.nobreakmodel: - koboldai_vars.nobreakmodel = True + model_loaders['generic_hf_torch'].nobreakmodel = True if args.remote: koboldai_vars.host = True; @@ -1484,6 +1484,9 @@ def general_startup(override_args=None): if args.localtunnel: koboldai_vars.host = True; + if args.lowmem: + model_loaders['generic_hf_torch'].low_mem = True + if args.host != "Disabled": # This means --host option was submitted without an argument # Enable all LAN IPs (0.0.0.0/0) @@ -1516,6 +1519,9 @@ def general_startup(override_args=None): koboldai_vars.trust_remote_code = True if args.cpu: koboldai_vars.use_colab_tpu = False + koboldai_vars.hascuda = False + koboldai_vars.usegpu = False + model_loaders['generic_hf_torch'].nobreakmodel = True koboldai_vars.smandelete = koboldai_vars.host == args.override_delete koboldai_vars.smanrename = koboldai_vars.host == args.override_rename @@ -1545,245 +1551,6 @@ def general_startup(override_args=None): socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio) -#==================================================================# -# Load Model -#==================================================================# - -@socketio.on("get_model_info") -def get_model_info(model, directory=""): - logger.info("Selected: {}, {}".format(model, directory)) - # if the model is in the api list - disk_blocks = 0 - key = False - breakmodel = False - gpu = False - layer_count = None - key_value = "" - break_values = [] - url = False - default_url = None - models_on_url = False - multi_online_models = False - show_online_model_select=False - gpu_count = torch.cuda.device_count() - gpu_names = [] - send_horde_models = False - show_custom_model_box = False - for i in range(gpu_count): - gpu_names.append(torch.cuda.get_device_name(i)) - if model in ['Colab', 'API']: - url = True - elif model == 'CLUSTER': - models_on_url = True - show_online_model_select=True - url = True - key = True - default_url = koboldai_vars.horde_url - multi_online_models = True - key_value = koboldai_vars.horde_api_key - url = koboldai_vars.horde_url - if key_value: - send_horde_models = True - elif model in [x.name for x in model_menu['apilist']]: - show_online_model_select=True - if path.exists("settings/{}.v2_settings".format(model)): - with open("settings/{}.v2_settings".format(model), "r") as file: - # Check if API key exists - try: - js = json.load(file) - - if("apikey" in js and js["apikey"] != ""): - # API key exists, grab it and close the file - key_value = js["apikey"] - elif 'oaiapikey' in js and js['oaiapikey'] != "": - key_value = js["oaiapikey"] - if model in ('GooseAI', 'OAI'): - get_oai_models({'model': model, 'key': key_value}) - except json.decoder.JSONDecodeError: - print(":(") - pass - key = True - elif "rwkv" in model.lower(): - pass - elif model == 'ReadOnly': - pass - #elif model == 'customhuggingface': - # show_custom_model_box = True - elif args.cpu: - pass - else: - layer_count = get_layer_count(model, directory=directory) - if layer_count is None: - breakmodel = False - gpu = True - else: - breakmodel = True - if model in ["NeoCustom", "GPT2Custom", "customhuggingface"]: - filename = "settings/{}.breakmodel".format(os.path.basename(os.path.normpath(directory))) - else: - filename = "settings/{}.breakmodel".format(model.replace("/", "_")) - if path.exists(filename): - with open(filename, "r") as file: - data = [x for x in file.read().split("\n")[:2] if x != ''] - if len(data) < 2: - data.append("0") - break_values, disk_blocks = data - break_values = break_values.split(",") - else: - break_values = [layer_count] - break_values = [int(x) for x in break_values if x != ''] - break_values += [0] * (gpu_count - len(break_values)) - emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'multi_online_models': multi_online_models, 'default_url': default_url, - 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, - 'disk_break_value': disk_blocks, 'accelerate': True, - 'break_values': break_values, 'gpu_count': gpu_count, - 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, - 'show_custom_model_box': show_custom_model_box}, broadcast=True, room="UI_1") - emit('selected_model_info', {'key_value': key_value, 'key':key, - 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'multi_online_models': multi_online_models, 'default_url': default_url, - 'disk_break_value': disk_blocks, 'disk_break': True, - 'break_values': break_values, 'gpu_count': gpu_count, - 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url, 'show_online_model_select': show_online_model_select, - 'bit_8_available': koboldai_vars.bit_8_available if koboldai_vars.experimental_features else False, - 'show_custom_model_box': show_custom_model_box}) - if send_horde_models: - get_cluster_models({'key': key_value, 'url': default_url}) - elif key_value != "" and model in [x.name for x in model_menu['apilist']] and model != 'CLUSTER': - get_oai_models(key_value) - - - -def get_layer_count(model, directory=""): - if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]): - if(model == "GPT2Custom"): - with open(os.path.join(directory, "config.json"), "r") as f: - model_config = json.load(f) - # Get the model_type from the config or assume a model type if it isn't present - else: - if(directory): - model = directory - from transformers import AutoConfig - if(os.path.isdir(model.replace('/', '_'))): - model_config = AutoConfig.from_pretrained(model.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache") - elif(is_model_downloaded(model)): - model_config = AutoConfig.from_pretrained("models/{}".format(model.replace('/', '_')), revision=koboldai_vars.revision, cache_dir="cache") - elif(os.path.isdir(directory)): - model_config = AutoConfig.from_pretrained(directory, revision=koboldai_vars.revision, cache_dir="cache") - elif(os.path.isdir(koboldai_vars.custmodpth.replace('/', '_'))): - model_config = AutoConfig.from_pretrained(koboldai_vars.custmodpth.replace('/', '_'), revision=koboldai_vars.revision, cache_dir="cache") - else: - model_config = AutoConfig.from_pretrained(model, revision=koboldai_vars.revision, cache_dir="cache") - try: - if (model_config.model_type != 'gpt2' or model_config.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel: - return utils.num_layers(model_config) - else: - return None - except: - return None - else: - return None - -@socketio.on('OAI_Key_Update') -def get_oai_models(data): - key = data['key'] - model = data['model'] - koboldai_vars.oaiapikey = key - if model == 'OAI': - url = "https://api.openai.com/v1/engines" - elif model == 'GooseAI': - url = "https://api.goose.ai/v1/engines" - else: - return - - # Get list of models from OAI - logger.init("OAI Engines", status="Retrieving") - req = requests.get( - url, - headers = { - 'Authorization': 'Bearer '+key - } - ) - if(req.status_code == 200): - r = req.json() - engines = r["data"] - try: - engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines] - except: - logger.error(engines) - raise - - online_model = "" - changed=False - - #Save the key - if not path.exists("settings"): - # If the client settings file doesn't exist, create it - # Write API key to file - os.makedirs('settings', exist_ok=True) - if path.exists("settings/{}.v2_settings".format(model)): - with open("settings/{}.v2_settings".format(model), "r") as file: - js = json.load(file) - if 'online_model' in js: - online_model = js['online_model'] - if "apikey" in js: - if js['apikey'] != key: - changed=True - else: - js = {} - changed=True - - if changed: - with open("settings/{}.v2_settings".format(model), "w") as file: - js["apikey"] = key - file.write(json.dumps(js, indent=3)) - - logger.init_ok("OAI Engines", status="OK") - emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1") - emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2") - else: - # Something went wrong, print the message and quit since we can't initialize an engine - logger.init_err("OAI Engines", status="Failed") - logger.error(req.json()) - emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) - -@socketio.on("get_cluster_models") -def get_cluster_models(msg): - koboldai_vars.horde_api_key = msg['key'] or koboldai_vars.horde_api_key - url = msg['url'] or koboldai_vars.horde_url - koboldai_vars.horde_url = url - # Get list of models from public cluster - print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="") - try: - req = requests.get(f"{url}/api/v2/status/models?type=text") - except: - logger.init_err("KAI Horde Models", status="Failed") - logger.error("Provided KoboldAI Horde URL unreachable") - emit('from_server', {'cmd': 'errmsg', 'data': "Provided KoboldAI Horde URL unreachable"}) - return - if not req.ok: - # Something went wrong, print the message and quit since we can't initialize an engine - logger.init_err("KAI Horde Models", status="Failed") - logger.error(req.json()) - emit('from_server', {'cmd': 'errmsg', 'data': req.json()}, room="UI_1") - return - - engines = req.json() - logger.debug(engines) - try: - engines = [[en["name"], en["name"]] for en in engines] - except: - logger.error(engines) - raise - logger.debug(engines) - - online_model = "" - savesettings() - - logger.init_ok("KAI Horde Models", status="OK") - - emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True, room="UI_1") - emit('oai_engines', {'data': engines, 'online_model': online_model}, broadcast=False, room="UI_2") - def unload_model(): global model @@ -1845,7 +1612,6 @@ def load_model(plugin, initial_load=False): # loadmodelsettings() # loadsettings() logger.init("GPU support", status="Searching") - koboldai_vars.hascuda = torch.cuda.is_available() and not args.cpu koboldai_vars.bmsupported = ((koboldai_vars.model_type != 'gpt2') or koboldai_vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not koboldai_vars.nobreakmodel if(args.breakmodel is not None and args.breakmodel): logger.warning("--breakmodel is no longer supported. Breakmodel mode is now automatically enabled when --breakmodel_gpulayers is used (see --help for details).") @@ -1861,12 +1627,7 @@ def load_model(plugin, initial_load=False): else: logger.init_warn("GPU support", status="Not Found") - if args.cpu: - koboldai_vars.usegpu = False - gpu_layers = None - disk_layers = None - koboldai_vars.breakmodel = False - elif koboldai_vars.hascuda: + if koboldai_vars.hascuda: if(koboldai_vars.bmsupported): koboldai_vars.usegpu = False koboldai_vars.breakmodel = True @@ -1879,6 +1640,7 @@ def load_model(plugin, initial_load=False): model = model_loaders[plugin] model.load(initial_load=initial_load) + logger.debug("Model Type: {}".format(koboldai_vars.model_type)) # TODO: Convert everywhere to use model.tokenizer if model: diff --git a/koboldai_settings.py b/koboldai_settings.py index d8416df2..e9562ffc 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -710,7 +710,6 @@ class model_settings(settings): self.modeldim = -1 # Embedding dimension of your model (e.g. it's 4096 for GPT-J-6B and 2560 for GPT-Neo-2.7B) self.sampler_order = [6, 0, 1, 2, 3, 4, 5] self.newlinemode = "n" - self.lazy_load = True # Whether or not to use torch_lazy_loader.py for transformers models in order to reduce CPU memory usage self.presets = [] # Holder for presets self.selected_preset = "" self.uid_presets = [] @@ -1236,7 +1235,7 @@ class system_settings(settings): self.corescript = "default.lua" # Filename of corescript to load self.gpu_device = 0 # Which PyTorch device to use when using pure GPU generation self.savedir = os.getcwd()+"\\stories" - self.hascuda = False # Whether torch has detected CUDA on the system + self.hascuda = torch.cuda.is_available() # Whether torch has detected CUDA on the system self.usegpu = False # Whether to launch pipeline with GPU support self.splist = [] self.spselect = "" # Temporary storage for soft prompt filename to load diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index d5cf6397..c228e2ee 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -30,7 +30,6 @@ class model_loader(HFTorchInferenceModel): def _load(self, save_model: bool, initial_load: bool) -> None: utils.koboldai_vars.allowsp = True - self.lazy_load = utils.koboldai_vars.lazy_load # Make model path the same as the model name to make this consistent # with the other loading method if it isn't a known model type. This @@ -69,12 +68,14 @@ class model_loader(HFTorchInferenceModel): # If we're using torch_lazy_loader, we need to get breakmodel config # early so that it knows where to load the individual model tensors + logger.debug("lazy_load: {} hascuda: {} breakmodel: {} nobreakmode: {}".format(self.lazy_load, utils.koboldai_vars.hascuda, self.breakmodel, self.nobreakmodel)) if ( self.lazy_load and utils.koboldai_vars.hascuda - and utils.koboldai_vars.breakmodel - and not utils.koboldai_vars.nobreakmodel + and self.breakmodel + and not self.nobreakmodel ): + logger.debug("loading breakmodel") self.breakmodel_device_config(self.model_config) if self.lazy_load: diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai.py new file mode 100644 index 00000000..08d8ea06 --- /dev/null +++ b/modeling/inference_models/gooseai.py @@ -0,0 +1,31 @@ +import torch +import requests +import numpy as np +from typing import List, Optional, Union + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, +) + +from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader + + + +class OpenAIAPIError(Exception): + def __init__(self, error_type: str, error_message) -> None: + super().__init__(f"{error_type}: {error_message}") + + +class model_loader(openai_gooseai_model_loader): + """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.url = "https://api.goose.ai/v1/engines" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "GooseAI" \ No newline at end of file diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index c99e9a05..759feb65 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -27,7 +27,7 @@ class model_loader(HFInferenceModel): #model_name: str, ) -> None: super().__init__() - + self.hf_torch = False self.model_config = None self.capabilties = ModelCapabilities( embedding_manipulation=False, diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py index efbb01d3..cad2a7f2 100644 --- a/modeling/inference_models/openai.py +++ b/modeling/inference_models/openai.py @@ -11,6 +11,8 @@ from modeling.inference_model import ( InferenceModel, ) +from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader + class OpenAIAPIError(Exception): @@ -18,172 +20,12 @@ class OpenAIAPIError(Exception): super().__init__(f"{error_type}: {error_message}") -class model_loader(InferenceModel): +class model_loader(openai_gooseai_model_loader): """InferenceModel for interfacing with OpenAI's generation API.""" def __init__(self): super().__init__() - self.key = "" + self.url = "https://api.openai.com/v1/engines" def is_valid(self, model_name, model_path, menu_path): - return model_name == "OAI" or model_name == "GooseAI" - - def get_requested_parameters(self, model_name, model_path, menu_path): - self.source = model_name - requested_parameters = [] - requested_parameters.extend([{ - "uitype": "text", - "unit": "text", - "label": "Key", - "id": "key", - "default": "", - "check": {"value": "", 'check': "!="}, - "tooltip": "User Key to use when connecting to OpenAI/GooseAI.", - "menu_path": "", - "refresh_model_inputs": True, - "extra_classes": "" - }, - { - "uitype": "dropdown", - "unit": "text", - "label": "Model", - "id": "model", - "default": "", - "check": {"value": "", 'check': "!="}, - "tooltip": "Which model to use when running OpenAI/GooseAI.", - "menu_path": "", - "refresh_model_inputs": False, - "extra_classes": "", - 'children': self.get_oai_models(), - - }]) - return requested_parameters - - def set_input_parameters(self, parameters): - self.key = parameters['key'].strip() - self.model = parameters['model'] - - def get_oai_models(self): - if self.key == "": - return [] - if self.source == 'OAI': - url = "https://api.openai.com/v1/engines" - elif self.source == 'GooseAI': - url = "https://api.goose.ai/v1/engines" - else: - return - - # Get list of models from OAI - logger.init("OAI Engines", status="Retrieving") - req = requests.get( - url, - headers = { - 'Authorization': 'Bearer '+self.key - } - ) - if(req.status_code == 200): - r = req.json() - engines = r["data"] - try: - engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines] - except: - logger.error(engines) - raise - - online_model = "" - - - logger.init_ok("OAI Engines", status="OK") - return engines - else: - # Something went wrong, print the message and quit since we can't initialize an engine - logger.init_err("OAI Engines", status="Failed") - logger.error(req.json()) - emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) - return [] - - - def _load(self, save_model: bool, initial_load: bool) -> None: - self.tokenizer = self._get_tokenizer("gpt2") - - def _raw_generate( - self, - prompt_tokens: Union[List[int], torch.Tensor], - max_new: int, - gen_settings: GenerationSettings, - single_line: bool = False, - batch_count: int = 1, - seed: Optional[int] = None, - **kwargs, - ) -> GenerationResult: - - if seed is not None: - logger.warning( - "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored." - ) - - decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens)) - - # Store context in memory to use it for comparison with generated content - utils.koboldai_vars.lastctx = decoded_prompt - - # Build request JSON data - # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround - # as the koboldai_vars.model will always be OAI - if "GooseAI" in utils.koboldai_vars.configname: - reqdata = { - "prompt": decoded_prompt, - "max_tokens": max_new, - "temperature": gen_settings.temp, - "top_a": gen_settings.top_a, - "top_p": gen_settings.top_p, - "top_k": gen_settings.top_k, - "tfs": gen_settings.tfs, - "typical_p": gen_settings.typical, - "repetition_penalty": gen_settings.rep_pen, - "repetition_penalty_slope": gen_settings.rep_pen_slope, - "repetition_penalty_range": gen_settings.rep_pen_range, - "n": batch_count, - # TODO: Implement streaming - "stream": False, - } - else: - reqdata = { - "prompt": decoded_prompt, - "max_tokens": max_new, - "temperature": gen_settings.temp, - "top_p": gen_settings.top_p, - "frequency_penalty": gen_settings.rep_pen, - "n": batch_count, - "stream": False, - } - - req = requests.post( - utils.koboldai_vars.oaiurl, - json=reqdata, - headers={ - "Authorization": "Bearer " + utils.koboldai_vars.oaiapikey, - "Content-Type": "application/json", - }, - ) - - j = req.json() - - if not req.ok: - # Send error message to web client - if "error" in j: - error_type = j["error"]["type"] - error_message = j["error"]["message"] - else: - error_type = "Unknown" - error_message = "Unknown" - raise OpenAIAPIError(error_type, error_message) - - outputs = [out["text"] for out in j["choices"]] - return GenerationResult( - model=self, - out_batches=np.array([self.tokenizer.encode(x) for x in outputs]), - prompt=prompt_tokens, - is_whole_generation=True, - single_line=single_line, - ) + return model_name == "OAI" \ No newline at end of file diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 1941a12e..c7a781d7 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -22,18 +22,19 @@ class HFInferenceModel(InferenceModel): def is_valid(self, model_name, model_path, menu_path): try: if model_path is not None and os.path.exists(model_path): - model_config = AutoConfig.from_pretrained(model_path) + self.model_config = AutoConfig.from_pretrained(model_path) elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): - model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") + self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") else: - model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") return True except: return False def get_requested_parameters(self, model_name, model_path, menu_path): requested_parameters = [] - + if not self.hf_torch: + return [] if model_path is not None and os.path.exists(model_path): self.model_config = AutoConfig.from_pretrained(model_path) elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): @@ -124,14 +125,20 @@ class HFInferenceModel(InferenceModel): return requested_parameters def set_input_parameters(self, parameters): - gpu_count = torch.cuda.device_count() - layers = [] - for i in range(gpu_count): - layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) - self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None - self.layers = layers - self.disk_layers = parameters['disk_layers'] if 'disk_layers' in parameters else None - self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + if self.hf_torch: + import breakmodel + gpu_count = torch.cuda.device_count() + layers = [] + for i in range(gpu_count): + layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) + self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None + self.layers = layers + self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0 + breakmodel.gpu_blocks = layers + breakmodel.disk_blocks = self.disk_layers + self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.model_type = self.get_model_type() + self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel self.model_name = parameters['id'] self.path = parameters['path'] if 'path' in parameters else None @@ -157,6 +164,10 @@ class HFInferenceModel(InferenceModel): torch.cuda.empty_cache() except: pass + if self.hf_torch: + breakmodel.breakmodel = True + breakmodel.gpu_blocks = [] + breakmodel.disk_blocks = 0 def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index 7cc16ad5..84c60a6c 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -53,15 +53,12 @@ LOG_SAMPLER_NO_EFFECT = False class HFTorchInferenceModel(HFInferenceModel): - def __init__( - self, - #model_name: str, - #lazy_load: bool, - #low_mem: bool, - ) -> None: + def __init__(self) -> None: super().__init__() - #self.lazy_load = lazy_load - #self.low_mem = low_mem + self.hf_torch = True + self.lazy_load = True + self.low_mem = False + self.nobreakmodel = False self.post_token_hooks = [ PostTokenHooks.stream_tokens, @@ -398,7 +395,7 @@ class HFTorchInferenceModel(HFInferenceModel): Embedding._koboldai_patch_causallm_model = self.model def _get_lazy_load_callback(self, n_layers: int, convert_to_float16: bool = True): - if not utils.koboldai_vars.lazy_load: + if not self.lazy_load: return if utils.args.breakmodel_disklayers is not None: @@ -819,14 +816,14 @@ class HFTorchInferenceModel(HFInferenceModel): elif ( utils.args.breakmodel_gpulayers is not None or utils.args.breakmodel_disklayers is not None + or breakmodel.gpu_blocks != [] ): try: - if not utils.args.breakmodel_gpulayers: - breakmodel.gpu_blocks = [] - else: - breakmodel.gpu_blocks = list( - map(int, utils.args.breakmodel_gpulayers.split(",")) - ) + if breakmodel.gpu_blocks == []: + if utils.args.breakmodel_gpulayers: + breakmodel.gpu_blocks = list( + map(int, utils.args.breakmodel_gpulayers.split(",")) + ) assert len(breakmodel.gpu_blocks) <= torch.cuda.device_count() s = n_layers for i in range(len(breakmodel.gpu_blocks)): diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/parents/openai_gooseai.py new file mode 100644 index 00000000..621ccbad --- /dev/null +++ b/modeling/inference_models/parents/openai_gooseai.py @@ -0,0 +1,189 @@ +import torch +import requests +import numpy as np +from typing import List, Optional, Union + +import utils +from logger import logger +from modeling.inference_model import ( + GenerationResult, + GenerationSettings, + InferenceModel, +) + + + +class OpenAIAPIError(Exception): + def __init__(self, error_type: str, error_message) -> None: + super().__init__(f"{error_type}: {error_message}") + + +class model_loader(InferenceModel): + """InferenceModel for interfacing with OpenAI's generation API.""" + + def __init__(self): + super().__init__() + self.key = "" + self.url = "https://api.goose.ai/v1/engines" + #if self.source == 'OAI': + # url = "https://api.openai.com/v1/engines" + #elif self.source == 'GooseAI': + # url = "https://api.goose.ai/v1/engines" + + def is_valid(self, model_name, model_path, menu_path): + return model_name == "OAI" or model_name == "GooseAI" + + def get_requested_parameters(self, model_name, model_path, menu_path): + self.source = model_name + requested_parameters = [] + requested_parameters.extend([{ + "uitype": "text", + "unit": "text", + "label": "Key", + "id": "key", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "User Key to use when connecting to OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }, + { + "uitype": "dropdown", + "unit": "text", + "label": "Model", + "id": "model", + "default": "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Which model to use when running OpenAI/GooseAI.", + "menu_path": "", + "refresh_model_inputs": False, + "extra_classes": "", + 'children': self.get_oai_models(), + + }]) + return requested_parameters + + def set_input_parameters(self, parameters): + self.key = parameters['key'].strip() + self.model = parameters['model'] + + def get_oai_models(self): + if self.key == "": + return [] + + + # Get list of models from OAI + logger.init("OAI Engines", status="Retrieving") + req = requests.get( + self.url, + headers = { + 'Authorization': 'Bearer '+self.key + } + ) + if(req.status_code == 200): + r = req.json() + engines = r["data"] + try: + engines = [{"value": en["id"], "text": "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")} for en in engines] + except: + logger.error(engines) + raise + + online_model = "" + + + logger.init_ok("OAI Engines", status="OK") + return engines + else: + # Something went wrong, print the message and quit since we can't initialize an engine + logger.init_err("OAI Engines", status="Failed") + logger.error(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) + return [] + + + def _load(self, save_model: bool, initial_load: bool) -> None: + self.tokenizer = self._get_tokenizer("gpt2") + + def _raw_generate( + self, + prompt_tokens: Union[List[int], torch.Tensor], + max_new: int, + gen_settings: GenerationSettings, + single_line: bool = False, + batch_count: int = 1, + seed: Optional[int] = None, + **kwargs, + ) -> GenerationResult: + + if seed is not None: + logger.warning( + "Seed is unsupported on the OpenAIAPIInferenceModel. Seed will be ignored." + ) + + decoded_prompt = utils.decodenewlines(self.tokenizer.decode(prompt_tokens)) + + # Store context in memory to use it for comparison with generated content + utils.koboldai_vars.lastctx = decoded_prompt + + # Build request JSON data + # GooseAI is a subntype of OAI. So to check if it's this type, we check the configname as a workaround + # as the koboldai_vars.model will always be OAI + if "GooseAI" in utils.koboldai_vars.configname: + reqdata = { + "prompt": decoded_prompt, + "max_tokens": max_new, + "temperature": gen_settings.temp, + "top_a": gen_settings.top_a, + "top_p": gen_settings.top_p, + "top_k": gen_settings.top_k, + "tfs": gen_settings.tfs, + "typical_p": gen_settings.typical, + "repetition_penalty": gen_settings.rep_pen, + "repetition_penalty_slope": gen_settings.rep_pen_slope, + "repetition_penalty_range": gen_settings.rep_pen_range, + "n": batch_count, + # TODO: Implement streaming + "stream": False, + } + else: + reqdata = { + "prompt": decoded_prompt, + "max_tokens": max_new, + "temperature": gen_settings.temp, + "top_p": gen_settings.top_p, + "frequency_penalty": gen_settings.rep_pen, + "n": batch_count, + "stream": False, + } + + req = requests.post( + self.url, + json=reqdata, + headers={ + "Authorization": "Bearer " + self.key, + "Content-Type": "application/json", + }, + ) + + j = req.json() + + if not req.ok: + # Send error message to web client + if "error" in j: + error_type = j["error"]["type"] + error_message = j["error"]["message"] + else: + error_type = "Unknown" + error_message = "Unknown" + raise OpenAIAPIError(error_type, error_message) + + outputs = [out["text"] for out in j["choices"]] + return GenerationResult( + model=self, + out_batches=np.array([self.tokenizer.encode(x) for x in outputs]), + prompt=prompt_tokens, + is_whole_generation=True, + single_line=single_line, + ) diff --git a/static/koboldai.js b/static/koboldai.js index 7f004ff2..ab7f7832 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1652,6 +1652,12 @@ function selected_model_info(data) { while (loadmodelsettings.firstChild) { loadmodelsettings.removeChild(loadmodelsettings.firstChild); } + //Clear out plugin selector + var model_plugin = document.getElementById('modelplugin'); + while (model_plugin.firstChild) { + model_plugin.removeChild(model_plugin.firstChild); + } + var accept = document.getElementById("btn_loadmodelaccept"); accept.disabled = false; From a6f0e97ba0ecf17b558e7577834ed9cff964be00 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 20:40:05 -0400 Subject: [PATCH 029/102] Working(?) breakmodel --- modeling/inference_models/parents/hf.py | 3 +- modeling/inference_models/parents/hf_torch.py | 52 ++++++++++--------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index c7a781d7..67fd8b15 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -18,6 +18,7 @@ class HFInferenceModel(InferenceModel): self.model = None self.tokenizer = None self.badwordsids = koboldai_settings.badwordsids_default + self.usegpu = False def is_valid(self, model_name, model_path, menu_path): try: @@ -136,7 +137,7 @@ class HFInferenceModel(InferenceModel): self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers - self.use_gpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel self.model_name = parameters['id'] diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index 84c60a6c..d942a572 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -289,6 +289,7 @@ class HFTorchInferenceModel(HFInferenceModel): raise logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") + logger.debug(traceback_string) try: return GPT2LMHeadModel.from_pretrained(location, **tf_kwargs) except Exception as e: @@ -437,10 +438,10 @@ class HFTorchInferenceModel(HFInferenceModel): ): device_map[key] = ( utils.koboldai_vars.gpu_device - if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu + if utils.koboldai_vars.hascuda and self.usegpu else "cpu" if not utils.koboldai_vars.hascuda - or not utils.koboldai_vars.breakmodel + or not self.breakmodel else breakmodel.primary_device ) else: @@ -456,12 +457,12 @@ class HFTorchInferenceModel(HFInferenceModel): ) device = ( utils.koboldai_vars.gpu_device - if utils.koboldai_vars.hascuda and utils.koboldai_vars.usegpu + if utils.koboldai_vars.hascuda and self.usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not utils.koboldai_vars.hascuda - or not utils.koboldai_vars.breakmodel + or not self.breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right( @@ -566,15 +567,15 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) and model_dict[key].dtype is torch.float32 ): model_dict[key] = model_dict[key].to(torch.float16) if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel and model_dict[key].dtype is torch.float16 ): model_dict[key] = model_dict[key].to(torch.float32) @@ -612,14 +613,14 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) ): dtype = torch.float16 if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel ): dtype = torch.float32 if ( @@ -675,16 +676,16 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) and model_dict[key].dtype is torch.float32 ): model_dict[key] = model_dict[key].to(torch.float16) if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel and model_dict[key].dtype is torch.float16 ): model_dict[key] = model_dict[key].to(torch.float32) @@ -723,14 +724,14 @@ class HFTorchInferenceModel(HFInferenceModel): and breakmodel.primary_device != "cpu" and utils.koboldai_vars.hascuda and ( - utils.koboldai_vars.breakmodel - or utils.koboldai_vars.usegpu + self.breakmodel + or self.usegpu ) ): dtype = torch.float16 if breakmodel.primary_device == "cpu" or ( - not utils.koboldai_vars.usegpu - and not utils.koboldai_vars.breakmodel + not self.usegpu + and not self.breakmodel ): dtype = torch.float32 if ( @@ -764,7 +765,7 @@ class HFTorchInferenceModel(HFInferenceModel): if always_use or ( utils.koboldai_vars.hascuda and self.low_mem - and (utils.koboldai_vars.usegpu or utils.koboldai_vars.breakmodel) + and (self.usegpu or self.breakmodel) ): original_dtype = torch.get_default_dtype() torch.set_default_dtype(torch.float16) @@ -956,8 +957,9 @@ class HFTorchInferenceModel(HFInferenceModel): -1, utils.num_layers(config), ): - utils.koboldai_vars.breakmodel = False - utils.koboldai_vars.usegpu = True + logger.debug("All layers on same GPU. Breakmodel disabled") + self.breakmodel = False + self.usegpu = True utils.koboldai_vars.gpu_device = len(breakmodel.gpu_blocks) - 1 return @@ -966,6 +968,6 @@ class HFTorchInferenceModel(HFInferenceModel): import breakmodel breakmodel.primary_device = "cpu" - utils.koboldai_vars.breakmodel = False - utils.koboldai_vars.usegpu = False + self.breakmodel = False + self.usegpu = False return From aaa91338996a652960bfa8b9461c2f0de8d82bee Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 11 May 2023 21:22:33 -0400 Subject: [PATCH 030/102] Disk Cache working UI valid marker broken for disk cache --- aiserver.py | 4 +--- modeling/inference_models/parents/hf.py | 6 +++--- modeling/inference_models/parents/hf_torch.py | 8 ++++---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/aiserver.py b/aiserver.py index 158a6699..a306449e 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1639,7 +1639,7 @@ def load_model(plugin, initial_load=False): model = model_loaders[plugin] - model.load(initial_load=initial_load) + model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) logger.debug("Model Type: {}".format(koboldai_vars.model_type)) # TODO: Convert everywhere to use model.tokenizer @@ -6156,8 +6156,6 @@ def UI_2_select_model(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): - logger.info("loading Model") - logger.info(data) model_loaders[data['plugin']].set_input_parameters(data) load_model(data['plugin']) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 67fd8b15..03955d88 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -53,12 +53,12 @@ class HFInferenceModel(InferenceModel): break_values = break_values.split(",") else: break_values = [layer_count] - disk_blocks = None + disk_blocks = 0 break_values = [int(x) for x in break_values if x != '' and x is not None] gpu_count = torch.cuda.device_count() break_values += [0] * (gpu_count - len(break_values)) if disk_blocks is not None: - break_values += [disk_blocks] + break_values += [int(disk_blocks)] for i in range(gpu_count): requested_parameters.append({ "uitype": "slider", @@ -134,7 +134,7 @@ class HFInferenceModel(InferenceModel): layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None self.layers = layers - self.disk_layers = int(parameters['disk_layers']) if 'disk_layers' in parameters and parameters['disk_layers'].isnumeric() else 0 + self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None diff --git a/modeling/inference_models/parents/hf_torch.py b/modeling/inference_models/parents/hf_torch.py index d942a572..aae3ada3 100644 --- a/modeling/inference_models/parents/hf_torch.py +++ b/modeling/inference_models/parents/hf_torch.py @@ -780,6 +780,7 @@ class HFTorchInferenceModel(HFInferenceModel): device_count = torch.cuda.device_count() if device_count < 2: primary = None + logger.debug("n_layers: {}".format(n_layers)) gpu_blocks = breakmodel.gpu_blocks + ( device_count - len(breakmodel.gpu_blocks) ) * [0] @@ -835,10 +836,7 @@ class HFTorchInferenceModel(HFInferenceModel): s -= breakmodel.gpu_blocks[i] assert sum(breakmodel.gpu_blocks) <= n_layers n_layers -= sum(breakmodel.gpu_blocks) - if utils.args.breakmodel_disklayers is not None: - assert utils.args.breakmodel_disklayers <= n_layers - breakmodel.disk_blocks = utils.args.breakmodel_disklayers - n_layers -= utils.args.breakmodel_disklayers + n_layers -= breakmodel.disk_blocks except: logger.warning( "--breakmodel_gpulayers is malformatted. Please use the --help option to see correct usage of --breakmodel_gpulayers. Defaulting to all layers on device 0." @@ -949,6 +947,8 @@ class HFTorchInferenceModel(HFInferenceModel): logger.init_ok("Final device configuration:", status="Info") self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device) + with open("settings/{}.breakmodel".format(self.model_name.replace("/", "_")), "w") as file: + file.write("{}\n{}".format(",".join(map(str, breakmodel.gpu_blocks)), breakmodel.disk_blocks)) # If all layers are on the same device, use the old GPU generation mode while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0: From 67df9b917f6a84445520e89a04080e8553356b15 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 12 May 2023 09:08:07 +0200 Subject: [PATCH 031/102] Reintroduce 4.29 Transformers --- aiserver.py | 2 +- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- requirements_mtj.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index 1abdd31e..b045ea71 100644 --- a/aiserver.py +++ b/aiserver.py @@ -221,7 +221,7 @@ model_menu = { MenuFolder("Untuned Fairseq Dense", "fsdlist"), MenuFolder("Untuned Bloom", "bloomlist"), MenuFolder("Untuned XGLM", "xglmlist"), - #MenuFolder("Official RWKV-4", "rwkvlist"), + MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), diff --git a/environments/huggingface.yml b/environments/huggingface.yml index af16423e..3d0ca633 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.* + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index ffcacfb6..eb2927bd 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,7 +30,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.28.* + - transformers==4.29.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/requirements.txt b/requirements.txt index c98b7252..28fdb28c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.28.* +transformers==4.29.* huggingface_hub==0.12.1 Flask==2.2.3 Flask-SocketIO==5.3.2 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index b41b7ead..7fc866f0 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers==4.28.* +transformers==4.29.* chex == 0.1.5 huggingface_hub==0.12.1 progressbar2 From 205c64f1ea161ec2d0b3929efb73db429fa0f798 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 13 May 2023 20:26:55 +0200 Subject: [PATCH 032/102] More universal pytorch folder detection --- modeling/inference_models/hf_torch.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 3f7c3967..cc7af713 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -581,10 +581,9 @@ class HFTorchInferenceModel(HFInferenceModel): last_storage_key = storage_key if isinstance(f, zipfile.ZipExtFile): f.close() - try: - f = z.open(f"archive/data/{storage_key}") - except: - f = z.open(f"{zipfolder}/data/{storage_key}") + ziproot = z.namelist()[0].split("/")[0] + f = z.open(f"{ziproot}/data/{storage_key}") + current_offset = 0 if current_offset != model_dict[key].seek_offset: f.read(model_dict[key].seek_offset - current_offset) From 56443bc7eaf4404c153368582baff107a3137bcb Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 15 May 2023 21:44:01 +0200 Subject: [PATCH 033/102] Unban torch._tensor._rebuild_tensor_v2 --- modeling/lazy_loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py index 3dee5bae..e7acc784 100644 --- a/modeling/lazy_loader.py +++ b/modeling/lazy_loader.py @@ -196,6 +196,8 @@ class RestrictedUnpickler(pickle.Unpickler): return collections.OrderedDict elif module == "torch._utils" and name == "_rebuild_tensor_v2": return torch._utils._rebuild_tensor_v2 + elif module == "torch._tensor" and name == "_rebuild_tensor_v2": + return torch._tensor._rebuild_tensor_v2 elif module == "torch" and name in ( "DoubleStorage", "FloatStorage", From c5100b4eab5b37e0d575869283a6f837f5398f00 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 15 May 2023 22:21:22 +0200 Subject: [PATCH 034/102] Unban Tensor --- modeling/lazy_loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py index e7acc784..a948821e 100644 --- a/modeling/lazy_loader.py +++ b/modeling/lazy_loader.py @@ -209,6 +209,7 @@ class RestrictedUnpickler(pickle.Unpickler): "ByteStorage", "BoolStorage", "BFloat16Storage", + "Tensor", ): return getattr(torch, name) elif module == "numpy.core.multiarray" and name == "scalar": @@ -221,7 +222,7 @@ class RestrictedUnpickler(pickle.Unpickler): # Forbid everything else. qualified_name = name if module == "__builtin__" else f"{module}.{name}" raise pickle.UnpicklingError( - f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code" + f"`{qualified_name}` is forbidden; the model you are loading probably contains malicious code. If you think this is incorrect ask the developer to unban the ability for {module} to execute {name}" ) def load(self, *args, **kwargs): From 59c96b5b7aea2eaf4ad8bab70794c8f3d41edccf Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 15 May 2023 22:38:12 +0200 Subject: [PATCH 035/102] Unban fix --- modeling/lazy_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py index a948821e..5a27d549 100644 --- a/modeling/lazy_loader.py +++ b/modeling/lazy_loader.py @@ -196,8 +196,8 @@ class RestrictedUnpickler(pickle.Unpickler): return collections.OrderedDict elif module == "torch._utils" and name == "_rebuild_tensor_v2": return torch._utils._rebuild_tensor_v2 - elif module == "torch._tensor" and name == "_rebuild_tensor_v2": - return torch._tensor._rebuild_tensor_v2 + elif module == "torch._tensor" and name == "_rebuild_from_type_v2": + return torch._tensor._rebuild_from_type_v2 elif module == "torch" and name in ( "DoubleStorage", "FloatStorage", From b2501e469381eb42530fdf74d7d7322e5dd1f6f7 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 16 May 2023 22:15:59 +0200 Subject: [PATCH 036/102] 4.29 was still to buggy --- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- requirements_mtj.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 3d0ca633..af16423e 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.29.* + - transformers==4.28.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index eb2927bd..ffcacfb6 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,7 +30,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.29.* + - transformers==4.28.* - huggingface_hub==0.12.1 - safetensors==0.3.1 - accelerate==0.18.0 diff --git a/requirements.txt b/requirements.txt index 28fdb28c..c98b7252 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.29.* +transformers==4.28.* huggingface_hub==0.12.1 Flask==2.2.3 Flask-SocketIO==5.3.2 diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 7fc866f0..b41b7ead 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -5,7 +5,7 @@ requests dm-haiku==0.0.9 jax==0.3.25 jaxlib==0.3.25 -transformers==4.29.* +transformers==4.28.* chex == 0.1.5 huggingface_hub==0.12.1 progressbar2 From f027d8b6e56393c12b8cd1611a3c0b7cc90802c9 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 17 May 2023 21:15:31 -0400 Subject: [PATCH 037/102] Better working valid detection and named model backends for UI --- aiserver.py | 53 +++-- modeling/inference_models/api.py | 3 +- modeling/inference_models/basic_api.py | 4 +- modeling/inference_models/generic_hf_torch.py | 3 +- modeling/inference_models/gooseai.py | 5 +- modeling/inference_models/hf_mtj.py | 4 +- modeling/inference_models/horde.py | 3 +- modeling/inference_models/openai.py | 6 +- modeling/inference_models/parents/hf.py | 24 +- .../parents/openai_gooseai.py | 2 +- modeling/inference_models/readonly.py | 3 +- modeling/inference_models/rwkv.py | 5 +- static/koboldai.js | 206 +++++++++++------- templates/templates.html | 5 +- 14 files changed, 191 insertions(+), 135 deletions(-) diff --git a/aiserver.py b/aiserver.py index 92dde7f4..314fb512 100644 --- a/aiserver.py +++ b/aiserver.py @@ -622,12 +622,12 @@ from modeling.patches import patch_transformers #Load all of the model importers import importlib -model_loader_code = {} -model_loaders = {} +model_backend_code = {} +model_backends = {} for module in os.listdir("./modeling/inference_models"): if os.path.isfile(os.path.join("./modeling/inference_models",module)) and module[-3:] == '.py': - model_loader_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3])) - model_loaders[module[:-3]] = model_loader_code[module[:-3]].model_loader() + model_backend_code[module[:-3]] = importlib.import_module('modeling.inference_models.{}'.format(module[:-3])) + model_backends[model_backend_code[module[:-3]].model_backend_name] = model_backend_code[module[:-3]].model_backend() old_socketio_on = socketio.on @@ -1354,6 +1354,7 @@ def general_startup(override_args=None): parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable") parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") + parser.add_argument("--model_backend", help="Specify the model backend you want to use") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--apikey", help="Specify the API key to use for online services") parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register") @@ -1447,6 +1448,12 @@ def general_startup(override_args=None): args.max_summary_length = int(args.max_summary_length) if args.model: + # At this point we have to try to load the model through the selected backend + if not args.model_backend: + logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command") + exit() + #if + koboldai_vars.model = args.model; koboldai_vars.revision = args.revision koboldai_settings.multi_story = args.multi_story @@ -1472,7 +1479,7 @@ def general_startup(override_args=None): koboldai_vars.quiet = True if args.nobreakmodel: - model_loaders['generic_hf_torch'].nobreakmodel = True + model_backends['Huggingface'].nobreakmodel = True if args.remote: koboldai_vars.host = True; @@ -1484,7 +1491,7 @@ def general_startup(override_args=None): koboldai_vars.host = True; if args.lowmem: - model_loaders['generic_hf_torch'].low_mem = True + model_backends['Huggingface'].low_mem = True if args.host != "Disabled": # This means --host option was submitted without an argument @@ -1520,7 +1527,7 @@ def general_startup(override_args=None): koboldai_vars.use_colab_tpu = False koboldai_vars.hascuda = False koboldai_vars.usegpu = False - model_loaders['generic_hf_torch'].nobreakmodel = True + model_backends['Huggingface'].nobreakmodel = True koboldai_vars.smandelete = koboldai_vars.host == args.override_delete koboldai_vars.smanrename = koboldai_vars.host == args.override_rename @@ -1582,7 +1589,7 @@ def unload_model(): koboldai_vars.badwordsids = koboldai_settings.badwordsids_default -def load_model(plugin, initial_load=False): +def load_model(model_backend, initial_load=False): global model global tokenizer global model_config @@ -1637,7 +1644,7 @@ def load_model(plugin, initial_load=False): koboldai_vars.default_preset = koboldai_settings.default_preset - model = model_loaders[plugin] + model = model_backends[model_backend] model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) logger.debug("Model Type: {}".format(koboldai_vars.model_type)) @@ -6103,33 +6110,23 @@ def UI_2_select_model(data): emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]}) else: #Get load methods - logger.debug("Asking for model info on potential model: {}".format(data)) - valid = False if 'path' not in data or data['path'] == "": valid_loaders = {} - for model_loader in model_loaders: - logger.debug("Testing Loader {} for model {}: {}".format(model_loader, data["name"], model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]))) - if model_loaders[model_loader].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]): - valid_loaders[model_loader] = model_loaders[model_loader].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) - valid = True - if valid: - logger.debug("Valid Loaders: {}".format(valid_loaders)) - emit("selected_model_info", valid_loaders) - if not valid and 'path' in data: + for model_backend in model_backends: + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"}) + else: #Get directories paths, breadcrumbs = get_folder_path_info(data['path']) output = [] for path in paths: valid=False - for model_loader in model_loaders: - if model_loaders[model_loader].is_valid(path[1], path[0], "Custom"): + for model_backend in model_backends: + if model_backends[model_backend].is_valid(path[1], path[0], "Custom"): valid=True break output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) - emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) - elif not valid: - logger.error("Nothing can load the model: {}".format(valid_loaders)) - + emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) return @@ -6156,7 +6153,7 @@ def UI_2_select_model(data): @socketio.on('load_model') @logger.catch def UI_2_load_model(data): - model_loaders[data['plugin']].set_input_parameters(data) + model_backends[data['plugin']].set_input_parameters(data) load_model(data['plugin']) #load_model(use_gpu=data['use_gpu'], gpu_layers=data['gpu_layers'], disk_layers=data['disk_layers'], online_model=data['online_model'], url=koboldai_vars.colaburl, use_8_bit=data['use_8_bit']) @@ -10671,7 +10668,7 @@ for schema in config_endpoint_schemas: def startup(): if koboldai_vars.model == "" or koboldai_vars.model is None: koboldai_vars.model = "ReadOnly" - socketio.start_background_task(load_model, *('readonly',), **{'initial_load':True}) + socketio.start_background_task(load_model, *('Read Only',), **{'initial_load':True}) print("", end="", flush=True) diff --git a/modeling/inference_models/api.py b/modeling/inference_models/api.py index 5bddd714..409158f5 100644 --- a/modeling/inference_models/api.py +++ b/modeling/inference_models/api.py @@ -17,12 +17,13 @@ from modeling.inference_model import ( ModelCapabilities, ) +model_backend_name = "KoboldAI API" class APIException(Exception): """To be used for errors when using the Kobold API as an interface.""" -class model_loader(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() #self.base_url = "" diff --git a/modeling/inference_models/basic_api.py b/modeling/inference_models/basic_api.py index 5666ba8e..cca9652b 100644 --- a/modeling/inference_models/basic_api.py +++ b/modeling/inference_models/basic_api.py @@ -15,11 +15,13 @@ from modeling.inference_model import ( ) +model_backend_name = "KoboldAI Old Colab Method" + class BasicAPIException(Exception): """To be used for errors when using the Basic API as an interface.""" -class model_loader(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() diff --git a/modeling/inference_models/generic_hf_torch.py b/modeling/inference_models/generic_hf_torch.py index c228e2ee..f7a00f45 100644 --- a/modeling/inference_models/generic_hf_torch.py +++ b/modeling/inference_models/generic_hf_torch.py @@ -22,8 +22,9 @@ except ModuleNotFoundError as e: from modeling.inference_models.parents.hf_torch import HFTorchInferenceModel +model_backend_name = "Huggingface" -class model_loader(HFTorchInferenceModel): +class model_backend(HFTorchInferenceModel): def _initialize_model(self): return diff --git a/modeling/inference_models/gooseai.py b/modeling/inference_models/gooseai.py index 08d8ea06..9d6e8771 100644 --- a/modeling/inference_models/gooseai.py +++ b/modeling/inference_models/gooseai.py @@ -11,16 +11,17 @@ from modeling.inference_model import ( InferenceModel, ) -from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader +from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend +model_backend_name = "GooseAI" class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") -class model_loader(openai_gooseai_model_loader): +class model_backend(openai_gooseai_model_backend): """InferenceModel for interfacing with OpenAI's generation API.""" def __init__(self): diff --git a/modeling/inference_models/hf_mtj.py b/modeling/inference_models/hf_mtj.py index 4e82d348..6351eca2 100644 --- a/modeling/inference_models/hf_mtj.py +++ b/modeling/inference_models/hf_mtj.py @@ -19,10 +19,10 @@ from modeling.inference_model import ( from modeling.inference_models.parents.hf import HFInferenceModel from modeling.tokenizer import GenericTokenizer +model_backend_name = "Huggingface MTJ" - -class model_loader(HFInferenceModel): +class model_backend(HFInferenceModel): def __init__( self, #model_name: str, diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index bd457197..6c880bbe 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -16,12 +16,13 @@ from modeling.inference_model import ( ModelCapabilities, ) +model_backend_name = "Horde" class HordeException(Exception): """To be used for errors on server side of the Horde.""" -class model_loader(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() self.url = "https://horde.koboldai.net" diff --git a/modeling/inference_models/openai.py b/modeling/inference_models/openai.py index cad2a7f2..19a7d1e6 100644 --- a/modeling/inference_models/openai.py +++ b/modeling/inference_models/openai.py @@ -11,16 +11,16 @@ from modeling.inference_model import ( InferenceModel, ) -from modeling.inference_models.parents.openai_gooseai import model_loader as openai_gooseai_model_loader - +from modeling.inference_models.parents.openai_gooseai import model_backend as openai_gooseai_model_backend +model_backend_name = "OpenAI" class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") -class model_loader(openai_gooseai_model_loader): +class model_backend(openai_gooseai_model_backend): """InferenceModel for interfacing with OpenAI's generation API.""" def __init__(self): diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index ba291c3f..69549bd5 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -1,7 +1,7 @@ import os from typing import Optional from transformers import AutoConfig - +import warnings import utils import koboldai_settings from logger import logger @@ -43,7 +43,7 @@ class HFInferenceModel(InferenceModel): else: self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None - if layer_count is not None and layer_count >= 0: + if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: if os.path.exists("settings/{}.breakmodel".format(model_name.replace("/", "_"))): with open("settings/{}.breakmodel".format(model_name.replace("/", "_")), "r") as file: data = [x for x in file.read().split("\n")[:2] if x != ''] @@ -128,15 +128,17 @@ class HFInferenceModel(InferenceModel): def set_input_parameters(self, parameters): if self.hf_torch: import breakmodel - gpu_count = torch.cuda.device_count() - layers = [] - for i in range(gpu_count): - layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) - self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None - self.layers = layers - self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0 - breakmodel.gpu_blocks = layers - breakmodel.disk_blocks = self.disk_layers + layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None + if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: + gpu_count = torch.cuda.device_count() + layers = [] + for i in range(gpu_count): + layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) + self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None + self.layers = layers + self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0 + breakmodel.gpu_blocks = layers + breakmodel.disk_blocks = self.disk_layers self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel diff --git a/modeling/inference_models/parents/openai_gooseai.py b/modeling/inference_models/parents/openai_gooseai.py index 621ccbad..871ea5ce 100644 --- a/modeling/inference_models/parents/openai_gooseai.py +++ b/modeling/inference_models/parents/openai_gooseai.py @@ -18,7 +18,7 @@ class OpenAIAPIError(Exception): super().__init__(f"{error_type}: {error_message}") -class model_loader(InferenceModel): +class model_backend(InferenceModel): """InferenceModel for interfacing with OpenAI's generation API.""" def __init__(self): diff --git a/modeling/inference_models/readonly.py b/modeling/inference_models/readonly.py index c642c05a..92531af4 100644 --- a/modeling/inference_models/readonly.py +++ b/modeling/inference_models/readonly.py @@ -14,12 +14,13 @@ from modeling.inference_model import ( ModelCapabilities, ) +model_backend_name = "Read Only" class BasicAPIException(Exception): """To be used for errors when using the Basic API as an interface.""" -class model_loader(InferenceModel): +class model_backend(InferenceModel): def __init__(self) -> None: super().__init__() diff --git a/modeling/inference_models/rwkv.py b/modeling/inference_models/rwkv.py index d14d8c81..fa6497b7 100644 --- a/modeling/inference_models/rwkv.py +++ b/modeling/inference_models/rwkv.py @@ -55,7 +55,10 @@ MODEL_FILES = { } -class model_loader(InferenceModel): +model_backend_name = "RWKV" + + +class model_backend(InferenceModel): def __init__( self, #model_name: str, diff --git a/static/koboldai.js b/static/koboldai.js index de3ab324..905403c1 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1645,8 +1645,85 @@ function show_model_menu(data) { } +function model_settings_checker() { + //get check value: + missing_element = false; + if (this.check_data != null) { + if ('sum' in this.check_data) { + check_value = 0 + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value); + } else { + missing_element = true; + } + } + } else { + check_value = this.value + } + if (this.check_data['check'] == "=") { + valid = (check_value == this.check_data['value']); + } else if (this.check_data['check'] == "!=") { + valid = (check_value != this.check_data['value']); + } else if (this.check_data['check'] == ">=") { + valid = (check_value >= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value <= this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value > this.check_data['value']); + } else if (this.check_data['check'] == "<=") { + valid = (check_value < this.check_data['value']); + } + if (valid || missing_element) { + //if we are supposed to refresh when this value changes we'll resubmit + if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) { + console.log("resubmit"); + } + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } + } + } else { + this.closest(".setting_container_model").classList.remove('input_error'); + this.closest(".setting_container_model").removeAttribute("tooltip"); + } + } else { + if ('sum' in this.check_data) { + for (const temp of this.check_data['sum']) { + if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } + } + } else { + this.closest(".setting_container_model").classList.add('input_error'); + this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } + } + } + var accept = document.getElementById("btn_loadmodelaccept"); + ok_to_load = true; + for (const item of document.getElementsByClassName("input_error")) { + if (item.classList.contains("input_error") && !item.closest(".model_plugin_settings_area").classList.contains("hidden")) { + ok_to_load = false; + break; + } + } + + if (ok_to_load) { + accept.classList.remove("disabled"); + accept.disabled = false; + } else { + accept.classList.add("disabled"); + accept.disabled = true; + } +} -function selected_model_info(data) { +function selected_model_info(sent_data) { + const data = sent_data['model_backends']; //clear out the loadmodelsettings var loadmodelsettings = document.getElementById('loadmodelsettings') while (loadmodelsettings.firstChild) { @@ -1667,7 +1744,10 @@ function selected_model_info(data) { for (const area of document.getElementsByClassName("model_plugin_settings_area")) { area.classList.add("hidden"); } - document.getElementById(this.value + "_settings_area").classList.remove("hidden"); + if (document.getElementById(this.value + "_settings_area")) { + document.getElementById(this.value + "_settings_area").classList.remove("hidden"); + } + model_settings_checker() } //create the content for (const [loader, items] of Object.entries(data)) { @@ -1679,7 +1759,11 @@ function selected_model_info(data) { modelpluginoption.innerText = loader; modelpluginoption.value = loader; modelplugin.append(modelpluginoption); + if (loader == sent_data['preselected']) { + modelplugin.value = sent_data['preselected']; + } + //create the user input for each requested input for (item of items) { let new_setting = document.getElementById('blank_model_settings').cloneNode(true); new_setting.id = loader; @@ -1687,73 +1771,7 @@ function selected_model_info(data) { new_setting.querySelector('#blank_model_settings_label').innerText = item['label']; new_setting.querySelector('#blank_model_settings_tooltip').setAttribute("tooltip", item['tooltip']); - onchange_event = function () { - //get check value: - if ('sum' in this.check_data) { - check_value = 0 - for (const temp of this.check_data['sum']) { - if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { - check_value += parseInt(document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").value); - } - } - } else { - check_value = this.value - } - if (this.check_data['check'] == "=") { - valid = (check_value == this.check_data['value']); - } else if (this.check_data['check'] == "!=") { - valid = (check_value != this.check_data['value']); - } else if (this.check_data['check'] == ">=") { - valid = (check_value >= this.check_data['value']); - } else if (this.check_data['check'] == "<=") { - valid = (check_value <= this.check_data['value']); - } else if (this.check_data['check'] == "<=") { - valid = (check_value > this.check_data['value']); - } else if (this.check_data['check'] == "<=") { - valid = (check_value < this.check_data['value']); - } - if (valid) { - //if we are supposed to refresh when this value changes we'll resubmit - if (this.getAttribute("refresh_model_inputs") == "true") { - console.log("resubmit"); - } - if ('sum' in this.check_data) { - for (const temp of this.check_data['sum']) { - if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.remove('input_error'); - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); - } - } - } else { - this.closest(".setting_container_model").classList.remove('input_error'); - this.closest(".setting_container_model").removeAttribute("tooltip"); - } - var accept = document.getElementById("btn_loadmodelaccept"); - if (document.getElementsByClassName("input_error").length) - accept.disabled = true; - } else { - if ('sum' in this.check_data) { - for (const temp of this.check_data['sum']) { - if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); - } - } - } else { - this.closest(".setting_container_model").classList.add('input_error'); - this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); - } - } - var accept = document.getElementById("btn_loadmodelaccept"); - if (document.getElementsByClassName("input_error").length > 0) { - accept.classList.add("disabled"); - accept.disabled = true; - } else { - accept.classList.remove("disabled"); - accept.disabled = false; - } - - } + onchange_event = model_settings_checker; if (item['uitype'] == "slider") { var slider_number = new_setting.querySelector('#blank_model_settings_value_slider_number'); slider_number.value = item['default']; @@ -1764,6 +1782,7 @@ function selected_model_info(data) { slider.value = item['default']; slider.min = item['min']; slider.max = item['max']; + slider.setAttribute("data_type", item['unit']); slider.id = loader + "|" + item['id'] + "_value"; if ('check' in item) { slider.check_data = item['check']; @@ -1777,25 +1796,37 @@ function selected_model_info(data) { slider.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); new_setting.querySelector('#blank_model_settings_min_label').innerText = item['min']; new_setting.querySelector('#blank_model_settings_max_label').innerText = item['max']; + slider.noresubmit = true; slider.onchange(); + slider.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_slider').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_slider').remove(); } if (item['uitype'] == "toggle") { - var toggle = new_setting.querySelector('#blank_model_settings_toggle'); + toggle = document.createElement("input"); + toggle.type='checkbox'; + toggle.classList.add("setting_item_input"); + toggle.classList.add("blank_model_settings_input"); + toggle.classList.add("model_settings_input"); toggle.id = loader + "|" + item['id'] + "_value"; toggle.checked = item['default']; - toggle.onchange = onchange_event; + toggle.onclick = onchange_event; + toggle.setAttribute("data_type", item['unit']); toggle.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); if ('check' in item) { toggle.check_data = item['check']; } else { toggle.check_data = null; } - toggle.onchange(); + new_setting.querySelector('#blank_model_settings_toggle').append(toggle); + setTimeout(function() { + $('#'+loader + "\\|" + item['id'] + "_value").bootstrapToggle({size: "mini", onstyle: "success", toggle: "toggle"}); + }, 200); + toggle.noresubmit = true; + toggle.onclick(); + toggle.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_checkbox_container').classList.add("hidden"); - new_setting.querySelector('#blank_model_settings_toggle').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_toggle').remove(); } if (item['uitype'] == "dropdown") { var select_element = new_setting.querySelector('#blank_model_settings_dropdown'); @@ -1807,6 +1838,7 @@ function selected_model_info(data) { select_element.append(new_option); } select_element.value = item['default']; + select_element.setAttribute("data_type", item['unit']); select_element.onchange = onchange_event; select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); if ('check' in item) { @@ -1814,14 +1846,17 @@ function selected_model_info(data) { } else { select_element.check_data = null; } + select_element.noresubmit = true; select_element.onchange(); + select_element.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_dropdown').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_dropdown').remove(); } if (item['uitype'] == "password") { var password_item = new_setting.querySelector('#blank_model_settings_password'); password_item.id = loader + "|" + item['id'] + "_value"; password_item.value = item['default']; + password_item.setAttribute("data_type", item['unit']); password_item.onchange = onchange_event; password_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); if ('check' in item) { @@ -1829,24 +1864,29 @@ function selected_model_info(data) { } else { password_item.check_data = null; } + password_item.noresubmit = true; password_item.onchange(); + password_item.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_password').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_password').remove(); } if (item['uitype'] == "text") { var text_item = new_setting.querySelector('#blank_model_settings_text'); text_item.id = loader + "|" + item['id'] + "_value"; text_item.value = item['default']; text_item.onchange = onchange_event; + text_item.setAttribute("data_type", item['unit']); text_item.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); if ('check' in item) { text_item.check_data = item['check']; } else { text_item.check_data = null; } + text_item.noresubmit = true; text_item.onchange(); + text_item.noresubmit = false; } else { - new_setting.querySelector('#blank_model_settings_text').classList.add("hidden"); + new_setting.querySelector('#blank_model_settings_text').remove(); } model_area.append(new_setting); @@ -1891,7 +1931,15 @@ function load_model() { //get an object of all the input settings from the user data = {} for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - data[element.id.split("|")[1].replace("_value", "")] = element.value; + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; diff --git a/templates/templates.html b/templates/templates.html index 49cd3e5b..49fa99f6 100644 --- a/templates/templates.html +++ b/templates/templates.html @@ -162,9 +162,8 @@ - - - + + From 182ecff20273b4921f4cefa04f7a845d22fc58ac Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 18 May 2023 16:01:17 -0400 Subject: [PATCH 038/102] Added in model backend to the command line arguments --- aiserver.py | 70 ++++++++++++++++--------- modeling/inference_model.py | 2 + modeling/inference_models/horde.py | 1 - modeling/inference_models/parents/hf.py | 8 ++- 4 files changed, 54 insertions(+), 27 deletions(-) diff --git a/aiserver.py b/aiserver.py index 314fb512..235732ec 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1355,6 +1355,7 @@ def general_startup(override_args=None): parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") parser.add_argument("--model_backend", help="Specify the model backend you want to use") + parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--apikey", help="Specify the API key to use for online services") parser.add_argument("--sh_apikey", help="Specify the API key to use for txt2img from the Stable Horde. Get a key from https://horde.koboldai.net/register") @@ -1447,14 +1448,6 @@ def general_startup(override_args=None): args.max_summary_length = int(args.max_summary_length) - if args.model: - # At this point we have to try to load the model through the selected backend - if not args.model_backend: - logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command") - exit() - #if - - koboldai_vars.model = args.model; koboldai_vars.revision = args.revision koboldai_settings.multi_story = args.multi_story @@ -1556,6 +1549,37 @@ def general_startup(override_args=None): socketio.start_background_task(socket_io_relay, koboldai_settings.queue, socketio) + if args.model: + # At this point we have to try to load the model through the selected backend + if not args.model_backend: + logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command") + logger.error("Possible model backends are: {}".format(", ".join([x for x in model_backends]))) + exit() + if args.model_backend not in model_backends: + logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends]))) + exit() + #OK, we've been given a model to load and a backend to load it through. Now we need to get a list of parameters and make sure we get what we need to actually load it + parameters = model_backends[args.model_backend].get_requested_parameters(args.model, args.path, "") + ok_to_load = True + arg_parameters = json.loads(args.model_parameters.replace("'", "\"")) if args.model_parameters != "" else {} + for parameter in parameters: + if parameter['default'] == "" or parameter['id'] not in arg_parameters: + ok_to_load = False + elif parameter['id'] not in arg_parameters: + arg_parameters[parameter] = parameter['default'] + if not ok_to_load: + logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)") + logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters]))) + exit() + arg_parameters['id'] = args.model + arg_parameters['model_path'] = args.path + arg_parameters['menu_path'] = "" + model_backends[args.model_backend].set_input_parameters(arg_parameters) + koboldai_vars.model = args.model + return args.model_backend + else: + return "Read Only" + def unload_model(): @@ -1633,13 +1657,13 @@ def load_model(model_backend, initial_load=False): else: logger.init_warn("GPU support", status="Not Found") - if koboldai_vars.hascuda: - if(koboldai_vars.bmsupported): - koboldai_vars.usegpu = False - koboldai_vars.breakmodel = True - else: - koboldai_vars.breakmodel = False - koboldai_vars.usegpu = use_gpu + #if koboldai_vars.hascuda: + # if(koboldai_vars.bmsupported): + # koboldai_vars.usegpu = False + # koboldai_vars.breakmodel = True + # else: + # koboldai_vars.breakmodel = False + # koboldai_vars.usegpu = use_gpu else: koboldai_vars.default_preset = koboldai_settings.default_preset @@ -10665,10 +10689,8 @@ for schema in config_endpoint_schemas: #==================================================================# # Final startup commands to launch Flask app #==================================================================# -def startup(): - if koboldai_vars.model == "" or koboldai_vars.model is None: - koboldai_vars.model = "ReadOnly" - socketio.start_background_task(load_model, *('Read Only',), **{'initial_load':True}) +def startup(command_line_backend): + socketio.start_background_task(load_model, *(command_line_backend,), **{'initial_load':True}) print("", end="", flush=True) @@ -10677,7 +10699,7 @@ def run(): global app global tpu_mtj_backend - general_startup() + command_line_backend = general_startup() # Start flask & SocketIO logger.init("Flask", status="Starting") if koboldai_vars.host: @@ -10725,7 +10747,7 @@ def run(): cloudflare = _run_cloudflared(port) koboldai_vars.cloudflare_link = cloudflare - startup() + startup(command_line_backend) if(args.localtunnel or args.ngrok or args.remote): with open('cloudflare.log', 'w') as cloudflarelog: @@ -10745,7 +10767,7 @@ def run(): else: socketio.run(app, port=port) else: - startup() + startup(command_line_backend) if args.unblock: if not args.no_ui: try: @@ -10773,13 +10795,13 @@ def run(): if __name__ == "__main__": run() else: - general_startup() + command_line_backend = general_startup() # Start flask & SocketIO logger.init("Flask", status="Starting") Session(app) logger.init_ok("Flask", status="OK") patch_transformers() - startup() + startup(command_line_backend) koboldai_settings.port = args.port if "port" in args and args.port is not None else 5000 print("{0}\nServer started in WSGI mode!{1}".format(colors.GREEN, colors.END), flush=True) diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 4a29a027..c3fff46f 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -178,6 +178,8 @@ class InferenceModel: return {} def set_input_parameters(self, parameters): + for parameter in parameters: + setattr(self, parameter, parameters[parameter]) return def load(self, save_model: bool = False, initial_load: bool = False) -> None: diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index 6c880bbe..5d8552fb 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -86,7 +86,6 @@ class model_backend(InferenceModel): def get_cluster_models(self): # Get list of models from public cluster - logger.info("Retrieving engine list...") try: req = requests.get(f"{self.url}/api/v2/status/models?type=text") except: diff --git a/modeling/inference_models/parents/hf.py b/modeling/inference_models/parents/hf.py index 69549bd5..70143b69 100644 --- a/modeling/inference_models/parents/hf.py +++ b/modeling/inference_models/parents/hf.py @@ -133,10 +133,14 @@ class HFInferenceModel(InferenceModel): gpu_count = torch.cuda.device_count() layers = [] for i in range(gpu_count): - layers.append(int(parameters["{}_Layers".format(i)]) if parameters["{}_Layers".format(i)].isnumeric() else None) + layers.append(int(parameters["{}_Layers".format(i)]) if isinstance(parameters["{}_Layers".format(i)], str) and parameters["{}_Layers".format(i)].isnumeric() else None) self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None + if isinstance(self.cpu_layers, str): + self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0 self.layers = layers - self.disk_layers = int(parameters['Disk_Layers']) if 'Disk_Layers' in parameters and parameters['Disk_Layers'].isnumeric() else 0 + self.disk_layers = parameters['Disk_Layers'] if 'Disk_Layers' in parameters else 0 + if isinstance(self.disk_layers, str): + self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None From 4040538d3438acd56e4a9121708a79b6d0d5da83 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 18 May 2023 18:34:00 -0400 Subject: [PATCH 039/102] Model Backends now defined in the menu --- aiserver.py | 38 ++++++++++++++++-------------- modeling/inference_models/horde.py | 3 ++- static/koboldai.css | 11 +++++++-- static/koboldai.js | 16 +++++++++++-- templates/popups.html | 2 +- 5 files changed, 46 insertions(+), 24 deletions(-) diff --git a/aiserver.py b/aiserver.py index 235732ec..aeebdbc1 100644 --- a/aiserver.py +++ b/aiserver.py @@ -178,11 +178,13 @@ class MenuModel(MenuItem): vram_requirements: str = "", model_type: MenuModelType = MenuModelType.HUGGINGFACE, experimental: bool = False, + model_backend: str = "Huggingface", ) -> None: super().__init__(label, name, experimental) self.model_type = model_type self.vram_requirements = vram_requirements self.is_downloaded = is_model_downloaded(self.name) + self.model_backend = model_backend def to_ui1(self) -> list: return [ @@ -245,7 +247,7 @@ model_menu = { MenuFolder("Official RWKV-4", "rwkvlist"), MenuFolder("Untuned GPT2", "gpt2list"), MenuFolder("Online Services", "apilist"), - MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER), + MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"), ], 'adventurelist': [ MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"), @@ -369,25 +371,24 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'rwkvlist': [ - MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""), - MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""), - MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""), - MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), - MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), - MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), - MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), - MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), - MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), - MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), + MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", "", model_backend="RWKV"), + MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", "", model_backend="RWKV"), + MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", "", model_backend="RWKV"), + MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", "", model_backend="RWKV"), + MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", "", model_backend="RWKV"), + MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", "", model_backend="RWKV"), + MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", "", model_backend="RWKV"), + MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", "", model_backend="RWKV"), + MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", "", model_backend="RWKV"), + MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", "", model_backend="RWKV"), MenuFolder("Return to Main Menu", "mainmenu"), ], 'apilist': [ - MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API), - MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API), - MenuModel("InferKit API (requires API key)", "InferKit", model_type=MenuModelType.ONLINE_API), - MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API), - MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API), - MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API), + MenuModel("GooseAI API (requires API key)", "GooseAI", model_type=MenuModelType.ONLINE_API, model_backend="GooseAI"), + MenuModel("OpenAI API (requires API key)", "OAI", model_type=MenuModelType.ONLINE_API, model_backend="OpenAI"), + MenuModel("KoboldAI API", "API", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI API"), + MenuModel("Basic Model API", "Colab", model_type=MenuModelType.ONLINE_API, model_backend="KoboldAI Old Colab Method"), + MenuModel("KoboldAI Horde", "CLUSTER", model_type=MenuModelType.ONLINE_API, model_backend="Horde"), MenuFolder("Return to Main Menu", "mainmenu"), ] } @@ -1670,6 +1671,7 @@ def load_model(model_backend, initial_load=False): model = model_backends[model_backend] model.load(initial_load=initial_load, save_model=not (args.colab or args.cacheonly) or args.savemodel) + koboldai_vars.model = model.model_name if "model_name" in vars(model) else model.id #Should have model_name, but it could be set to id depending on how it's setup logger.debug("Model Type: {}".format(koboldai_vars.model_type)) # TODO: Convert everywhere to use model.tokenizer @@ -6136,7 +6138,7 @@ def UI_2_select_model(data): #Get load methods if 'path' not in data or data['path'] == "": valid_loaders = {} - for model_backend in model_backends: + for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"}) else: diff --git a/modeling/inference_models/horde.py b/modeling/inference_models/horde.py index 5d8552fb..8e05fbbd 100644 --- a/modeling/inference_models/horde.py +++ b/modeling/inference_models/horde.py @@ -70,6 +70,7 @@ class model_backend(InferenceModel): "id": "model", "default": model_name, "check": {"value": "", 'check': "!="}, + 'multiple': True, "tooltip": "Which model to use when running OpenAI/GooseAI.", "menu_path": "", "refresh_model_inputs": False, @@ -102,7 +103,7 @@ class model_backend(InferenceModel): engines = req.json() try: - engines = [{"text": en["name"], "value": en["name"]} for en in engines] + engines = [{"text": "all", "value": "all"}] + [{"text": en["name"], "value": en["name"]} for en in engines] except: logger.error(engines) raise diff --git a/static/koboldai.css b/static/koboldai.css index f3dde4b7..b70c6877 100644 --- a/static/koboldai.css +++ b/static/koboldai.css @@ -352,7 +352,7 @@ border-top-right-radius: var(--tabs_rounding); grid-template-areas: "label value" "item item" "minlabel maxlabel"; - grid-template-rows: 20px 23px 20px; + grid-template-rows: 20px auto 20px; grid-template-columns: auto 30px; row-gap: 0.2em; background-color: var(--setting_background); @@ -2124,6 +2124,13 @@ body { cursor: pointer; background-color: #688f1f; } + +.loadmodelsettings { + overflow-y: auto; + max-height: 50%; +} + + /*----------------------------- Model Load Popup ------------------------------------------*/ #specspan, .popup_list_area .model_item .model { @@ -3539,7 +3546,7 @@ h2 .material-icons-outlined { } -.horde_trigger[model_model="ReadOnly"], +.horde_trigger[model_model="Read Only"], .horde_trigger[model_model="CLUSTER"] { display: none; } diff --git a/static/koboldai.js b/static/koboldai.js index 905403c1..399e52cf 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1695,12 +1695,20 @@ function model_settings_checker() { for (const temp of this.check_data['sum']) { if (document.getElementById(this.id.split("|")[0] +"|" + temp + "_value")) { document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").classList.add('input_error'); - document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + if (this.check_data['check_message']) { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } else { + document.getElementById(this.id.split("|")[0] +"|" + temp + "_value").closest(".setting_container_model").removeAttribute("tooltip"); + } } } } else { this.closest(".setting_container_model").classList.add('input_error'); - this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + if (this.check_data['check_message']) { + this.closest(".setting_container_model").setAttribute("tooltip", this.check_data['check_message']); + } else { + this.closest(".setting_container_model").removeAttribute("tooltip"); + } } } } @@ -1841,6 +1849,10 @@ function selected_model_info(sent_data) { select_element.setAttribute("data_type", item['unit']); select_element.onchange = onchange_event; select_element.setAttribute("refresh_model_inputs", item['refresh_model_inputs']); + if (('multiple' in item) && (item['multiple'])) { + select_element.multiple = true; + select_element.size = 10; + } if ('check' in item) { select_element.check_data = item['check']; } else { diff --git a/templates/popups.html b/templates/popups.html index 59f07e70..9c6b4a9e 100644 --- a/templates/popups.html +++ b/templates/popups.html @@ -48,7 +48,7 @@
- + - + + + {% include 'popups.html' %} + + From a1036465af02cefda32af06d4d3a04b0161aa118 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 12:46:02 -0400 Subject: [PATCH 045/102] Add warning about command line changes and new modular backend --- data/one_time_messages.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/data/one_time_messages.json b/data/one_time_messages.json index 3062827b..7485fd15 100644 --- a/data/one_time_messages.json +++ b/data/one_time_messages.json @@ -8,5 +8,10 @@ "id": 2, "title": "Changes since last version", "message": "

New Features

\n

Phrase Biasing

\nThere is now a Phrase Biasing implementation under Settings -> Biasing. You can now encourage or discourage the AI to generate words or phrases (without needing to use a userscript)\n

Context viewer

\nThe Context Viewer allows you to see what is sent to the AI. Given that only so much text can be read by the AI at a time, it's useful to know exactly what it's looking at.\n

Story Commentary

\nStory Commentary allows custom characters to speak their mind on your story. This can be configured under Settings -> Story Commentary. Characters can be added as World Into entries with a type of \"Commentator\".\n

New Chat UI (Experimental)

\nThis new interface for Chat Mode is a more visually \"chat like\" in comparison to the old text-based mode. To activate it, ensure both Chat Mode (Home -> Game Mode) and Experimental UI (Interface -> Experimental UI) are enabled, then change the Chat Style (Interface -> Chat Style) to \"Messages\". Similarly, to the story commentators, characters can be defined in the World Info menu; if a character's name matches a chat character defined in the World Info menu, the image on the character entry will be used as an icon. \n

Tweaks

\nTweaks allow small UI changes to be mixed and matched to create a more personalized interface.\n

Attention Bias (Experimental)

\nAttention Bias hopes to cause some parts of the context to be internally weighed more than others in self attention. This is very experimental, and only works on OPT-based models for now.\n

Genre

\nThe genre menu (Author's Note -> Genre) prepends genre information to the context. You can either choose from preset genres or write your own. Works better on models trained with genre/tag information, including most new models in the model picker.\n

World Info generation

\nWorld Info entries can now have their text generated automatically from a title and type. Powered by whatever model you have active, so effectiveness will vary with model.\n

Drag and drop import

\nImportable files can now be dragged into the UI to load them.\n

NovelAI lorebook/card support

\nNovelAI lorebooks and cards can now be imported as World Info. If a card is uploaded, the PNG will be used as the World Info image.\n

Finder (Ctrl+K)

\nAllows jumping to various UI elements and performing actions quickly. Mode can be adjusted by pressing the clicking the mode icon or with hotkeys on an empty search box (Search: '#', World Info: '>', Inference Scratchpad: '!', Image Prompting: '?').\n

Club import wizard

\nPrompts imported from aetherroom.club with placeholders will now show a setup prompt where you can input the value of these placeholders.\n

Context menu

\nA context menu has been added and is available in several areas. Give it a try by right-clicking on the main text area.\n

Substitutions

\nSubstitutions allow phrases to be replaced if you or the AI input them into the story. The default Substitutions are disabled and can be enabled with the pencil icon to the right of the entry.\n

Inference scratchpad

\nThe Inference Scratchpad is a way of prompting the AI outside in isolation; the AI will not see anything in your story, and nothing the AI responds with will be added to the story. This can be useful in scenarios where you wish to use the AI in a more generic way. For example, you could prompt it with something like \"List of fantasy names:\" to receive such a list.\n

Error notifications

\nErrors are generally less opaque to the user. Client sided errors and many server errors will show a notification detailing the error.\n

Ctrl+Click to jump to World Info entry

\nHolding Ctrl while clicking on a mention of a World Info tag will bring you to the entry.\n

Model picker indicators

\nThe model picker now has indicators showing if a model is downloaded, may achieve poor quality, or may not load on your system.\n

More shortcuts

\nPress Ctrl+? to view them.\n

Image Generation

\nYou can now generate flavor images based on the game text at each action. In the settings menu in the home tab, you can click generate image to create an image based on the current text. It will appear below the image. Hovering on the image will give you the prompt used to generate the image. You can click on the text of previous actions to see the image associated with that action and can right click on the image and hit retry to generate a new image based on that action.\nSettings for how/where the image is generated are in the left flyout menu under interface, image\n

Text to speech (Experimental)

\nText to speech is now available. To enable it go to the settings menu, enable experimental ui, then enable generate audio. Audio will be generated for your actions. Play buttons will appear next to the submit button, and right clicking an action will give you a new speak option to start reading from that point.\n

UI Mode

\nIn response to feedback, we've added different UI mode levels from simple to power user. Advanced hides some of the less used options, while Power User shows everything. Simple is very much a work in progress, but it intends to simplify the majority of settings to 3 sliders. Feel free to play with it but don't expect good results yet.\n

Presets

\nPresets are now here. Community presets are pre-loaded in KoboldAI and can be selected from the settings tab in the settings menu, or from the home screen. In addition, you can save your own presets and share them with others (or send them to use for future inclusion). Presets are saved in the presets folder\n

Alt Text Gen

\nWith this setting on the system will insert world info text the sentence before the word that triggers it in the AI text. This should make the AI pay more attention to it and make it more likely to influence the output.\n

Alt Multi Gen

\nIf set multiple generations will be generated sequentially rather than at once. This reduces the amount of VRAM required and can let you generate multiple story options with more demanding models at the potential expense of speed\n

Beep on Complete

\nThere is now an option in the settings menu, interface tab called been on complete. If set the browser will beep when generation is complete. Useful for slow systems\n

Privacy Screen (Experimental)

\nBy hitting ctrl+L the screen will be blurred for all users until the password is entered and unlock is clicked (password is set in the settings menu, interface tab.\n

Change Game Text Size

\nGame text can be adjusted to any size\n

No double spaces

\nWhen set double spaces will be replaced by single spaces\n

Themes

\nWe now have a theming engine. Themes can come in 3 flavors. Palette themes use a more basic theming system entirely in the UI. Select the colors from the Palette section and things will change. Advanced themes can have various variables set manually (click the advanced theme button to see). These allow you to go a level deeper than the palette system. Finally, we have custom themes. These are custom CSS code that can do almost anything. All themes can be saved and shared. Saved themes are stored in the themes folder\n

Auto Memory (Experimental)

\nThe start of auto-memory is in place and we are looking for feedback. It currently generates the summary but does not put it in memory (though you can copy-paste it). To see it, turn on experimental ui, go to the story menu, memory tab and click generate under auto-memory. \n

General Notes

\nIf you want a place to write stuff down that saves with the story but doesn't affect it, that's what the notes tab is for. It is found under the story menu, Notes tab\n

W++ (or SBF)

\nIn world info entries you can turn on w++ mode. This will allow you to enter data in the W++ format without having to actually write it.\n

World Info Titles

\nWorld info entries now have titles on them to make it easier to find the one you want. Soon the world info entries will be collapsed to just the title to make navigation easier\n

Download/Upload world info folders

\nWorld info folders can now be downloaded and/or uploaded. This will let you share world info easier.\n

Game Text in AI Context

\nGame text that will be in the AI's context is now bold in the game screen. This will let you easily see where the AI will stop remembering your game (anything not bolded is \"forgotten\")\n

World info context

\nText that triggers a world info entry will now be italicized. Hovering over that text will cause a tooltip to show what the AI text is that will be added.\n

Updated help text

\nHelp text has been expanded throughout the UI.\n

Context Bar

at the bottom of the story menu is a bar that shows how much of the AI's context is in use, and by what. Different colors correspond to different data types (actions, memory, world info, etc)\n
\n
\n

Improvements

\n

Author's Note

\nThe author's note is now inserted between sentences a configurable distance from the end of the story. This should improve the coherence of generated text while keeping the author's note relevant." + }, + "3": { + "id": 3, + "title": "Changes since last version", + "message": "

New Features

\n

Modular Model Backends

Model loading is now accomplished via seperate model backend files. This will allow KoboldAI to more easily add in new model backends (examples, 4-bit, GGML, whatever developers want to add) without having to do significant code rework.

Rework of command line arguments

--breakmodel command line arguments have been deleted and if you use those you will have to pass through --model_backend and --model_parameters." } } \ No newline at end of file From 9df1f03b12ffa2513b15472a96338483178fe760 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 14:28:36 -0400 Subject: [PATCH 046/102] Fix for custom huggingface model menu entry --- aiserver.py | 36 ++++---- modeling/inference_models/hf.py | 154 ++++++++++++++++++-------------- static/application.js | 23 ++++- static/koboldai.js | 23 ++++- 4 files changed, 139 insertions(+), 97 deletions(-) diff --git a/aiserver.py b/aiserver.py index b4aad4e7..fe6d7606 100644 --- a/aiserver.py +++ b/aiserver.py @@ -233,7 +233,7 @@ model_menu = { "mainmenu": [ MenuPath("Load a model from its directory", "NeoCustom"), MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), - MenuFolder("Load custom model from Hugging Face", "customhuggingface"), + MenuModel("Load custom model from Hugging Face", "customhuggingface", ""), MenuFolder("Adventure Models", "adventurelist"), MenuFolder("Novel Models", "novellist"), MenuFolder("Chat Models", "chatlist"), @@ -6135,7 +6135,7 @@ def UI_2_select_model(data): valid_loaders = {} for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) - emit("selected_model_info", {"model_backends": valid_loaders, "preselected": "Huggingface"}) + emit("selected_model_info", {"model_backends": valid_loaders}) else: #Get directories paths, breadcrumbs = get_folder_path_info(data['path']) @@ -6149,24 +6149,20 @@ def UI_2_select_model(data): output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) return - - - #We've selected a menu - if data['model'] in model_menu: - sendModelSelection(menu=data['model']) - #We've selected a custom line - elif data['menu'] in ("NeoCustom", "GPT2Custom"): - get_model_info(data['menu'], directory=data['display_name']) - #We've selected a custom menu folder - elif data['model'] in ("NeoCustom", "GPT2Custom") and 'path' in data: - sendModelSelection(menu=data['model'], folder=data['path']) - #We've selected a custom menu - elif data['model'] in ("NeoCustom", "GPT2Custom", "customhuggingface"): - sendModelSelection(menu=data['model'], folder="./models") - else: - #We now have some model we want to potentially load. - #First we need to send the client the model parameters (layers, etc) - get_model_info(data['model']) + + + + +#==================================================================# +# Event triggered when user changes a model parameter and it's set to resubmit +#==================================================================# +@socketio.on('resubmit_model_info') +@logger.catch +def UI_2_resubmit_model_info(data): + valid_loaders = {} + for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"], parameters=data) + emit("selected_model_info", {"model_backends": valid_loaders}) #==================================================================# # Event triggered when user loads a model diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 6f848fa9..eff3d1ce 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -33,95 +33,111 @@ class HFInferenceModel(InferenceModel): except: return False - def get_requested_parameters(self, model_name, model_path, menu_path): + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): requested_parameters = [] if not self.hf_torch: return [] - if model_path is not None and os.path.exists(model_path): - self.model_config = AutoConfig.from_pretrained(model_path) - elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): - self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") - else: - self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") - layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None - if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: - if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): - with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: - temp = json.load(f) - break_values = temp['layers'] if 'layers' in temp else [layer_count] - disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0 + if model_name == 'customhuggingface': + requested_parameters.append({ + "uitype": "text", + "unit": "text", + "label": "Huggingface Model Name", + "id": "custom_model_name", + "default": parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else "", + "check": {"value": "", 'check': "!="}, + "tooltip": "Model name from https://huggingface.co/", + "menu_path": "", + "refresh_model_inputs": True, + "extra_classes": "" + }) + + if model_name != 'customhuggingface' or "custom_model_name" in parameters: + model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name + if model_path is not None and os.path.exists(model_path): + self.model_config = AutoConfig.from_pretrained(model_path) + elif(os.path.exists("models/{}".format(model_name.replace('/', '_')))): + self.model_config = AutoConfig.from_pretrained("models/{}".format(model_name.replace('/', '_')), revision=utils.koboldai_vars.revision, cache_dir="cache") else: - break_values = [layer_count] - disk_blocks = 0 - - break_values = [int(x) for x in break_values if x != '' and x is not None] - gpu_count = torch.cuda.device_count() - break_values += [0] * (gpu_count - len(break_values)) - if disk_blocks is not None: - break_values += [int(disk_blocks)] - for i in range(gpu_count): + self.model_config = AutoConfig.from_pretrained(model_name, revision=utils.koboldai_vars.revision, cache_dir="cache") + layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None + if layer_count is not None and layer_count >= 0 and not self.nobreakmodel: + if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): + with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: + temp = json.load(f) + break_values = temp['layers'] if 'layers' in temp else [layer_count] + disk_blocks = temp['disk_layers'] if 'disk_layers' in temp else 0 + else: + break_values = [layer_count] + disk_blocks = 0 + + break_values = [int(x) for x in break_values if x != '' and x is not None] + gpu_count = torch.cuda.device_count() + break_values += [0] * (gpu_count - len(break_values)) + if disk_blocks is not None: + break_values += [int(disk_blocks)] + for i in range(gpu_count): + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "{} Layers".format(torch.cuda.get_device_name(i)), + "id": "{}_Layers".format(i), + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": break_values[i], + "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) requested_parameters.append({ "uitype": "slider", "unit": "int", - "label": "{} Layers".format(torch.cuda.get_device_name(i)), - "id": "{}_Layers".format(i), + "label": "CPU Layers", + "id": "CPU_Layers", "min": 0, "max": layer_count, "step": 1, "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), - "default": break_values[i], - "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), + "default": layer_count - sum(break_values), + "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", "menu_path": "Layers", "extra_classes": "", "refresh_model_inputs": False }) - requested_parameters.append({ - "uitype": "slider", - "unit": "int", - "label": "CPU Layers", - "id": "CPU_Layers", - "min": 0, - "max": layer_count, - "step": 1, - "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, - "check_message": "The sum of assigned layers must equal {}".format(layer_count), - "default": layer_count - sum(break_values), - "tooltip": "The number of layers to put on the CPU. This will use your system RAM. It will also do inference partially on CPU. Use if you must.", - "menu_path": "Layers", - "extra_classes": "", - "refresh_model_inputs": False - }) - if disk_blocks is not None: + if disk_blocks is not None: + requested_parameters.append({ + "uitype": "slider", + "unit": "int", + "label": "Disk Layers", + "id": "Disk_Layers", + "min": 0, + "max": layer_count, + "step": 1, + "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, + "check_message": "The sum of assigned layers must equal {}".format(layer_count), + "default": disk_blocks, + "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "menu_path": "Layers", + "extra_classes": "", + "refresh_model_inputs": False + }) + else: requested_parameters.append({ - "uitype": "slider", - "unit": "int", - "label": "Disk Layers", - "id": "Disk_Layers", - "min": 0, - "max": layer_count, - "step": 1, - "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)]+['CPU_Layers']+(['Disk_Layers'] if disk_blocks is not None else []), "value": layer_count, 'check': "="}, - "check_message": "The sum of assigned layers must equal {}".format(layer_count), - "default": disk_blocks, - "tooltip": "The number of layers to put on the disk. This will use your hard drive. The is VERY slow in comparison to GPU or CPU. Use as a last resort.", + "uitype": "toggle", + "unit": "bool", + "label": "Use GPU", + "id": "use_gpu", + "default": False, + "tooltip": "Whether or not to use the GPU", "menu_path": "Layers", "extra_classes": "", "refresh_model_inputs": False }) - else: - requested_parameters.append({ - "uitype": "toggle", - "unit": "bool", - "label": "Use GPU", - "id": "use_gpu", - "default": False, - "tooltip": "Whether or not to use the GPU", - "menu_path": "Layers", - "extra_classes": "", - "refresh_model_inputs": False - }) - + return requested_parameters @@ -153,7 +169,7 @@ class HFInferenceModel(InferenceModel): self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel - self.model_name = parameters['id'] + self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.path = parameters['path'] if 'path' in parameters else None def unload(self): diff --git a/static/application.js b/static/application.js index 99a65ed7..ca445c5f 100644 --- a/static/application.js +++ b/static/application.js @@ -4009,7 +4009,25 @@ function model_settings_checker() { if (valid || missing_element) { //if we are supposed to refresh when this value changes we'll resubmit if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) { - console.log("resubmit"); + //get an object of all the input settings from the user + data = {} + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; + } + data = {...data, ...selected_model_data}; + + data['plugin'] = document.getElementById("modelplugin").value; + + socket.emit("resubmit_model_info", data); } if ('sum' in this.check_data) { for (const temp of this.check_data['sum']) { @@ -4099,9 +4117,6 @@ function selected_model_info(sent_data) { modelpluginoption.innerText = loader; modelpluginoption.value = loader; modelplugin.append(modelpluginoption); - if (loader == sent_data['preselected']) { - modelplugin.value = sent_data['preselected']; - } //create the user input for each requested input for (item of items) { diff --git a/static/koboldai.js b/static/koboldai.js index 99595879..dabbcda9 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1683,7 +1683,25 @@ function model_settings_checker() { if (valid || missing_element) { //if we are supposed to refresh when this value changes we'll resubmit if ((this.getAttribute("refresh_model_inputs") == "true") && !missing_element && !this.noresubmit) { - console.log("resubmit"); + //get an object of all the input settings from the user + data = {} + settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; + } + data = {...data, ...selected_model_data}; + + data['plugin'] = document.getElementById("modelplugin").value; + + socket.emit("resubmit_model_info", data); } if ('sum' in this.check_data) { for (const temp of this.check_data['sum']) { @@ -1773,9 +1791,6 @@ function selected_model_info(sent_data) { modelpluginoption.innerText = loader; modelpluginoption.value = loader; modelplugin.append(modelpluginoption); - if (loader == sent_data['preselected']) { - modelplugin.value = sent_data['preselected']; - } //create the user input for each requested input for (item of items) { From 756a33c63e323372716a1321e649f01873ecb533 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 17:28:39 -0400 Subject: [PATCH 047/102] Added try loop on model backend so it will continue with other models. --- aiserver.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index fe6d7606..02ea2229 100644 --- a/aiserver.py +++ b/aiserver.py @@ -627,8 +627,11 @@ model_backend_code = {} model_backends = {} for module in os.listdir("./modeling/inference_models"): if not os.path.isfile(os.path.join("./modeling/inference_models",module)) and module != '__pycache__': - model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) - model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() + try: + model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) + model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() + except: + logger.error("Model Backend {} failed to load".format(module)) old_socketio_on = socketio.on @@ -1572,7 +1575,7 @@ def general_startup(override_args=None): elif parameter['id'] not in arg_parameters: arg_parameters[parameter] = parameter['default'] if not ok_to_load: - logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} (required parameters shown below)") + logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters]))) logger.error("Missing: {}".format(", ".join(mising_parameters))) exit() From db30402c3bd01432f8a8a8239faee5c8e55991aa Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 17:30:36 -0400 Subject: [PATCH 048/102] Move RWKV to use Huggingface model backend --- aiserver.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/aiserver.py b/aiserver.py index 02ea2229..a1d548e9 100644 --- a/aiserver.py +++ b/aiserver.py @@ -371,16 +371,16 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'rwkvlist': [ - MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", "", model_backend="RWKV"), - MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", "", model_backend="RWKV"), - MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", "", model_backend="RWKV"), - MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", "", model_backend="RWKV"), - MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", "", model_backend="RWKV"), - MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", "", model_backend="RWKV"), - MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", "", model_backend="RWKV"), - MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", "", model_backend="RWKV"), - MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", "", model_backend="RWKV"), - MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", "", model_backend="RWKV"), + MenuModel("RWKV Raven 14B", "RWKV/rwkv-raven-14b", ""), + MenuModel("RWKV Pile 14B", "RWKV/rwkv-4-14b-pile", ""), + MenuModel("RWKV Raven 7B", "RWKV/rwkv-raven-7b", ""), + MenuModel("RWKV Pile 7B", "RWKV/rwkv-4-7b-pile", ""), + MenuModel("RWKV Raven 3B", "RWKV/rwkv-raven-3b", ""), + MenuModel("RWKV Pile 3B", "RWKV/rwkv-4-3b-pile", ""), + MenuModel("RWKV Raven 1.5B", "RWKV/rwkv-raven-1b5", ""), + MenuModel("RWKV Pile 1.5B", "RWKV/rwkv-4-1b5-pile", ""), + MenuModel("RWKV Pile 430M", "RWKV/rwkv-4-430m-pile", ""), + MenuModel("RWKV Pile 169B", "RWKV/rwkv-4-169m-pile", ""), MenuFolder("Return to Main Menu", "mainmenu"), ], 'apilist': [ From b21884fc31c556c81a89158123dfce18ba398640 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 17:34:15 -0400 Subject: [PATCH 049/102] Better error reporting --- aiserver.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index a1d548e9..7e8c09c8 100644 --- a/aiserver.py +++ b/aiserver.py @@ -56,6 +56,7 @@ import html import argparse import sys import gc +import traceback import lupa @@ -630,8 +631,10 @@ for module in os.listdir("./modeling/inference_models"): try: model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() - except: + except Exception: logger.error("Model Backend {} failed to load".format(module)) + logger.error(traceback.format_exc()) + old_socketio_on = socketio.on From 309f1c432ae79acdbeb6b52a6f65ed963ef5d36d Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 17:43:13 -0400 Subject: [PATCH 050/102] Added the ability to disable model backends in the model backend code. --- aiserver.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 7e8c09c8..40335a9f 100644 --- a/aiserver.py +++ b/aiserver.py @@ -631,10 +631,14 @@ for module in os.listdir("./modeling/inference_models"): try: model_backend_code[module] = importlib.import_module('modeling.inference_models.{}.class'.format(module)) model_backends[model_backend_code[module].model_backend_name] = model_backend_code[module].model_backend() + if 'disable' in vars(model_backends[model_backend_code[module].model_backend_name]): + if model_backends[model_backend_code[module].model_backend_name].disable: + del model_backends[model_backend_code[module].model_backend_name] except Exception: logger.error("Model Backend {} failed to load".format(module)) logger.error(traceback.format_exc()) - + +logger.info("We loaded the following model backends: \n{}".format("\n".join([x for x in model_backends]))) old_socketio_on = socketio.on From 6df5fe4ad07acb7b901b65ade005ec8af40126dc Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 18:24:06 -0400 Subject: [PATCH 051/102] partial load model from custom path in menu --- aiserver.py | 20 ++++++++++++++++---- modeling/inference_models/api/class.py | 1 + modeling/inference_models/basic_api/class.py | 1 + modeling/inference_models/gooseai/class.py | 1 + modeling/inference_models/horde/class.py | 1 + modeling/inference_models/openai/class.py | 1 + modeling/inference_models/openai_gooseai.py | 6 ++++++ 7 files changed, 27 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index 40335a9f..14d268be 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6141,11 +6141,19 @@ def UI_2_select_model(data): emit("open_model_load_menu", {"items": [{**item.to_json(), **{"menu":data["name"]}} for item in model_menu[data["name"]] if item.should_show()]}) else: #Get load methods - if 'path' not in data or data['path'] == "": + if data['ismenu'] == 'false': valid_loaders = {} - for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): - valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) - emit("selected_model_info", {"model_backends": valid_loaders}) + if data['id'] in [item.name for sublist in model_menu for item in model_menu[sublist]]: + #Here if we have a model id that's in our menu, we explicitly use that backend + for model_backend in set([item.model_backend for sublist in model_menu for item in model_menu[sublist] if item.name == data['id']]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + emit("selected_model_info", {"model_backends": valid_loaders}) + else: + #Here we have a model that's not in our menu structure (either a custom model or a custom path + #so we'll just go through all the possible loaders + for model_backend in model_backends: + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + emit("selected_model_info", {"model_backends": valid_loaders}) else: #Get directories paths, breadcrumbs = get_folder_path_info(data['path']) @@ -6154,8 +6162,12 @@ def UI_2_select_model(data): valid=False for model_backend in model_backends: if model_backends[model_backend].is_valid(path[1], path[0], "Custom"): + logger.debug("{} says valid".format(model_backend)) valid=True break + else: + logger.debug("{} says invalid".format(model_backend)) + output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) return diff --git a/modeling/inference_models/api/class.py b/modeling/inference_models/api/class.py index d9ec1147..3d54edd9 100644 --- a/modeling/inference_models/api/class.py +++ b/modeling/inference_models/api/class.py @@ -6,6 +6,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/basic_api/class.py b/modeling/inference_models/basic_api/class.py index 6f045ef5..2094d34e 100644 --- a/modeling/inference_models/basic_api/class.py +++ b/modeling/inference_models/basic_api/class.py @@ -4,6 +4,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py index 8d58b4b5..1073f45f 100644 --- a/modeling/inference_models/gooseai/class.py +++ b/modeling/inference_models/gooseai/class.py @@ -2,6 +2,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/horde/class.py b/modeling/inference_models/horde/class.py index 387c5833..2c4c4bf5 100644 --- a/modeling/inference_models/horde/class.py +++ b/modeling/inference_models/horde/class.py @@ -5,6 +5,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py index 84fe6df9..492a3fdb 100644 --- a/modeling/inference_models/openai/class.py +++ b/modeling/inference_models/openai/class.py @@ -2,6 +2,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py index 4d885074..e4a027db 100644 --- a/modeling/inference_models/openai_gooseai.py +++ b/modeling/inference_models/openai_gooseai.py @@ -2,6 +2,7 @@ import torch import requests import numpy as np from typing import List, Optional, Union +import os import utils from logger import logger @@ -30,6 +31,11 @@ class model_backend(InferenceModel): return model_name == "OAI" or model_name == "GooseAI" def get_requested_parameters(self, model_name, model_path, menu_path): + try: + print(self.source) + except: + print(vars(self)) + raise if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self): with open("settings/{}.model_backend.settings".format(self.source), "r") as f: self.key = json.load(f)['key'] From a1ee6849dc1d98c287561d5bdb6aff225c0322a5 Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 18:28:47 -0400 Subject: [PATCH 052/102] Custom Paths from Menu structure fixed --- aiserver.py | 3 ++- modeling/inference_models/gooseai/class.py | 2 +- modeling/inference_models/openai/class.py | 2 +- modeling/inference_models/openai_gooseai.py | 5 ----- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/aiserver.py b/aiserver.py index 14d268be..d4a127f0 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6152,7 +6152,8 @@ def UI_2_select_model(data): #Here we have a model that's not in our menu structure (either a custom model or a custom path #so we'll just go through all the possible loaders for model_backend in model_backends: - valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) + if model_backends[model_backend].is_valid(data["name"], data["path"] if 'path' in data else None, data["menu"]): + valid_loaders[model_backend] = model_backends[model_backend].get_requested_parameters(data["name"], data["path"] if 'path' in data else None, data["menu"]) emit("selected_model_info", {"model_backends": valid_loaders}) else: #Get directories diff --git a/modeling/inference_models/gooseai/class.py b/modeling/inference_models/gooseai/class.py index 1073f45f..934f15dd 100644 --- a/modeling/inference_models/gooseai/class.py +++ b/modeling/inference_models/gooseai/class.py @@ -19,7 +19,6 @@ model_backend_name = "GooseAI" class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") - self.source = "GooseAI" class model_backend(openai_gooseai_model_backend): @@ -28,6 +27,7 @@ class model_backend(openai_gooseai_model_backend): def __init__(self): super().__init__() self.url = "https://api.goose.ai/v1/engines" + self.source = "GooseAI" def is_valid(self, model_name, model_path, menu_path): return model_name == "GooseAI" \ No newline at end of file diff --git a/modeling/inference_models/openai/class.py b/modeling/inference_models/openai/class.py index 492a3fdb..cea644ea 100644 --- a/modeling/inference_models/openai/class.py +++ b/modeling/inference_models/openai/class.py @@ -19,7 +19,6 @@ model_backend_name = "OpenAI" class OpenAIAPIError(Exception): def __init__(self, error_type: str, error_message) -> None: super().__init__(f"{error_type}: {error_message}") - self.source = "OpenAI" class model_backend(openai_gooseai_model_backend): @@ -28,6 +27,7 @@ class model_backend(openai_gooseai_model_backend): def __init__(self): super().__init__() self.url = "https://api.openai.com/v1/engines" + self.source = "OpenAI" def is_valid(self, model_name, model_path, menu_path): return model_name == "OAI" \ No newline at end of file diff --git a/modeling/inference_models/openai_gooseai.py b/modeling/inference_models/openai_gooseai.py index e4a027db..e4b9dfb8 100644 --- a/modeling/inference_models/openai_gooseai.py +++ b/modeling/inference_models/openai_gooseai.py @@ -31,11 +31,6 @@ class model_backend(InferenceModel): return model_name == "OAI" or model_name == "GooseAI" def get_requested_parameters(self, model_name, model_path, menu_path): - try: - print(self.source) - except: - print(vars(self)) - raise if os.path.exists("settings/{}.model_backend.settings".format(self.source)) and 'colaburl' not in vars(self): with open("settings/{}.model_backend.settings".format(self.source), "r") as f: self.key = json.load(f)['key'] From 128c77e0fde7deae7fa30e65cc4166eb46ba314d Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 19:01:11 -0400 Subject: [PATCH 053/102] Default model backend to huggingface if not present when loading a model through the command line --- aiserver.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/aiserver.py b/aiserver.py index d4a127f0..a8591dc3 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1365,7 +1365,7 @@ def general_startup(override_args=None): parser.add_argument("--port", type=int, help="Specify the port on which the application will be joinable") parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") - parser.add_argument("--model_backend", help="Specify the model backend you want to use") + parser.add_argument("--model_backend", default="Huggingface", help="Specify the model backend you want to use") parser.add_argument("--model_parameters", action="store", default="", help="json of id values to use for the input to the model loading process (leave blank to get required parameters)") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") parser.add_argument("--apikey", help="Specify the API key to use for online services") @@ -1558,10 +1558,6 @@ def general_startup(override_args=None): if args.model: # At this point we have to try to load the model through the selected backend - if not args.model_backend: - logger.error("Didn't select a model backend. Please enter one through the --model_backend or remove the --model from the run command") - logger.error("Possible model backends are: {}".format(", ".join([x for x in model_backends]))) - exit() if args.model_backend not in model_backends: logger.error("Your selected model backend ({}) isn't in the model backends we know about ({})".format(args.model_backend, ", ".join([x for x in model_backends]))) exit() @@ -1576,11 +1572,11 @@ def general_startup(override_args=None): arg_parameters['use_gpu'] = True for parameter in parameters: - if parameter['default'] == "" or parameter['id'] not in arg_parameters: + if parameter['default'] == "" and parameter['id'] not in arg_parameters: mising_parameters.append(parameter['id']) ok_to_load = False elif parameter['id'] not in arg_parameters: - arg_parameters[parameter] = parameter['default'] + arg_parameters[parameter['id']] = parameter['default'] if not ok_to_load: logger.error("Your selected backend needs additional parameters to run. Please pass through the parameters as a json like {\"[ID]\": \"[Value]\"} using --model_parameters (required parameters shown below)") logger.error("Parameters (ID: Default Value (Help Text)): {}".format("\n".join(["{}: {} ({})".format(x['id'],x['default'],x['tooltip']) for x in parameters]))) From 19559d5eef5999c48503852d02d45c1c7fcce7ec Mon Sep 17 00:00:00 2001 From: ebolam Date: Fri, 19 May 2023 19:15:25 -0400 Subject: [PATCH 054/102] Fix for colors in the classic UI --- static/custom.css | 74 ++++++++++++++++++++++++++++++++++++++++ templates/templates.html | 1 - 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/static/custom.css b/static/custom.css index ffa6f44f..412c7f1b 100644 --- a/static/custom.css +++ b/static/custom.css @@ -2330,4 +2330,78 @@ body.connected .popupfooter, .popupfooter.always-available { .popup .model_item .model_menu_selected { color: var(--popup_selected_color); background-color: var(--popup_selected_color_text); +} + +.settings_select { + color: var(--dropdown_text); + background: var(--dropdown_background); + margin-left: auto; + margin-right: 25px; +} + +.setting_value { + text-align: right; + grid-area: value; + font-size: calc(12px + var(--font_size_adjustment)); + padding: 2px; + padding-top: 0px; + background-color: inherit; + color: inherit; + border: none; + outline: none; +} + +.setting_value:focus { + color: var(--text_edit); +} + +.setting_container_model { + display: grid; + grid-template-areas: "label value" + "item item" + "minlabel maxlabel"; + grid-template-rows: 20px auto 20px; + grid-template-columns: auto 30px; + row-gap: 0.2em; + background-color: var(--setting_background); + color: var(--setting_text); + border-radius: var(--radius_settings_background); + padding: 2px; + margin: 2px; + width: calc(100%); +} + +.setting_container_model .setting_item{ + font-size: calc(0.93em + var(--font_size_adjustment)); + margin-left: 10px; +} + + +.setting_minlabel { + padding-top: 6px; + grid-area: minlabel; + overflow: hidden; + padding: 5px; + padding-top: 0px; + text-align: left; + font-size: calc(0.8em + var(--font_size_adjustment)); +} + +.setting_maxlabel { + padding-top: 6px; + grid-area: maxlabel; + overflow: hidden; + padding: 5px; + padding-top: 0px; + text-align: right; + font-size: calc(0.8em + var(--font_size_adjustment)); +} + +.setting_label { + display: flex; + grid-area: label; + overflow: hidden; + padding: 5px; + padding-right: 0px; + padding-top: 0px; } \ No newline at end of file diff --git a/templates/templates.html b/templates/templates.html index 49fa99f6..926bf854 100644 --- a/templates/templates.html +++ b/templates/templates.html @@ -1,5 +1,4 @@ -
From 513b8575e71d164fc82747009f8fd3391f4ceb28 Mon Sep 17 00:00:00 2001 From: ebolam Date: Sat, 20 May 2023 11:01:49 -0400 Subject: [PATCH 055/102] Fix for missing import Fix for model name being a path which caused save issues --- aiserver.py | 2 +- modeling/inference_models/hf.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index a8591dc3..38ffc3f6 100644 --- a/aiserver.py +++ b/aiserver.py @@ -6165,7 +6165,7 @@ def UI_2_select_model(data): else: logger.debug("{} says invalid".format(model_backend)) - output.append({'label': path[1], 'name': path[0], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) + output.append({'label': path[1], 'name': path[1], 'size': "", "menu": "Custom", 'path': path[0], 'isMenu': not valid}) emit("open_model_load_menu", {"items": output+[{'label': 'Return to Main Menu', 'name':'mainmenu', 'size': "", "menu": "Custom", 'isMenu': True}], 'breadcrumbs': breadcrumbs}) return diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index eff3d1ce..318423d5 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -8,6 +8,7 @@ import koboldai_settings from logger import logger from modeling.inference_model import InferenceModel import torch +import gc class HFInferenceModel(InferenceModel): From 925cad2e2fa6c65b8ea37680d19fa69023cce9f5 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 14:50:13 -0400 Subject: [PATCH 056/102] Better compatibility with hf model backend --- modeling/inference_models/hf.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 318423d5..b209d49f 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -1,4 +1,4 @@ -import os +import os, sys from typing import Optional from transformers import AutoConfig import warnings @@ -196,9 +196,10 @@ class HFInferenceModel(InferenceModel): except: pass if self.hf_torch: - breakmodel.breakmodel = True - breakmodel.gpu_blocks = [] - breakmodel.disk_blocks = 0 + if 'breakmodel' in sys.modules: + breakmodel.breakmodel = True + breakmodel.gpu_blocks = [] + breakmodel.disk_blocks = 0 def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults From dc20e6dde9152fd609ae06d362b05b9a0ac29bb5 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 15:04:33 -0400 Subject: [PATCH 057/102] Fix for unloading models --- modeling/inference_models/hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index b209d49f..53c802b1 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -197,6 +197,7 @@ class HFInferenceModel(InferenceModel): pass if self.hf_torch: if 'breakmodel' in sys.modules: + import breakmodel breakmodel.breakmodel = True breakmodel.gpu_blocks = [] breakmodel.disk_blocks = 0 From ca770844b0d6002f07d5b347190be0b25e6faf3d Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 15:07:59 -0400 Subject: [PATCH 058/102] Fix for breakmodel --- modeling/inference_models/hf_torch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 5dd53bf8..47c37436 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -820,7 +820,7 @@ class HFTorchInferenceModel(HFInferenceModel): breakmodel.gpu_blocks = [0] * n_layers return - elif breakmodel.gpu_blocks != []: + elif breakmodel.gpu_blocks == []: logger.info("Breakmodel not specified, assuming GPU 0") breakmodel.gpu_blocks = [n_layers] n_layers = 0 From f1a16f260f4f22384ae882042860228134bf6222 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 16:10:41 -0400 Subject: [PATCH 059/102] Potential breakmodel fix --- modeling/inference_models/hf_torch.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 47c37436..5595edc7 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -824,6 +824,20 @@ class HFTorchInferenceModel(HFInferenceModel): logger.info("Breakmodel not specified, assuming GPU 0") breakmodel.gpu_blocks = [n_layers] n_layers = 0 + + else: + s = n_layers + for i in range(len(breakmodel.gpu_blocks)): + if breakmodel.gpu_blocks[i] <= -1: + breakmodel.gpu_blocks[i] = s + break + else: + s -= breakmodel.gpu_blocks[i] + assert sum(breakmodel.gpu_blocks) <= n_layers + n_layers -= sum(breakmodel.gpu_blocks) + if breakmodel.disk_blocks is not None: + assert breakmodel.disk_blocks <= n_layers + n_layers -= breakmodel.disk_blocks logger.init_ok("Final device configuration:", status="Info") self.breakmodel_device_list(n_layers, primary=breakmodel.primary_device) From 9e53bcf67684198bbbaeb3e67281c1641419f448 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 20:24:57 -0400 Subject: [PATCH 060/102] Fix for breakmodel loading to CPU when set to GPU --- modeling/inference_models/generic_hf_torch/class.py | 8 +++++--- modeling/inference_models/hf.py | 6 ++++-- modeling/inference_models/hf_torch.py | 3 +++ static/custom.css | 5 +++++ 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 4e2c8a5b..572337e2 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -248,11 +248,12 @@ class model_backend(HFTorchInferenceModel): self.patch_embedding() + if utils.koboldai_vars.hascuda: - if utils.koboldai_vars.usegpu: + if self.usegpu: # Use just VRAM self.model = self.model.half().to(utils.koboldai_vars.gpu_device) - elif utils.koboldai_vars.breakmodel: + elif self.breakmodel: # Use both RAM and VRAM (breakmodel) if not self.lazy_load: self.breakmodel_device_config(self.model.config) @@ -267,7 +268,8 @@ class model_backend(HFTorchInferenceModel): self._move_to_devices() else: self.model = self.model.to("cpu").float() - + + self.model.kai_model = self utils.koboldai_vars.modeldim = self.get_hidden_size() diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 53c802b1..e801eab2 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -158,7 +158,7 @@ class HFInferenceModel(InferenceModel): layers.append(None) else: layers.append(parameters["{}_Layers".format(i)]) - self.cpu_layers = parameters['CPU_Layers'] if 'CPU_Layers' in parameters else None + self.cpu_layers = int(parameters['CPU_Layers']) if 'CPU_Layers' in parameters else None if isinstance(self.cpu_layers, str): self.cpu_layers = int(self.cpu_layers) if self.cpu_layers.isnumeric() else 0 self.layers = layers @@ -167,9 +167,11 @@ class HFInferenceModel(InferenceModel): self.disk_layers = int(self.disk_layers) if self.disk_layers.isnumeric() else 0 breakmodel.gpu_blocks = layers breakmodel.disk_blocks = self.disk_layers - self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None + self.usegpu = self.cpu_layers == 0 and breakmodel.disk_blocks == 0 and sum(self.layers)-self.layers[0] == 0 self.model_type = self.get_model_type() self.breakmodel = ((self.model_type != 'gpt2') or self.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not self.nobreakmodel + else: + self.usegpu = parameters['use_gpu'] if 'use_gpu' in parameters else None self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.path = parameters['path'] if 'path' in parameters else None diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 5595edc7..c5560360 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -126,6 +126,7 @@ class HFTorchInferenceModel(HFInferenceModel): return "Unknown" def _post_load(m_self) -> None: + if not utils.koboldai_vars.model_type: utils.koboldai_vars.model_type = m_self.get_model_type() @@ -562,6 +563,7 @@ class HFTorchInferenceModel(HFInferenceModel): ) ) # print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True) + #logger.debug(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ") model_dict[key] = model_dict[key].materialize( f, map_location="cpu" ) @@ -847,6 +849,7 @@ class HFTorchInferenceModel(HFInferenceModel): # If all layers are on the same device, use the old GPU generation mode while len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0: breakmodel.gpu_blocks.pop() + self.breakmodel = True if len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in ( -1, utils.num_layers(config), diff --git a/static/custom.css b/static/custom.css index 412c7f1b..968d73e4 100644 --- a/static/custom.css +++ b/static/custom.css @@ -2404,4 +2404,9 @@ body.connected .popupfooter, .popupfooter.always-available { padding: 5px; padding-right: 0px; padding-top: 0px; +} + +.input_error { + border: 5px solid red !important; + box-sizing: border-box !important; } \ No newline at end of file From 4c25d6fbbbfad67176056a6f5af1826c2c2eb24c Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 20:34:01 -0400 Subject: [PATCH 061/102] Fix for loading model multiple times loosing the gpu/cpu splits --- modeling/inference_models/hf.py | 6 ------ modeling/inference_models/hf_torch.py | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index e801eab2..b50ebf56 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -197,12 +197,6 @@ class HFInferenceModel(InferenceModel): torch.cuda.empty_cache() except: pass - if self.hf_torch: - if 'breakmodel' in sys.modules: - import breakmodel - breakmodel.breakmodel = True - breakmodel.gpu_blocks = [] - breakmodel.disk_blocks = 0 def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index c5560360..681d3ab1 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -788,6 +788,7 @@ class HFTorchInferenceModel(HFInferenceModel): if device_count < 2: primary = None logger.debug("n_layers: {}".format(n_layers)) + logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks)) gpu_blocks = breakmodel.gpu_blocks + ( device_count - len(breakmodel.gpu_blocks) ) * [0] @@ -818,6 +819,8 @@ class HFTorchInferenceModel(HFInferenceModel): n_layers = utils.num_layers(config) + logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks)) + if utils.args.cpu: breakmodel.gpu_blocks = [0] * n_layers return From 48226191922a48024a75a531668d3638b1f71155 Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 20:47:14 -0400 Subject: [PATCH 062/102] Fix for model backends that have no inputs not being able to load in the UI --- static/koboldai.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/static/koboldai.js b/static/koboldai.js index dabbcda9..c4b2e160 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1933,6 +1933,8 @@ function selected_model_info(sent_data) { document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); } + model_settings_checker() + } function update_gpu_layers() { From 5561cc1f220c0cf9d957bcbd3e535ad88502ab82 Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 23 May 2023 08:33:19 -0400 Subject: [PATCH 063/102] Fix for GPU generation --- modeling/inference_models/hf_torch.py | 13 ++++++++- static/application.js | 42 +++++++++++++++------------ static/koboldai.js | 40 +++++++++++++------------ 3 files changed, 58 insertions(+), 37 deletions(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 681d3ab1..2f575e73 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -125,6 +125,17 @@ class HFTorchInferenceModel(HFInferenceModel): else: return "Unknown" + def get_auxilary_device(self): + """Get device auxilary tensors like inputs should be stored on.""" + + # NOTE: TPU isn't a torch device, so TPU stuff gets sent to CPU. + if utils.koboldai_vars.hascuda and self.usegpu: + return utils.koboldai_vars.gpu_device + elif utils.koboldai_vars.hascuda and self.breakmodel: + import breakmodel + return breakmodel.primary_device + return "cpu" + def _post_load(m_self) -> None: if not utils.koboldai_vars.model_type: @@ -226,7 +237,7 @@ class HFTorchInferenceModel(HFInferenceModel): else: gen_in = prompt_tokens - device = utils.get_auxilary_device() + device = self.get_auxilary_device() gen_in = gen_in.to(device) additional_bad_words_ids = [self.tokenizer.encode("\n")] if single_line else [] diff --git a/static/application.js b/static/application.js index ca445c5f..ca81f729 100644 --- a/static/application.js +++ b/static/application.js @@ -4012,16 +4012,18 @@ function model_settings_checker() { //get an object of all the input settings from the user data = {} settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); - for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } - data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; @@ -4259,6 +4261,8 @@ function selected_model_info(sent_data) { document.getElementById(document.getElementById("modelplugin").value + "_settings_area").classList.remove("hidden"); } + model_settings_checker(); + } function getModelParameterCount(modelName) { @@ -4371,16 +4375,18 @@ function load_model() { //get an object of all the input settings from the user data = {} - for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } - data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; diff --git a/static/koboldai.js b/static/koboldai.js index c4b2e160..f0a1f6f8 100644 --- a/static/koboldai.js +++ b/static/koboldai.js @@ -1686,16 +1686,18 @@ function model_settings_checker() { //get an object of all the input settings from the user data = {} settings_area = document.getElementById(document.getElementById("modelplugin").value + "_settings_area"); - for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } - data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; @@ -1965,16 +1967,18 @@ function load_model() { //get an object of all the input settings from the user data = {} - for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { - var element_data = element.value; - if (element.getAttribute("data_type") == "int") { - element_data = parseInt(element_data); - } else if (element.getAttribute("data_type") == "float") { - element_data = parseFloat(element_data); - } else if (element.getAttribute("data_type") == "bool") { - element_data = (element_data == 'on'); + if (settings_area) { + for (const element of settings_area.querySelectorAll(".model_settings_input:not(.hidden)")) { + var element_data = element.value; + if (element.getAttribute("data_type") == "int") { + element_data = parseInt(element_data); + } else if (element.getAttribute("data_type") == "float") { + element_data = parseFloat(element_data); + } else if (element.getAttribute("data_type") == "bool") { + element_data = (element_data == 'on'); + } + data[element.id.split("|")[1].replace("_value", "")] = element_data; } - data[element.id.split("|")[1].replace("_value", "")] = element_data; } data = {...data, ...selected_model_data}; From 7a8e4c39da3c1d30ddf3489945799b2695d9be86 Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 23 May 2023 08:35:15 -0400 Subject: [PATCH 064/102] Fix for attention bias --- aiserver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 38ffc3f6..6276e514 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3693,7 +3693,8 @@ def calcsubmit(txt): bias += [1] * (i - top_index) bias[i] = b["multiplier"] - device = utils.get_auxilary_device() + + device = model.get_auxilary_device() attention_bias.attention_bias = torch.Tensor(bias).to(device) logger.info(f"Bias by {koboldai_vars.memory_attn_bias} -- {attention_bias.attention_bias}") logger.debug("Submit: experimental_features time {}s".format(time.time()-start_time)) From d6c37bbac0fdbbc6a5eba4671bdd85f695efd822 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Tue, 23 May 2023 22:59:36 +0800 Subject: [PATCH 065/102] Updated embedded Kobold Lite to v32 --- static/klite.html | 422 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 318 insertions(+), 104 deletions(-) diff --git a/static/klite.html b/static/klite.html index 0893ebbe..8f3e55d1 100644 --- a/static/klite.html +++ b/static/klite.html @@ -3,7 +3,7 @@