Merge branch 'united' of https://github.com/henk717/KoboldAI into peft

2025-06-05 21:59:24 +02:00 · 2023-05-06 10:47:14 -05:00
parent 35b56117e6 04592e5086
commit f02ddab7c7
8 changed files with 56 additions and 12 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -1346,7 +1346,8 @@ def general_startup(override_args=None):
    parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation")
    parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)")
    parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)")
-    
+    parser.add_argument("--trust_remote_code", action='store_true', default=False, help="Allow Huggingface Models to Execute Code (Insecure!)")  
    parser.add_argument('-f', action='store', help="option for compatability with colab memory profiles")
    parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen")
    parser.add_argument('-q', '--quiesce', action='count', default=0, help="The default logging level is ERROR or higher. This value decreases the amount of logging seen in your screen")
@@ -1470,8 +1471,13 @@ def general_startup(override_args=None):
            allowed_ips = sorted(allowed_ips, key=lambda ip: int(''.join([i.zfill(3) for i in ip.split('.')])))
            print(f"Allowed IPs: {allowed_ips}")
-
+    if args.trust_remote_code:
-
+        logger.warning("EXECUTION OF UNSAFE REMOTE CODE IS ENABLED!!!")
        logger.warning("You are not protected from Model Viruses in this mode!")
        logger.warning("Exit the program now to abort execution!")
        logger.warning("Only use this mode with models that you trust and verified!")
        time.sleep(25)
        koboldai_vars.trust_remote_code = True
    if args.cpu:
        koboldai_vars.use_colab_tpu = False
@@ -8287,7 +8293,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
    use_userscripts: bool = fields.Boolean(load_default=False, metadata={"description": "Whether or not to use the userscripts from the KoboldAI GUI when generating text."})
    soft_prompt: Optional[str] = fields.String(metadata={"description": "Soft prompt to use when generating. If set to the empty string or any other string containing no non-whitespace characters, uses no soft prompt."}, validate=[soft_prompt_validator, validate.Regexp(r"^[^/\\]*$")])
    max_length: int = fields.Integer(validate=validate.Range(min=1, max=512), metadata={"description": "Number of tokens to generate."})
-    max_context_length: int = fields.Integer(validate=validate.Range(min=512, max=2048), metadata={"description": "Maximum number of tokens to send to the model."})
+    max_context_length: int = fields.Integer(validate=validate.Range(min=1), metadata={"description": "Maximum number of tokens to send to the model."})
    n: int = fields.Integer(validate=validate.Range(min=1, max=5), metadata={"description": "Number of outputs to generate."})
    disable_output_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all output formatting options default to `false` instead of the value in the KoboldAI GUI."})
    frmttriminc: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes some characters from the end of the output such that the output doesn't end in the middle of a sentence. If the output is less than one sentence long, does nothing.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -1,6 +1,7 @@
 name: koboldai
 channels:
  - pytorch
  - nvidia
  - conda-forge
  - defaults
 dependencies:
@@ -9,9 +10,9 @@ dependencies:
  - flask-socketio=5.3.2
  - flask-session=0.4.0
  - python-socketio=5.7.2
-  - pytorch=1.11.*
+  - pytorch=2.0.*
  - python=3.8.*
-  - cudatoolkit=11.1
+  - pytorch-cuda=11.8
  - eventlet=0.33.3
  - dnspython=2.2.1
  - markdown
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -24,8 +24,8 @@ dependencies:
  - Pillow
  - psutil
  - pip:
-    - --extra-index-url https://download.pytorch.org/whl/rocm5.2
+    - --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
-    - torch==1.13.1+rocm5.2
+    - torch==2.0.*
    - flask-cloudflared==0.0.10
    - flask-ngrok
    - flask-cors
--- a/koboldai_settings.py
+++ b/koboldai_settings.py
@@ -1207,12 +1207,12 @@ class system_settings(settings):
    local_only_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                            'lua_koboldcore', 'regex_sl', 'acregex_ai', 'acregex_ui', 'comregex_ai', 'comregex_ui',
                            'sp', '_horde_pid', 'inference_config', 'image_pipeline', 
-                            'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui']
+                            'summarizer', 'summary_tokenizer', 'tts_model', 'rng_states', 'comregex_ai', 'comregex_ui', 'trust_remote_code']
    no_save_variables = ['lua_state', 'lua_logname', 'lua_koboldbridge', 'lua_kobold', 
                         'lua_koboldcore', 'sp', 'sp_length', '_horde_pid', 'horde_share', 'aibusy', 
                         'serverstarted', 'inference_config', 'image_pipeline', 'summarizer', 
                         'summary_tokenizer', 'use_colab_tpu', 'noai', 'disable_set_aibusy', 'cloudflare_link', 'tts_model',
-                         'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch']
+                         'generating_image', 'bit_8_available', 'host', 'hascuda', 'usegpu', 'rng_states', 'comregex_ai', 'comregex_ui', 'git_repository', 'git_branch', 'trust_remote_code']
    settings_name = "system"
    def __init__(self, socketio, koboldai_var):
        self._socketio = socketio
@@ -1298,6 +1298,7 @@ class system_settings(settings):
        self.seen_messages = []
        self.git_repository = ""
        self.git_branch = ""
        self.trust_remote_code = False
        @dataclass
--- a/maps/llama.json
+++ b/maps/llama.json
@@ -0,0 +1,35 @@
 {
    "mtj_compat": "llama",
    "mtj_pe": "neox_rotary",
    "mtj_config_map": {
      "norm": ["norm", "layernorm-nobias"],
      "pe_rotary_dims": ["pe_rotary_dims", 128],
      "d_model": "hidden_size",
      "n_heads": "num_attention_heads",
      "n_vocab": "vocab_size",
      "layers": "num_hidden_layers",
      "seq": "max_position_embeddings",
      "tokenizer_class": ["tokenizer_class", "LlamaTokenizer"],
      "tokenizer": ["tokenizer", "llama"]
    },
    "static_weights": {
      "model.embed_tokens.weight": {"mtj": {"module": "embedding_shard/~/linear", "param": "w", "transforms": ["no_transpose", "vocab_pad"]}},
      "model.norm.weight": {"mtj": {"module": "projection_shard/~/replicated_layer_norm", "param": "scale"}},
      "lm_head.weight": {"mtj": {"module": "projection_shard/~/linear", "param": "w", "transforms": ["vocab_pad"]}}
    },
    "layer_weights": {
      "transformer.h.{layer}.attn.attention.bias": {},
      "transformer.h.{layer}.attn.attention.masked_bias": {},
      "model.layers.{layer}.self_attn.rotary_emb.inv_freq": {},
      "model.layers.{layer}.self_attn.q_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear", "param": "w"}},
      "model.layers.{layer}.self_attn.v_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_1", "param": "w"}},
      "model.layers.{layer}.self_attn.k_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_2", "param": "w"}},
      "model.layers.{layer}.self_attn.o_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_3", "param": "w"}},
      "model.layers.{layer}.mlp.gate_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_4", "param": "w"}},
      "model.layers.{layer}.mlp.down_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_5", "param": "w"}},
      "model.layers.{layer}.mlp.up_proj.weight": {"mtj": {"module": "layer_{layer}/~/linear_6", "param": "w"}},
      "model.layers.{layer}.input_layernorm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm", "param": "scale"}},
      "model.layers.{layer}.post_attention_layernorm.weight": {"mtj": {"module": "layer_{layer}/~/replicated_layer_norm_1", "param": "scale"}}
    }
  }
--- a/modeling/inference_models/hf_torch.py
+++ b/modeling/inference_models/hf_torch.py
@@ -296,6 +296,7 @@ class HFTorchInferenceModel(HFInferenceModel):
    def _get_model(self, location: str, tf_kwargs: Dict):
        tf_kwargs["revision"] = utils.koboldai_vars.revision
        tf_kwargs["cache_dir"] = "cache"
        tf_kwargs["trust_remote_code"] = utils.koboldai_vars.trust_remote_code
        # If we have model hints for legacy model, use them rather than fall back.
        try:
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ Flask==2.2.3
 Flask-SocketIO==5.3.2
 python-socketio==5.7.2
 requests
-torch >= 1.9, < 1.13
+torch == 2.0.*
 flask-cloudflared==0.0.10
 flask-ngrok
 flask-cors
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -9,7 +9,7 @@ transformers == 4.28.0
 chex == 0.1.5
 huggingface_hub==0.12.1
 progressbar2
-git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
+git+https://github.com/Zurnaz/mesh-transformer-jax.git@llama_tpu
 Flask==2.2.3
 Flask-SocketIO==5.3.2
 python-socketio==5.7.2