Merge branch 'united' into scribe

2025-06-05 21:59:24 +02:00 · 2023-08-28 17:45:49 +02:00
parent 4b2d591354 3e0b8279f2
commit 25b78abcd2
13 changed files with 28 additions and 46 deletions
--- a/2
+++ b/2
--- a/aiserver.py
+++ b/aiserver.py
@@ -248,7 +248,7 @@ model_menu = {
        MenuPath("Load a model from its directory", "NeoCustom"),
        MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
        MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""),
-        MenuModel("Load custom GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"),
+        MenuModel("Load old GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"),
        MenuFolder("Instruct Models", "instructlist"),
        MenuFolder("Novel Models", "novellist"),
        MenuFolder("Chat Models", "chatlist"),
--- a/commandline.bat
+++ b/commandline.bat
@@ -15,21 +15,15 @@ IF %M%==2 GOTO subfolder
 IF %M%==3 GOTO drivemap_B

 :subfolder
-SET TEMP=%~DP0MINICONDA3
-SET TMP=%~DP0MINICONDA3
 call miniconda3\condabin\activate
 cmd /k "%*"

 :drivemap
 subst K: miniconda3 >nul
-SET TEMP=K:\
-SET TMP=K:\
 call K:\python\condabin\activate
 cmd /k "%*"

 :drivemap_B
 subst B: miniconda3 >nul
-SET TEMP=B:\
-SET TMP=B:\
 call B:\python\condabin\activate
 cmd /k "%*"
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -32,10 +32,11 @@ dependencies:
    - flask-ngrok
    - flask-cors
    - lupa==1.10
-    - transformers==4.31.0
-    - huggingface_hub==0.15.1
-    - safetensors==0.3.1
-    - accelerate==0.20.3
+    - transformers==4.32.*
+    - huggingface_hub==0.16.4
+    - optimum==1.12.0
+    - safetensors==0.3.3
+    - accelerate==0.21.0
    - git+https://github.com/VE-FORBRYDERNE/mkultra
    - flask-session
    - ansi2html
@@ -50,7 +51,7 @@ dependencies:
    - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
    - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
-    - https://github.com/henk717/KoboldAI/releases/download/Snapshot-11-08-23/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
+    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
    - einops
    - peft==0.3.0
    - scipy
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -30,10 +30,11 @@ dependencies:
    - flask-ngrok
    - flask-cors
    - lupa==1.10
-    - transformers==4.31.0
-    - huggingface_hub==0.15.1
-    - safetensors==0.3.1
-    - accelerate==0.20.3
+    - transformers==4.32.*
+    - huggingface_hub==0.16.4
+    - optimum==1.12.0
+    - safetensors==0.3.3
+    - accelerate==0.21.0
    - git+https://github.com/VE-FORBRYDERNE/mkultra
    - ansi2html
    - flask_compress
--- a/install_git_transformers.bat
+++ b/install_git_transformers.bat
@@ -20,8 +20,6 @@ IF %M%==3 GOTO drivemap_B

 :subfolder
 ECHO Runtime launching in subfolder mode
-SET TEMP=%~DP0MINICONDA3
-SET TMP=%~DP0MINICONDA3
 call miniconda3\condabin\activate
 pip install git+https://github.com/huggingface/transformers
 pip install git+https://github.com/huggingface/optimum
@@ -32,8 +30,6 @@ cmd /k
 ECHO Runtime launching in K: drive mode
 subst /D K: >nul
 subst K: miniconda3 >nul
-SET TEMP=K:\
-SET TMP=K:\
 call K:\python\condabin\activate
 pip install git+https://github.com/huggingface/transformers
 pip install git+https://github.com/huggingface/optimum
@@ -44,8 +40,6 @@ cmd /k
 ECHO Runtime launching in B: drive mode
 subst /D B: >nul
 subst B: miniconda3 >nul
-SET TEMP=B:\
-SET TMP=B:\
 call B:\python\condabin\activate
 pip install git+https://github.com/huggingface/transformers
 pip install git+https://github.com/huggingface/optimum
--- a/install_requirements.bat
+++ b/install_requirements.bat
@@ -44,8 +44,6 @@ echo 3 > loader.settings
 subst B: /D >nul
 mkdir miniconda3
 subst B: miniconda3
-SET TEMP=B:\
-SET TMP=B:\
 copy umamba.exe B:\umamba.exe
 copy loader.settings B:\loader.settings
 copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat
@@ -60,8 +58,6 @@ exit

 :subfolder
 echo 2 > loader.settings
-SET TEMP=%~DP0MINICONDA3
-SET TMP=%~DP0MINICONDA3
 umamba.exe create -r miniconda3\ -n base
 umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy
 umamba.exe clean -a -y
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -49,6 +49,9 @@ class model_backend(HFTorchInferenceModel):

    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
        requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
+        if not utils.koboldai_vars.hascuda:
+            logger.warning("Your GPU has not been detected and you can only make use of 32-bit inference, meaning the ram requirements are 8 times higher than specified on the menu and your generations will be slow.\nUnless this is an error and your GPU is known to be compatible with our software check out https://koboldai.org/cpp for a suitable alternative that has wider GPU support and has the ability to run models in 4-bit on the CPU.")
+
        dependency_exists = importlib.util.find_spec("bitsandbytes")
        if dependency_exists:
            if model_name != 'customhuggingface' or "custom_model_name" in parameters:
@@ -57,7 +60,7 @@ class model_backend(HFTorchInferenceModel):
                        temp = json.load(f)
                else:
                    temp = {}
-                if not hasattr(self.model_config, 'quantization_config'):
+                if not hasattr(self.model_config, 'quantization_config') and utils.koboldai_vars.hascuda:
                    requested_parameters.append({
                                                "uitype": "dropdown",
                                                "unit": "text",
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -389,7 +389,7 @@ class model_backend(HFTorchInferenceModel):
                except:
                    autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk
                if autogptq_failed:
-                    model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True)
+                    model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, inject_fused_attention=False)
                # Patch in embeddings function
                def get_input_embeddings(self):
                    return self.model.get_input_embeddings()
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -234,7 +234,7 @@ class HFInferenceModel(InferenceModel):
        if self.model_type == "llama":
            # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
            self.tokenizer.add_bos_token = False
-
+            self.tokenizer.legacy = False
            # HF transformers no longer supports decode_with_prefix_space
            # We work around this by wrapping decode, encode, and __call__
            # with versions that work around the 'prefix space' misfeature
--- a/play.bat
+++ b/play.bat
@@ -18,8 +18,6 @@ IF %M%==3 GOTO drivemap_B

 :subfolder
 ECHO Runtime launching in subfolder mode
-SET TEMP=%~DP0MINICONDA3
-SET TMP=%~DP0MINICONDA3
 call miniconda3\condabin\activate
 python aiserver.py %*
 cmd /k
@@ -28,8 +26,6 @@ cmd /k
 ECHO Runtime launching in K: drive mode
 subst /D K: >nul
 subst K: miniconda3 >nul
-SET TEMP=K:\
-SET TMP=K:\
 call K:\python\condabin\activate
 python aiserver.py %*
 cmd /k
@@ -38,8 +34,6 @@ cmd /k
 ECHO Runtime launching in B: drive mode
 subst /D B: >nul
 subst B: miniconda3 >nul
-SET TEMP=B:\
-SET TMP=B:\
 call B:\python\condabin\activate
 python aiserver.py %*
 cmd /k
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
-transformers==4.31.*
-huggingface_hub==0.15.1
+transformers==4.32.*
+huggingface_hub==0.16.4
+optimum==1.12.0
+safetensors==0.3.3
 Flask==2.2.3
 Flask-SocketIO==5.3.2
 python-socketio==5.7.2
@@ -15,7 +17,7 @@ markdown
 bleach==4.1.0
 sentencepiece
 protobuf
-accelerate==0.20.3
+accelerate==0.21.0
 flask-session==0.4.0
 marshmallow>=3.13
 apispec-webframeworks
@@ -37,10 +39,13 @@ pytest==7.2.2
 pytest-html==3.2.0
 pytest-metadata==2.0.4
 requests-mock==1.10.0
-safetensors==0.3.1
 git+https://github.com/0cc4m/hf_bleeding_edge/
 einops
 peft==0.3.0
 scipy
+https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
+https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10'
+https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
+https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
 windows-curses; sys_platform == 'win32'
 pynvml
--- a/update-koboldai.bat
+++ b/update-koboldai.bat
@@ -15,24 +15,18 @@ IF %M%==2 GOTO subfolder
 IF %M%==3 GOTO drivemap_B

 :subfolder
-SET TEMP=%~DP0MINICONDA3
-SET TMP=%~DP0MINICONDA3
 call miniconda3\condabin\activate
 GOTO GIT

 :drivemap
 subst /D K: >nul
 subst K: miniconda3 >nul
-SET TEMP=K:\
-SET TMP=K:\
 call K:\python\condabin\activate
 GOTO GIT

 :drivemap_B
 subst /D B: >nul
 subst B: miniconda3 >nul
-SET TEMP=B:\
-SET TMP=B:\
 call B:\python\condabin\activate
 GOTO GIT