diff --git a/AI-Horde-Worker b/AI-Horde-Worker index 960723f3..755696b9 160000 --- a/AI-Horde-Worker +++ b/AI-Horde-Worker @@ -1 +1 @@ -Subproject commit 960723f39a2b51a1e24d59d3c46121e7d59618ca +Subproject commit 755696b9d4464e4167bfea5fd426686420015038 diff --git a/aiserver.py b/aiserver.py index e1d74172..ba3be3d4 100644 --- a/aiserver.py +++ b/aiserver.py @@ -248,7 +248,7 @@ model_menu = { MenuPath("Load a model from its directory", "NeoCustom"), MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""), - MenuModel("Load custom GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"), + MenuModel("Load old GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"), MenuFolder("Instruct Models", "instructlist"), MenuFolder("Novel Models", "novellist"), MenuFolder("Chat Models", "chatlist"), diff --git a/commandline.bat b/commandline.bat index 94e61608..2575c849 100644 --- a/commandline.bat +++ b/commandline.bat @@ -15,21 +15,15 @@ IF %M%==2 GOTO subfolder IF %M%==3 GOTO drivemap_B :subfolder -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate cmd /k "%*" :drivemap subst K: miniconda3 >nul -SET TEMP=K:\ -SET TMP=K:\ call K:\python\condabin\activate cmd /k "%*" :drivemap_B subst B: miniconda3 >nul -SET TEMP=B:\ -SET TMP=B:\ call B:\python\condabin\activate cmd /k "%*" \ No newline at end of file diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 0ceb43ec..e053654a 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,10 +32,11 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.31.0 - - huggingface_hub==0.15.1 - - safetensors==0.3.1 - - accelerate==0.20.3 + - transformers==4.32.* + - huggingface_hub==0.16.4 + - optimum==1.12.0 + - safetensors==0.3.3 + - accelerate==0.21.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session - ansi2html @@ -50,7 +51,7 @@ dependencies: - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - - https://github.com/henk717/KoboldAI/releases/download/Snapshot-11-08-23/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' + - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - einops - peft==0.3.0 - scipy diff --git a/environments/rocm.yml b/environments/rocm.yml index 9bf2813e..bd468a6a 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,10 +30,11 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.31.0 - - huggingface_hub==0.15.1 - - safetensors==0.3.1 - - accelerate==0.20.3 + - transformers==4.32.* + - huggingface_hub==0.16.4 + - optimum==1.12.0 + - safetensors==0.3.3 + - accelerate==0.21.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - ansi2html - flask_compress diff --git a/install_git_transformers.bat b/install_git_transformers.bat index 7154e2a1..09f80547 100644 --- a/install_git_transformers.bat +++ b/install_git_transformers.bat @@ -20,8 +20,6 @@ IF %M%==3 GOTO drivemap_B :subfolder ECHO Runtime launching in subfolder mode -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/optimum @@ -32,8 +30,6 @@ cmd /k ECHO Runtime launching in K: drive mode subst /D K: >nul subst K: miniconda3 >nul -SET TEMP=K:\ -SET TMP=K:\ call K:\python\condabin\activate pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/optimum @@ -44,8 +40,6 @@ cmd /k ECHO Runtime launching in B: drive mode subst /D B: >nul subst B: miniconda3 >nul -SET TEMP=B:\ -SET TMP=B:\ call B:\python\condabin\activate pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/optimum diff --git a/install_requirements.bat b/install_requirements.bat index 9756a18f..496917c0 100644 --- a/install_requirements.bat +++ b/install_requirements.bat @@ -44,8 +44,6 @@ echo 3 > loader.settings subst B: /D >nul mkdir miniconda3 subst B: miniconda3 -SET TEMP=B:\ -SET TMP=B:\ copy umamba.exe B:\umamba.exe copy loader.settings B:\loader.settings copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat @@ -60,8 +58,6 @@ exit :subfolder echo 2 > loader.settings -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 umamba.exe create -r miniconda3\ -n base umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy umamba.exe clean -a -y diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 9b1049cf..f95bb24a 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -49,6 +49,9 @@ class model_backend(HFTorchInferenceModel): def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters) + if not utils.koboldai_vars.hascuda: + logger.warning("Your GPU has not been detected and you can only make use of 32-bit inference, meaning the ram requirements are 8 times higher than specified on the menu and your generations will be slow.\nUnless this is an error and your GPU is known to be compatible with our software check out https://koboldai.org/cpp for a suitable alternative that has wider GPU support and has the ability to run models in 4-bit on the CPU.") + dependency_exists = importlib.util.find_spec("bitsandbytes") if dependency_exists: if model_name != 'customhuggingface' or "custom_model_name" in parameters: @@ -57,7 +60,7 @@ class model_backend(HFTorchInferenceModel): temp = json.load(f) else: temp = {} - if not hasattr(self.model_config, 'quantization_config'): + if not hasattr(self.model_config, 'quantization_config') and utils.koboldai_vars.hascuda: requested_parameters.append({ "uitype": "dropdown", "unit": "text", diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index 3d044b6f..3094dc33 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -389,7 +389,7 @@ class model_backend(HFTorchInferenceModel): except: autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk if autogptq_failed: - model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True) + model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, inject_fused_attention=False) # Patch in embeddings function def get_input_embeddings(self): return self.model.get_input_embeddings() diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index e50d87ff..7b005c9e 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -234,7 +234,7 @@ class HFInferenceModel(InferenceModel): if self.model_type == "llama": # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer self.tokenizer.add_bos_token = False - + self.tokenizer.legacy = False # HF transformers no longer supports decode_with_prefix_space # We work around this by wrapping decode, encode, and __call__ # with versions that work around the 'prefix space' misfeature diff --git a/play.bat b/play.bat index c9e82b83..ebf9e8b6 100644 --- a/play.bat +++ b/play.bat @@ -18,8 +18,6 @@ IF %M%==3 GOTO drivemap_B :subfolder ECHO Runtime launching in subfolder mode -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate python aiserver.py %* cmd /k @@ -28,8 +26,6 @@ cmd /k ECHO Runtime launching in K: drive mode subst /D K: >nul subst K: miniconda3 >nul -SET TEMP=K:\ -SET TMP=K:\ call K:\python\condabin\activate python aiserver.py %* cmd /k @@ -38,8 +34,6 @@ cmd /k ECHO Runtime launching in B: drive mode subst /D B: >nul subst B: miniconda3 >nul -SET TEMP=B:\ -SET TMP=B:\ call B:\python\condabin\activate python aiserver.py %* cmd /k \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dff40042..b7abbed1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ -transformers==4.31.* -huggingface_hub==0.15.1 +transformers==4.32.* +huggingface_hub==0.16.4 +optimum==1.12.0 +safetensors==0.3.3 Flask==2.2.3 Flask-SocketIO==5.3.2 python-socketio==5.7.2 @@ -15,7 +17,7 @@ markdown bleach==4.1.0 sentencepiece protobuf -accelerate==0.20.3 +accelerate==0.21.0 flask-session==0.4.0 marshmallow>=3.13 apispec-webframeworks @@ -37,10 +39,13 @@ pytest==7.2.2 pytest-html==3.2.0 pytest-metadata==2.0.4 requests-mock==1.10.0 -safetensors==0.3.1 git+https://github.com/0cc4m/hf_bleeding_edge/ einops peft==0.3.0 scipy +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8' windows-curses; sys_platform == 'win32' pynvml diff --git a/update-koboldai.bat b/update-koboldai.bat index f2e642ee..b59f4d8f 100644 --- a/update-koboldai.bat +++ b/update-koboldai.bat @@ -15,24 +15,18 @@ IF %M%==2 GOTO subfolder IF %M%==3 GOTO drivemap_B :subfolder -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate GOTO GIT :drivemap subst /D K: >nul subst K: miniconda3 >nul -SET TEMP=K:\ -SET TMP=K:\ call K:\python\condabin\activate GOTO GIT :drivemap_B subst /D B: >nul subst B: miniconda3 >nul -SET TEMP=B:\ -SET TMP=B:\ call B:\python\condabin\activate GOTO GIT