mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge branch 'united' into scribe
This commit is contained in:
Submodule AI-Horde-Worker updated: 960723f39a...755696b9d4
@@ -248,7 +248,7 @@ model_menu = {
|
||||
MenuPath("Load a model from its directory", "NeoCustom"),
|
||||
MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
|
||||
MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""),
|
||||
MenuModel("Load custom GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"),
|
||||
MenuModel("Load old GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"),
|
||||
MenuFolder("Instruct Models", "instructlist"),
|
||||
MenuFolder("Novel Models", "novellist"),
|
||||
MenuFolder("Chat Models", "chatlist"),
|
||||
|
@@ -15,21 +15,15 @@ IF %M%==2 GOTO subfolder
|
||||
IF %M%==3 GOTO drivemap_B
|
||||
|
||||
:subfolder
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
call miniconda3\condabin\activate
|
||||
cmd /k "%*"
|
||||
|
||||
:drivemap
|
||||
subst K: miniconda3 >nul
|
||||
SET TEMP=K:\
|
||||
SET TMP=K:\
|
||||
call K:\python\condabin\activate
|
||||
cmd /k "%*"
|
||||
|
||||
:drivemap_B
|
||||
subst B: miniconda3 >nul
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
call B:\python\condabin\activate
|
||||
cmd /k "%*"
|
@@ -32,10 +32,11 @@ dependencies:
|
||||
- flask-ngrok
|
||||
- flask-cors
|
||||
- lupa==1.10
|
||||
- transformers==4.31.0
|
||||
- huggingface_hub==0.15.1
|
||||
- safetensors==0.3.1
|
||||
- accelerate==0.20.3
|
||||
- transformers==4.32.*
|
||||
- huggingface_hub==0.16.4
|
||||
- optimum==1.12.0
|
||||
- safetensors==0.3.3
|
||||
- accelerate==0.21.0
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
- flask-session
|
||||
- ansi2html
|
||||
@@ -50,7 +51,7 @@ dependencies:
|
||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/henk717/KoboldAI/releases/download/Snapshot-11-08-23/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- einops
|
||||
- peft==0.3.0
|
||||
- scipy
|
||||
|
@@ -30,10 +30,11 @@ dependencies:
|
||||
- flask-ngrok
|
||||
- flask-cors
|
||||
- lupa==1.10
|
||||
- transformers==4.31.0
|
||||
- huggingface_hub==0.15.1
|
||||
- safetensors==0.3.1
|
||||
- accelerate==0.20.3
|
||||
- transformers==4.32.*
|
||||
- huggingface_hub==0.16.4
|
||||
- optimum==1.12.0
|
||||
- safetensors==0.3.3
|
||||
- accelerate==0.21.0
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
- ansi2html
|
||||
- flask_compress
|
||||
|
@@ -20,8 +20,6 @@ IF %M%==3 GOTO drivemap_B
|
||||
|
||||
:subfolder
|
||||
ECHO Runtime launching in subfolder mode
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
call miniconda3\condabin\activate
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
pip install git+https://github.com/huggingface/optimum
|
||||
@@ -32,8 +30,6 @@ cmd /k
|
||||
ECHO Runtime launching in K: drive mode
|
||||
subst /D K: >nul
|
||||
subst K: miniconda3 >nul
|
||||
SET TEMP=K:\
|
||||
SET TMP=K:\
|
||||
call K:\python\condabin\activate
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
pip install git+https://github.com/huggingface/optimum
|
||||
@@ -44,8 +40,6 @@ cmd /k
|
||||
ECHO Runtime launching in B: drive mode
|
||||
subst /D B: >nul
|
||||
subst B: miniconda3 >nul
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
call B:\python\condabin\activate
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
pip install git+https://github.com/huggingface/optimum
|
||||
|
@@ -44,8 +44,6 @@ echo 3 > loader.settings
|
||||
subst B: /D >nul
|
||||
mkdir miniconda3
|
||||
subst B: miniconda3
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
copy umamba.exe B:\umamba.exe
|
||||
copy loader.settings B:\loader.settings
|
||||
copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat
|
||||
@@ -60,8 +58,6 @@ exit
|
||||
|
||||
:subfolder
|
||||
echo 2 > loader.settings
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
umamba.exe create -r miniconda3\ -n base
|
||||
umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy
|
||||
umamba.exe clean -a -y
|
||||
|
@@ -49,6 +49,9 @@ class model_backend(HFTorchInferenceModel):
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||
requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
|
||||
if not utils.koboldai_vars.hascuda:
|
||||
logger.warning("Your GPU has not been detected and you can only make use of 32-bit inference, meaning the ram requirements are 8 times higher than specified on the menu and your generations will be slow.\nUnless this is an error and your GPU is known to be compatible with our software check out https://koboldai.org/cpp for a suitable alternative that has wider GPU support and has the ability to run models in 4-bit on the CPU.")
|
||||
|
||||
dependency_exists = importlib.util.find_spec("bitsandbytes")
|
||||
if dependency_exists:
|
||||
if model_name != 'customhuggingface' or "custom_model_name" in parameters:
|
||||
@@ -57,7 +60,7 @@ class model_backend(HFTorchInferenceModel):
|
||||
temp = json.load(f)
|
||||
else:
|
||||
temp = {}
|
||||
if not hasattr(self.model_config, 'quantization_config'):
|
||||
if not hasattr(self.model_config, 'quantization_config') and utils.koboldai_vars.hascuda:
|
||||
requested_parameters.append({
|
||||
"uitype": "dropdown",
|
||||
"unit": "text",
|
||||
|
@@ -389,7 +389,7 @@ class model_backend(HFTorchInferenceModel):
|
||||
except:
|
||||
autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk
|
||||
if autogptq_failed:
|
||||
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True)
|
||||
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, inject_fused_attention=False)
|
||||
# Patch in embeddings function
|
||||
def get_input_embeddings(self):
|
||||
return self.model.get_input_embeddings()
|
||||
|
@@ -234,7 +234,7 @@ class HFInferenceModel(InferenceModel):
|
||||
if self.model_type == "llama":
|
||||
# Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
|
||||
self.tokenizer.add_bos_token = False
|
||||
|
||||
self.tokenizer.legacy = False
|
||||
# HF transformers no longer supports decode_with_prefix_space
|
||||
# We work around this by wrapping decode, encode, and __call__
|
||||
# with versions that work around the 'prefix space' misfeature
|
||||
|
6
play.bat
6
play.bat
@@ -18,8 +18,6 @@ IF %M%==3 GOTO drivemap_B
|
||||
|
||||
:subfolder
|
||||
ECHO Runtime launching in subfolder mode
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
call miniconda3\condabin\activate
|
||||
python aiserver.py %*
|
||||
cmd /k
|
||||
@@ -28,8 +26,6 @@ cmd /k
|
||||
ECHO Runtime launching in K: drive mode
|
||||
subst /D K: >nul
|
||||
subst K: miniconda3 >nul
|
||||
SET TEMP=K:\
|
||||
SET TMP=K:\
|
||||
call K:\python\condabin\activate
|
||||
python aiserver.py %*
|
||||
cmd /k
|
||||
@@ -38,8 +34,6 @@ cmd /k
|
||||
ECHO Runtime launching in B: drive mode
|
||||
subst /D B: >nul
|
||||
subst B: miniconda3 >nul
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
call B:\python\condabin\activate
|
||||
python aiserver.py %*
|
||||
cmd /k
|
@@ -1,5 +1,7 @@
|
||||
transformers==4.31.*
|
||||
huggingface_hub==0.15.1
|
||||
transformers==4.32.*
|
||||
huggingface_hub==0.16.4
|
||||
optimum==1.12.0
|
||||
safetensors==0.3.3
|
||||
Flask==2.2.3
|
||||
Flask-SocketIO==5.3.2
|
||||
python-socketio==5.7.2
|
||||
@@ -15,7 +17,7 @@ markdown
|
||||
bleach==4.1.0
|
||||
sentencepiece
|
||||
protobuf
|
||||
accelerate==0.20.3
|
||||
accelerate==0.21.0
|
||||
flask-session==0.4.0
|
||||
marshmallow>=3.13
|
||||
apispec-webframeworks
|
||||
@@ -37,10 +39,13 @@ pytest==7.2.2
|
||||
pytest-html==3.2.0
|
||||
pytest-metadata==2.0.4
|
||||
requests-mock==1.10.0
|
||||
safetensors==0.3.1
|
||||
git+https://github.com/0cc4m/hf_bleeding_edge/
|
||||
einops
|
||||
peft==0.3.0
|
||||
scipy
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
|
||||
windows-curses; sys_platform == 'win32'
|
||||
pynvml
|
||||
|
@@ -15,24 +15,18 @@ IF %M%==2 GOTO subfolder
|
||||
IF %M%==3 GOTO drivemap_B
|
||||
|
||||
:subfolder
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
call miniconda3\condabin\activate
|
||||
GOTO GIT
|
||||
|
||||
:drivemap
|
||||
subst /D K: >nul
|
||||
subst K: miniconda3 >nul
|
||||
SET TEMP=K:\
|
||||
SET TMP=K:\
|
||||
call K:\python\condabin\activate
|
||||
GOTO GIT
|
||||
|
||||
:drivemap_B
|
||||
subst /D B: >nul
|
||||
subst B: miniconda3 >nul
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
call B:\python\condabin\activate
|
||||
GOTO GIT
|
||||
|
||||
|
Reference in New Issue
Block a user