Merge branch 'united' into scribe

This commit is contained in:
db0
2023-08-28 17:45:49 +02:00
13 changed files with 28 additions and 46 deletions

View File

@@ -248,7 +248,7 @@ model_menu = {
MenuPath("Load a model from its directory", "NeoCustom"), MenuPath("Load a model from its directory", "NeoCustom"),
MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""), MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""),
MenuModel("Load custom GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"), MenuModel("Load old GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"),
MenuFolder("Instruct Models", "instructlist"), MenuFolder("Instruct Models", "instructlist"),
MenuFolder("Novel Models", "novellist"), MenuFolder("Novel Models", "novellist"),
MenuFolder("Chat Models", "chatlist"), MenuFolder("Chat Models", "chatlist"),

View File

@@ -15,21 +15,15 @@ IF %M%==2 GOTO subfolder
IF %M%==3 GOTO drivemap_B IF %M%==3 GOTO drivemap_B
:subfolder :subfolder
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
call miniconda3\condabin\activate call miniconda3\condabin\activate
cmd /k "%*" cmd /k "%*"
:drivemap :drivemap
subst K: miniconda3 >nul subst K: miniconda3 >nul
SET TEMP=K:\
SET TMP=K:\
call K:\python\condabin\activate call K:\python\condabin\activate
cmd /k "%*" cmd /k "%*"
:drivemap_B :drivemap_B
subst B: miniconda3 >nul subst B: miniconda3 >nul
SET TEMP=B:\
SET TMP=B:\
call B:\python\condabin\activate call B:\python\condabin\activate
cmd /k "%*" cmd /k "%*"

View File

@@ -32,10 +32,11 @@ dependencies:
- flask-ngrok - flask-ngrok
- flask-cors - flask-cors
- lupa==1.10 - lupa==1.10
- transformers==4.31.0 - transformers==4.32.*
- huggingface_hub==0.15.1 - huggingface_hub==0.16.4
- safetensors==0.3.1 - optimum==1.12.0
- accelerate==0.20.3 - safetensors==0.3.3
- accelerate==0.21.0
- git+https://github.com/VE-FORBRYDERNE/mkultra - git+https://github.com/VE-FORBRYDERNE/mkultra
- flask-session - flask-session
- ansi2html - ansi2html
@@ -50,7 +51,7 @@ dependencies:
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
- https://github.com/henk717/KoboldAI/releases/download/Snapshot-11-08-23/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- einops - einops
- peft==0.3.0 - peft==0.3.0
- scipy - scipy

View File

@@ -30,10 +30,11 @@ dependencies:
- flask-ngrok - flask-ngrok
- flask-cors - flask-cors
- lupa==1.10 - lupa==1.10
- transformers==4.31.0 - transformers==4.32.*
- huggingface_hub==0.15.1 - huggingface_hub==0.16.4
- safetensors==0.3.1 - optimum==1.12.0
- accelerate==0.20.3 - safetensors==0.3.3
- accelerate==0.21.0
- git+https://github.com/VE-FORBRYDERNE/mkultra - git+https://github.com/VE-FORBRYDERNE/mkultra
- ansi2html - ansi2html
- flask_compress - flask_compress

View File

@@ -20,8 +20,6 @@ IF %M%==3 GOTO drivemap_B
:subfolder :subfolder
ECHO Runtime launching in subfolder mode ECHO Runtime launching in subfolder mode
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
call miniconda3\condabin\activate call miniconda3\condabin\activate
pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/optimum pip install git+https://github.com/huggingface/optimum
@@ -32,8 +30,6 @@ cmd /k
ECHO Runtime launching in K: drive mode ECHO Runtime launching in K: drive mode
subst /D K: >nul subst /D K: >nul
subst K: miniconda3 >nul subst K: miniconda3 >nul
SET TEMP=K:\
SET TMP=K:\
call K:\python\condabin\activate call K:\python\condabin\activate
pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/optimum pip install git+https://github.com/huggingface/optimum
@@ -44,8 +40,6 @@ cmd /k
ECHO Runtime launching in B: drive mode ECHO Runtime launching in B: drive mode
subst /D B: >nul subst /D B: >nul
subst B: miniconda3 >nul subst B: miniconda3 >nul
SET TEMP=B:\
SET TMP=B:\
call B:\python\condabin\activate call B:\python\condabin\activate
pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/optimum pip install git+https://github.com/huggingface/optimum

View File

@@ -44,8 +44,6 @@ echo 3 > loader.settings
subst B: /D >nul subst B: /D >nul
mkdir miniconda3 mkdir miniconda3
subst B: miniconda3 subst B: miniconda3
SET TEMP=B:\
SET TMP=B:\
copy umamba.exe B:\umamba.exe copy umamba.exe B:\umamba.exe
copy loader.settings B:\loader.settings copy loader.settings B:\loader.settings
copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat
@@ -60,8 +58,6 @@ exit
:subfolder :subfolder
echo 2 > loader.settings echo 2 > loader.settings
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
umamba.exe create -r miniconda3\ -n base umamba.exe create -r miniconda3\ -n base
umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy
umamba.exe clean -a -y umamba.exe clean -a -y

View File

@@ -49,6 +49,9 @@ class model_backend(HFTorchInferenceModel):
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters) requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
if not utils.koboldai_vars.hascuda:
logger.warning("Your GPU has not been detected and you can only make use of 32-bit inference, meaning the ram requirements are 8 times higher than specified on the menu and your generations will be slow.\nUnless this is an error and your GPU is known to be compatible with our software check out https://koboldai.org/cpp for a suitable alternative that has wider GPU support and has the ability to run models in 4-bit on the CPU.")
dependency_exists = importlib.util.find_spec("bitsandbytes") dependency_exists = importlib.util.find_spec("bitsandbytes")
if dependency_exists: if dependency_exists:
if model_name != 'customhuggingface' or "custom_model_name" in parameters: if model_name != 'customhuggingface' or "custom_model_name" in parameters:
@@ -57,7 +60,7 @@ class model_backend(HFTorchInferenceModel):
temp = json.load(f) temp = json.load(f)
else: else:
temp = {} temp = {}
if not hasattr(self.model_config, 'quantization_config'): if not hasattr(self.model_config, 'quantization_config') and utils.koboldai_vars.hascuda:
requested_parameters.append({ requested_parameters.append({
"uitype": "dropdown", "uitype": "dropdown",
"unit": "text", "unit": "text",

View File

@@ -389,7 +389,7 @@ class model_backend(HFTorchInferenceModel):
except: except:
autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk
if autogptq_failed: if autogptq_failed:
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True) model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, inject_fused_attention=False)
# Patch in embeddings function # Patch in embeddings function
def get_input_embeddings(self): def get_input_embeddings(self):
return self.model.get_input_embeddings() return self.model.get_input_embeddings()

View File

@@ -234,7 +234,7 @@ class HFInferenceModel(InferenceModel):
if self.model_type == "llama": if self.model_type == "llama":
# Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
self.tokenizer.add_bos_token = False self.tokenizer.add_bos_token = False
self.tokenizer.legacy = False
# HF transformers no longer supports decode_with_prefix_space # HF transformers no longer supports decode_with_prefix_space
# We work around this by wrapping decode, encode, and __call__ # We work around this by wrapping decode, encode, and __call__
# with versions that work around the 'prefix space' misfeature # with versions that work around the 'prefix space' misfeature

View File

@@ -18,8 +18,6 @@ IF %M%==3 GOTO drivemap_B
:subfolder :subfolder
ECHO Runtime launching in subfolder mode ECHO Runtime launching in subfolder mode
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
call miniconda3\condabin\activate call miniconda3\condabin\activate
python aiserver.py %* python aiserver.py %*
cmd /k cmd /k
@@ -28,8 +26,6 @@ cmd /k
ECHO Runtime launching in K: drive mode ECHO Runtime launching in K: drive mode
subst /D K: >nul subst /D K: >nul
subst K: miniconda3 >nul subst K: miniconda3 >nul
SET TEMP=K:\
SET TMP=K:\
call K:\python\condabin\activate call K:\python\condabin\activate
python aiserver.py %* python aiserver.py %*
cmd /k cmd /k
@@ -38,8 +34,6 @@ cmd /k
ECHO Runtime launching in B: drive mode ECHO Runtime launching in B: drive mode
subst /D B: >nul subst /D B: >nul
subst B: miniconda3 >nul subst B: miniconda3 >nul
SET TEMP=B:\
SET TMP=B:\
call B:\python\condabin\activate call B:\python\condabin\activate
python aiserver.py %* python aiserver.py %*
cmd /k cmd /k

View File

@@ -1,5 +1,7 @@
transformers==4.31.* transformers==4.32.*
huggingface_hub==0.15.1 huggingface_hub==0.16.4
optimum==1.12.0
safetensors==0.3.3
Flask==2.2.3 Flask==2.2.3
Flask-SocketIO==5.3.2 Flask-SocketIO==5.3.2
python-socketio==5.7.2 python-socketio==5.7.2
@@ -15,7 +17,7 @@ markdown
bleach==4.1.0 bleach==4.1.0
sentencepiece sentencepiece
protobuf protobuf
accelerate==0.20.3 accelerate==0.21.0
flask-session==0.4.0 flask-session==0.4.0
marshmallow>=3.13 marshmallow>=3.13
apispec-webframeworks apispec-webframeworks
@@ -37,10 +39,13 @@ pytest==7.2.2
pytest-html==3.2.0 pytest-html==3.2.0
pytest-metadata==2.0.4 pytest-metadata==2.0.4
requests-mock==1.10.0 requests-mock==1.10.0
safetensors==0.3.1
git+https://github.com/0cc4m/hf_bleeding_edge/ git+https://github.com/0cc4m/hf_bleeding_edge/
einops einops
peft==0.3.0 peft==0.3.0
scipy scipy
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10'
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
windows-curses; sys_platform == 'win32' windows-curses; sys_platform == 'win32'
pynvml pynvml

View File

@@ -15,24 +15,18 @@ IF %M%==2 GOTO subfolder
IF %M%==3 GOTO drivemap_B IF %M%==3 GOTO drivemap_B
:subfolder :subfolder
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
call miniconda3\condabin\activate call miniconda3\condabin\activate
GOTO GIT GOTO GIT
:drivemap :drivemap
subst /D K: >nul subst /D K: >nul
subst K: miniconda3 >nul subst K: miniconda3 >nul
SET TEMP=K:\
SET TMP=K:\
call K:\python\condabin\activate call K:\python\condabin\activate
GOTO GIT GOTO GIT
:drivemap_B :drivemap_B
subst /D B: >nul subst /D B: >nul
subst B: miniconda3 >nul subst B: miniconda3 >nul
SET TEMP=B:\
SET TMP=B:\
call B:\python\condabin\activate call B:\python\condabin\activate
GOTO GIT GOTO GIT