Merge branch 'united' into scribe

This commit is contained in:
db0
2023-08-28 17:45:49 +02:00
13 changed files with 28 additions and 46 deletions

View File

@@ -248,7 +248,7 @@ model_menu = {
MenuPath("Load a model from its directory", "NeoCustom"),
MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""),
MenuModel("Load custom GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"),
MenuModel("Load old GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"),
MenuFolder("Instruct Models", "instructlist"),
MenuFolder("Novel Models", "novellist"),
MenuFolder("Chat Models", "chatlist"),

View File

@@ -15,21 +15,15 @@ IF %M%==2 GOTO subfolder
IF %M%==3 GOTO drivemap_B
:subfolder
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
call miniconda3\condabin\activate
cmd /k "%*"
:drivemap
subst K: miniconda3 >nul
SET TEMP=K:\
SET TMP=K:\
call K:\python\condabin\activate
cmd /k "%*"
:drivemap_B
subst B: miniconda3 >nul
SET TEMP=B:\
SET TMP=B:\
call B:\python\condabin\activate
cmd /k "%*"

View File

@@ -32,10 +32,11 @@ dependencies:
- flask-ngrok
- flask-cors
- lupa==1.10
- transformers==4.31.0
- huggingface_hub==0.15.1
- safetensors==0.3.1
- accelerate==0.20.3
- transformers==4.32.*
- huggingface_hub==0.16.4
- optimum==1.12.0
- safetensors==0.3.3
- accelerate==0.21.0
- git+https://github.com/VE-FORBRYDERNE/mkultra
- flask-session
- ansi2html
@@ -50,7 +51,7 @@ dependencies:
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
- https://github.com/henk717/KoboldAI/releases/download/Snapshot-11-08-23/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- einops
- peft==0.3.0
- scipy

View File

@@ -30,10 +30,11 @@ dependencies:
- flask-ngrok
- flask-cors
- lupa==1.10
- transformers==4.31.0
- huggingface_hub==0.15.1
- safetensors==0.3.1
- accelerate==0.20.3
- transformers==4.32.*
- huggingface_hub==0.16.4
- optimum==1.12.0
- safetensors==0.3.3
- accelerate==0.21.0
- git+https://github.com/VE-FORBRYDERNE/mkultra
- ansi2html
- flask_compress

View File

@@ -20,8 +20,6 @@ IF %M%==3 GOTO drivemap_B
:subfolder
ECHO Runtime launching in subfolder mode
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
call miniconda3\condabin\activate
pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/optimum
@@ -32,8 +30,6 @@ cmd /k
ECHO Runtime launching in K: drive mode
subst /D K: >nul
subst K: miniconda3 >nul
SET TEMP=K:\
SET TMP=K:\
call K:\python\condabin\activate
pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/optimum
@@ -44,8 +40,6 @@ cmd /k
ECHO Runtime launching in B: drive mode
subst /D B: >nul
subst B: miniconda3 >nul
SET TEMP=B:\
SET TMP=B:\
call B:\python\condabin\activate
pip install git+https://github.com/huggingface/transformers
pip install git+https://github.com/huggingface/optimum

View File

@@ -44,8 +44,6 @@ echo 3 > loader.settings
subst B: /D >nul
mkdir miniconda3
subst B: miniconda3
SET TEMP=B:\
SET TMP=B:\
copy umamba.exe B:\umamba.exe
copy loader.settings B:\loader.settings
copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat
@@ -60,8 +58,6 @@ exit
:subfolder
echo 2 > loader.settings
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
umamba.exe create -r miniconda3\ -n base
umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy
umamba.exe clean -a -y

View File

@@ -49,6 +49,9 @@ class model_backend(HFTorchInferenceModel):
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
if not utils.koboldai_vars.hascuda:
logger.warning("Your GPU has not been detected and you can only make use of 32-bit inference, meaning the ram requirements are 8 times higher than specified on the menu and your generations will be slow.\nUnless this is an error and your GPU is known to be compatible with our software check out https://koboldai.org/cpp for a suitable alternative that has wider GPU support and has the ability to run models in 4-bit on the CPU.")
dependency_exists = importlib.util.find_spec("bitsandbytes")
if dependency_exists:
if model_name != 'customhuggingface' or "custom_model_name" in parameters:
@@ -57,7 +60,7 @@ class model_backend(HFTorchInferenceModel):
temp = json.load(f)
else:
temp = {}
if not hasattr(self.model_config, 'quantization_config'):
if not hasattr(self.model_config, 'quantization_config') and utils.koboldai_vars.hascuda:
requested_parameters.append({
"uitype": "dropdown",
"unit": "text",

View File

@@ -389,7 +389,7 @@ class model_backend(HFTorchInferenceModel):
except:
autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk
if autogptq_failed:
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True)
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, inject_fused_attention=False)
# Patch in embeddings function
def get_input_embeddings(self):
return self.model.get_input_embeddings()

View File

@@ -234,7 +234,7 @@ class HFInferenceModel(InferenceModel):
if self.model_type == "llama":
# Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
self.tokenizer.add_bos_token = False
self.tokenizer.legacy = False
# HF transformers no longer supports decode_with_prefix_space
# We work around this by wrapping decode, encode, and __call__
# with versions that work around the 'prefix space' misfeature

View File

@@ -18,8 +18,6 @@ IF %M%==3 GOTO drivemap_B
:subfolder
ECHO Runtime launching in subfolder mode
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
call miniconda3\condabin\activate
python aiserver.py %*
cmd /k
@@ -28,8 +26,6 @@ cmd /k
ECHO Runtime launching in K: drive mode
subst /D K: >nul
subst K: miniconda3 >nul
SET TEMP=K:\
SET TMP=K:\
call K:\python\condabin\activate
python aiserver.py %*
cmd /k
@@ -38,8 +34,6 @@ cmd /k
ECHO Runtime launching in B: drive mode
subst /D B: >nul
subst B: miniconda3 >nul
SET TEMP=B:\
SET TMP=B:\
call B:\python\condabin\activate
python aiserver.py %*
cmd /k

View File

@@ -1,5 +1,7 @@
transformers==4.31.*
huggingface_hub==0.15.1
transformers==4.32.*
huggingface_hub==0.16.4
optimum==1.12.0
safetensors==0.3.3
Flask==2.2.3
Flask-SocketIO==5.3.2
python-socketio==5.7.2
@@ -15,7 +17,7 @@ markdown
bleach==4.1.0
sentencepiece
protobuf
accelerate==0.20.3
accelerate==0.21.0
flask-session==0.4.0
marshmallow>=3.13
apispec-webframeworks
@@ -37,10 +39,13 @@ pytest==7.2.2
pytest-html==3.2.0
pytest-metadata==2.0.4
requests-mock==1.10.0
safetensors==0.3.1
git+https://github.com/0cc4m/hf_bleeding_edge/
einops
peft==0.3.0
scipy
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10'
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
windows-curses; sys_platform == 'win32'
pynvml

View File

@@ -15,24 +15,18 @@ IF %M%==2 GOTO subfolder
IF %M%==3 GOTO drivemap_B
:subfolder
SET TEMP=%~DP0MINICONDA3
SET TMP=%~DP0MINICONDA3
call miniconda3\condabin\activate
GOTO GIT
:drivemap
subst /D K: >nul
subst K: miniconda3 >nul
SET TEMP=K:\
SET TMP=K:\
call K:\python\condabin\activate
GOTO GIT
:drivemap_B
subst /D B: >nul
subst B: miniconda3 >nul
SET TEMP=B:\
SET TMP=B:\
call B:\python\condabin\activate
GOTO GIT