This commit is contained in:
Henk
2023-11-08 18:47:02 +01:00
parent f43896750a
commit 9e275de5d9
6 changed files with 31 additions and 31 deletions

View File

@@ -273,6 +273,7 @@ model_menu = {
MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"),
], ],
'instructlist': [ 'instructlist': [
MenuModel("Tiefighter 13B", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),
MenuModel("Holomax 13B", "KoboldAI/LLaMA2-13B-Holomax", "12GB*"), MenuModel("Holomax 13B", "KoboldAI/LLaMA2-13B-Holomax", "12GB*"),
MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"), MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"),
MenuModel("Chronos-Hermes V2 13B", "Austism/chronos-hermes-13b-v2", "12GB*"), MenuModel("Chronos-Hermes V2 13B", "Austism/chronos-hermes-13b-v2", "12GB*"),
@@ -283,6 +284,7 @@ model_menu = {
], ],
'adventurelist': [ 'adventurelist': [
MenuFolder("Instruct models may perform better than the models below (Using Instruct mode)", "instructlist"), MenuFolder("Instruct models may perform better than the models below (Using Instruct mode)", "instructlist"),
MenuModel("Tiefighter 13B (Instruct Hybrid)", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),
MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "20GB*"), MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "20GB*"),
MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "12GB"), MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "12GB"),
MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "12GB*"), MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "12GB*"),
@@ -298,6 +300,7 @@ model_menu = {
MenuFolder("Return to Main Menu", "mainmenu"), MenuFolder("Return to Main Menu", "mainmenu"),
], ],
'novellist': [ 'novellist': [
MenuModel("Tiefighter 13B (Instruct Hybrid)", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),
MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "32GB"), MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "32GB"),
MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "32GB"), MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "32GB"),
MenuModel("Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"), MenuModel("Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"),

View File

@@ -35,11 +35,11 @@ dependencies:
- flask-cors - flask-cors
- Werkzeug==2.3.7 - Werkzeug==2.3.7
- lupa==1.10 - lupa==1.10
- transformers[sentencepiece]==4.34.0 - transformers[sentencepiece]==4.35.0
- huggingface_hub==0.16.4 - huggingface_hub==0.16.4
- optimum[onnxruntime]==1.13.2 - optimum[onnxruntime]==1.14.0
- safetensors==0.3.3 - safetensors==0.4.0
- accelerate==0.21.0 - accelerate==0.24.1
- git+https://github.com/VE-FORBRYDERNE/mkultra - git+https://github.com/VE-FORBRYDERNE/mkultra
- flask-session - flask-session
- ansi2html - ansi2html
@@ -53,10 +53,9 @@ dependencies:
- git+https://github.com/0cc4m/hf_bleeding_edge/ - git+https://github.com/0cc4m/hf_bleeding_edge/
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - auto_gptq==0.5.0
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- einops - einops
- peft==0.3.0 - peft==0.6.0
- scipy - scipy
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32'

View File

@@ -35,11 +35,11 @@ dependencies:
- flask-cors - flask-cors
- Werkzeug==2.3.7 - Werkzeug==2.3.7
- lupa==1.10 - lupa==1.10
- transformers[sentencepiece]==4.34.0 - transformers[sentencepiece]==4.35.0
- huggingface_hub==0.16.4 - huggingface_hub==0.16.4
- optimum[onnxruntime,openvino,nncf,neural-compressor]==1.13.2 - optimum[onnxruntime,openvino,nncf,neural-compressor]==1.14.0
- safetensors==0.3.3 - safetensors==0.4.0
- accelerate==0.21.0 - accelerate==0.24.1
- git+https://github.com/VE-FORBRYDERNE/mkultra - git+https://github.com/VE-FORBRYDERNE/mkultra
- flask-session - flask-session
- ansi2html - ansi2html
@@ -51,10 +51,9 @@ dependencies:
- git+https://github.com/0cc4m/hf_bleeding_edge/ - git+https://github.com/0cc4m/hf_bleeding_edge/
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - auto_gptq==0.5.0
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
- einops - einops
- peft==0.3.0 - peft==0.6.0
- scipy - scipy
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32'

View File

@@ -31,11 +31,11 @@ dependencies:
- flask-cors - flask-cors
- Werkzeug==2.3.7 - Werkzeug==2.3.7
- lupa==1.10 - lupa==1.10
- transformers[sentencepiece]==4.34.0 - transformers[sentencepiece]==4.35.0
- huggingface_hub==0.16.4 - huggingface_hub==0.16.4
- optimum[onnxruntime]==1.13.2 - optimum[onnxruntime]==1.14.0
- safetensors==0.3.3 - safetensors==0.4.0
- accelerate==0.21.0 - accelerate==0.24.1
- git+https://github.com/VE-FORBRYDERNE/mkultra - git+https://github.com/VE-FORBRYDERNE/mkultra
- ansi2html - ansi2html
- flask_compress - flask_compress
@@ -45,8 +45,8 @@ dependencies:
- diffusers - diffusers
- git+https://github.com/0cc4m/hf_bleeding_edge/ - git+https://github.com/0cc4m/hf_bleeding_edge/
- einops - einops
- peft==0.3.0 - peft==0.6.0
- windows-curses; sys_platform == 'win32' - windows-curses; sys_platform == 'win32'
- pynvml - pynvml
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp38-cp38-linux_x86_64.whl - auto_gptq==0.5.0
- omegaconf - omegaconf

View File

@@ -13,6 +13,7 @@ import transformers
from transformers import ( from transformers import (
GPT2Tokenizer, GPT2Tokenizer,
AutoTokenizer, AutoTokenizer,
LlamaTokenizer,
) )
from modeling.stoppers import Stoppers from modeling.stoppers import Stoppers
from modeling.tokenizer import GenericTokenizer from modeling.tokenizer import GenericTokenizer
@@ -251,9 +252,10 @@ class InferenceModel:
location, use_fast=False, **std_kwargs location, use_fast=False, **std_kwargs
), ),
lambda: AutoTokenizer.from_pretrained(location, **std_kwargs), lambda: AutoTokenizer.from_pretrained(location, **std_kwargs),
# Fallback to GPT2Tokenizer # Attempt more basic GPT2 Tokenizer
lambda: GPT2Tokenizer.from_pretrained(location, **std_kwargs), lambda: GPT2Tokenizer.from_pretrained(location, **std_kwargs),
lambda: GPT2Tokenizer.from_pretrained("gpt2", **std_kwargs), # Fallback to generic LLaMA Tokenizer
lambda: LlamaTokenizer.from_pretrained("KoboldAI/llama2-tokenizer", use_fast=False, **std_kwargs),
] ]
for i, try_get_tokenizer in enumerate(suppliers): for i, try_get_tokenizer in enumerate(suppliers):

View File

@@ -1,7 +1,7 @@
transformers[sentencepiece]==4.34.0 transformers[sentencepiece]==4.35.0
huggingface_hub==0.16.4 huggingface_hub==0.16.4
optimum[onnxruntime]==1.13.2 optimum[onnxruntime]==1.14.0
safetensors==0.3.3 safetensors==0.4.0
Flask==2.3.3 Flask==2.3.3
Flask-SocketIO==5.3.2 Flask-SocketIO==5.3.2
Werkzeug==2.3.7 Werkzeug==2.3.7
@@ -17,7 +17,7 @@ lupa==1.10
markdown markdown
bleach==4.1.0 bleach==4.1.0
protobuf protobuf
accelerate==0.21.0 accelerate==0.24.1
flask-session==0.5.0 flask-session==0.5.0
marshmallow>=3.13 marshmallow>=3.13
apispec-webframeworks apispec-webframeworks
@@ -40,12 +40,9 @@ pytest-metadata==2.0.4
requests-mock==1.10.0 requests-mock==1.10.0
git+https://github.com/0cc4m/hf_bleeding_edge/ git+https://github.com/0cc4m/hf_bleeding_edge/
einops einops
peft==0.3.0 peft==0.6.0
scipy scipy
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10' auto-gptq==0.5.0
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10'
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
windows-curses; sys_platform == 'win32' windows-curses; sys_platform == 'win32'
pynvml pynvml
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10' https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'