HF 4.35
This commit is contained in:
parent
f43896750a
commit
9e275de5d9
|
@ -273,6 +273,7 @@ model_menu = {
|
|||
MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"),
|
||||
],
|
||||
'instructlist': [
|
||||
MenuModel("Tiefighter 13B", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),
|
||||
MenuModel("Holomax 13B", "KoboldAI/LLaMA2-13B-Holomax", "12GB*"),
|
||||
MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"),
|
||||
MenuModel("Chronos-Hermes V2 13B", "Austism/chronos-hermes-13b-v2", "12GB*"),
|
||||
|
@ -283,6 +284,7 @@ model_menu = {
|
|||
],
|
||||
'adventurelist': [
|
||||
MenuFolder("Instruct models may perform better than the models below (Using Instruct mode)", "instructlist"),
|
||||
MenuModel("Tiefighter 13B (Instruct Hybrid)", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),
|
||||
MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "20GB*"),
|
||||
MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "12GB"),
|
||||
MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "12GB*"),
|
||||
|
@ -298,6 +300,7 @@ model_menu = {
|
|||
MenuFolder("Return to Main Menu", "mainmenu"),
|
||||
],
|
||||
'novellist': [
|
||||
MenuModel("Tiefighter 13B (Instruct Hybrid)", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),
|
||||
MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "32GB"),
|
||||
MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "32GB"),
|
||||
MenuModel("Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"),
|
||||
|
|
|
@ -35,11 +35,11 @@ dependencies:
|
|||
- flask-cors
|
||||
- Werkzeug==2.3.7
|
||||
- lupa==1.10
|
||||
- transformers[sentencepiece]==4.34.0
|
||||
- transformers[sentencepiece]==4.35.0
|
||||
- huggingface_hub==0.16.4
|
||||
- optimum[onnxruntime]==1.13.2
|
||||
- safetensors==0.3.3
|
||||
- accelerate==0.21.0
|
||||
- optimum[onnxruntime]==1.14.0
|
||||
- safetensors==0.4.0
|
||||
- accelerate==0.24.1
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
- flask-session
|
||||
- ansi2html
|
||||
|
@ -53,10 +53,9 @@ dependencies:
|
|||
- git+https://github.com/0cc4m/hf_bleeding_edge/
|
||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- auto_gptq==0.5.0
|
||||
- einops
|
||||
- peft==0.3.0
|
||||
- peft==0.6.0
|
||||
- scipy
|
||||
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
|
|
|
@ -35,11 +35,11 @@ dependencies:
|
|||
- flask-cors
|
||||
- Werkzeug==2.3.7
|
||||
- lupa==1.10
|
||||
- transformers[sentencepiece]==4.34.0
|
||||
- transformers[sentencepiece]==4.35.0
|
||||
- huggingface_hub==0.16.4
|
||||
- optimum[onnxruntime,openvino,nncf,neural-compressor]==1.13.2
|
||||
- safetensors==0.3.3
|
||||
- accelerate==0.21.0
|
||||
- optimum[onnxruntime,openvino,nncf,neural-compressor]==1.14.0
|
||||
- safetensors==0.4.0
|
||||
- accelerate==0.24.1
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
- flask-session
|
||||
- ansi2html
|
||||
|
@ -51,10 +51,9 @@ dependencies:
|
|||
- git+https://github.com/0cc4m/hf_bleeding_edge/
|
||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- auto_gptq==0.5.0
|
||||
- einops
|
||||
- peft==0.3.0
|
||||
- peft==0.6.0
|
||||
- scipy
|
||||
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
|
|
|
@ -31,11 +31,11 @@ dependencies:
|
|||
- flask-cors
|
||||
- Werkzeug==2.3.7
|
||||
- lupa==1.10
|
||||
- transformers[sentencepiece]==4.34.0
|
||||
- transformers[sentencepiece]==4.35.0
|
||||
- huggingface_hub==0.16.4
|
||||
- optimum[onnxruntime]==1.13.2
|
||||
- safetensors==0.3.3
|
||||
- accelerate==0.21.0
|
||||
- optimum[onnxruntime]==1.14.0
|
||||
- safetensors==0.4.0
|
||||
- accelerate==0.24.1
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
- ansi2html
|
||||
- flask_compress
|
||||
|
@ -45,8 +45,8 @@ dependencies:
|
|||
- diffusers
|
||||
- git+https://github.com/0cc4m/hf_bleeding_edge/
|
||||
- einops
|
||||
- peft==0.3.0
|
||||
- peft==0.6.0
|
||||
- windows-curses; sys_platform == 'win32'
|
||||
- pynvml
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp38-cp38-linux_x86_64.whl
|
||||
- auto_gptq==0.5.0
|
||||
- omegaconf
|
|
@ -13,6 +13,7 @@ import transformers
|
|||
from transformers import (
|
||||
GPT2Tokenizer,
|
||||
AutoTokenizer,
|
||||
LlamaTokenizer,
|
||||
)
|
||||
from modeling.stoppers import Stoppers
|
||||
from modeling.tokenizer import GenericTokenizer
|
||||
|
@ -251,9 +252,10 @@ class InferenceModel:
|
|||
location, use_fast=False, **std_kwargs
|
||||
),
|
||||
lambda: AutoTokenizer.from_pretrained(location, **std_kwargs),
|
||||
# Fallback to GPT2Tokenizer
|
||||
# Attempt more basic GPT2 Tokenizer
|
||||
lambda: GPT2Tokenizer.from_pretrained(location, **std_kwargs),
|
||||
lambda: GPT2Tokenizer.from_pretrained("gpt2", **std_kwargs),
|
||||
# Fallback to generic LLaMA Tokenizer
|
||||
lambda: LlamaTokenizer.from_pretrained("KoboldAI/llama2-tokenizer", use_fast=False, **std_kwargs),
|
||||
]
|
||||
|
||||
for i, try_get_tokenizer in enumerate(suppliers):
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
transformers[sentencepiece]==4.34.0
|
||||
transformers[sentencepiece]==4.35.0
|
||||
huggingface_hub==0.16.4
|
||||
optimum[onnxruntime]==1.13.2
|
||||
safetensors==0.3.3
|
||||
optimum[onnxruntime]==1.14.0
|
||||
safetensors==0.4.0
|
||||
Flask==2.3.3
|
||||
Flask-SocketIO==5.3.2
|
||||
Werkzeug==2.3.7
|
||||
|
@ -17,7 +17,7 @@ lupa==1.10
|
|||
markdown
|
||||
bleach==4.1.0
|
||||
protobuf
|
||||
accelerate==0.21.0
|
||||
accelerate==0.24.1
|
||||
flask-session==0.5.0
|
||||
marshmallow>=3.13
|
||||
apispec-webframeworks
|
||||
|
@ -40,12 +40,9 @@ pytest-metadata==2.0.4
|
|||
requests-mock==1.10.0
|
||||
git+https://github.com/0cc4m/hf_bleeding_edge/
|
||||
einops
|
||||
peft==0.3.0
|
||||
peft==0.6.0
|
||||
scipy
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
|
||||
auto-gptq==0.5.0
|
||||
windows-curses; sys_platform == 'win32'
|
||||
pynvml
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
|
||||
|
|
Loading…
Reference in New Issue