From 9e275de5d96c5ad94b86f344af55360633d33816 Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 8 Nov 2023 18:47:02 +0100 Subject: [PATCH] HF 4.35 --- aiserver.py | 3 +++ environments/huggingface.yml | 13 ++++++------- environments/ipex.yml | 13 ++++++------- environments/rocm.yml | 12 ++++++------ modeling/inference_model.py | 6 ++++-- requirements.txt | 15 ++++++--------- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/aiserver.py b/aiserver.py index 9f3e0805..a9207ba2 100644 --- a/aiserver.py +++ b/aiserver.py @@ -273,6 +273,7 @@ model_menu = { MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"), ], 'instructlist': [ + MenuModel("Tiefighter 13B", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"), MenuModel("Holomax 13B", "KoboldAI/LLaMA2-13B-Holomax", "12GB*"), MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"), MenuModel("Chronos-Hermes V2 13B", "Austism/chronos-hermes-13b-v2", "12GB*"), @@ -283,6 +284,7 @@ model_menu = { ], 'adventurelist': [ MenuFolder("Instruct models may perform better than the models below (Using Instruct mode)", "instructlist"), + MenuModel("Tiefighter 13B (Instruct Hybrid)", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"), MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "20GB*"), MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "12GB"), MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "12GB*"), @@ -298,6 +300,7 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'novellist': [ + MenuModel("Tiefighter 13B (Instruct Hybrid)", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"), MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "32GB"), MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "32GB"), MenuModel("Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"), diff --git a/environments/huggingface.yml b/environments/huggingface.yml index deead8e7..9ebfe28a 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -35,11 +35,11 @@ dependencies: - flask-cors - Werkzeug==2.3.7 - lupa==1.10 - - transformers[sentencepiece]==4.34.0 + - transformers[sentencepiece]==4.35.0 - huggingface_hub==0.16.4 - - optimum[onnxruntime]==1.13.2 - - safetensors==0.3.3 - - accelerate==0.21.0 + - optimum[onnxruntime]==1.14.0 + - safetensors==0.4.0 + - accelerate==0.24.1 - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session - ansi2html @@ -53,10 +53,9 @@ dependencies: - git+https://github.com/0cc4m/hf_bleeding_edge/ - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' + - auto_gptq==0.5.0 - einops - - peft==0.3.0 + - peft==0.6.0 - scipy - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32' diff --git a/environments/ipex.yml b/environments/ipex.yml index c9794e48..0b08855c 100644 --- a/environments/ipex.yml +++ b/environments/ipex.yml @@ -35,11 +35,11 @@ dependencies: - flask-cors - Werkzeug==2.3.7 - lupa==1.10 - - transformers[sentencepiece]==4.34.0 + - transformers[sentencepiece]==4.35.0 - huggingface_hub==0.16.4 - - optimum[onnxruntime,openvino,nncf,neural-compressor]==1.13.2 - - safetensors==0.3.3 - - accelerate==0.21.0 + - optimum[onnxruntime,openvino,nncf,neural-compressor]==1.14.0 + - safetensors==0.4.0 + - accelerate==0.24.1 - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session - ansi2html @@ -51,10 +51,9 @@ dependencies: - git+https://github.com/0cc4m/hf_bleeding_edge/ - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' + - auto_gptq==0.5.0 - einops - - peft==0.3.0 + - peft==0.6.0 - scipy - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32' diff --git a/environments/rocm.yml b/environments/rocm.yml index 2a6043ea..bb44adca 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -31,11 +31,11 @@ dependencies: - flask-cors - Werkzeug==2.3.7 - lupa==1.10 - - transformers[sentencepiece]==4.34.0 + - transformers[sentencepiece]==4.35.0 - huggingface_hub==0.16.4 - - optimum[onnxruntime]==1.13.2 - - safetensors==0.3.3 - - accelerate==0.21.0 + - optimum[onnxruntime]==1.14.0 + - safetensors==0.4.0 + - accelerate==0.24.1 - git+https://github.com/VE-FORBRYDERNE/mkultra - ansi2html - flask_compress @@ -45,8 +45,8 @@ dependencies: - diffusers - git+https://github.com/0cc4m/hf_bleeding_edge/ - einops - - peft==0.3.0 + - peft==0.6.0 - windows-curses; sys_platform == 'win32' - pynvml - - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp38-cp38-linux_x86_64.whl + - auto_gptq==0.5.0 - omegaconf \ No newline at end of file diff --git a/modeling/inference_model.py b/modeling/inference_model.py index 2bcb21a7..1af06675 100644 --- a/modeling/inference_model.py +++ b/modeling/inference_model.py @@ -13,6 +13,7 @@ import transformers from transformers import ( GPT2Tokenizer, AutoTokenizer, + LlamaTokenizer, ) from modeling.stoppers import Stoppers from modeling.tokenizer import GenericTokenizer @@ -251,9 +252,10 @@ class InferenceModel: location, use_fast=False, **std_kwargs ), lambda: AutoTokenizer.from_pretrained(location, **std_kwargs), - # Fallback to GPT2Tokenizer + # Attempt more basic GPT2 Tokenizer lambda: GPT2Tokenizer.from_pretrained(location, **std_kwargs), - lambda: GPT2Tokenizer.from_pretrained("gpt2", **std_kwargs), + # Fallback to generic LLaMA Tokenizer + lambda: LlamaTokenizer.from_pretrained("KoboldAI/llama2-tokenizer", use_fast=False, **std_kwargs), ] for i, try_get_tokenizer in enumerate(suppliers): diff --git a/requirements.txt b/requirements.txt index f668dd88..6776effb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -transformers[sentencepiece]==4.34.0 +transformers[sentencepiece]==4.35.0 huggingface_hub==0.16.4 -optimum[onnxruntime]==1.13.2 -safetensors==0.3.3 +optimum[onnxruntime]==1.14.0 +safetensors==0.4.0 Flask==2.3.3 Flask-SocketIO==5.3.2 Werkzeug==2.3.7 @@ -17,7 +17,7 @@ lupa==1.10 markdown bleach==4.1.0 protobuf -accelerate==0.21.0 +accelerate==0.24.1 flask-session==0.5.0 marshmallow>=3.13 apispec-webframeworks @@ -40,12 +40,9 @@ pytest-metadata==2.0.4 requests-mock==1.10.0 git+https://github.com/0cc4m/hf_bleeding_edge/ einops -peft==0.3.0 +peft==0.6.0 scipy -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10' -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10' -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8' -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8' +auto-gptq==0.5.0 windows-curses; sys_platform == 'win32' pynvml https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'