From 9e275de5d96c5ad94b86f344af55360633d33816 Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Wed, 8 Nov 2023 18:47:02 +0100
Subject: [PATCH] HF 4.35

---
 aiserver.py                  |  3 +++
 environments/huggingface.yml | 13 ++++++-------
 environments/ipex.yml        | 13 ++++++-------
 environments/rocm.yml        | 12 ++++++------
 modeling/inference_model.py  |  6 ++++--
 requirements.txt             | 15 ++++++---------
 6 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/aiserver.py b/aiserver.py
index 9f3e0805..a9207ba2 100644
--- a/aiserver.py
+++ b/aiserver.py
@@ -273,6 +273,7 @@ model_menu = {
         MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"),
     ],
     'instructlist': [
+        MenuModel("Tiefighter 13B", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),   
         MenuModel("Holomax 13B", "KoboldAI/LLaMA2-13B-Holomax", "12GB*"),        
         MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"),
         MenuModel("Chronos-Hermes V2 13B", "Austism/chronos-hermes-13b-v2", "12GB*"),
@@ -283,6 +284,7 @@ model_menu = {
         ],
     'adventurelist': [
         MenuFolder("Instruct models may perform better than the models below (Using Instruct mode)", "instructlist"),
+        MenuModel("Tiefighter 13B (Instruct Hybrid)", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),
         MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "20GB*"),
         MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "12GB"),
         MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "12GB*"),
@@ -298,6 +300,7 @@ model_menu = {
         MenuFolder("Return to Main Menu", "mainmenu"),
         ],
     'novellist': [
+        MenuModel("Tiefighter 13B (Instruct Hybrid)", "KoboldAI/LLaMA2-13B-Tiefighter", "12GB*"),
         MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "32GB"),
         MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "32GB"),
         MenuModel("Janeway FSD 13B", "KoboldAI/fairseq-dense-13B-Janeway", "32GB"),
diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index deead8e7..9ebfe28a 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -35,11 +35,11 @@ dependencies:
     - flask-cors
     - Werkzeug==2.3.7
     - lupa==1.10
-    - transformers[sentencepiece]==4.34.0
+    - transformers[sentencepiece]==4.35.0
     - huggingface_hub==0.16.4
-    - optimum[onnxruntime]==1.13.2
-    - safetensors==0.3.3
-    - accelerate==0.21.0
+    - optimum[onnxruntime]==1.14.0
+    - safetensors==0.4.0
+    - accelerate==0.24.1
     - git+https://github.com/VE-FORBRYDERNE/mkultra
     - flask-session
     - ansi2html
@@ -53,10 +53,9 @@ dependencies:
     - git+https://github.com/0cc4m/hf_bleeding_edge/
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
+    - auto_gptq==0.5.0
     - einops
-    - peft==0.3.0
+    - peft==0.6.0
     - scipy
     - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
     - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
diff --git a/environments/ipex.yml b/environments/ipex.yml
index c9794e48..0b08855c 100644
--- a/environments/ipex.yml
+++ b/environments/ipex.yml
@@ -35,11 +35,11 @@ dependencies:
     - flask-cors
     - Werkzeug==2.3.7
     - lupa==1.10
-    - transformers[sentencepiece]==4.34.0
+    - transformers[sentencepiece]==4.35.0
     - huggingface_hub==0.16.4
-    - optimum[onnxruntime,openvino,nncf,neural-compressor]==1.13.2
-    - safetensors==0.3.3
-    - accelerate==0.21.0
+    - optimum[onnxruntime,openvino,nncf,neural-compressor]==1.14.0
+    - safetensors==0.4.0
+    - accelerate==0.24.1
     - git+https://github.com/VE-FORBRYDERNE/mkultra
     - flask-session
     - ansi2html
@@ -51,10 +51,9 @@ dependencies:
     - git+https://github.com/0cc4m/hf_bleeding_edge/
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
+    - auto_gptq==0.5.0
     - einops
-    - peft==0.3.0
+    - peft==0.6.0
     - scipy
     - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
     - https://github.com/0cc4m/exllama/releases/download/0.0.7/exllama-0.0.7-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
diff --git a/environments/rocm.yml b/environments/rocm.yml
index 2a6043ea..bb44adca 100644
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -31,11 +31,11 @@ dependencies:
     - flask-cors
     - Werkzeug==2.3.7
     - lupa==1.10
-    - transformers[sentencepiece]==4.34.0
+    - transformers[sentencepiece]==4.35.0
     - huggingface_hub==0.16.4
-    - optimum[onnxruntime]==1.13.2
-    - safetensors==0.3.3
-    - accelerate==0.21.0
+    - optimum[onnxruntime]==1.14.0
+    - safetensors==0.4.0
+    - accelerate==0.24.1
     - git+https://github.com/VE-FORBRYDERNE/mkultra
     - ansi2html
     - flask_compress
@@ -45,8 +45,8 @@ dependencies:
     - diffusers
     - git+https://github.com/0cc4m/hf_bleeding_edge/
     - einops
-    - peft==0.3.0
+    - peft==0.6.0
     - windows-curses; sys_platform == 'win32'
     - pynvml
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp38-cp38-linux_x86_64.whl
+    - auto_gptq==0.5.0
     - omegaconf
\ No newline at end of file
diff --git a/modeling/inference_model.py b/modeling/inference_model.py
index 2bcb21a7..1af06675 100644
--- a/modeling/inference_model.py
+++ b/modeling/inference_model.py
@@ -13,6 +13,7 @@ import transformers
 from transformers import (
     GPT2Tokenizer,
     AutoTokenizer,
+    LlamaTokenizer,
 )
 from modeling.stoppers import Stoppers
 from modeling.tokenizer import GenericTokenizer
@@ -251,9 +252,10 @@ class InferenceModel:
                 location, use_fast=False, **std_kwargs
             ),
             lambda: AutoTokenizer.from_pretrained(location, **std_kwargs),
-            # Fallback to GPT2Tokenizer
+            # Attempt more basic GPT2 Tokenizer
             lambda: GPT2Tokenizer.from_pretrained(location, **std_kwargs),
-            lambda: GPT2Tokenizer.from_pretrained("gpt2", **std_kwargs),
+            # Fallback to generic LLaMA Tokenizer
+            lambda: LlamaTokenizer.from_pretrained("KoboldAI/llama2-tokenizer", use_fast=False, **std_kwargs),
         ]
 
         for i, try_get_tokenizer in enumerate(suppliers):
diff --git a/requirements.txt b/requirements.txt
index f668dd88..6776effb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
-transformers[sentencepiece]==4.34.0
+transformers[sentencepiece]==4.35.0
 huggingface_hub==0.16.4
-optimum[onnxruntime]==1.13.2
-safetensors==0.3.3
+optimum[onnxruntime]==1.14.0
+safetensors==0.4.0
 Flask==2.3.3
 Flask-SocketIO==5.3.2
 Werkzeug==2.3.7
@@ -17,7 +17,7 @@ lupa==1.10
 markdown
 bleach==4.1.0
 protobuf
-accelerate==0.21.0
+accelerate==0.24.1
 flask-session==0.5.0
 marshmallow>=3.13
 apispec-webframeworks
@@ -40,12 +40,9 @@ pytest-metadata==2.0.4
 requests-mock==1.10.0
 git+https://github.com/0cc4m/hf_bleeding_edge/
 einops
-peft==0.3.0
+peft==0.6.0
 scipy
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10'
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
+auto-gptq==0.5.0
 windows-curses; sys_platform == 'win32'
 pynvml
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'