Automatic installation of the quant_cuda module during install_requirements

Kepler (K40+) and Maxwell support
2025-06-05 21:59:24 +02:00 · 2023-04-10 22:37:16 +02:00
parent 7efd314428
commit b628aec719
3 changed files with 8 additions and 1 deletions
--- a/install_requirements.bat
+++ b/install_requirements.bat
@@ -49,6 +49,8 @@ umamba.exe install --no-shortcuts -r B:\python\ -n base -f "%~dp0\environments\h
 umamba.exe -r B:\ clean -a -y
 rd B:\Python\pkgs /S /Q
 subst B: /d
 call B:\python\condabin\activate
 cmd /k "pip install https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/2023-04-10/quant_cuda-0.0.0-cp38-cp38-win_amd64.whl"
 pause
 exit
@@ -60,5 +62,7 @@ umamba.exe create -r miniconda3\ -n base
 umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy
 umamba.exe clean -a -y
 rd miniconda3\Python\pkgs /S /Q
 call miniconda3\condabin\activate
 cmd /k "pip install https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/2023-04-10/quant_cuda-0.0.0-cp38-cp38-win_amd64.whl"
 pause
 exit
--- a/install_requirements.sh
+++ b/install_requirements.sh
@@ -5,6 +5,9 @@ wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -
 bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
 # Weird micromamba bug causes it to fail the first time, running it twice just to be safe, the second time is much faster
 bin/micromamba create -f environments/huggingface.yml -r runtime -n koboldai -y
 # Install quant_cuda module for 4-bit
 bin/micromamba run -r runtime -n koboldai pip install https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/2023-04-10/quant_cuda-0.0.0-cp38-cp38-linux_x86_64.whl
 exit
 fi
 if [[ $1 = "rocm" ]]; then
--- a/repos/gptq
+++ b/repos/gptq