4-bit as Default

2025-06-05 21:59:24 +02:00 · 2023-07-23 23:08:11 +02:00
parent 89637ae9d7
commit 0f913275a9
2 changed files with 1 additions and 3 deletions
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -52,5 +52,3 @@ dependencies:
    - einops
    - peft==0.3.0
    - scipy
-    - --find-links=https://0cc4m.github.io/exllama/exllama-whl-links.html
-    - exllama==0.0.6
--- a/modeling/inference_models/generic_hf_torch/class.py
+++ b/modeling/inference_models/generic_hf_torch/class.py
@@ -61,7 +61,7 @@ class model_backend(HFTorchInferenceModel):
                                            "default": temp['quantization'] if 'quantization' in temp else 'none',
                                            "tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode",
                                            "menu_path": "Layers",
-                                            "children": [{'text': 'None', 'value':'none'},{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}],
+                                            "children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}],
                                            "extra_classes": "",
                                            "refresh_model_inputs": False
                                        })