From e90903946dbdc76768753128fa6efd42b9ae0cec Mon Sep 17 00:00:00 2001
From: Henk <henk@henk.tech>
Date: Sun, 13 Aug 2023 17:36:17 +0200
Subject: [PATCH] AutoGPTQ updates

---
 environments/huggingface.yml                  |  4 +--
 .../inference_models/gptq_hf_torch/class.py   | 27 ++++++++++++++++++-
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/environments/huggingface.yml b/environments/huggingface.yml
index 8eecd344..00d27984 100644
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -49,8 +49,8 @@ dependencies:
     - git+https://github.com/0cc4m/hf_bleeding_edge/
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
     - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
-    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
+    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
+    - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
     - einops
     - peft==0.3.0
     - scipy
diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 804ca682..b48f1d56 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -153,6 +153,32 @@ class model_backend(HFTorchInferenceModel):
         gptq_model, _, _, _, _ = load_model_gptq_settings(model_path)
         return bool(gptq_model)
 
+    def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
+        requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
+        if model_name != 'customhuggingface' or "custom_model_name" in parameters:
+            if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
+                with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
+                    temp = json.load(f)
+            else:
+                temp = {}
+            requested_parameters.append({
+                                        "uitype": "dropdown",
+                                        "unit": "text",
+                                        "label": "Implementation",
+                                        "id": "implementation",
+                                        "default": temp['implementation'] if 'implementation' in temp else 'occam',
+                                        "tooltip": "Which GPTQ provider to use?",
+                                        "menu_path": "Layers",
+                                        "children": [{'text': 'Occam GPTQ', 'value': 'occam'}, {'text': 'AutoGPTQ', 'value': 'AutoGPTQ'}],
+                                        "extra_classes": "",
+                                        "refresh_model_inputs": False
+                                    })
+        return requested_parameters
+
+    def set_input_parameters(self, parameters):
+        super().set_input_parameters(parameters)
+        self.implementation = parameters['implementation'] if 'implementation' in parameters else "occam"
+
     def _load(self, save_model: bool, initial_load: bool) -> None:
         try:
             from hf_bleeding_edge import AutoModelForCausalLM
@@ -169,7 +195,6 @@ class model_backend(HFTorchInferenceModel):
         self.init_model_config()
 
         self.lazy_load = True
-        self.implementation = "occam"
 
         gpulayers = self.breakmodel_config.gpu_blocks