From e90903946dbdc76768753128fa6efd42b9ae0cec Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 13 Aug 2023 17:36:17 +0200 Subject: [PATCH] AutoGPTQ updates --- environments/huggingface.yml | 4 +-- .../inference_models/gptq_hf_torch/class.py | 27 ++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 8eecd344..00d27984 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -49,8 +49,8 @@ dependencies: - git+https://github.com/0cc4m/hf_bleeding_edge/ - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' + - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' + - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - einops - peft==0.3.0 - scipy diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index 804ca682..b48f1d56 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -153,6 +153,32 @@ class model_backend(HFTorchInferenceModel): gptq_model, _, _, _, _ = load_model_gptq_settings(model_path) return bool(gptq_model) + def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters) + if model_name != 'customhuggingface' or "custom_model_name" in parameters: + if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): + with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: + temp = json.load(f) + else: + temp = {} + requested_parameters.append({ + "uitype": "dropdown", + "unit": "text", + "label": "Implementation", + "id": "implementation", + "default": temp['implementation'] if 'implementation' in temp else 'occam', + "tooltip": "Which GPTQ provider to use?", + "menu_path": "Layers", + "children": [{'text': 'Occam GPTQ', 'value': 'occam'}, {'text': 'AutoGPTQ', 'value': 'AutoGPTQ'}], + "extra_classes": "", + "refresh_model_inputs": False + }) + return requested_parameters + + def set_input_parameters(self, parameters): + super().set_input_parameters(parameters) + self.implementation = parameters['implementation'] if 'implementation' in parameters else "occam" + def _load(self, save_model: bool, initial_load: bool) -> None: try: from hf_bleeding_edge import AutoModelForCausalLM @@ -169,7 +195,6 @@ class model_backend(HFTorchInferenceModel): self.init_model_config() self.lazy_load = True - self.implementation = "occam" gpulayers = self.breakmodel_config.gpu_blocks