mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
AutoGPTQ updates
This commit is contained in:
@@ -49,8 +49,8 @@ dependencies:
|
|||||||
- git+https://github.com/0cc4m/hf_bleeding_edge/
|
- git+https://github.com/0cc4m/hf_bleeding_edge/
|
||||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||||
- einops
|
- einops
|
||||||
- peft==0.3.0
|
- peft==0.3.0
|
||||||
- scipy
|
- scipy
|
||||||
|
@@ -153,6 +153,32 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
gptq_model, _, _, _, _ = load_model_gptq_settings(model_path)
|
gptq_model, _, _, _, _ = load_model_gptq_settings(model_path)
|
||||||
return bool(gptq_model)
|
return bool(gptq_model)
|
||||||
|
|
||||||
|
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||||
|
requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
|
||||||
|
if model_name != 'customhuggingface' or "custom_model_name" in parameters:
|
||||||
|
if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
|
||||||
|
with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
|
||||||
|
temp = json.load(f)
|
||||||
|
else:
|
||||||
|
temp = {}
|
||||||
|
requested_parameters.append({
|
||||||
|
"uitype": "dropdown",
|
||||||
|
"unit": "text",
|
||||||
|
"label": "Implementation",
|
||||||
|
"id": "implementation",
|
||||||
|
"default": temp['implementation'] if 'implementation' in temp else 'occam',
|
||||||
|
"tooltip": "Which GPTQ provider to use?",
|
||||||
|
"menu_path": "Layers",
|
||||||
|
"children": [{'text': 'Occam GPTQ', 'value': 'occam'}, {'text': 'AutoGPTQ', 'value': 'AutoGPTQ'}],
|
||||||
|
"extra_classes": "",
|
||||||
|
"refresh_model_inputs": False
|
||||||
|
})
|
||||||
|
return requested_parameters
|
||||||
|
|
||||||
|
def set_input_parameters(self, parameters):
|
||||||
|
super().set_input_parameters(parameters)
|
||||||
|
self.implementation = parameters['implementation'] if 'implementation' in parameters else "occam"
|
||||||
|
|
||||||
def _load(self, save_model: bool, initial_load: bool) -> None:
|
def _load(self, save_model: bool, initial_load: bool) -> None:
|
||||||
try:
|
try:
|
||||||
from hf_bleeding_edge import AutoModelForCausalLM
|
from hf_bleeding_edge import AutoModelForCausalLM
|
||||||
@@ -169,7 +195,6 @@ class model_backend(HFTorchInferenceModel):
|
|||||||
self.init_model_config()
|
self.init_model_config()
|
||||||
|
|
||||||
self.lazy_load = True
|
self.lazy_load = True
|
||||||
self.implementation = "occam"
|
|
||||||
|
|
||||||
gpulayers = self.breakmodel_config.gpu_blocks
|
gpulayers = self.breakmodel_config.gpu_blocks
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user