From d5dd24a3303c6f85360eff5a771d8d2c102bf779 Mon Sep 17 00:00:00 2001 From: ebolam Date: Thu, 12 Oct 2023 21:04:00 -0400 Subject: [PATCH] Added setting saving for exllama and exllamav2 --- modeling/inference_models/exllama/class.py | 32 +++++++++++++++++--- modeling/inference_models/exllamav2/class.py | 31 ++++++++++++++++--- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py index 569f6d61..b52c2d65 100644 --- a/modeling/inference_models/exllama/class.py +++ b/modeling/inference_models/exllama/class.py @@ -375,6 +375,12 @@ class model_backend(InferenceModel): return tokenizer def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + saved_data = {'layers': [], 'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1} + if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): + with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: + temp = json.load(f) + for key in temp: + saved_data[key] = temp[key] requested_parameters = [] gpu_count = torch.cuda.device_count() layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None @@ -401,7 +407,7 @@ class model_backend(InferenceModel): "step": 1, "check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)], "value": layer_count, 'check': "="}, "check_message": "The sum of assigned layers must equal {}".format(layer_count), - "default": [layer_count if i == 0 else 0], + "default": saved_data['layers'][i] if len(saved_data['layers']) > i else layer_count if i==0 else 0, "tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)), "menu_path": "Layers", "extra_classes": "", @@ -416,7 +422,7 @@ class model_backend(InferenceModel): "min": 2048, "max": 16384, "step": 512, - "default": 2048, + "default": saved_data['max_ctx'], "tooltip": "The maximum context size the model supports", "menu_path": "Configuration", "extra_classes": "", @@ -431,7 +437,7 @@ class model_backend(InferenceModel): "min": 1, "max": 8, "step": 0.25, - "default": 1, + "default": saved_data['compress_emb'], "tooltip": "If the model requires compressed embeddings, set them here", "menu_path": "Configuration", "extra_classes": "", @@ -446,7 +452,7 @@ class model_backend(InferenceModel): "min": 1, "max": 32, "step": 0.25, - "default": 1, + "default": saved_data['ntk_alpha'], "tooltip": "NTK alpha value", "menu_path": "Configuration", "extra_classes": "", @@ -491,3 +497,21 @@ class model_backend(InferenceModel): self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] self.path = parameters['path'] if 'path' in parameters else None + + def _save_settings(self): + with open( + "settings/{}.exllama.model_backend.settings".format( + self.model_name.replace("/", "_") + ), + "w", + ) as f: + json.dump( + { + "layers": self.layers if "layers" in vars(self) else [], + "max_ctx": self.model_config.max_seq_len, + "compress_emb": self.model_config.compress_pos_emb, + "ntk_alpha": self.model_config.alpha_value + }, + f, + indent="", + ) \ No newline at end of file diff --git a/modeling/inference_models/exllamav2/class.py b/modeling/inference_models/exllamav2/class.py index dd97e83f..7757202f 100644 --- a/modeling/inference_models/exllamav2/class.py +++ b/modeling/inference_models/exllamav2/class.py @@ -351,6 +351,12 @@ class model_backend(InferenceModel): return tokenizer def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): + saved_data = {'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1} + if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): + with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: + temp = json.load(f) + for key in temp: + saved_data[key] = temp[key] requested_parameters = [] gpu_count = torch.cuda.device_count() layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None @@ -363,7 +369,7 @@ class model_backend(InferenceModel): "min": 2048, "max": 16384, "step": 512, - "default": 2048, + "default": saved_data['max_ctx'], "tooltip": "The maximum context size the model supports", "menu_path": "Configuration", "extra_classes": "", @@ -378,7 +384,7 @@ class model_backend(InferenceModel): "min": 1, "max": 8, "step": 0.25, - "default": 1, + "default": saved_data['compress_emb'], "tooltip": "If the model requires compressed embeddings, set them here", "menu_path": "Configuration", "extra_classes": "", @@ -393,7 +399,7 @@ class model_backend(InferenceModel): "min": 1, "max": 32, "step": 0.25, - "default": 1, + "default": saved_data['ntk_alpha'], "tooltip": "NTK alpha value", "menu_path": "Configuration", "extra_classes": "", @@ -420,4 +426,21 @@ class model_backend(InferenceModel): self.model_config.sdp_thd = 0 self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id'] - self.path = parameters['path'] if 'path' in parameters else None \ No newline at end of file + self.path = parameters['path'] if 'path' in parameters else None + + def _save_settings(self): + with open( + "settings/{}.exllamav2.model_backend.settings".format( + self.model_name.replace("/", "_") + ), + "w", + ) as f: + json.dump( + { + "max_ctx": self.model_config.max_seq_len, + "compress_emb": self.model_config.compress_pos_emb, + "ntk_alpha": self.model_config.alpha_value + }, + f, + indent="", + ) \ No newline at end of file