Added setting saving for exllama and exllamav2

This commit is contained in:
ebolam
2023-10-12 21:04:00 -04:00
parent 0688ba4fcd
commit d5dd24a330
2 changed files with 55 additions and 8 deletions

View File

@@ -375,6 +375,12 @@ class model_backend(InferenceModel):
return tokenizer
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
saved_data = {'layers': [], 'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1}
if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
temp = json.load(f)
for key in temp:
saved_data[key] = temp[key]
requested_parameters = []
gpu_count = torch.cuda.device_count()
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
@@ -401,7 +407,7 @@ class model_backend(InferenceModel):
"step": 1,
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)], "value": layer_count, 'check': "="},
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
"default": [layer_count if i == 0 else 0],
"default": saved_data['layers'][i] if len(saved_data['layers']) > i else layer_count if i==0 else 0,
"tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
"menu_path": "Layers",
"extra_classes": "",
@@ -416,7 +422,7 @@ class model_backend(InferenceModel):
"min": 2048,
"max": 16384,
"step": 512,
"default": 2048,
"default": saved_data['max_ctx'],
"tooltip": "The maximum context size the model supports",
"menu_path": "Configuration",
"extra_classes": "",
@@ -431,7 +437,7 @@ class model_backend(InferenceModel):
"min": 1,
"max": 8,
"step": 0.25,
"default": 1,
"default": saved_data['compress_emb'],
"tooltip": "If the model requires compressed embeddings, set them here",
"menu_path": "Configuration",
"extra_classes": "",
@@ -446,7 +452,7 @@ class model_backend(InferenceModel):
"min": 1,
"max": 32,
"step": 0.25,
"default": 1,
"default": saved_data['ntk_alpha'],
"tooltip": "NTK alpha value",
"menu_path": "Configuration",
"extra_classes": "",
@@ -491,3 +497,21 @@ class model_backend(InferenceModel):
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
self.path = parameters['path'] if 'path' in parameters else None
def _save_settings(self):
with open(
"settings/{}.exllama.model_backend.settings".format(
self.model_name.replace("/", "_")
),
"w",
) as f:
json.dump(
{
"layers": self.layers if "layers" in vars(self) else [],
"max_ctx": self.model_config.max_seq_len,
"compress_emb": self.model_config.compress_pos_emb,
"ntk_alpha": self.model_config.alpha_value
},
f,
indent="",
)

View File

@@ -351,6 +351,12 @@ class model_backend(InferenceModel):
return tokenizer
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
saved_data = {'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1}
if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
temp = json.load(f)
for key in temp:
saved_data[key] = temp[key]
requested_parameters = []
gpu_count = torch.cuda.device_count()
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
@@ -363,7 +369,7 @@ class model_backend(InferenceModel):
"min": 2048,
"max": 16384,
"step": 512,
"default": 2048,
"default": saved_data['max_ctx'],
"tooltip": "The maximum context size the model supports",
"menu_path": "Configuration",
"extra_classes": "",
@@ -378,7 +384,7 @@ class model_backend(InferenceModel):
"min": 1,
"max": 8,
"step": 0.25,
"default": 1,
"default": saved_data['compress_emb'],
"tooltip": "If the model requires compressed embeddings, set them here",
"menu_path": "Configuration",
"extra_classes": "",
@@ -393,7 +399,7 @@ class model_backend(InferenceModel):
"min": 1,
"max": 32,
"step": 0.25,
"default": 1,
"default": saved_data['ntk_alpha'],
"tooltip": "NTK alpha value",
"menu_path": "Configuration",
"extra_classes": "",
@@ -420,4 +426,21 @@ class model_backend(InferenceModel):
self.model_config.sdp_thd = 0
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
self.path = parameters['path'] if 'path' in parameters else None
self.path = parameters['path'] if 'path' in parameters else None
def _save_settings(self):
with open(
"settings/{}.exllamav2.model_backend.settings".format(
self.model_name.replace("/", "_")
),
"w",
) as f:
json.dump(
{
"max_ctx": self.model_config.max_seq_len,
"compress_emb": self.model_config.compress_pos_emb,
"ntk_alpha": self.model_config.alpha_value
},
f,
indent="",
)