mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Added setting saving for exllama and exllamav2
This commit is contained in:
@@ -375,6 +375,12 @@ class model_backend(InferenceModel):
|
||||
return tokenizer
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||
saved_data = {'layers': [], 'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1}
|
||||
if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
|
||||
with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
|
||||
temp = json.load(f)
|
||||
for key in temp:
|
||||
saved_data[key] = temp[key]
|
||||
requested_parameters = []
|
||||
gpu_count = torch.cuda.device_count()
|
||||
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
|
||||
@@ -401,7 +407,7 @@ class model_backend(InferenceModel):
|
||||
"step": 1,
|
||||
"check": {"sum": ["{}_Layers".format(i) for i in range(gpu_count)], "value": layer_count, 'check': "="},
|
||||
"check_message": "The sum of assigned layers must equal {}".format(layer_count),
|
||||
"default": [layer_count if i == 0 else 0],
|
||||
"default": saved_data['layers'][i] if len(saved_data['layers']) > i else layer_count if i==0 else 0,
|
||||
"tooltip": "The number of layers to put on {}.".format(torch.cuda.get_device_name(i)),
|
||||
"menu_path": "Layers",
|
||||
"extra_classes": "",
|
||||
@@ -416,7 +422,7 @@ class model_backend(InferenceModel):
|
||||
"min": 2048,
|
||||
"max": 16384,
|
||||
"step": 512,
|
||||
"default": 2048,
|
||||
"default": saved_data['max_ctx'],
|
||||
"tooltip": "The maximum context size the model supports",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
@@ -431,7 +437,7 @@ class model_backend(InferenceModel):
|
||||
"min": 1,
|
||||
"max": 8,
|
||||
"step": 0.25,
|
||||
"default": 1,
|
||||
"default": saved_data['compress_emb'],
|
||||
"tooltip": "If the model requires compressed embeddings, set them here",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
@@ -446,7 +452,7 @@ class model_backend(InferenceModel):
|
||||
"min": 1,
|
||||
"max": 32,
|
||||
"step": 0.25,
|
||||
"default": 1,
|
||||
"default": saved_data['ntk_alpha'],
|
||||
"tooltip": "NTK alpha value",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
@@ -491,3 +497,21 @@ class model_backend(InferenceModel):
|
||||
|
||||
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
|
||||
self.path = parameters['path'] if 'path' in parameters else None
|
||||
|
||||
def _save_settings(self):
|
||||
with open(
|
||||
"settings/{}.exllama.model_backend.settings".format(
|
||||
self.model_name.replace("/", "_")
|
||||
),
|
||||
"w",
|
||||
) as f:
|
||||
json.dump(
|
||||
{
|
||||
"layers": self.layers if "layers" in vars(self) else [],
|
||||
"max_ctx": self.model_config.max_seq_len,
|
||||
"compress_emb": self.model_config.compress_pos_emb,
|
||||
"ntk_alpha": self.model_config.alpha_value
|
||||
},
|
||||
f,
|
||||
indent="",
|
||||
)
|
@@ -351,6 +351,12 @@ class model_backend(InferenceModel):
|
||||
return tokenizer
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||
saved_data = {'max_ctx': 2048, 'compress_emb': 1, 'ntk_alpha': 1}
|
||||
if os.path.exists("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
|
||||
with open("settings/{}.exllama.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
|
||||
temp = json.load(f)
|
||||
for key in temp:
|
||||
saved_data[key] = temp[key]
|
||||
requested_parameters = []
|
||||
gpu_count = torch.cuda.device_count()
|
||||
layer_count = self.model_config["n_layer"] if isinstance(self.model_config, dict) else self.model_config.num_layers if hasattr(self.model_config, "num_layers") else self.model_config.n_layer if hasattr(self.model_config, "n_layer") else self.model_config.num_hidden_layers if hasattr(self.model_config, 'num_hidden_layers') else None
|
||||
@@ -363,7 +369,7 @@ class model_backend(InferenceModel):
|
||||
"min": 2048,
|
||||
"max": 16384,
|
||||
"step": 512,
|
||||
"default": 2048,
|
||||
"default": saved_data['max_ctx'],
|
||||
"tooltip": "The maximum context size the model supports",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
@@ -378,7 +384,7 @@ class model_backend(InferenceModel):
|
||||
"min": 1,
|
||||
"max": 8,
|
||||
"step": 0.25,
|
||||
"default": 1,
|
||||
"default": saved_data['compress_emb'],
|
||||
"tooltip": "If the model requires compressed embeddings, set them here",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
@@ -393,7 +399,7 @@ class model_backend(InferenceModel):
|
||||
"min": 1,
|
||||
"max": 32,
|
||||
"step": 0.25,
|
||||
"default": 1,
|
||||
"default": saved_data['ntk_alpha'],
|
||||
"tooltip": "NTK alpha value",
|
||||
"menu_path": "Configuration",
|
||||
"extra_classes": "",
|
||||
@@ -420,4 +426,21 @@ class model_backend(InferenceModel):
|
||||
self.model_config.sdp_thd = 0
|
||||
|
||||
self.model_name = parameters['custom_model_name'] if 'custom_model_name' in parameters else parameters['id']
|
||||
self.path = parameters['path'] if 'path' in parameters else None
|
||||
self.path = parameters['path'] if 'path' in parameters else None
|
||||
|
||||
def _save_settings(self):
|
||||
with open(
|
||||
"settings/{}.exllamav2.model_backend.settings".format(
|
||||
self.model_name.replace("/", "_")
|
||||
),
|
||||
"w",
|
||||
) as f:
|
||||
json.dump(
|
||||
{
|
||||
"max_ctx": self.model_config.max_seq_len,
|
||||
"compress_emb": self.model_config.compress_pos_emb,
|
||||
"ntk_alpha": self.model_config.alpha_value
|
||||
},
|
||||
f,
|
||||
indent="",
|
||||
)
|
Reference in New Issue
Block a user