diff --git a/modeling/inference_models/exllama/class.py b/modeling/inference_models/exllama/class.py
index 21eba58e..aa37a7aa 100644
--- a/modeling/inference_models/exllama/class.py
+++ b/modeling/inference_models/exllama/class.py
@@ -50,7 +50,7 @@ def load_model_gptq_settings(path):
     gptq_model = False
     gptq_file = False
 
-    gptq_legacy_files = glob.glob(os.path.join(path, "4bit*.safetensors"))
+    gptq_legacy_files = glob.glob(os.path.join(path, "*4bit*.safetensors"))
     if "gptq_bits" in js:
         gptq_model = True
         gptq_file = os.path.join(path, "model.safetensors")
@@ -58,7 +58,7 @@ def load_model_gptq_settings(path):
         gptq_model = True
         gptq_file = gptq_legacy_files[0]
         fname = Path(gptq_file).parts[-1]
-        g = re.findall("^(?:4bit)(?:-)(\\d+)(?:g-?)", fname)
+        g = re.findall("(?:4bit)(?:-)(\\d+)(?:g-?)", fname)
 
     return gptq_model, gptq_file
 
@@ -113,11 +113,6 @@ class model_backend(InferenceModel):
         if not config and os.path.exists("models/{}".format(model_name.replace('/', '_'))):
             config = ExLlamaConfig(os.path.join("models/{}".format(model_name.replace('/', '_')), "config.json"))
 
-        if config and "superhot" in model_name.lower():
-            # Set compress_pos_emb factor
-            config.max_seq_len = 8192
-            config.compress_pos_emb = 4.0
-
         return config
 
     def _load(self, save_model: bool, initial_load: bool) -> None:
@@ -366,6 +361,51 @@ class model_backend(InferenceModel):
                                             "refresh_model_inputs": False
                                         })
 
+        requested_parameters.append({
+            "uitype": "slider",
+            "unit": "int",
+            "label": "Maximum Context",
+            "id": "max_ctx",
+            "min": 2048,
+            "max": 16384,
+            "step": 512,
+            "default": 2048,
+            "tooltip": "The maximum context size the model supports",
+            "menu_path": "Configuration",
+            "extra_classes": "",
+            "refresh_model_inputs": False
+        })
+
+        requested_parameters.append({
+            "uitype": "slider",
+            "unit": "float",
+            "label": "Embedding Compression",
+            "id": "compress_emb",
+            "min": 1,
+            "max": 8,
+            "step": 0.25,
+            "default": 1,
+            "tooltip": "If the model requires compressed embeddings, set them here",
+            "menu_path": "Configuration",
+            "extra_classes": "",
+            "refresh_model_inputs": False
+        })
+
+        requested_parameters.append({
+            "uitype": "slider",
+            "unit": "float",
+            "label": "NTK alpha",
+            "id": "ntk_alpha",
+            "min": 1,
+            "max": 32,
+            "step": 0.25,
+            "default": 1,
+            "tooltip": "NTK alpha value",
+            "menu_path": "Configuration",
+            "extra_classes": "",
+            "refresh_model_inputs": False
+        })
+
         return requested_parameters
 
     def set_input_parameters(self, parameters):
@@ -387,6 +427,10 @@ class model_backend(InferenceModel):
         self.model_config.device_map.lm_head = "cuda:0"
         self.model_config.device_map.norm = "cuda:0"
 
+        self.model_config.max_seq_len = parameters["max_ctx"]
+        self.model_config.compress_pos_emb = parameters["compress_emb"]
+        self.model_config.alpha_value = parameters["ntk_alpha"]
+
         # Disable half2 for HIP
         self.model_config.rmsnorm_no_half2 = bool(torch.version.hip)
         self.model_config.rope_no_half2 = bool(torch.version.hip)
diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py
index 16d3db91..157ebdbe 100644
--- a/modeling/inference_models/gptq_hf_torch/class.py
+++ b/modeling/inference_models/gptq_hf_torch/class.py
@@ -56,7 +56,7 @@ def load_model_gptq_settings(path):
     gptq_file = False
     gptq_version = -1
 
-    gptq_legacy_files = glob.glob(os.path.join(path, "4bit*.pt")) + glob.glob(os.path.join(path, "4bit*.safetensors"))
+    gptq_legacy_files = glob.glob(os.path.join(path, "*4bit*.pt")) + glob.glob(os.path.join(path, "*4bit*.safetensors"))
     if "gptq_bits" in js:
         gptq_model = True
         gptq_bits = js["gptq_bits"]
@@ -70,7 +70,7 @@ def load_model_gptq_settings(path):
         gptq_bits = 4
         gptq_file = gptq_legacy_files[0]
         fname = Path(gptq_file).parts[-1]
-        g = re.findall("^(?:4bit)(?:-)(\\d+)(?:g-?)", fname)
+        g = re.findall("(?:4bit)(?:-)(\\d+)(?:g-?)", fname)
         gptq_groupsize = int(g[0]) if g else -1
         gptq_version = -1