Fix multigpu

2025-06-05 21:59:24 +02:00 · 2023-06-06 19:51:38 +02:00
parent 39dfb18455
commit 47b371b9d3
1 changed files with 5 additions and 0 deletions
--- a/modeling/inference_models/exllama/class.py
+++ b/modeling/inference_models/exllama/class.py
@@ -295,7 +295,11 @@ class model_backend(InferenceModel):
        )

    def _get_model(self, location: str, tf_kwargs: Dict):
+        if not self.model_config:
+            ExLlamaConfig(os.path.join(location, "config.json"))
+
        _, self.model_config.model_path = load_model_gptq_settings(location)
+        # self.model_config.gpu_peer_fix = True
        return ExLlama(self.model_config)

    def _get_tokenizer(self, location: str):
@@ -351,6 +355,7 @@ class model_backend(InferenceModel):
                layers.append(parameters["{}_Layers".format(i)])

        self.layers = layers
+        self.model_config.device_map.layers = []
        for i, l in enumerate(layers):
            if l > 0:
                self.model_config.device_map.layers.extend([f"cuda:{i}"] * l)