Fix for loading model multiple times loosing the gpu/cpu splits

2025-06-05 21:59:24 +02:00 · 2023-05-22 20:34:01 -04:00
parent 9e53bcf676
commit 4c25d6fbbb
2 changed files with 3 additions and 6 deletions
--- a/modeling/inference_models/hf.py
+++ b/modeling/inference_models/hf.py
@@ -197,12 +197,6 @@ class HFInferenceModel(InferenceModel):
                torch.cuda.empty_cache()
        except:
            pass
-        if self.hf_torch:
-            if 'breakmodel' in sys.modules:
-                import breakmodel
-                breakmodel.breakmodel = True
-                breakmodel.gpu_blocks = []
-                breakmodel.disk_blocks = 0

    def _post_load(self) -> None:
        # These are model specific tokenizer overrides if a model has bad defaults