Fix for loading model multiple times loosing the gpu/cpu splits

This commit is contained in:
ebolam
2023-05-22 20:34:01 -04:00
parent 9e53bcf676
commit 4c25d6fbbb
2 changed files with 3 additions and 6 deletions

View File

@@ -197,12 +197,6 @@ class HFInferenceModel(InferenceModel):
torch.cuda.empty_cache()
except:
pass
if self.hf_torch:
if 'breakmodel' in sys.modules:
import breakmodel
breakmodel.breakmodel = True
breakmodel.gpu_blocks = []
breakmodel.disk_blocks = 0
def _post_load(self) -> None:
# These are model specific tokenizer overrides if a model has bad defaults