From 4c25d6fbbbfad67176056a6f5af1826c2c2eb24c Mon Sep 17 00:00:00 2001 From: ebolam Date: Mon, 22 May 2023 20:34:01 -0400 Subject: [PATCH] Fix for loading model multiple times loosing the gpu/cpu splits --- modeling/inference_models/hf.py | 6 ------ modeling/inference_models/hf_torch.py | 3 +++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index e801eab2..b50ebf56 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -197,12 +197,6 @@ class HFInferenceModel(InferenceModel): torch.cuda.empty_cache() except: pass - if self.hf_torch: - if 'breakmodel' in sys.modules: - import breakmodel - breakmodel.breakmodel = True - breakmodel.gpu_blocks = [] - breakmodel.disk_blocks = 0 def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index c5560360..681d3ab1 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -788,6 +788,7 @@ class HFTorchInferenceModel(HFInferenceModel): if device_count < 2: primary = None logger.debug("n_layers: {}".format(n_layers)) + logger.debug("gpu blocks: {}".format(breakmodel.gpu_blocks)) gpu_blocks = breakmodel.gpu_blocks + ( device_count - len(breakmodel.gpu_blocks) ) * [0] @@ -818,6 +819,8 @@ class HFTorchInferenceModel(HFInferenceModel): n_layers = utils.num_layers(config) + logger.debug("gpu blocks before modification: {}".format(breakmodel.gpu_blocks)) + if utils.args.cpu: breakmodel.gpu_blocks = [0] * n_layers return