From fd6f66a98da4655773ae02e39e2299da36818c7e Mon Sep 17 00:00:00 2001 From: somebody Date: Sat, 8 Jul 2023 13:57:05 -0500 Subject: [PATCH 1/3] Patch _rebuild_from_type_v2 to not try converting LazyTensors to Tensors --- modeling/lazy_loader.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py index da2ec989..2af0ae51 100644 --- a/modeling/lazy_loader.py +++ b/modeling/lazy_loader.py @@ -57,6 +57,7 @@ import _codecs import os from typing import Any, Callable, Dict, Optional, Tuple, Type +from torch import Tensor from torch.nn import Module from torch.storage import UntypedStorage @@ -237,6 +238,29 @@ class SafetensorsLazyTensor(LazyTensor): self.checkpoint_file, tensor_key=self.key, device=self.location ) +def _patched_rebuild_from_type_v2(func, new_type, args, state): + """A patched version of torch._tensor._rebuild_from_type_v2 that + does not attempt to convert `LazyTensor`s to `torch.Tensor`s.""" + + ret = func(*args) + + # BEGIN PATCH + transformation_ok = isinstance(ret, LazyTensor) and new_type == Tensor + if type(ret) is not new_type and not transformation_ok: + # END PATCH + ret = ret.as_subclass(new_type) + + # Tensor does define __setstate__ even though it doesn't define + # __getstate__. So only use __setstate__ if it is NOT the one defined + # on Tensor + if ( + getattr(ret.__class__, "__setstate__", Tensor.__setstate__) + is not Tensor.__setstate__ + ): + ret.__setstate__(state) + else: + ret = torch._utils._set_obj_state(ret, state) + return ret class RestrictedUnpickler(pickle.Unpickler): def original_persistent_load(self, saved_id): @@ -253,7 +277,7 @@ class RestrictedUnpickler(pickle.Unpickler): elif module == "torch._utils" and name == "_rebuild_tensor_v2": return torch._utils._rebuild_tensor_v2 elif module == "torch._tensor" and name == "_rebuild_from_type_v2": - return torch._tensor._rebuild_from_type_v2 + return _patched_rebuild_from_type_v2 elif module == "torch" and name in ( "DoubleStorage", "FloatStorage", From c2ee30af32c68e3915585feea3d9d19c8ae70e4c Mon Sep 17 00:00:00 2001 From: somebody Date: Sat, 8 Jul 2023 14:04:46 -0500 Subject: [PATCH 2/3] Add --panic to raise when loading fails --- aiserver.py | 1 + modeling/inference_models/generic_hf_torch/class.py | 2 ++ modeling/inference_models/hf_torch.py | 5 +++++ 3 files changed, 8 insertions(+) diff --git a/aiserver.py b/aiserver.py index 49223b3a..74b8bca8 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1400,6 +1400,7 @@ def general_startup(override_args=None): parser.add_argument('-f', action='store', help="option for compatability with colab memory profiles") parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen") parser.add_argument('-q', '--quiesce', action='count', default=0, help="The default logging level is ERROR or higher. This value decreases the amount of logging seen in your screen") + parser.add_argument("--panic", action='store_true', help="Disables falling back when loading fails.") #args: argparse.Namespace = None if "pytest" in sys.modules and override_args is None: diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index ad17b85b..8f024ea1 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -90,6 +90,8 @@ class model_backend(HFTorchInferenceModel): utils.module_names = list(metamodel.state_dict().keys()) utils.named_buffers = list(metamodel.named_buffers(recurse=True)) except Exception as e: + if utils.args.panic: + raise e logger.warning(f"Gave up on lazy loading due to {e}") self.lazy_load = False diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 84d3447e..2249a87a 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -363,6 +363,8 @@ class HFTorchInferenceModel(HFInferenceModel): return GPTNeoForCausalLM.from_pretrained(location, **tf_kwargs) except Exception as e: logger.warning(f"{self.model_name} is a no-go; {e} - Falling back to auto.") + if utils.args.panic: + raise e # Try to determine model type from either AutoModel or falling back to legacy try: @@ -414,6 +416,9 @@ class HFTorchInferenceModel(HFInferenceModel): logger.error("Invalid load key! Aborting.") raise + if utils.args.panic: + raise e + logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") logger.debug(traceback.format_exc()) From 3928d86339b492466594a22c521a0d68f08024b3 Mon Sep 17 00:00:00 2001 From: somebody Date: Sat, 8 Jul 2023 14:36:45 -0500 Subject: [PATCH 3/3] Fall back to unpatched HF --- modeling/inference_models/hf_torch.py | 31 ++++++++++---- modeling/lazy_loader.py | 4 ++ modeling/patches.py | 60 +++++++++++++++------------ 3 files changed, 61 insertions(+), 34 deletions(-) diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 2249a87a..fb9fe39e 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -364,7 +364,7 @@ class HFTorchInferenceModel(HFInferenceModel): except Exception as e: logger.warning(f"{self.model_name} is a no-go; {e} - Falling back to auto.") if utils.args.panic: - raise e + raise # Try to determine model type from either AutoModel or falling back to legacy try: @@ -383,11 +383,28 @@ class HFTorchInferenceModel(HFInferenceModel): metamodel ) - with lazy_loader.use_lazy_load( - enable=self.lazy_load, - # DO NOT DEMATERIALIZE MODULES / INIT WEIGHTS EMPTY!!! IT WILL EXPLODE!!!!!!! - dematerialized_modules=False, - ): + try: + # Try to load with the lazyloader first... + with lazy_loader.use_lazy_load( + enable=self.lazy_load, + # DO NOT DEMATERIALIZE MODULES / INIT WEIGHTS EMPTY!!! IT WILL EXPLODE!!!!!!! + dematerialized_modules=False, + ): + model = AutoModelForCausalLM.from_pretrained( + location, + offload_folder="accelerate-disk-cache", + torch_dtype=self._get_target_dtype(), + **tf_kwargs, + ) + except Exception as e: + # ...but fall back to stock HF if lazyloader fails. + if utils.args.panic: + raise + logger.error("Lazyloader failed, falling back to stock HF load. You may run out of RAM here. Details:") + logger.error(e) + logger.error(traceback.format_exc()) + logger.info("Falling back to stock HF load...") + model = AutoModelForCausalLM.from_pretrained( location, offload_folder="accelerate-disk-cache", @@ -417,7 +434,7 @@ class HFTorchInferenceModel(HFInferenceModel): raise if utils.args.panic: - raise e + raise logger.warning(f"Fell back to GPT2LMHeadModel due to {e}") logger.debug(traceback.format_exc()) diff --git a/modeling/lazy_loader.py b/modeling/lazy_loader.py index 2af0ae51..69e0d948 100644 --- a/modeling/lazy_loader.py +++ b/modeling/lazy_loader.py @@ -60,6 +60,7 @@ from typing import Any, Callable, Dict, Optional, Tuple, Type from torch import Tensor from torch.nn import Module from torch.storage import UntypedStorage +from modeling.patches import LazyloadPatches # Safetensors is a dependency for the local version, TPU/Colab doesn't # support it yet. @@ -510,6 +511,8 @@ def use_lazy_load( begin_time = time.time() try: + LazyloadPatches.__enter__() + old_rebuild_tensor = torch._utils._rebuild_tensor torch._utils._rebuild_tensor = _rebuild_tensor @@ -577,6 +580,7 @@ def use_lazy_load( yield True finally: + LazyloadPatches.__exit__(None, None, None) torch._utils._rebuild_tensor = old_rebuild_tensor torch.load = old_torch_load diff --git a/modeling/patches.py b/modeling/patches.py index 827e997a..a72d533a 100644 --- a/modeling/patches.py +++ b/modeling/patches.py @@ -10,7 +10,9 @@ from transformers import ( PreTrainedModel, modeling_utils, ) -from modeling.lazy_loader import LazyTensor + +import torch +import modeling import utils @@ -126,27 +128,16 @@ def patch_transformers_generation() -> None: transformers.generation.logits_process.NoBadWordsLogitsProcessor.__init__ = new_init -def patch_transformers_for_lazyload() -> None: - """ - Most of the code is modified code from the Accelerate and Transformers - projects, made by HuggingFace. The license for these projects are as follows: - --- - Copyright The HuggingFace Team. All rights reserved. +class LazyloadPatches: + old_load_state_dict = transformers.modeling_utils._load_state_dict_into_meta_model - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at + def __enter__() -> None: + transformers.modeling_utils._load_state_dict_into_meta_model = ( + LazyloadPatches._load_state_dict_into_meta_model + ) - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - """ - import torch - from accelerate.utils import set_module_tensor_to_device, offload_weight + def __exit__(exc_type, exc_value, exc_traceback) -> None: + transformers.modeling_utils._load_state_dict_into_meta_model = LazyloadPatches.old_load_state_dict def _load_state_dict_into_meta_model( model, @@ -167,6 +158,26 @@ def patch_transformers_for_lazyload() -> None: is_safetensors=False, keep_in_fp32_modules=None, ): + """ + This is modified code from the Accelerate and Transformers projects, + made by HuggingFace. The license for these projects are as follows: + --- + Copyright The HuggingFace Team. All rights reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + """ + from accelerate.utils import offload_weight, set_module_tensor_to_device + is_quantized = is_quantized or load_in_8bit if is_quantized: @@ -201,7 +212,7 @@ def patch_transformers_for_lazyload() -> None: ), ): - if isinstance(param, LazyTensor): + if isinstance(param, modeling.lazy_loader.LazyTensor): # Should always be true param = param.materialize(map_location="cpu") utils.bar.update(1) @@ -296,15 +307,10 @@ def patch_transformers_for_lazyload() -> None: return error_msgs, offload_index, state_dict_index - transformers.modeling_utils._load_state_dict_into_meta_model = ( - _load_state_dict_into_meta_model - ) - def patch_transformers(use_tpu: bool) -> None: patch_transformers_download() patch_transformers_loader() if not use_tpu: - patch_transformers_generation() - patch_transformers_for_lazyload() + patch_transformers_generation() \ No newline at end of file