mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Seems to work
This commit is contained in:
@@ -278,56 +278,22 @@ class HFTorchInferenceModel(HFInferenceModel):
|
|||||||
|
|
||||||
# Try to determine model type from either AutoModel or falling back to legacy
|
# Try to determine model type from either AutoModel or falling back to legacy
|
||||||
try:
|
try:
|
||||||
# with accelerate.init_empty_weights():
|
|
||||||
# model = AutoModelForCausalLM.from_config(self.model_config)
|
|
||||||
|
|
||||||
# print("[HUGE SKELETON] MAKING DEVICE MAP")
|
|
||||||
# device_map = infer_auto_device_map(
|
|
||||||
# model,
|
|
||||||
# no_split_module_classes=model._no_split_modules,
|
|
||||||
# max_memory={0: "10GiB", 1: "7GiB", "cpu": "20GiB"},
|
|
||||||
# dtype=torch.float16,
|
|
||||||
# )
|
|
||||||
|
|
||||||
# # TODO: ??
|
|
||||||
# print("[HUGE SKELETON] TYING WEIGHTS")
|
|
||||||
# model.tie_weights()
|
|
||||||
|
|
||||||
print("[HUGE SKELETON] LOADING FROM PRETRAINED")
|
print("[HUGE SKELETON] LOADING FROM PRETRAINED")
|
||||||
# model = load_checkpoint_and_dispatch(
|
|
||||||
# model,
|
|
||||||
# location + "/pytorch_model.bin",
|
|
||||||
# device_map=device_map,
|
|
||||||
# no_split_module_classes=model._no_split_modules,
|
|
||||||
# dtype=torch.float16,
|
|
||||||
# )
|
|
||||||
with lazy_loader.use_lazy_load(
|
with lazy_loader.use_lazy_load(
|
||||||
enable=True,
|
enable=True,
|
||||||
|
# DO NOT DEMATERIALIZE MODULES / INIT WEIGHTS EMPTY!!! IT WILL EXPLODE!!!!!!!
|
||||||
# dematerialized_modules=True,
|
# dematerialized_modules=True,
|
||||||
dematerialized_modules=False,
|
dematerialized_modules=False,
|
||||||
):
|
):
|
||||||
model = AutoModelForCausalLM.from_pretrained(
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
location,
|
location,
|
||||||
device_map="auto",
|
device_map="auto",
|
||||||
max_memory={0: "10GiB", 1: "7GiB", "cpu": "20GiB"},
|
# max_memory={0: "10GiB", 1: "7GiB", "cpu": "20GiB"},
|
||||||
offload_folder="accelerate-disk-cache",
|
offload_folder="accelerate-disk-cache",
|
||||||
torch_dtype=torch.float16,
|
torch_dtype=torch.float16,
|
||||||
**tf_kwargs,
|
**tf_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
for name, value in list(model.named_parameters()) + list(
|
|
||||||
model.named_buffers()
|
|
||||||
):
|
|
||||||
if value.device != torch.device("meta"):
|
|
||||||
continue
|
|
||||||
print(name, value, value.nelement())
|
|
||||||
# try:
|
|
||||||
# value.cpu()
|
|
||||||
# except NotImplementedError:
|
|
||||||
# # Can't be copied out of meta tensor, no data
|
|
||||||
# print("Bad news at", name)
|
|
||||||
# # setattr(model, name, torch.zeros(value.size()))
|
|
||||||
|
|
||||||
return model
|
return model
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback_string = traceback.format_exc().lower()
|
traceback_string = traceback.format_exc().lower()
|
||||||
|
@@ -148,29 +148,6 @@ def patch_transformers_for_lazyload() -> None:
|
|||||||
limitations under the License.
|
limitations under the License.
|
||||||
"""
|
"""
|
||||||
import torch
|
import torch
|
||||||
import accelerate
|
|
||||||
|
|
||||||
# _old_set_module_tensor_to_device = (
|
|
||||||
# accelerate.utils.modeling.set_module_tensor_to_device
|
|
||||||
# )
|
|
||||||
|
|
||||||
# def _set_module_tensor_to_device(
|
|
||||||
# module: torch.nn.Module,
|
|
||||||
# tensor_name: str,
|
|
||||||
# device: Union[int, str, torch.device],
|
|
||||||
# value: Optional[torch.Tensor] = None,
|
|
||||||
# dtype: Optional[Union[str, torch.dtype]] = None,
|
|
||||||
# ):
|
|
||||||
# if isinstance(value, LazyTensor):
|
|
||||||
# value = value.materialize()
|
|
||||||
# print("HEY!", dtype)
|
|
||||||
# return _old_set_module_tensor_to_device(
|
|
||||||
# module, tensor_name, device, value, dtype
|
|
||||||
# )
|
|
||||||
|
|
||||||
# accelerate.utils.modeling.set_module_tensor_to_device = _set_module_tensor_to_device
|
|
||||||
|
|
||||||
from accelerate.utils.modeling import named_module_tensors
|
|
||||||
from accelerate.utils import set_module_tensor_to_device, offload_weight
|
from accelerate.utils import set_module_tensor_to_device, offload_weight
|
||||||
|
|
||||||
def _load_state_dict_into_meta_model(
|
def _load_state_dict_into_meta_model(
|
||||||
@@ -225,10 +202,7 @@ def patch_transformers_for_lazyload() -> None:
|
|||||||
for old_key, new_key in zip(old_keys, new_keys):
|
for old_key, new_key in zip(old_keys, new_keys):
|
||||||
state_dict[new_key] = state_dict.pop(old_key)
|
state_dict[new_key] = state_dict.pop(old_key)
|
||||||
|
|
||||||
# BEGIN PATCH
|
# BEGIN PATCH
|
||||||
# TODO: Based on config
|
|
||||||
# dtype = torch.float16
|
|
||||||
|
|
||||||
for param_name, param in sorted(
|
for param_name, param in sorted(
|
||||||
state_dict.items(),
|
state_dict.items(),
|
||||||
# State dict must be ordered in this manner to make the caching in
|
# State dict must be ordered in this manner to make the caching in
|
||||||
@@ -243,7 +217,7 @@ def patch_transformers_for_lazyload() -> None:
|
|||||||
if isinstance(param, LazyTensor):
|
if isinstance(param, LazyTensor):
|
||||||
# Should always be true
|
# Should always be true
|
||||||
param = param.materialize()
|
param = param.materialize()
|
||||||
# END PATCH
|
# END PATCH
|
||||||
|
|
||||||
# First part of the test is always true as load_state_dict_keys always contains state_dict keys.
|
# First part of the test is always true as load_state_dict_keys always contains state_dict keys.
|
||||||
if (
|
if (
|
||||||
@@ -338,63 +312,6 @@ def patch_transformers_for_lazyload() -> None:
|
|||||||
_load_state_dict_into_meta_model
|
_load_state_dict_into_meta_model
|
||||||
)
|
)
|
||||||
|
|
||||||
# # Patch AlignDevicesHook to hack around OPT lm_head
|
|
||||||
# HACK_ZERO_ON_FAIL_TENSORS = ["lm_head.weight"]
|
|
||||||
|
|
||||||
# def _init_hook(self, module):
|
|
||||||
# if not self.offload and self.execution_device is not None:
|
|
||||||
# # BEGIN PATCH
|
|
||||||
# for name, tensor in named_module_tensors(
|
|
||||||
# module, recurse=self.place_submodules
|
|
||||||
# ):
|
|
||||||
# try:
|
|
||||||
# set_module_tensor_to_device(module, name, self.execution_device)
|
|
||||||
# except ValueError:
|
|
||||||
# # ValueError: weight is on the meta device, we need a `value` to put in on 0.
|
|
||||||
# # bleuuuuuuuuuuuuuuuhhh
|
|
||||||
# if name in HACK_ZERO_ON_FAIL_TENSORS:
|
|
||||||
# logger.warning(
|
|
||||||
# f"Couldn't find value for weight {name}, zeroing."
|
|
||||||
# )
|
|
||||||
# set_module_tensor_to_device(
|
|
||||||
# module,
|
|
||||||
# name,
|
|
||||||
# self.execution_device,
|
|
||||||
# value=torch.zeros(tensor.shape),
|
|
||||||
# )
|
|
||||||
# # END PATCH
|
|
||||||
# elif self.offload:
|
|
||||||
# self.original_devices = {
|
|
||||||
# name: param.device
|
|
||||||
# for name, param in named_module_tensors(
|
|
||||||
# module, recurse=self.place_submodules
|
|
||||||
# )
|
|
||||||
# }
|
|
||||||
|
|
||||||
# if self.weights_map is None:
|
|
||||||
# self.weights_map = {
|
|
||||||
# name: param.to("cpu")
|
|
||||||
# for name, param in named_module_tensors(
|
|
||||||
# module,
|
|
||||||
# include_buffers=self.offload_buffers,
|
|
||||||
# recurse=self.place_submodules,
|
|
||||||
# )
|
|
||||||
# }
|
|
||||||
|
|
||||||
# for name, _ in named_module_tensors(
|
|
||||||
# module,
|
|
||||||
# include_buffers=self.offload_buffers,
|
|
||||||
# recurse=self.place_submodules,
|
|
||||||
# ):
|
|
||||||
# set_module_tensor_to_device(module, name, "meta")
|
|
||||||
|
|
||||||
# if not self.offload_buffers and self.execution_device is not None:
|
|
||||||
# for name, _ in module.named_buffers(recurse=self.place_submodules):
|
|
||||||
# set_module_tensor_to_device(module, name, self.execution_device)
|
|
||||||
# return module
|
|
||||||
|
|
||||||
# accelerate.hooks.AlignDevicesHook.init_hook = _init_hook
|
|
||||||
|
|
||||||
|
|
||||||
def patch_transformers() -> None:
|
def patch_transformers() -> None:
|
||||||
patch_transformers_download()
|
patch_transformers_download()
|
||||||
|
Reference in New Issue
Block a user