diff --git a/aiserver.py b/aiserver.py index 87385bad..af48156d 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1652,18 +1652,14 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model=" device_map = {} - for _key, spec in lazy_load_spec.get("layer_weights", {}).items(): - for layer in range(n_layers): - key = _key.format(layer=layer) - if key not in model_dict: - continue + for key, value in model_dict.items(): + if isinstance(value, torch_lazy_loader.LazyTensor) and not any(key.startswith(n) or key.startswith(n.split(".", 1)[1]) for n in vars.layer_param_names): + device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" + else: + layer = int(next(n for n in vars.layer_param_names if key.startswith(n) or key.startswith(n.split(".", 1)[1])).rsplit(".", 1)[1]) device = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" if not vars.hascuda or not vars.breakmodel or layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks) device_map[key] = device - for key, value in model_dict.items(): - if isinstance(value, torch_lazy_loader.LazyTensor) and key not in device_map: - device_map[key] = vars.gpu_device if vars.hascuda and vars.usegpu else "cpu" - if utils.num_shards is None or utils.current_shard == 0: if utils.num_shards is not None: num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs)) @@ -1717,15 +1713,6 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model=" lazy_load_callback.nested = False return lazy_load_callback - lazy_load_config_path = os.path.join("maps", vars.model_type + ".json") - if(vars.lazy_load and "model_config" in globals() and os.path.isfile(lazy_load_config_path)): - with open(lazy_load_config_path) as f: - lazy_load_spec = json.load(f) - - else: - vars.lazy_load = False - - def get_hidden_size_from_model(model): try: @@ -1800,6 +1787,13 @@ def load_model(use_gpu=True, gpu_layers=None, initial_load=False, online_model=" import shutil shutil.move(vars.model.replace('/', '_'), "models/{}".format(vars.model.replace('/', '_'))) print("\n", flush=True) + if(vars.lazy_load): # If we're using lazy loader, we need to figure out what the model's hidden layers are called + with torch_lazy_loader.use_lazy_torch_load(dematerialized_modules=True): + try: + metamodel = AutoModelForCausalLM.from_config(model_config) + except Exception as e: + metamodel = GPTNeoForCausalLM.from_config(model_config) + vars.layer_param_names = utils.get_layer_param_names(metamodel) with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(utils.num_layers(model_config)) if vars.lazy_load else None, dematerialized_modules=True): if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time lowmem = {} diff --git a/utils.py b/utils.py index 69c3ad77..bbb42c52 100644 --- a/utils.py +++ b/utils.py @@ -7,11 +7,19 @@ import tempfile import requests import requests.adapters import time +from transformers import __version__ as transformers_version +import packaging.version from tqdm.auto import tqdm import os import itertools from typing import Optional +HAS_ACCELERATE = packaging.version.parse(transformers_version) >= packaging.version.parse("4.20.0.dev0") +try: + import accelerate +except ImportError: + HAS_ACCELERATE = False + vars = None num_shards: Optional[int] = None current_shard = 0 @@ -300,3 +308,15 @@ def get_sharded_checkpoint_num_tensors(pretrained_model_name_or_path, filename, import torch shard_paths, _ = transformers.modeling_utils.get_checkpoint_shard_files(pretrained_model_name_or_path, filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, local_files_only=local_files_only, use_auth_token=use_auth_token, user_agent=user_agent, revision=revision, mirror=mirror) return list(itertools.chain(*(torch.load(p, map_location="cpu").keys() for p in shard_paths))) + +def get_layer_param_names(model): + names = [] + def recurse(module, head=""): + for c in module.named_children(): + name = head + c[0] + if c[0].isnumeric() and any(c[1].__class__.__name__.endswith(suffix) for suffix in ("Block", "Layer")): + names.append(name) + else: + recurse(c[1], head=name + ".") + recurse(model) + return names