mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-04-03 05:01:02 +02:00
Lazy loader needs to cache named buffers of layers in the disk cache
This commit is contained in:
parent
ab5ab79003
commit
1620ac4148
@ -1718,7 +1718,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
lazy_load_callback.nested = True
|
lazy_load_callback.nested = True
|
||||||
|
|
||||||
device_map: Dict[str, Union[str, int]] = {}
|
device_map: Dict[str, Union[str, int]] = {}
|
||||||
offload_map: Dict[str, str] = {}
|
|
||||||
|
|
||||||
@functools.lru_cache(maxsize=None)
|
@functools.lru_cache(maxsize=None)
|
||||||
def get_original_key(key):
|
def get_original_key(key):
|
||||||
@ -1801,6 +1800,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
finally:
|
finally:
|
||||||
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
|
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
|
||||||
if utils.offload_index:
|
if utils.offload_index:
|
||||||
|
for name, tensor in vars.named_buffers:
|
||||||
|
if name not in utils.offload_index:
|
||||||
|
accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index)
|
||||||
accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache")
|
accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache")
|
||||||
utils.bar.close()
|
utils.bar.close()
|
||||||
utils.bar = None
|
utils.bar = None
|
||||||
@ -1895,6 +1897,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
|||||||
metamodel = GPTNeoForCausalLM.from_config(model_config)
|
metamodel = GPTNeoForCausalLM.from_config(model_config)
|
||||||
vars.layers_module_names = utils.get_layers_module_names(metamodel)
|
vars.layers_module_names = utils.get_layers_module_names(metamodel)
|
||||||
vars.module_names = list(metamodel.state_dict().keys())
|
vars.module_names = list(metamodel.state_dict().keys())
|
||||||
|
vars.named_buffers = list(metamodel.named_buffers(recurse=True))
|
||||||
with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(utils.num_layers(model_config)) if vars.lazy_load else None, dematerialized_modules=True):
|
with maybe_use_float16(), torch_lazy_loader.use_lazy_torch_load(enable=vars.lazy_load, callback=get_lazy_load_callback(utils.num_layers(model_config)) if vars.lazy_load else None, dematerialized_modules=True):
|
||||||
if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
|
if(vars.lazy_load): # torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
|
||||||
lowmem = {}
|
lowmem = {}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user