Remove unused file open

This commit is contained in:
somebody
2023-07-03 17:51:54 -05:00
parent 7f869a54d8
commit 32917fd651

View File

@@ -1171,125 +1171,118 @@ def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badword
if callback.nested: if callback.nested:
return return
callback.nested = True callback.nested = True
with zipfile.ZipFile(f, "r") as z: try:
try: if utils.current_shard == 0:
last_storage_key = None print("\n\n\nThis model has ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), " parameters.\n")
zipfolder = os.path.basename(os.path.normpath(f)).split('.')[0]
f = None
current_offset = 0
if utils.current_shard == 0:
print("\n\n\nThis model has ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), " parameters.\n")
if utils.num_shards is None or utils.current_shard == 0:
if utils.num_shards is not None:
num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
else:
num_tensors = len(model_dict)
if socketio is None:
utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
else:
utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=utils.UIProgressBarFile(socketio.emit))
koboldai_vars.status_message = "Loading model"
koboldai_vars.loaded_layers = 0
koboldai_vars.total_layers = num_tensors
if utils.num_shards is None or utils.current_shard == 0:
if utils.num_shards is not None: if utils.num_shards is not None:
utils.current_shard += 1 num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
else:
num_tensors = len(model_dict)
for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)): if socketio is None:
model_spec_key = max((k for k in model_spec.keys() if key.endswith(k)), key=len, default=None) utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
else:
utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=utils.UIProgressBarFile(socketio.emit))
koboldai_vars.status_message = "Loading model"
koboldai_vars.loaded_layers = 0
koboldai_vars.total_layers = num_tensors
# Some model weights are used by transformers but not by MTJ. if utils.num_shards is not None:
# We have to materialize these weights anyways because utils.current_shard += 1
# transformers will throw a tantrum otherwise. To attain
# the least possible memory usage, we create them as meta
# tensors, which don't take up any actual CPU or TPU memory.
if model_spec_key is None:
model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
utils.bar.update(1)
koboldai_vars.loaded_layers += 1
continue
spec = model_spec[model_spec_key] for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
transforms = set(spec.get("transforms", ())) model_spec_key = max((k for k in model_spec.keys() if key.endswith(k)), key=len, default=None)
if not isinstance(model_dict[key], lazy_loader.LazyTensor): # Some model weights are used by transformers but not by MTJ.
error = f"Duplicate key {repr(key)}" # We have to materialize these weights anyways because
print("\n\nERROR: " + error, file=sys.stderr) # transformers will throw a tantrum otherwise. To attain
raise RuntimeError(error) # the least possible memory usage, we create them as meta
# tensors, which don't take up any actual CPU or TPU memory.
tensor = model_dict[key].materialize(map_location="cpu") if model_spec_key is None:
model_dict[key] = tensor.to("meta") model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
# MTJ requires certain mathematical operations to be performed
# on tensors in order for them to be in the correct format
if "remove_first_two_rows" in transforms:
tensor = tensor[2:]
if "divide_by_shards" in transforms:
tensor /= params["cores_per_replica"]
if "vocab_pad" in transforms:
tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],))
# We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
#if "no_transpose" not in transforms and tensor.ndim == 2:
# tensor = tensor.T
tensor.unsqueeze_(0)
# Shard the tensor so that parts of the tensor can be used
# on different TPU cores
tensor = reshard_reverse(
tensor,
params["cores_per_replica"],
network.state["params"][spec["module"]][spec["param"]].shape,
)
tensor = tensor.detach()
# numpy does not support bfloat16
if tensor.dtype is torch.bfloat16:
tensor = tensor.to(torch.float32)
tensor = jnp.array(tensor)
if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
tensor = tensor.bfloat16()
network.state["params"][spec["module"]][spec["param"]] = move_xmap(
tensor,
np.empty(params["cores_per_replica"]),
)
koboldai_vars.loaded_layers += 1
try:
time.sleep(0.01)
except:
pass
utils.bar.update(1) utils.bar.update(1)
koboldai_vars.loaded_layers += 1
continue
if utils.num_shards is not None and utils.current_shard < utils.num_shards: spec = model_spec[model_spec_key]
return transforms = set(spec.get("transforms", ()))
# Check for tensors that MTJ needs that were not provided in the if not isinstance(model_dict[key], lazy_loader.LazyTensor):
# HF model error = f"Duplicate key {repr(key)}"
for mk, mv in network.state["params"].items(): print("\n\nERROR: " + error, file=sys.stderr)
for pk, pv in mv.items(): raise RuntimeError(error)
if isinstance(pv, PlaceholderTensor):
# The transformers GPT-J models apparently do not
# have embedding bias, whereas MTJ GPT-J models do,
# so we have to supplement an embedding bias tensor
# by creating a tensor with the necessary shape, filled
# with zeros.
if mk == "causal_transformer_shard/~/embedding_shard/~/linear" and pk == "b":
mv[pk] = move_xmap(jnp.zeros(mv[pk].shape, dtype=jnp.bfloat16), np.empty(params["cores_per_replica"]))
else: tensor = model_dict[key].materialize(map_location="cpu")
error = f"{mk} {pk} could not be found in the model checkpoint" model_dict[key] = tensor.to("meta")
print("\n\nERROR: " + error, file=sys.stderr)
raise RuntimeError(error) # MTJ requires certain mathematical operations to be performed
finally: # on tensors in order for them to be in the correct format
if utils.num_shards is None or utils.current_shard >= utils.num_shards: if "remove_first_two_rows" in transforms:
utils.bar.close() tensor = tensor[2:]
utils.bar = None if "divide_by_shards" in transforms:
koboldai_vars.status_message = "" tensor /= params["cores_per_replica"]
callback.nested = False if "vocab_pad" in transforms:
if isinstance(f, zipfile.ZipExtFile): tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],))
f.close() # We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
#if "no_transpose" not in transforms and tensor.ndim == 2:
# tensor = tensor.T
tensor.unsqueeze_(0)
# Shard the tensor so that parts of the tensor can be used
# on different TPU cores
tensor = reshard_reverse(
tensor,
params["cores_per_replica"],
network.state["params"][spec["module"]][spec["param"]].shape,
)
tensor = tensor.detach()
# numpy does not support bfloat16
if tensor.dtype is torch.bfloat16:
tensor = tensor.to(torch.float32)
tensor = jnp.array(tensor)
if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
tensor = tensor.bfloat16()
network.state["params"][spec["module"]][spec["param"]] = move_xmap(
tensor,
np.empty(params["cores_per_replica"]),
)
koboldai_vars.loaded_layers += 1
try:
time.sleep(0.01)
except:
pass
utils.bar.update(1)
if utils.num_shards is not None and utils.current_shard < utils.num_shards:
return
# Check for tensors that MTJ needs that were not provided in the
# HF model
for mk, mv in network.state["params"].items():
for pk, pv in mv.items():
if isinstance(pv, PlaceholderTensor):
# The transformers GPT-J models apparently do not
# have embedding bias, whereas MTJ GPT-J models do,
# so we have to supplement an embedding bias tensor
# by creating a tensor with the necessary shape, filled
# with zeros.
if mk == "causal_transformer_shard/~/embedding_shard/~/linear" and pk == "b":
mv[pk] = move_xmap(jnp.zeros(mv[pk].shape, dtype=jnp.bfloat16), np.empty(params["cores_per_replica"]))
else:
error = f"{mk} {pk} could not be found in the model checkpoint"
print("\n\nERROR: " + error, file=sys.stderr)
raise RuntimeError(error)
finally:
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
utils.bar.close()
utils.bar = None
koboldai_vars.status_message = ""
callback.nested = False
callback.nested = False callback.nested = False
if os.path.isdir(koboldai_vars.model.replace('/', '_')): if os.path.isdir(koboldai_vars.model.replace('/', '_')):