Remove unused file open

2025-06-05 21:59:24 +02:00 · 2023-07-03 17:51:54 -05:00
parent 7f869a54d8
commit 32917fd651
1 changed files with 102 additions and 109 deletions
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -1171,125 +1171,118 @@ def load_model(path: str, model_type: str, badwordsids=koboldai_settings.badword
        if callback.nested:
            return
        callback.nested = True
-        with zipfile.ZipFile(f, "r") as z:
-            try:
-                last_storage_key = None
-                zipfolder = os.path.basename(os.path.normpath(f)).split('.')[0]
-                f = None
-                current_offset = 0
-                if utils.current_shard == 0:
-                    print("\n\n\nThis model has  ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), "  parameters.\n")
-
-                if utils.num_shards is None or utils.current_shard == 0:
-                    if utils.num_shards is not None:
-                        num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
-                    else:
-                        num_tensors = len(model_dict)
-
-                    if socketio is None:
-                        utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
-                    else:
-                        utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=utils.UIProgressBarFile(socketio.emit))
-                    koboldai_vars.status_message = "Loading model"
-                    koboldai_vars.loaded_layers = 0
-                    koboldai_vars.total_layers = num_tensors
+        try:
+            if utils.current_shard == 0:
+                print("\n\n\nThis model has  ", f"{hk.data_structures.tree_size(network.state['params']):,d}".replace(",", " "), "  parameters.\n")

+            if utils.num_shards is None or utils.current_shard == 0:
                if utils.num_shards is not None:
-                    utils.current_shard += 1
+                    num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs))
+                else:
+                    num_tensors = len(model_dict)

-                for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
-                    model_spec_key = max((k for k in model_spec.keys() if key.endswith(k)), key=len, default=None)
+                if socketio is None:
+                    utils.bar = tqdm(total=num_tensors, desc="Loading model tensors")
+                else:
+                    utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=utils.UIProgressBarFile(socketio.emit))
+                koboldai_vars.status_message = "Loading model"
+                koboldai_vars.loaded_layers = 0
+                koboldai_vars.total_layers = num_tensors

-                    # Some model weights are used by transformers but not by MTJ.
-                    # We have to materialize these weights anyways because
-                    # transformers will throw a tantrum otherwise.  To attain
-                    # the least possible memory usage, we create them as meta
-                    # tensors, which don't take up any actual CPU or TPU memory.
-                    if model_spec_key is None:
-                        model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
-                        utils.bar.update(1)
-                        koboldai_vars.loaded_layers += 1
-                        continue
+            if utils.num_shards is not None:
+                utils.current_shard += 1

-                    spec = model_spec[model_spec_key]
-                    transforms = set(spec.get("transforms", ()))
+            for key in sorted(model_dict.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)):
+                model_spec_key = max((k for k in model_spec.keys() if key.endswith(k)), key=len, default=None)

-                    if not isinstance(model_dict[key], lazy_loader.LazyTensor):
-                        error = f"Duplicate key {repr(key)}"
-                        print("\n\nERROR:  " + error, file=sys.stderr)
-                        raise RuntimeError(error)
-
-                    tensor = model_dict[key].materialize(map_location="cpu")
-                    model_dict[key] = tensor.to("meta")
-
-                    # MTJ requires certain mathematical operations to be performed
-                    # on tensors in order for them to be in the correct format
-                    if "remove_first_two_rows" in transforms:
-                        tensor = tensor[2:]
-                    if "divide_by_shards" in transforms:
-                        tensor /= params["cores_per_replica"]
-                    if "vocab_pad" in transforms:
-                        tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],))
-                    # We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
-                    #if "no_transpose" not in transforms and tensor.ndim == 2:
-                    #    tensor = tensor.T
-                    tensor.unsqueeze_(0)
-                    
-
-                    # Shard the tensor so that parts of the tensor can be used
-                    # on different TPU cores
-                    tensor = reshard_reverse(
-                        tensor,
-                        params["cores_per_replica"],
-                        network.state["params"][spec["module"]][spec["param"]].shape,
-                    )
-                    tensor = tensor.detach()
-                    # numpy does not support bfloat16
-                    if tensor.dtype is torch.bfloat16:
-                      tensor = tensor.to(torch.float32)
-                    tensor = jnp.array(tensor)
-                    if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
-                        tensor = tensor.bfloat16()
-                    network.state["params"][spec["module"]][spec["param"]] = move_xmap(
-                        tensor,
-                        np.empty(params["cores_per_replica"]),
-                    )
-                    
-                    koboldai_vars.loaded_layers += 1
-                    try:
-                        time.sleep(0.01)
-                    except:
-                        pass
+                # Some model weights are used by transformers but not by MTJ.
+                # We have to materialize these weights anyways because
+                # transformers will throw a tantrum otherwise.  To attain
+                # the least possible memory usage, we create them as meta
+                # tensors, which don't take up any actual CPU or TPU memory.
+                if model_spec_key is None:
+                    model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
                    utils.bar.update(1)
+                    koboldai_vars.loaded_layers += 1
+                    continue

-                if utils.num_shards is not None and utils.current_shard < utils.num_shards:
-                    return
+                spec = model_spec[model_spec_key]
+                transforms = set(spec.get("transforms", ()))

-                # Check for tensors that MTJ needs that were not provided in the
-                # HF model
-                for mk, mv in network.state["params"].items():
-                    for pk, pv in mv.items():
-                        if isinstance(pv, PlaceholderTensor):
-                            # The transformers GPT-J models apparently do not
-                            # have embedding bias, whereas MTJ GPT-J models do,
-                            # so we have to supplement an embedding bias tensor
-                            # by creating a tensor with the necessary shape, filled
-                            # with zeros.
-                            if mk == "causal_transformer_shard/~/embedding_shard/~/linear" and pk == "b":
-                                mv[pk] = move_xmap(jnp.zeros(mv[pk].shape, dtype=jnp.bfloat16), np.empty(params["cores_per_replica"]))
+                if not isinstance(model_dict[key], lazy_loader.LazyTensor):
+                    error = f"Duplicate key {repr(key)}"
+                    print("\n\nERROR:  " + error, file=sys.stderr)
+                    raise RuntimeError(error)

-                            else:
-                                error = f"{mk} {pk} could not be found in the model checkpoint"
-                                print("\n\nERROR:  " + error, file=sys.stderr)
-                                raise RuntimeError(error)
-            finally:
-                if utils.num_shards is None or utils.current_shard >= utils.num_shards:
-                    utils.bar.close()
-                    utils.bar = None
-                    koboldai_vars.status_message = ""
-                callback.nested = False
-                if isinstance(f, zipfile.ZipExtFile):
-                    f.close()
+                tensor = model_dict[key].materialize(map_location="cpu")
+                model_dict[key] = tensor.to("meta")
+
+                # MTJ requires certain mathematical operations to be performed
+                # on tensors in order for them to be in the correct format
+                if "remove_first_two_rows" in transforms:
+                    tensor = tensor[2:]
+                if "divide_by_shards" in transforms:
+                    tensor /= params["cores_per_replica"]
+                if "vocab_pad" in transforms:
+                    tensor = torch.nn.functional.pad(tensor, (0,) * (tensor.ndim * 2 - 1) + (params["n_vocab_padding"],))
+                # We don't need to transpose linear module weights anymore because MTJ will do it for us if `transposed_linear` is set to True in the config
+                #if "no_transpose" not in transforms and tensor.ndim == 2:
+                #    tensor = tensor.T
+                tensor.unsqueeze_(0)
+                
+
+                # Shard the tensor so that parts of the tensor can be used
+                # on different TPU cores
+                tensor = reshard_reverse(
+                    tensor,
+                    params["cores_per_replica"],
+                    network.state["params"][spec["module"]][spec["param"]].shape,
+                )
+                tensor = tensor.detach()
+                # numpy does not support bfloat16
+                if tensor.dtype is torch.bfloat16:
+                    tensor = tensor.to(torch.float32)
+                tensor = jnp.array(tensor)
+                if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
+                    tensor = tensor.bfloat16()
+                network.state["params"][spec["module"]][spec["param"]] = move_xmap(
+                    tensor,
+                    np.empty(params["cores_per_replica"]),
+                )
+                
+                koboldai_vars.loaded_layers += 1
+                try:
+                    time.sleep(0.01)
+                except:
+                    pass
+                utils.bar.update(1)
+
+            if utils.num_shards is not None and utils.current_shard < utils.num_shards:
+                return
+
+            # Check for tensors that MTJ needs that were not provided in the
+            # HF model
+            for mk, mv in network.state["params"].items():
+                for pk, pv in mv.items():
+                    if isinstance(pv, PlaceholderTensor):
+                        # The transformers GPT-J models apparently do not
+                        # have embedding bias, whereas MTJ GPT-J models do,
+                        # so we have to supplement an embedding bias tensor
+                        # by creating a tensor with the necessary shape, filled
+                        # with zeros.
+                        if mk == "causal_transformer_shard/~/embedding_shard/~/linear" and pk == "b":
+                            mv[pk] = move_xmap(jnp.zeros(mv[pk].shape, dtype=jnp.bfloat16), np.empty(params["cores_per_replica"]))
+
+                        else:
+                            error = f"{mk} {pk} could not be found in the model checkpoint"
+                            print("\n\nERROR:  " + error, file=sys.stderr)
+                            raise RuntimeError(error)
+        finally:
+            if utils.num_shards is None or utils.current_shard >= utils.num_shards:
+                utils.bar.close()
+                utils.bar = None
+                koboldai_vars.status_message = ""
+            callback.nested = False
    callback.nested = False

    if os.path.isdir(koboldai_vars.model.replace('/', '_')):