Merge branch 'united' into neox

2025-06-05 21:59:24 +02:00 · 2022-03-18 11:19:03 -04:00
parent 95c4251db9 f581fe89cb
commit 85a4959efa
9 changed files with 36 additions and 37 deletions
--- a/aiserver.py
+++ b/aiserver.py
@@ -148,7 +148,7 @@ class vars:
    genamt      = 80     # Amount of text for each action to generate
    ikgen       = 200    # Number of characters for InferKit to generate
    rep_pen     = 1.1    # Default generator repetition_penalty
-    rep_pen_slope = 1.0  # Default generator repetition penalty slope
+    rep_pen_slope = 0.7  # Default generator repetition penalty slope
    rep_pen_range = 1024 # Default generator repetition penalty range
    temp        = 0.5    # Default generator temperature
    top_p       = 0.9    # Default generator top_p
--- a/environments/base.yml
+++ b/environments/base.yml
@@ -1,20 +0,0 @@
-name: koboldai
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - colorama
-  - flask-socketio
-  - pytorch
-  - python=3.8.*
-  - cudatoolkit=11.1
-  - eventlet
-  - markdown
-  - bleach
-  - pip
-  - git
-  - pip:
-    - flask-cloudflared
-    - flask-ngrok
-    - lupa==1.10
--- a/environments/huggingface.yml
+++ b/environments/huggingface.yml
@@ -6,7 +6,7 @@ channels:
 dependencies:
  - colorama
  - flask-socketio
-  - pytorch
+  - pytorch=1.11.*
  - python=3.8.*
  - cudatoolkit=11.1
  - eventlet
@@ -20,4 +20,4 @@ dependencies:
    - flask-cloudflared
    - flask-ngrok
    - lupa==1.10
-    - git+https://github.com/huggingface/transformers
+    - transformers>=4.17
--- a/environments/rocm.yml
+++ b/environments/rocm.yml
@@ -15,9 +15,9 @@ dependencies:
  - protobuf
  - pip:
    - --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
-    - torch
+    - torch==1.11.*
    - torchvision==0.11.1
    - flask-cloudflared
    - flask-ngrok
    - lupa==1.10
-    - git+https://github.com/huggingface/transformers
+    - transformers>=4.17
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-git+https://github.com/huggingface/transformers
+transformers>=4.17
 Flask
 Flask-SocketIO
 requests
-torch
+torch==1.11
 flask-cloudflared
 flask-ngrok
 eventlet
@@ -10,4 +10,4 @@ lupa==1.10
 markdown
 bleach
 sentencepiece
-protobuf
+protobuf
--- a/requirements_mtj.txt
+++ b/requirements_mtj.txt
@@ -1,3 +1,4 @@
+torch >= 1.9, <= 1.11
 numpy
 tqdm
 requests
@@ -5,7 +6,7 @@ optax >= 0.0.5, <= 0.0.9
 dm-haiku == 0.0.5
 ray[default]
 jax == 0.2.21
-transformers
+transformers >= 4.17
 progressbar2
 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
 flask
--- a/torch_lazy_loader.py
+++ b/torch_lazy_loader.py
@@ -57,11 +57,26 @@ from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
 _EXTRA_STATE_KEY_SUFFIX = '_extra_state'


+STORAGE_TYPE_MAP = {
+    torch.float64: torch.DoubleStorage,
+    torch.float32: torch.FloatStorage,
+    torch.float16: torch.HalfStorage,
+    torch.int64: torch.LongStorage,
+    torch.int32: torch.IntStorage,
+    torch.int16: torch.ShortStorage,
+    torch.int8: torch.CharStorage,
+    torch.uint8: torch.ByteStorage,
+    torch.bool: torch.BoolStorage,
+    torch.bfloat16: torch.BFloat16Storage,
+}
+
+
 class LazyTensor:
-    def __init__(self, storage_type: Type[torch._StorageBase], key: str, location: str, seek_offset: Optional[int] = None, shape: Optional[Tuple[int, ...]] = None, stride: Optional[Tuple[int, ...]] = None, requires_grad=False, backward_hooks: Any = None):
+    def __init__(self, storage_type: Type[torch._StorageBase], key: str, location: str, dtype: Optional[torch.dtype] = None, seek_offset: Optional[int] = None, shape: Optional[Tuple[int, ...]] = None, stride: Optional[Tuple[int, ...]] = None, requires_grad=False, backward_hooks: Any = None):
        self.storage_type = storage_type
        self.key = key
        self.location = location
+        self.dtype = dtype
        self.seek_offset = seek_offset
        self.shape = shape
        self.stride = stride
@@ -69,14 +84,14 @@ class LazyTensor:
        self.backward_hooks = backward_hooks

    def __view(self, f: Callable):
-        return f"{type(self).__name__}(storage_type={f(self.storage_type)}, key={f(self.key)}, location={f(self.location)}, seek_offset={f(self.seek_offset)}, shape={f(self.shape)}, stride={f(self.stride)}, requires_grad={f(self.requires_grad)}, backward_hooks={f(self.backward_hooks)})"
+        return f"{type(self).__name__}(storage_type={f(self.storage_type)}, key={f(self.key)}, location={f(self.location)}, dtype={f(self.dtype)}, seek_offset={f(self.seek_offset)}, shape={f(self.shape)}, stride={f(self.stride)}, requires_grad={f(self.requires_grad)}, backward_hooks={f(self.backward_hooks)})"

    def __repr__(self):
        return self.__view(repr)

    def materialize(self, checkpoint: Union[zipfile.ZipFile, zipfile.ZipExtFile], map_location=None) -> torch.Tensor:
        size = reduce(lambda x, y: x * y, self.shape, 1)
-        dtype = self.storage_type(0).dtype
+        dtype = self.dtype
        nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
        if isinstance(checkpoint, zipfile.ZipFile):
            f = checkpoint.open(f"archive/data/{self.key}", "r")
@@ -84,7 +99,7 @@ class LazyTensor:
        else:
            f = checkpoint
        try:
-            storage = self.storage_type.from_buffer(f.read(nbytes), "little")
+            storage = STORAGE_TYPE_MAP[dtype].from_buffer(f.read(nbytes), "little")
        finally:
            if isinstance(checkpoint, zipfile.ZipFile):
                f.close()
@@ -120,7 +135,10 @@ class _LazyUnpickler(pickle.Unpickler):
 def _rebuild_tensor(lazy_storage: LazyTensor, storage_offset, shape, stride):
    lazy_storage.shape = shape
    lazy_storage.stride = stride
-    dtype = lazy_storage.storage_type(0).dtype
+    dtype = lazy_storage.storage_type.dtype
+    if not isinstance(dtype, torch.dtype):
+        dtype = lazy_storage.storage_type(0).dtype
+    lazy_storage.dtype = dtype
    lazy_storage.seek_offset = storage_offset if dtype is torch.bool else storage_offset * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
    return lazy_storage

@@ -177,7 +195,7 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, miss
            missing_keys.append(key)

    extra_state_key = prefix + _EXTRA_STATE_KEY_SUFFIX
-    if getattr(self.__class__, "set_extra_state", Module.set_extra_state) is not Module.set_extra_state:
+    if hasattr(Module, "set_extra_state") and getattr(self.__class__, "set_extra_state", Module.set_extra_state) is not Module.set_extra_state:  # if getattr(self.__class__, "set_extra_state", Module.set_extra_state) is not Module.set_extra_state:
        if extra_state_key in state_dict:
            self.set_extra_state(state_dict[extra_state_key])
        elif strict:
--- a/tpu_mtj_backend.py
+++ b/tpu_mtj_backend.py
@@ -1106,7 +1106,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                    # the least possible memory usage, we create them as meta
                    # tensors, which don't take up any actual CPU or TPU memory.
                    if key not in model_spec:
-                        model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].storage_type(0).dtype, device="meta")
+                        model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
                        continue

                    storage_key = model_dict[key].key
@@ -1133,7 +1133,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
                        tensor /= params["cores_per_replica"]
                    if "vocab_pad" in transforms:
                        tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"]))
-                    if "no_transpose" not in transforms:
+                    if "no_transpose" not in transforms and tensor.ndim == 2:
                        tensor = tensor.T
                    tensor.unsqueeze_(0)
                    if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
--- a/umamba.exe
+++ b/umamba.exe