Merge branch 'united' into neox
This commit is contained in:
commit
85a4959efa
|
@ -148,7 +148,7 @@ class vars:
|
||||||
genamt = 80 # Amount of text for each action to generate
|
genamt = 80 # Amount of text for each action to generate
|
||||||
ikgen = 200 # Number of characters for InferKit to generate
|
ikgen = 200 # Number of characters for InferKit to generate
|
||||||
rep_pen = 1.1 # Default generator repetition_penalty
|
rep_pen = 1.1 # Default generator repetition_penalty
|
||||||
rep_pen_slope = 1.0 # Default generator repetition penalty slope
|
rep_pen_slope = 0.7 # Default generator repetition penalty slope
|
||||||
rep_pen_range = 1024 # Default generator repetition penalty range
|
rep_pen_range = 1024 # Default generator repetition penalty range
|
||||||
temp = 0.5 # Default generator temperature
|
temp = 0.5 # Default generator temperature
|
||||||
top_p = 0.9 # Default generator top_p
|
top_p = 0.9 # Default generator top_p
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
name: koboldai
|
|
||||||
channels:
|
|
||||||
- pytorch
|
|
||||||
- conda-forge
|
|
||||||
- defaults
|
|
||||||
dependencies:
|
|
||||||
- colorama
|
|
||||||
- flask-socketio
|
|
||||||
- pytorch
|
|
||||||
- python=3.8.*
|
|
||||||
- cudatoolkit=11.1
|
|
||||||
- eventlet
|
|
||||||
- markdown
|
|
||||||
- bleach
|
|
||||||
- pip
|
|
||||||
- git
|
|
||||||
- pip:
|
|
||||||
- flask-cloudflared
|
|
||||||
- flask-ngrok
|
|
||||||
- lupa==1.10
|
|
|
@ -6,7 +6,7 @@ channels:
|
||||||
dependencies:
|
dependencies:
|
||||||
- colorama
|
- colorama
|
||||||
- flask-socketio
|
- flask-socketio
|
||||||
- pytorch
|
- pytorch=1.11.*
|
||||||
- python=3.8.*
|
- python=3.8.*
|
||||||
- cudatoolkit=11.1
|
- cudatoolkit=11.1
|
||||||
- eventlet
|
- eventlet
|
||||||
|
@ -20,4 +20,4 @@ dependencies:
|
||||||
- flask-cloudflared
|
- flask-cloudflared
|
||||||
- flask-ngrok
|
- flask-ngrok
|
||||||
- lupa==1.10
|
- lupa==1.10
|
||||||
- git+https://github.com/huggingface/transformers
|
- transformers>=4.17
|
|
@ -15,9 +15,9 @@ dependencies:
|
||||||
- protobuf
|
- protobuf
|
||||||
- pip:
|
- pip:
|
||||||
- --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
|
- --find-links https://download.pytorch.org/whl/rocm4.2/torch_stable.html
|
||||||
- torch
|
- torch==1.11.*
|
||||||
- torchvision==0.11.1
|
- torchvision==0.11.1
|
||||||
- flask-cloudflared
|
- flask-cloudflared
|
||||||
- flask-ngrok
|
- flask-ngrok
|
||||||
- lupa==1.10
|
- lupa==1.10
|
||||||
- git+https://github.com/huggingface/transformers
|
- transformers>=4.17
|
|
@ -1,8 +1,8 @@
|
||||||
git+https://github.com/huggingface/transformers
|
transformers>=4.17
|
||||||
Flask
|
Flask
|
||||||
Flask-SocketIO
|
Flask-SocketIO
|
||||||
requests
|
requests
|
||||||
torch
|
torch==1.11
|
||||||
flask-cloudflared
|
flask-cloudflared
|
||||||
flask-ngrok
|
flask-ngrok
|
||||||
eventlet
|
eventlet
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
torch >= 1.9, <= 1.11
|
||||||
numpy
|
numpy
|
||||||
tqdm
|
tqdm
|
||||||
requests
|
requests
|
||||||
|
@ -5,7 +6,7 @@ optax >= 0.0.5, <= 0.0.9
|
||||||
dm-haiku == 0.0.5
|
dm-haiku == 0.0.5
|
||||||
ray[default]
|
ray[default]
|
||||||
jax == 0.2.21
|
jax == 0.2.21
|
||||||
transformers
|
transformers >= 4.17
|
||||||
progressbar2
|
progressbar2
|
||||||
git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
|
git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck
|
||||||
flask
|
flask
|
||||||
|
|
|
@ -57,11 +57,26 @@ from typing import Any, Callable, Dict, Optional, Tuple, Type, Union
|
||||||
_EXTRA_STATE_KEY_SUFFIX = '_extra_state'
|
_EXTRA_STATE_KEY_SUFFIX = '_extra_state'
|
||||||
|
|
||||||
|
|
||||||
|
STORAGE_TYPE_MAP = {
|
||||||
|
torch.float64: torch.DoubleStorage,
|
||||||
|
torch.float32: torch.FloatStorage,
|
||||||
|
torch.float16: torch.HalfStorage,
|
||||||
|
torch.int64: torch.LongStorage,
|
||||||
|
torch.int32: torch.IntStorage,
|
||||||
|
torch.int16: torch.ShortStorage,
|
||||||
|
torch.int8: torch.CharStorage,
|
||||||
|
torch.uint8: torch.ByteStorage,
|
||||||
|
torch.bool: torch.BoolStorage,
|
||||||
|
torch.bfloat16: torch.BFloat16Storage,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class LazyTensor:
|
class LazyTensor:
|
||||||
def __init__(self, storage_type: Type[torch._StorageBase], key: str, location: str, seek_offset: Optional[int] = None, shape: Optional[Tuple[int, ...]] = None, stride: Optional[Tuple[int, ...]] = None, requires_grad=False, backward_hooks: Any = None):
|
def __init__(self, storage_type: Type[torch._StorageBase], key: str, location: str, dtype: Optional[torch.dtype] = None, seek_offset: Optional[int] = None, shape: Optional[Tuple[int, ...]] = None, stride: Optional[Tuple[int, ...]] = None, requires_grad=False, backward_hooks: Any = None):
|
||||||
self.storage_type = storage_type
|
self.storage_type = storage_type
|
||||||
self.key = key
|
self.key = key
|
||||||
self.location = location
|
self.location = location
|
||||||
|
self.dtype = dtype
|
||||||
self.seek_offset = seek_offset
|
self.seek_offset = seek_offset
|
||||||
self.shape = shape
|
self.shape = shape
|
||||||
self.stride = stride
|
self.stride = stride
|
||||||
|
@ -69,14 +84,14 @@ class LazyTensor:
|
||||||
self.backward_hooks = backward_hooks
|
self.backward_hooks = backward_hooks
|
||||||
|
|
||||||
def __view(self, f: Callable):
|
def __view(self, f: Callable):
|
||||||
return f"{type(self).__name__}(storage_type={f(self.storage_type)}, key={f(self.key)}, location={f(self.location)}, seek_offset={f(self.seek_offset)}, shape={f(self.shape)}, stride={f(self.stride)}, requires_grad={f(self.requires_grad)}, backward_hooks={f(self.backward_hooks)})"
|
return f"{type(self).__name__}(storage_type={f(self.storage_type)}, key={f(self.key)}, location={f(self.location)}, dtype={f(self.dtype)}, seek_offset={f(self.seek_offset)}, shape={f(self.shape)}, stride={f(self.stride)}, requires_grad={f(self.requires_grad)}, backward_hooks={f(self.backward_hooks)})"
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return self.__view(repr)
|
return self.__view(repr)
|
||||||
|
|
||||||
def materialize(self, checkpoint: Union[zipfile.ZipFile, zipfile.ZipExtFile], map_location=None) -> torch.Tensor:
|
def materialize(self, checkpoint: Union[zipfile.ZipFile, zipfile.ZipExtFile], map_location=None) -> torch.Tensor:
|
||||||
size = reduce(lambda x, y: x * y, self.shape, 1)
|
size = reduce(lambda x, y: x * y, self.shape, 1)
|
||||||
dtype = self.storage_type(0).dtype
|
dtype = self.dtype
|
||||||
nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
|
nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
|
||||||
if isinstance(checkpoint, zipfile.ZipFile):
|
if isinstance(checkpoint, zipfile.ZipFile):
|
||||||
f = checkpoint.open(f"archive/data/{self.key}", "r")
|
f = checkpoint.open(f"archive/data/{self.key}", "r")
|
||||||
|
@ -84,7 +99,7 @@ class LazyTensor:
|
||||||
else:
|
else:
|
||||||
f = checkpoint
|
f = checkpoint
|
||||||
try:
|
try:
|
||||||
storage = self.storage_type.from_buffer(f.read(nbytes), "little")
|
storage = STORAGE_TYPE_MAP[dtype].from_buffer(f.read(nbytes), "little")
|
||||||
finally:
|
finally:
|
||||||
if isinstance(checkpoint, zipfile.ZipFile):
|
if isinstance(checkpoint, zipfile.ZipFile):
|
||||||
f.close()
|
f.close()
|
||||||
|
@ -120,7 +135,10 @@ class _LazyUnpickler(pickle.Unpickler):
|
||||||
def _rebuild_tensor(lazy_storage: LazyTensor, storage_offset, shape, stride):
|
def _rebuild_tensor(lazy_storage: LazyTensor, storage_offset, shape, stride):
|
||||||
lazy_storage.shape = shape
|
lazy_storage.shape = shape
|
||||||
lazy_storage.stride = stride
|
lazy_storage.stride = stride
|
||||||
|
dtype = lazy_storage.storage_type.dtype
|
||||||
|
if not isinstance(dtype, torch.dtype):
|
||||||
dtype = lazy_storage.storage_type(0).dtype
|
dtype = lazy_storage.storage_type(0).dtype
|
||||||
|
lazy_storage.dtype = dtype
|
||||||
lazy_storage.seek_offset = storage_offset if dtype is torch.bool else storage_offset * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
|
lazy_storage.seek_offset = storage_offset if dtype is torch.bool else storage_offset * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3)
|
||||||
return lazy_storage
|
return lazy_storage
|
||||||
|
|
||||||
|
@ -177,7 +195,7 @@ def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, miss
|
||||||
missing_keys.append(key)
|
missing_keys.append(key)
|
||||||
|
|
||||||
extra_state_key = prefix + _EXTRA_STATE_KEY_SUFFIX
|
extra_state_key = prefix + _EXTRA_STATE_KEY_SUFFIX
|
||||||
if getattr(self.__class__, "set_extra_state", Module.set_extra_state) is not Module.set_extra_state:
|
if hasattr(Module, "set_extra_state") and getattr(self.__class__, "set_extra_state", Module.set_extra_state) is not Module.set_extra_state: # if getattr(self.__class__, "set_extra_state", Module.set_extra_state) is not Module.set_extra_state:
|
||||||
if extra_state_key in state_dict:
|
if extra_state_key in state_dict:
|
||||||
self.set_extra_state(state_dict[extra_state_key])
|
self.set_extra_state(state_dict[extra_state_key])
|
||||||
elif strict:
|
elif strict:
|
||||||
|
|
|
@ -1106,7 +1106,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
||||||
# the least possible memory usage, we create them as meta
|
# the least possible memory usage, we create them as meta
|
||||||
# tensors, which don't take up any actual CPU or TPU memory.
|
# tensors, which don't take up any actual CPU or TPU memory.
|
||||||
if key not in model_spec:
|
if key not in model_spec:
|
||||||
model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].storage_type(0).dtype, device="meta")
|
model_dict[key] = torch.empty(model_dict[key].shape, dtype=model_dict[key].dtype, device="meta")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
storage_key = model_dict[key].key
|
storage_key = model_dict[key].key
|
||||||
|
@ -1133,7 +1133,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
|
||||||
tensor /= params["cores_per_replica"]
|
tensor /= params["cores_per_replica"]
|
||||||
if "vocab_pad" in transforms:
|
if "vocab_pad" in transforms:
|
||||||
tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"]))
|
tensor = torch.nn.functional.pad(tensor, (0, 0, 0, params["n_vocab_padding"]))
|
||||||
if "no_transpose" not in transforms:
|
if "no_transpose" not in transforms and tensor.ndim == 2:
|
||||||
tensor = tensor.T
|
tensor = tensor.T
|
||||||
tensor.unsqueeze_(0)
|
tensor.unsqueeze_(0)
|
||||||
if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
|
if tensor.dtype is torch.float16 or tensor.dtype is torch.float32:
|
||||||
|
|
BIN
umamba.exe
BIN
umamba.exe
Binary file not shown.
Loading…
Reference in New Issue