Maybe works now...?

This commit is contained in:
somebody
2023-05-31 14:31:08 -05:00
parent d0d215bb37
commit 24b0b32829
3 changed files with 239 additions and 547 deletions

View File

@@ -71,6 +71,9 @@ class model_backend(HFTorchInferenceModel):
)
if self.lazy_load:
# torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
tf_kwargs.pop("low_cpu_mem_usage", None)
# If we're using lazy loader, we need to figure out what the model's hidden layers are called
with lazy_loader.use_lazy_load(dematerialized_modules=True):
try:
@@ -83,145 +86,92 @@ class model_backend(HFTorchInferenceModel):
self.lazy_load = False
# Download model from Huggingface if it does not exist, otherwise load locally
with self._maybe_use_float16(), lazy_loader.use_lazy_load(
enable=self.lazy_load,
callback=self._get_lazy_load_callback(utils.num_layers(self.model_config))
if self.lazy_load
else None,
dematerialized_modules=True,
):
if self.lazy_load:
# torch_lazy_loader.py and low_cpu_mem_usage can't be used at the same time
tf_kwargs.pop("low_cpu_mem_usage", None)
if self.get_local_model_path():
# Model is stored locally, load it.
self.model = self._get_model(self.get_local_model_path(), tf_kwargs)
self.tokenizer = self._get_tokenizer(self.get_local_model_path())
else:
# Model not stored locally, we need to download it.
if self.get_local_model_path():
# Model is stored locally, load it.
self.model = self._get_model(self.get_local_model_path(), tf_kwargs)
self.tokenizer = self._get_tokenizer(self.get_local_model_path())
else:
# Model not stored locally, we need to download it.
# _rebuild_tensor patch for casting dtype and supporting LazyTensors
old_rebuild_tensor = torch._utils._rebuild_tensor
# _rebuild_tensor patch for casting dtype and supporting LazyTensors
old_rebuild_tensor = torch._utils._rebuild_tensor
def new_rebuild_tensor(
storage: Union[lazy_loader.LazyTensor, torch.Storage],
storage_offset,
shape,
stride,
):
if not isinstance(storage, lazy_loader.LazyTensor):
dtype = storage.dtype
else:
dtype = storage.storage_type.dtype
if not isinstance(dtype, torch.dtype):
dtype = storage.storage_type(0).dtype
if dtype is torch.float32 and len(shape) >= 2:
utils.koboldai_vars.fp32_model = True
return old_rebuild_tensor(storage, storage_offset, shape, stride)
def new_rebuild_tensor(
storage: Union[lazy_loader.LazyTensor, torch.Storage],
storage_offset,
shape,
stride,
):
if not isinstance(storage, lazy_loader.LazyTensor):
dtype = storage.dtype
else:
dtype = storage.storage_type.dtype
if not isinstance(dtype, torch.dtype):
dtype = storage.storage_type(0).dtype
if dtype is torch.float32 and len(shape) >= 2:
utils.koboldai_vars.fp32_model = True
return old_rebuild_tensor(storage, storage_offset, shape, stride)
torch._utils._rebuild_tensor = new_rebuild_tensor
self.model = self._get_model(self.model_name, tf_kwargs)
self.tokenizer = self._get_tokenizer(self.model_name)
torch._utils._rebuild_tensor = old_rebuild_tensor
torch._utils._rebuild_tensor = new_rebuild_tensor
self.model = self._get_model(self.model_name, tf_kwargs)
self.tokenizer = self._get_tokenizer(self.model_name)
torch._utils._rebuild_tensor = old_rebuild_tensor
if save_model:
self.tokenizer.save_pretrained(
self.get_local_model_path(ignore_existance=True)
)
if save_model:
self.tokenizer.save_pretrained(
self.get_local_model_path(ignore_existance=True)
if utils.koboldai_vars.fp32_model:
# Use save_pretrained to convert fp32 models to fp16,
# unless we are using disk cache because save_pretrained
# is not supported in that case
self.model = self.model.half()
self.model.save_pretrained(
self.get_local_model_path(ignore_existance=True),
max_shard_size="500MiB",
)
if utils.koboldai_vars.fp32_model:
# Use save_pretrained to convert fp32 models to fp16,
# unless we are using disk cache because save_pretrained
# is not supported in that case
self.model = self.model.half()
self.model.save_pretrained(
self.get_local_model_path(ignore_existance=True),
max_shard_size="500MiB",
)
else:
# For fp16 models, we can just copy the model files directly
import transformers.configuration_utils
import transformers.modeling_utils
import transformers.file_utils
import huggingface_hub
else:
# For fp16 models, we can just copy the model files directly
import transformers.configuration_utils
import transformers.modeling_utils
import transformers.file_utils
import huggingface_hub
# Save the config.json
shutil.move(
os.path.realpath(
huggingface_hub.hf_hub_download(
self.model_name,
transformers.configuration_utils.CONFIG_NAME,
revision=utils.koboldai_vars.revision,
cache_dir="cache",
local_files_only=True,
legacy_cache_layout=False,
)
),
os.path.join(
self.get_local_model_path(ignore_existance=True),
# Save the config.json
shutil.move(
os.path.realpath(
huggingface_hub.hf_hub_download(
self.model_name,
transformers.configuration_utils.CONFIG_NAME,
),
)
if utils.num_shards is None:
# Save the pytorch_model.bin or model.safetensors of an unsharded model
any_success = False
possible_checkpoint_names = [
transformers.modeling_utils.WEIGHTS_NAME,
"model.safetensors",
]
for possible_checkpoint_name in possible_checkpoint_names:
try:
shutil.move(
os.path.realpath(
huggingface_hub.hf_hub_download(
self.model_name,
possible_checkpoint_name,
revision=utils.koboldai_vars.revision,
cache_dir="cache",
local_files_only=True,
legacy_cache_layout=False,
)
),
os.path.join(
self.get_local_model_path(
ignore_existance=True
),
possible_checkpoint_name,
),
)
any_success = True
except Exception:
pass
if not any_success:
raise RuntimeError(
f"Couldn't find any of {possible_checkpoint_names} in cache for {self.model_name} @ '{utils.koboldai_vars.revisison}'"
)
else:
# Handle saving sharded models
with open(utils.from_pretrained_index_filename) as f:
map_data = json.load(f)
filenames = set(map_data["weight_map"].values())
# Save the pytorch_model.bin.index.json of a sharded model
shutil.move(
os.path.realpath(utils.from_pretrained_index_filename),
os.path.join(
self.get_local_model_path(ignore_existance=True),
transformers.modeling_utils.WEIGHTS_INDEX_NAME,
),
revision=utils.koboldai_vars.revision,
cache_dir="cache",
local_files_only=True,
legacy_cache_layout=False,
)
# Then save the pytorch_model-#####-of-#####.bin files
for filename in filenames:
),
os.path.join(
self.get_local_model_path(ignore_existance=True),
transformers.configuration_utils.CONFIG_NAME,
),
)
if utils.num_shards is None:
# Save the pytorch_model.bin or model.safetensors of an unsharded model
any_success = False
possible_checkpoint_names = [
transformers.modeling_utils.WEIGHTS_NAME,
"model.safetensors",
]
for possible_checkpoint_name in possible_checkpoint_names:
try:
shutil.move(
os.path.realpath(
huggingface_hub.hf_hub_download(
self.model_name,
filename,
possible_checkpoint_name,
revision=utils.koboldai_vars.revision,
cache_dir="cache",
local_files_only=True,
@@ -232,13 +182,53 @@ class model_backend(HFTorchInferenceModel):
self.get_local_model_path(
ignore_existance=True
),
filename,
possible_checkpoint_name,
),
)
shutil.rmtree("cache/")
any_success = True
except Exception:
pass
if not any_success:
raise RuntimeError(
f"Couldn't find any of {possible_checkpoint_names} in cache for {self.model_name} @ '{utils.koboldai_vars.revisison}'"
)
else:
# Handle saving sharded models
with open(utils.from_pretrained_index_filename) as f:
map_data = json.load(f)
filenames = set(map_data["weight_map"].values())
# Save the pytorch_model.bin.index.json of a sharded model
shutil.move(
os.path.realpath(utils.from_pretrained_index_filename),
os.path.join(
self.get_local_model_path(ignore_existance=True),
transformers.modeling_utils.WEIGHTS_INDEX_NAME,
),
)
# Then save the pytorch_model-#####-of-#####.bin files
for filename in filenames:
shutil.move(
os.path.realpath(
huggingface_hub.hf_hub_download(
self.model_name,
filename,
revision=utils.koboldai_vars.revision,
cache_dir="cache",
local_files_only=True,
legacy_cache_layout=False,
)
),
os.path.join(
self.get_local_model_path(ignore_existance=True),
filename,
),
)
shutil.rmtree("cache/")
self.patch_embedding()
self.model.tie_weights()
# self.model.tie_weights()
self.model.kai_model = self
utils.koboldai_vars.modeldim = self.get_hidden_size()