Merge pull request #157 from VE-FORBRYDERNE/sp-fix

Bug fixes and new soft prompt implementation
This commit is contained in:
henk717 2022-06-21 22:20:36 +02:00 committed by GitHub
commit 37eb47d0d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 26 deletions

View File

@ -633,8 +633,9 @@ def move_model_to_devices(model):
generator = model.generate generator = model.generate
return return
import breakmodel
if(utils.HAS_ACCELERATE): if(utils.HAS_ACCELERATE):
import breakmodel
disk_blocks = breakmodel.disk_blocks disk_blocks = breakmodel.disk_blocks
gpu_blocks = breakmodel.gpu_blocks gpu_blocks = breakmodel.gpu_blocks
ram_blocks = len(utils.layers_module_names) - sum(gpu_blocks) ram_blocks = len(utils.layers_module_names) - sum(gpu_blocks)
@ -1246,18 +1247,20 @@ def get_oai_models(key):
# Function to patch transformers to use our soft prompt # Function to patch transformers to use our soft prompt
def patch_causallm(cls): def patch_causallm(model):
if(getattr(cls, "_koboldai_patch_causallm_patched", False)): from torch.nn import Embedding
return if(getattr(Embedding, "_koboldai_patch_causallm_model", None)):
old_forward = cls.forward Embedding._koboldai_patch_causallm_model = model
def new_causallm_forward(self, *args, **kwargs): return model
input_ids = kwargs.get('input_ids').to(self.device) old_embedding_call = Embedding.__call__
def new_embedding_call(self, input_ids, *args, **kwargs):
if(Embedding._koboldai_patch_causallm_model.get_input_embeddings() is not self):
return old_embedding_call(self, input_ids, *args, **kwargs)
assert input_ids is not None assert input_ids is not None
kwargs['input_ids'] = None
if(vars.sp is not None): if(vars.sp is not None):
shifted_input_ids = input_ids - self.config.vocab_size shifted_input_ids = input_ids - model.config.vocab_size
input_ids.clamp_(max=self.config.vocab_size-1) input_ids.clamp_(max=model.config.vocab_size-1)
inputs_embeds = self.get_input_embeddings()(input_ids) inputs_embeds = old_embedding_call(self, input_ids, *args, **kwargs)
if(vars.sp is not None): if(vars.sp is not None):
vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device) vars.sp = vars.sp.to(inputs_embeds.dtype).to(inputs_embeds.device)
inputs_embeds = torch.where( inputs_embeds = torch.where(
@ -1265,13 +1268,10 @@ def patch_causallm(cls):
vars.sp[shifted_input_ids.clamp(min=0)], vars.sp[shifted_input_ids.clamp(min=0)],
inputs_embeds, inputs_embeds,
) )
if(hasattr(self, "model") and hasattr(self.model, "embed_scale")): return inputs_embeds
inputs_embeds *= self.model.embed_scale Embedding.__call__ = new_embedding_call
kwargs['inputs_embeds'] = inputs_embeds Embedding._koboldai_patch_causallm_model = model
return old_forward(self, *args, **kwargs) return model
cls.forward = new_causallm_forward
cls._koboldai_patch_causallm_patched = True
return cls
def patch_transformers(): def patch_transformers():
@ -1603,9 +1603,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)") print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)")
vars.model_type = "gpt_neo" vars.model_type = "gpt_neo"
if(vars.model_type == "opt"):
vars.badwordsids = vars.badwordsids_opt
if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]):
loadmodelsettings() loadmodelsettings()
loadsettings() loadsettings()
@ -1866,7 +1863,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
else: else:
model = model.to('cpu').float() model = model.to('cpu').float()
generator = model.generate generator = model.generate
patch_causallm(model.__class__) patch_causallm(model)
# Use the Generic implementation # Use the Generic implementation
else: else:
lowmem = maybe_low_cpu_mem_usage() lowmem = maybe_low_cpu_mem_usage()
@ -1997,7 +1994,10 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename)) shutil.move(transformers.file_utils.get_from_cache(transformers.file_utils.hf_bucket_url(vars.model, filename, revision=vars.revision), cache_dir="cache", local_files_only=True), os.path.join("models/{}".format(vars.model.replace('/', '_')), filename))
shutil.rmtree("cache/") shutil.rmtree("cache/")
patch_causallm(model.__class__) if(vars.badwordsids is vars.badwordsids_default and vars.model_type not in ("gpt2", "gpt_neo", "gptj")):
vars.badwordsids = [[v] for k, v in tokenizer.get_vocab().items() if any(c in str(k) for c in "<>[]")]
patch_causallm(model)
if(vars.hascuda): if(vars.hascuda):
if(vars.usegpu): if(vars.usegpu):
@ -2147,8 +2147,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)): if vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (not vars.custmodpth or not os.path.isdir(vars.custmodpth)):
raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder") raise FileNotFoundError(f"The specified model path {repr(vars.custmodpth)} is not the path to a valid folder")
import tpu_mtj_backend import tpu_mtj_backend
if(vars.model == "TPUMeshTransformerGPTNeoX" or vars.model_type == "opt"): if(vars.model == "TPUMeshTransformerGPTNeoX"):
tpu_mtj_backend.pad_token_id = 1 tpu_mtj_backend.pad_token_id = 2
tpu_mtj_backend.vars = vars tpu_mtj_backend.vars = vars
tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback tpu_mtj_backend.warper_callback = tpumtjgenerate_warper_callback
tpu_mtj_backend.stopping_callback = tpumtjgenerate_stopping_callback tpu_mtj_backend.stopping_callback = tpumtjgenerate_stopping_callback
@ -2161,6 +2161,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig) tpu_mtj_backend.load_model(vars.custmodpth, hf_checkpoint=vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and vars.use_colab_tpu, **vars.modelconfig)
vars.modeldim = int(tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"])) vars.modeldim = int(tpu_mtj_backend.params.get("d_embed", tpu_mtj_backend.params["d_model"]))
tokenizer = tpu_mtj_backend.tokenizer tokenizer = tpu_mtj_backend.tokenizer
if(vars.badwordsids is vars.badwordsids_default and vars.model_type not in ("gpt2", "gpt_neo", "gptj")):
vars.badwordsids = [[v] for k, v in tokenizer.get_vocab().items() if any(c in str(k) for c in "<>[]")]
else: else:
loadsettings() loadsettings()

View File

@ -1018,7 +1018,12 @@ def read_neox_checkpoint(state, path, config, checkpoint_shards=2):
def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None: def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpoint=False, **kwargs) -> None:
global thread_resources_env, seq, tokenizer, network, params global thread_resources_env, seq, tokenizer, network, params, pad_token_id
if "pad_token_id" in kwargs:
pad_token_id = kwargs["pad_token_id"]
elif "eos_token_id" in kwargs:
pad_token_id = kwargs["eos_token_id"]
if not hasattr(vars, "sampler_order") or not vars.sampler_order: if not hasattr(vars, "sampler_order") or not vars.sampler_order:
vars.sampler_order = utils.default_sampler_order.copy() vars.sampler_order = utils.default_sampler_order.copy()
@ -1119,6 +1124,7 @@ def load_model(path: str, driver_version="tpu_driver0.1_dev20210607", hf_checkpo
return old_encode(s).ids return old_encode(s).ids
return encode return encode
tokenizer.encode = new_encode(tokenizer.encode) tokenizer.encode = new_encode(tokenizer.encode)
tokenizer._koboldai_header = []
elif not hf_checkpoint: elif not hf_checkpoint:
if not isinstance(params["tokenizer_class"], str) or not any(params["tokenizer_class"].endswith(s) for s in ("Tokenizer", "TokenizerFast")): if not isinstance(params["tokenizer_class"], str) or not any(params["tokenizer_class"].endswith(s) for s in ("Tokenizer", "TokenizerFast")):
raise ValueError("`tokenizer_class` must be a string ending in 'Tokenizer' or 'TokenizerFast'") raise ValueError("`tokenizer_class` must be a string ending in 'Tokenizer' or 'TokenizerFast'")