diff --git a/aiserver.py b/aiserver.py index 0a467aa3..e744d18e 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1345,6 +1345,7 @@ def general_startup(override_args=None): parser.add_argument("--summarizer_model", action='store', default="philschmid/bart-large-cnn-samsum", help="Huggingface model to use for summarization. Defaults to sshleifer/distilbart-cnn-12-6") parser.add_argument("--max_summary_length", action='store', default=75, help="Maximum size for summary to send to image generation") parser.add_argument("--multi_story", action='store_true', default=False, help="Allow multi-story mode (experimental)") + parser.add_argument("--peft", type=str, help="Specify the path or HuggingFace ID of a Peft to load it. Not supported on TPU. (Experimental)") parser.add_argument("--trust_remote_code", action='store_true', default=False, help="Allow Huggingface Models to Execute Code (Insecure!)") parser.add_argument('-f', action='store', help="option for compatability with colab memory profiles") diff --git a/environments/huggingface.yml b/environments/huggingface.yml index b8d640fb..1cc5a9c7 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -34,7 +34,7 @@ dependencies: - lupa==1.10 - transformers==4.28.0 - huggingface_hub==0.12.1 - - safetensors + - safetensors==0.3.1 - accelerate==0.18.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session @@ -46,4 +46,4 @@ dependencies: - ftfy - pydub - diffusers - - peft + - peft==0.3.0 diff --git a/environments/rocm.yml b/environments/rocm.yml index 6213089b..51b3e852 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -32,7 +32,7 @@ dependencies: - lupa==1.10 - transformers==4.28.0 - huggingface_hub==0.12.1 - - safetensors + - safetensors==0.3.1 - accelerate==0.18.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - ansi2html @@ -41,4 +41,4 @@ dependencies: - ftfy - pydub - diffusers - - peft + - peft==0.3.0 diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index ca1f1cdf..d4d5320b 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -22,6 +22,7 @@ from transformers import ( AutoModelForCausalLM, LogitsProcessorList, ) +from peft import PeftModel, PeftConfig import utils import modeling.lazy_loader as lazy_loader @@ -211,6 +212,31 @@ class HFTorchInferenceModel(HFInferenceModel): new_sample.old_sample = transformers.GenerationMixin.sample use_core_manipulations.sample = new_sample + # PEFT Loading. This MUST be done after all save_pretrained calls are + # finished on the main model. + if utils.args.peft: + peft_local_path = os.path.join("models/peft", utils.args.peft.replace("/", "_")) + logger.debug(f"Loading PEFT '{utils.args.peft}', possible local path is '{peft_local_path}'.") + + peft_installed_locally = True + possible_peft_locations = [peft_local_path, utils.args.peft] + + for i, location in enumerate(possible_peft_locations): + try: + m_self.model = PeftModel.from_pretrained(m_self.model, location) + logger.debug(f"Loaded PEFT at '{location}'") + break + except ValueError: + peft_installed_locally = False + if i == len(possible_peft_locations) - 1: + raise RuntimeError(f"Unable to load PeftModel for given name '{utils.args.peft}'. Does it exist?") + except RuntimeError: + raise RuntimeError("Error while loading PeftModel. Are you using the correct model?") + + if not peft_installed_locally: + logger.debug(f"PEFT not saved to models folder; saving to '{peft_local_path}'") + m_self.model.save_pretrained(peft_local_path) + return super()._post_load() def _raw_generate( @@ -238,8 +264,13 @@ class HFTorchInferenceModel(HFInferenceModel): with torch.no_grad(): start_time = time.time() + + # HEED & BEWARE: All arguments passed to self.model.generate MUST be + # kwargs; see https://github.com/huggingface/peft/issues/232. If they + # aren't, PeftModel will EXPLODE!!!! But nothing will happen without + # a PEFT loaded so it's sneaky. genout = self.model.generate( - gen_in, + input_ids=gen_in, do_sample=True, max_length=min( len(prompt_tokens) + max_new, utils.koboldai_vars.max_length diff --git a/models/peft/README.txt b/models/peft/README.txt new file mode 100644 index 00000000..fc7b72c4 --- /dev/null +++ b/models/peft/README.txt @@ -0,0 +1,2 @@ +PEFT models will be stored in this directory when downloaded. +Please don't be too mean to this directory. diff --git a/requirements.txt b/requirements.txt index 584e7377..4eb2c282 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,5 +36,5 @@ pytest==7.2.2 pytest-html==3.2.0 pytest-metadata==2.0.4 requests-mock==1.10.0 -safetensors -peft \ No newline at end of file +safetensors==0.3.1 +peft==0.3.0