From 289248ef40428e1e9590ccd4308002ae24b4e9d0 Mon Sep 17 00:00:00 2001 From: vfbd Date: Sat, 23 Jul 2022 14:37:28 -0400 Subject: [PATCH 01/26] Write AutoPromptTuningLM class --- prompt_tuner.py | 86 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 prompt_tuner.py diff --git a/prompt_tuner.py b/prompt_tuner.py new file mode 100644 index 00000000..99320861 --- /dev/null +++ b/prompt_tuner.py @@ -0,0 +1,86 @@ +import torch +import torch.nn.functional as F +from torch.nn import Embedding +import transformers +from mkultra.tuning import GPTPromptTuningMixin + + +class _WTEMixin: + @property + def wte(self): + return self.get_input_embeddings() + + @wte.setter + def wte(self, v): + self.set_input_embeddings(v) + + +class UniversalPromptTuningMixin: + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): + model = super().from_pretrained(pretrained_model_name_or_path, **kwargs) + + if not hasattr(model, "transformer"): + model.transformer = _WTEMixin() + elif not hasattr(model.transformer, "wte"): + assert isinstance(model.transformer, type) + model.transformer.__class__ = type("_UniversalPromptTuning" + model.transformer.__class__.__name__, (_WTEMixin, model.transformer.__class__), {}) + + model.__class__ = type("_UniversalPromptTuning" + model.__class__.__name__, (UniversalPromptTuningMixin, model.__class__), {}) + + for param in model.parameters(): + param.requires_grad = False + model.initialize_soft_prompt() + + return model + + def forward( + self, + input_ids=None, + attention_mask=None, + labels=None, + use_cache=None, + return_dict=None, + **kwargs, + ): + assert input_ids is not None + assert input_ids.ndim == 2 + + input_ids = F.pad(input_ids, (self.learned_embedding.size(0), 0, 0, 0), value=self.transformer.wte.weight.size(0) // 2) + + if labels is not None: + labels = self._extend_labels(labels) + + if attention_mask is not None: + attention_mask = self._extend_attention_mask(attention_mask) + + old_embedding_call = Embedding.__call__ + model = self + + def new_embedding_call(self, input_ids, *args, **kwargs): + inputs_embeds = old_embedding_call(self, input_ids, *args, **kwargs) + if model.transformer.wte is self: + assert inputs_embeds.ndim == 3 + inputs_embeds[:, :model.learned_embedding.size(0), :] = model.learned_embedding[None] + return inputs_embeds + + Embedding.__call__ = new_embedding_call + + try: + return super().forward( + input_ids=input_ids, + attention_mask=attention_mask, + labels=labels, + use_cache=use_cache, + return_dict=return_dict, + ) + finally: + Embedding.__call__ = old_embedding_call + +for k in dir(GPTPromptTuningMixin): + if not hasattr(UniversalPromptTuningMixin, k): + setattr(UniversalPromptTuningMixin, k, getattr(GPTPromptTuningMixin, k)) + + +class AutoPromptTuningLM(UniversalPromptTuningMixin, transformers.AutoModelForCausalLM): + pass From 168c14fd4c800a6530d3d4433a707e9b8a1d43cf Mon Sep 17 00:00:00 2001 From: vfbd Date: Sun, 24 Jul 2022 00:35:58 -0400 Subject: [PATCH 02/26] Better way to copy mkultra methods --- prompt_tuner.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index 99320861..1ce3e210 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -78,8 +78,10 @@ class UniversalPromptTuningMixin: Embedding.__call__ = old_embedding_call for k in dir(GPTPromptTuningMixin): - if not hasattr(UniversalPromptTuningMixin, k): - setattr(UniversalPromptTuningMixin, k, getattr(GPTPromptTuningMixin, k)) + v = getattr(GPTPromptTuningMixin, k) + _v = getattr(UniversalPromptTuningMixin, k, None) + if _v is None or (_v is getattr(object, k, None) and callable(_v) and not isinstance(_v, type)): + setattr(UniversalPromptTuningMixin, k, v) class AutoPromptTuningLM(UniversalPromptTuningMixin, transformers.AutoModelForCausalLM): From 00e8928ee6a1040d89b8208554605e80532305e4 Mon Sep 17 00:00:00 2001 From: vfbd Date: Wed, 3 Aug 2022 15:57:23 -0400 Subject: [PATCH 03/26] Upload current progress --- prompt_tuner.py | 440 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 426 insertions(+), 14 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index 1ce3e210..a35b12f1 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -1,24 +1,38 @@ +import abc +import os +import sys +import math +import numpy as np +import termcolor +import contextlib +import traceback +import random import torch import torch.nn.functional as F -from torch.nn import Embedding +from torch.nn import Embedding, CrossEntropyLoss import transformers -from mkultra.tuning import GPTPromptTuningMixin +from transformers import AutoTokenizer, GPT2TokenizerFast +from mkultra.tuning import GPTPromptTuningMixin, GPTNeoPromptTuningLM +from mkultra.soft_prompt import SoftPrompt +from typing import List, Optional, TextIO, Union +_PromptTuningPreTrainedModel = Union["UniversalPromptTuningMixin", GPTPromptTuningMixin, transformers.PreTrainedModel] + class _WTEMixin: @property - def wte(self): + def wte(self: Union["_WTEMixin", transformers.PreTrainedModel]): return self.get_input_embeddings() - + @wte.setter - def wte(self, v): + def wte(self: Union["_WTEMixin", transformers.PreTrainedModel], v): self.set_input_embeddings(v) class UniversalPromptTuningMixin: @classmethod - def from_pretrained(cls, pretrained_model_name_or_path, **kwargs): - model = super().from_pretrained(pretrained_model_name_or_path, **kwargs) + def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs): + model: _PromptTuningPreTrainedModel = super().from_pretrained(pretrained_model_name_or_path, **kwargs) if not hasattr(model, "transformer"): model.transformer = _WTEMixin() @@ -35,12 +49,12 @@ class UniversalPromptTuningMixin: return model def forward( - self, - input_ids=None, - attention_mask=None, - labels=None, - use_cache=None, - return_dict=None, + self: _PromptTuningPreTrainedModel, + input_ids: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, + labels: Optional[torch.Tensor] = None, + use_cache: Optional[bool] = None, + return_dict: Optional[bool] = None, **kwargs, ): assert input_ids is not None @@ -85,4 +99,402 @@ for k in dir(GPTPromptTuningMixin): class AutoPromptTuningLM(UniversalPromptTuningMixin, transformers.AutoModelForCausalLM): - pass + def __init__(self, config): + super().__init__(config) + + +default_quiet = False + + +def get_tokenizer(model_id, revision=None) -> transformers.PreTrainedTokenizerBase: + if(os.path.isdir(model_id)): + try: + tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache") + except Exception as e: + pass + try: + tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache", use_fast=False) + except Exception as e: + try: + tokenizer = GPT2TokenizerFast.from_pretrained(model_id, revision=revision, cache_dir="cache") + except Exception as e: + tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") + elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))): + try: + tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=revision, cache_dir="cache") + except Exception as e: + pass + try: + tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=revision, cache_dir="cache", use_fast=False) + except Exception as e: + try: + tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=revision, cache_dir="cache") + except Exception as e: + tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") + else: + try: + tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=revision, cache_dir="cache") + except Exception as e: + pass + try: + tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=revision, cache_dir="cache", use_fast=False) + except Exception as e: + try: + tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=revision, cache_dir="cache") + except Exception as e: + tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") + + @contextlib.contextmanager + def _kai_no_prefix(): + add_bos_token = getattr(tokenizer, "add_bos_token", False) + add_prefix_space = getattr(tokenizer, "add_prefix_space", False) + tokenizer.add_bos_token = False + tokenizer.add_prefix_space = False + try: + yield + finally: + tokenizer.add_bos_token = add_bos_token + tokenizer.add_prefix_space = add_prefix_space + + tokenizer._kai_no_prefix = _kai_no_prefix + return tokenizer + + +class ConfigurationError(Exception): + def __init__(self, msg: str = "Unknown error", code: int = 1, quiet: Optional[bool] = None): + if quiet is None: + quiet = default_quiet + super().__init__(msg) + self.code = code + self.quiet = quiet + + +class TrainerBase(abc.ABC): + @abc.abstractmethod + def startup(self, step: int) -> None: + ... + + @abc.abstractmethod + def get_batch(self, step: int, size: int) -> np.ndarray: + ... + + @abc.abstractmethod + def get_num_sequences(self) -> int: + ... + + @abc.abstractmethod + def get_initial_soft_embeddings(self, model: transformers.PreTrainedModel) -> SoftPrompt: + ... + + @abc.abstractmethod + def tokenize_dataset_callback(self, tokenizer: transformers.PreTrainedTokenizerBase, text: str) -> List[int]: + ... + + class TrainerData: + def __init__(self): + self.__lazy_load_spec: Optional[dict] = None + self.model_spec: Optional[dict] = None + self.tokenizer_id: Optional[str] = None + self.newlinemode: Optional[str] = None + self.ckpt_path: Optional[str] = None + self.save_file: Optional[str] = None + self.params: Optional[dict] = None + self.stparams: Optional[dict] = None + self.gradient_accumulation_steps = -1 + self.soft_in_dim = -1 + self.prompt_method = "tokens" + self.prompt_seed = 42 + + @property + def lazy_load_spec(self): + print("WARNING: `TrainerData.lazy_load_spec` is currently unused", file=sys.stderr) + return self.__lazy_load_spec + + @lazy_load_spec.setter + def lazy_load_spec(self, value: Optional[dict]): + print("WARNING: `TrainerData.lazy_load_spec` is currently unused", file=sys.stderr) + self.__lazy_load_spec = value + + @property + def kaiming_size(self): # backwards compatibility + return self.soft_in_dim + + @kaiming_size.setter + def kaiming_size(self, value: int): # backwards compatibility + self.prompt_method = "kaiming" + self.soft_in_dim = value + + data: TrainerData + + def __init__(self, universe: Optional[int] = None, quiet=False): + self.quiet = quiet + self.universe = universe + self.data = self.TrainerData() + self._spmodule: Optional[str] = None + if universe is not None: + print("WARNING: The `universe` argument of `TrainerBase.__init__` is currently unused", file=sys.stderr) + + def raise_configuration_error(self, msg, **kwargs): + if "quiet" not in kwargs: + kwargs["quiet"] = self.quiet + raise ConfigurationError(msg, **kwargs) + + def get_hf_checkpoint_metadata(self) -> bool: + return True + + def get_tokenizer(self) -> transformers.PreTrainedTokenizerBase: + return get_tokenizer(self.ckpt_path) + + def export_to_kobold(self, output_file: str, name: str, author: str, supported: str, description: str): + pass + + def export_to_mkultra(self, output_file: str, soft_prompt_name: str, soft_prompt_description: str): + pass + + def tokenize_dataset( + self, + dataset_path: Union[str, TextIO], + output_file: Union[str, TextIO], + batch_size=2048, + epochs=1, + use_ftfy=True, + shuffle_seed: Optional[Union[int, float, str, bytes, bytearray]] = 1729, + ): + dataset_path = dataset_path.replace("\\", "/") + output_file = output_file.replace("\\", "/") + if not isinstance(batch_size, int) or batch_size < 1: + self.raise_configuration_error( + "batch_size must be an integer greater than zero.", code=9 + ) + if ( + not isinstance(epochs, int) and not isinstance(epochs, float) + ) or epochs <= 0: + self.raise_configuration_error( + "epochs must be an int or float greater than zero.", code=10 + ) + if isinstance(output_file, str) and output_file.endswith("/"): + self.raise_configuration_error( + "output_file should be the path to a file, not a directory.", code=11 + ) + if isinstance(dataset_path, str) and not os.path.exists(dataset_path): + self.raise_configuration_error( + "dataset_path is not set to a valid file or directory.", code=12 + ) + + if use_ftfy: + import ftfy + + tokenizer = self.get_tokenizer() + + batch_size = min( + batch_size, + self.data.params["max_batch_size"] - self.data.soft_in_dim, + ) + assert batch_size >= 0 + print( + termcolor.colored( + "\nIf you see a warning somewhere below about token indices, ignore it. That warning is normal.\n", + "magenta", + ) + ) + print("Batch size:", batch_size) + print(termcolor.colored("Tokenizing your dataset...\n", "magenta")) + + if not isinstance(dataset_path, str): + files = [dataset_path] + elif os.path.isfile(dataset_path): + files = [dataset_path] + else: + files = sorted( + os.path.join(dataset_path, filename) + for filename in os.listdir(dataset_path) + ) + if shuffle_seed is not None: + random.Random(shuffle_seed).shuffle(files) + tokens = [] + eos = tokenizer.decode(self.data.params["eos_token"]) + for path in files: + if isinstance(path, str): + f = open(path) + else: + f = path + try: + text = f.read() + if use_ftfy: + text = ftfy.fix_text(text) + text = text.replace("<|endoftext|>", eos) + tokens.extend(self.tokenize_dataset_callback(tokenizer, text)) + finally: + if isinstance(path, str): + f.close() + + print("Dataset size (in tokens):", len(tokens)) + if len(tokens) < batch_size + 1: + self.raise_configuration_error( + "Your dataset is too small! The number of tokens has to be greater than the batch size. Try increasing the epochs.", + code=13, + ) + tail = len(tokens) % (batch_size + 1) + if tail: + print( + f"We're removing the last {tail} tokens from your dataset to make the length a multiple of {batch_size+1}." + ) + tokens = tokens[:-tail] + + tokens = np.array(tokens, dtype=np.uint16).reshape((-1, batch_size + 1)) + sequences_per_epoch = tokens.shape[0] + _epochs = math.ceil(epochs) + if _epochs > 1: + rng = np.random.Generator(np.random.PCG64(1729)) + tokens = np.concatenate( + ( + tokens, + *(rng.permutation(tokens, axis=0) for i in range(_epochs - 1)), + ), + axis=0, + ) + tokens = tokens[: math.ceil(epochs * sequences_per_epoch)] + print(f"Total sequences in your dataset: {tokens.shape[0]}") + + if isinstance(output_file, str): + f = open(output_file, "w") + else: + f = output_file + try: + np.save(output_file, tokens) + finally: + if isinstance(output_file, str): + f.close() + + def train(self): + if self.data.params is not None and "max_batch_size" not in self.data.params: + self.data.params["max_batch_size"] = 2048 + + if not os.path.exists(self.data.save_file): + print("We are starting a brand new soft-tuning session.\n") + self.startup(step=-1) + if self.data.soft_in_dim <= 0: + self.raise_configuration_error( + "You have not set a soft prompt size.", code=6 + ) + else: + # If we're resuming a soft-tuning session, the soft prompt tensor is + # already in the save file and we just have to decode it. + try: + z = torch.load(self.data.save_file) + assert z["step"] > 0 + assert z["tensor"].ndim == 2 and "opt_state" in z + assert z["tensor"].shape[0] < self.data.params["max_batch_size"] + self.data.soft_in_dim = z["tensor"].shape[0] + step = z["step"] + opt_state = z["opt_state"] + except AssertionError: + self.raise_configuration_error("MTJSP file is corrupted.", code=14) + print(f"We're resuming a previous soft-tuning session at step {step+1}.\n") + self.startup(step=step + 1) + soft_embeddings = z["tensor"] + + REVISION = None + + tokenizer = self.get_tokenizer() + model: _PromptTuningPreTrainedModel + + if(os.path.isdir(self.data.ckpt_path)): + try: + model = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + except Exception as e: + if("out of memory" in traceback.format_exc().lower()): + raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") + model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))): + try: + model = AutoPromptTuningLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=REVISION, cache_dir="cache") + except Exception as e: + if("out of memory" in traceback.format_exc().lower()): + raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") + model = GPTNeoPromptTuningLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=REVISION, cache_dir="cache") + else: + try: + model = AutoPromptTuningLM.from_pretrained(vars.model, revision=REVISION, cache_dir="cache") + except Exception as e: + if("out of memory" in traceback.format_exc().lower()): + raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") + model = GPTNeoPromptTuningLM.from_pretrained(vars.model, revision=REVISION, cache_dir="cache") + + if step == 0: + soft_embeddings = self.get_initial_soft_embeddings(model) + else: + soft_embeddings = SoftPrompt.from_inputs_embeds(soft_embeddings) + model.set_soft_prompt(soft_embeddings) + + steps = self.get_num_sequences() // self.data.gradient_accumulation_steps + warmup_steps = max(1, round(steps * self.data.stparams["warmup"])) + + beta1: Optional[float] = self.data.stparams.get("beta1", 0.0) + if beta1 == 0.0: + beta1 = None + optimizer = transformers.Adafactor( + params=model.get_soft_params(), + scale_parameter=False, + relative_step=False, + warmup_init=False, + lr=self.data.stparams["lr"], + beta1=beta1, + decay_rate=self.data.stparams.get("decay_rate", -0.8), + weight_decay=self.data.stparams.get("weight_decay", 0.1), + ) + if step != 0: + optimizer.load_state_dict(opt_state) + scheduler = transformers.get_cosine_with_hard_restarts_schedule_with_warmup( + optimizer=optimizer, + num_warmup_steps=warmup_steps, + num_training_steps=steps - warmup_steps, + num_cycles=(steps - warmup_steps) // self.data.stparams.get("training_steps_per_cycle", 56), + ) + + torch.cuda.empty_cache() + optimizer.state['step'] = step + cross_entropy_loss = CrossEntropyLoss() + + while step < steps: + model.train() + + total_loss = total_grad = total_grad_norm = 0 + + for i in range(self.data.gradient_accumulation_steps): + # Get the next sequence from the dataset + block = self.get_batch(step, self.data.gradient_accumulation_steps).to(model.transformer.wte.weight.device) + + # input_ids is the context to the model (without the soft prompt) and labels is what we expect the model to generate (the -100s represent soft prompt tokens for which loss is not calculated) + input_ids = block[:-1].unsqueeze(0).detach() + labels = torch.cat((torch.full((model.get_soft_params().size(0) - 1,), -100, device=block.device), block)).unsqueeze(0).cuda().detach() + + # Give the context to the model and compare the model's output logits with the labels to compute the loss + logits = model(input_ids=input_ids, labels=input_ids).logits + loss: torch.Tensor = cross_entropy_loss(logits.view(-1, model.transformer.wte.weight.size(1)), labels.view(-1)) + total_loss += loss.detach() + + # Compute the gradient of the loss function and add it to model.get_soft_params().grad (model.get_soft_params().grad += gradient) + loss.backward() + + total_grad_norm += torch.linalg.norm(model.get_soft_params().grad.detach() - total_grad) + total_grad = model.get_soft_params().grad.detach() + + del input_ids + del labels + del logits + torch.cuda.empty_cache() + + mean_loss = (total_loss / self.data.gradient_accumulation_steps).item() + mean_grad_norm = (total_grad_norm / self.data.gradient_accumulation_steps).item() + + # Apply the optimization algorithm using the accumulated gradients, which changes the contents of the soft prompt matrix very slightly to reduce the loss + optimizer.step() + lr = optimizer.param_groups[0]["lr"] + scheduler.step() + optimizer.zero_grad() + + # Save checkpoint every few steps + pass + + step += 1 From 728e19a7f078550703751b392c6a7c79c08b7137 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 16:29:39 -0400 Subject: [PATCH 04/26] Implement file saving in prompt_tuner.py --- prompt_tuner.py | 94 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 5 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index a35b12f1..fbecb4c4 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -7,6 +7,12 @@ import termcolor import contextlib import traceback import random +import zipfile +import json +import uuid +import datetime +import base64 +import pickle import torch import torch.nn.functional as F from torch.nn import Embedding, CrossEntropyLoss @@ -244,12 +250,72 @@ class TrainerBase(abc.ABC): def get_tokenizer(self) -> transformers.PreTrainedTokenizerBase: return get_tokenizer(self.ckpt_path) + + def save_data(self): + pass def export_to_kobold(self, output_file: str, name: str, author: str, supported: str, description: str): - pass + try: + z = torch.load(self.data.save_file) + assert z["step"] > 0 + assert z["tensor"].ndim == 2 and "opt_state" in z + assert z["tensor"].shape[0] < self.data.params["max_batch_size"] + self.data.soft_in_dim = z["tensor"].shape[0] + except AssertionError: + self.raise_configuration_error("MTJSP file is corrupted.", code=14) + + tensor = z["tensor"] + + meta = { + "name": name, + "author": author, + "supported": supported, + "description": description, + } + if len(meta["author"].strip()) == 0: + meta.pop("author") + meta["supported"] = list(map(lambda m: m.strip(), supported.split(","))) + + with zipfile.ZipFile(output_file, "w", compression=zipfile.ZIP_LZMA) as z: + with z.open("tensor.npy", "w") as f: + np.save(f, tensor, allow_pickle=False) + with zipfile.ZipFile(output_file, "a", compression=zipfile.ZIP_STORED) as z: + with z.open("meta.json", "w") as f: + f.write(json.dumps(meta, indent=2).encode("utf-8")) def export_to_mkultra(self, output_file: str, soft_prompt_name: str, soft_prompt_description: str): - pass + try: + z = torch.load(self.data.save_file) + assert z["step"] > 0 + assert z["tensor"].ndim == 2 and "opt_state" in z + assert z["tensor"].shape[0] < self.data.params["max_batch_size"] + self.data.soft_in_dim = z["tensor"].shape[0] + _step = z["step"] + except AssertionError: + self.raise_configuration_error("MTJSP file is corrupted.", code=14) + + tensor = z["tensor"] + + with open(output_file, "w") as f: + json.dump( + { + "metadata": { + "step": _step, + "loss": float(z["loss"].item()), + "uuid": str(uuid.uuid4()), + "name": soft_prompt_name, + "description": soft_prompt_description, + "epoch": datetime.datetime.now().timestamp(), + }, + "tensor": base64.b64encode( + pickle.dumps( + tensor, + protocol=4, + ), + ).decode("ascii"), + }, + f, + ) def tokenize_dataset( self, @@ -456,6 +522,23 @@ class TrainerBase(abc.ABC): optimizer.state['step'] = step cross_entropy_loss = CrossEntropyLoss() + def save_mkusp( + loss, + grad_norm, + ): + with open(self.data.save_file, "wb") as f: + torch.save( + { + "tensor": soft_embeddings.get_inputs_embeds(), + "opt_state": optimizer.state_dict(), + "step": step, + "loss": loss, + "grad_norm": grad_norm, + }, + f, + ) + self.save_data() + while step < steps: model.train() @@ -494,7 +577,8 @@ class TrainerBase(abc.ABC): scheduler.step() optimizer.zero_grad() - # Save checkpoint every few steps - pass - step += 1 + + # Save checkpoint every few steps + if step == 1 or step % self.data.stparams["save_every"] == 0: + save_mkusp(mean_loss, mean_grad_norm) From 05cf9b1dded8925e6b39bb81cc878e686c3befab Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 16:43:02 -0400 Subject: [PATCH 05/26] Upload BasicTrainer class --- prompt_tuner.py | 87 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/prompt_tuner.py b/prompt_tuner.py index fbecb4c4..f172c6a1 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -13,6 +13,8 @@ import uuid import datetime import base64 import pickle +import hashlib +import itertools import torch import torch.nn.functional as F from torch.nn import Embedding, CrossEntropyLoss @@ -582,3 +584,88 @@ class TrainerBase(abc.ABC): # Save checkpoint every few steps if step == 1 or step % self.data.stparams["save_every"] == 0: save_mkusp(mean_loss, mean_grad_norm) + + +class BasicTrainer(TrainerBase): + class TrainerData(TrainerBase.TrainerData): + def __init__(self): + super().__init__() + self.dataset_file: Optional[str] = None + self.initial_softprompt: Optional[List[int]] = None + + data: "BasicTrainer.TrainerData" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.dataset: Optional[np.ndarray] = None + + def startup(self, step: int) -> None: + if self.get_num_sequences() < self.data.gradient_accumulation_steps: + self.raise_configuration_error( + "Your dataset is too small! gradient_accumulation_steps must be less than or equal to the number of sequences.", + code=101, + ) + if ( + self.data.prompt_method == "tokens" + and step < 0 + and self.data.initial_softprompt is None + ): + self.raise_configuration_error( + "You have not set an initial soft prompt string.", code=103 + ) + if self.data.prompt_method == "tokens" and step < 0: + self.data.soft_in_dim = len(self.data.initial_softprompt) + + def get_batch(self, step: int, size: int) -> np.ndarray: + return self.dataset[(step - 1) * size : step * size] + + def get_num_sequences(self) -> int: + if self.dataset is None: + if self.data.dataset_file is None or not os.path.exists( + self.data.dataset_file + ): + self.raise_configuration_error( + f"Dataset file not found at {repr(self.data.dataset_file)}", + code=102, + ) + self.dataset = np.load(self.data.dataset_file, mmap_mode="r") + assert self.dataset.ndim >= 2 + assert self.dataset.shape[0] >= 2 + return self.dataset.shape[0] + + def get_initial_soft_embeddings(self, model: transformers.PreTrainedModel) -> SoftPrompt: + if self.data.prompt_method == "vocab_sample": + rng = np.random.Generator( + np.random.PCG64( + [ + self.data.prompt_seed, + int.from_bytes(hashlib.sha256(model.config.model_type.encode("utf8")).digest()[:4], "little"), + ] + ) + ) + tokenizer = self.get_tokenizer() + with tokenizer._kai_no_prefix(): + special_tokens = set( + itertools.chain.from_iterable( + tokenizer.encode(str(v)) + for v in tokenizer.special_tokens_map_extended.values() + ) + ) + sample_space = [ + k for k in range(self.data.params["n_vocab"]) if k not in special_tokens + ] + sample = rng.choice(sample_space, self.data.soft_in_dim, False) + return model.get_input_embeddings()(torch.tensor(sample, dtype=torch.int32)) + elif self.data.prompt_method == "tokens": + return model.get_input_embeddings()(torch.tensor(self.data.initial_softprompt, dtype=torch.int32)) + self.raise_configuration_error( + f"Unknown prompt method {repr(self.data.prompt_method)}", code=104 + ) + + def tokenize_dataset_callback( + self, tokenizer: transformers.PreTrainedTokenizerBase, text: str + ) -> List[int]: + if self.data.newlinemode == "s": + text = text.replace("\n", "") + with tokenizer._kai_no_prefix(): + return tokenizer.encode(text) + self.data.params["eos_token"] From a49a63316489d6441e84678a44d7db2fd1581ebd Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 16:46:39 -0400 Subject: [PATCH 06/26] `self.ckpt_path` -> `self.data.ckpt_path` --- prompt_tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index f172c6a1..2fbd6ee2 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -251,7 +251,7 @@ class TrainerBase(abc.ABC): return True def get_tokenizer(self) -> transformers.PreTrainedTokenizerBase: - return get_tokenizer(self.ckpt_path) + return get_tokenizer(self.data.ckpt_path) def save_data(self): pass From f79926b73d9a731208b5f67601e263aa848c41a9 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 16:51:09 -0400 Subject: [PATCH 07/26] Fix some more typos in prompt_tuner.py --- prompt_tuner.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index 2fbd6ee2..ea0efd3b 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -127,28 +127,28 @@ def get_tokenizer(model_id, revision=None) -> transformers.PreTrainedTokenizerBa tokenizer = GPT2TokenizerFast.from_pretrained(model_id, revision=revision, cache_dir="cache") except Exception as e: tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") - elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))): + elif(os.path.isdir("models/{}".format(model_id.replace('/', '_')))): try: - tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=revision, cache_dir="cache") + tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache") except Exception as e: pass try: - tokenizer = AutoTokenizer.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=revision, cache_dir="cache", use_fast=False) + tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache", use_fast=False) except Exception as e: try: - tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=revision, cache_dir="cache") + tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache") except Exception as e: tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") else: try: - tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=revision, cache_dir="cache") + tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache") except Exception as e: pass try: - tokenizer = AutoTokenizer.from_pretrained(vars.model, revision=revision, cache_dir="cache", use_fast=False) + tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache", use_fast=False) except Exception as e: try: - tokenizer = GPT2TokenizerFast.from_pretrained(vars.model, revision=revision, cache_dir="cache") + tokenizer = GPT2TokenizerFast.from_pretrained(model_id, revision=revision, cache_dir="cache") except Exception as e: tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") @@ -474,20 +474,20 @@ class TrainerBase(abc.ABC): if("out of memory" in traceback.format_exc().lower()): raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") - elif(os.path.isdir("models/{}".format(vars.model.replace('/', '_')))): + elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))): try: - model = AutoPromptTuningLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=REVISION, cache_dir="cache") + model = AutoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache") except Exception as e: if("out of memory" in traceback.format_exc().lower()): raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") - model = GPTNeoPromptTuningLM.from_pretrained("models/{}".format(vars.model.replace('/', '_')), revision=REVISION, cache_dir="cache") + model = GPTNeoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache") else: try: - model = AutoPromptTuningLM.from_pretrained(vars.model, revision=REVISION, cache_dir="cache") + model = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") except Exception as e: if("out of memory" in traceback.format_exc().lower()): raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") - model = GPTNeoPromptTuningLM.from_pretrained(vars.model, revision=REVISION, cache_dir="cache") + model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") if step == 0: soft_embeddings = self.get_initial_soft_embeddings(model) From 584056b6d5375e250c8c400e7086f3157638a31c Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 17:30:49 -0400 Subject: [PATCH 08/26] Fix remaining problems in prompt_tuner.py --- prompt_tuner.py | 64 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index ea0efd3b..a958f882 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -15,11 +15,12 @@ import base64 import pickle import hashlib import itertools +from tqdm.auto import tqdm import torch import torch.nn.functional as F from torch.nn import Embedding, CrossEntropyLoss import transformers -from transformers import AutoTokenizer, GPT2TokenizerFast +from transformers import AutoTokenizer, GPT2TokenizerFast, AutoConfig from mkultra.tuning import GPTPromptTuningMixin, GPTNeoPromptTuningLM from mkultra.soft_prompt import SoftPrompt from typing import List, Optional, TextIO, Union @@ -27,6 +28,18 @@ from typing import List, Optional, TextIO, Union _PromptTuningPreTrainedModel = Union["UniversalPromptTuningMixin", GPTPromptTuningMixin, transformers.PreTrainedModel] +class _WTEDummy: + def __init__(self, model: transformers.PreTrainedModel): + self.model = model + + @property + def wte(self: "_WTEDummy"): + return self.model.get_input_embeddings() + + @wte.setter + def wte(self: "_WTEDummy", v): + self.model.set_input_embeddings(v) + class _WTEMixin: @property def wte(self: Union["_WTEMixin", transformers.PreTrainedModel]): @@ -43,7 +56,7 @@ class UniversalPromptTuningMixin: model: _PromptTuningPreTrainedModel = super().from_pretrained(pretrained_model_name_or_path, **kwargs) if not hasattr(model, "transformer"): - model.transformer = _WTEMixin() + model.transformer = _WTEDummy(model) elif not hasattr(model.transformer, "wte"): assert isinstance(model.transformer, type) model.transformer.__class__ = type("_UniversalPromptTuning" + model.transformer.__class__.__name__, (_WTEMixin, model.transformer.__class__), {}) @@ -248,6 +261,26 @@ class TrainerBase(abc.ABC): raise ConfigurationError(msg, **kwargs) def get_hf_checkpoint_metadata(self) -> bool: + REVISION = None + params = {} + if(os.path.isdir(self.data.ckpt_path)): + model_config = AutoConfig.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))): + model_config = AutoConfig.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache") + else: + model_config = AutoConfig.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + params["tokenizer_id"] = self.data.ckpt_path + tokenizer = get_tokenizer(self.data.ckpt_path) + params["newlinemode"] = params.get( + "newlinemode", "s" if model_config.model_type == "xglm" else "n" + ) + params["max_batch_size"] = 2048 + with tokenizer._kai_no_prefix(): + params["eos_token"] = ( + [50259, 50259] if model_config.model_type == "xglm" and model_config.eos_token_id == 50259 else tokenizer.encode(model_config.eos_token_id) + ) + params["seq"] = 2048 + self.data.params = params return True def get_tokenizer(self) -> transformers.PreTrainedTokenizerBase: @@ -445,6 +478,7 @@ class TrainerBase(abc.ABC): self.raise_configuration_error( "You have not set a soft prompt size.", code=6 ) + step = 0 else: # If we're resuming a soft-tuning session, the soft prompt tensor is # already in the save file and we just have to decode it. @@ -502,7 +536,7 @@ class TrainerBase(abc.ABC): if beta1 == 0.0: beta1 = None optimizer = transformers.Adafactor( - params=model.get_soft_params(), + params=(model.get_soft_params(),), scale_parameter=False, relative_step=False, warmup_init=False, @@ -540,19 +574,22 @@ class TrainerBase(abc.ABC): f, ) self.save_data() + + bar1 = tqdm(initial=step + 1, total=steps, desc="CURRENT TRAINING STEP") while step < steps: + step += 1 model.train() total_loss = total_grad = total_grad_norm = 0 - for i in range(self.data.gradient_accumulation_steps): - # Get the next sequence from the dataset - block = self.get_batch(step, self.data.gradient_accumulation_steps).to(model.transformer.wte.weight.device) + # Get the next sequences from the dataset + block = torch.tensor(np.int32(self.get_batch(step, self.data.gradient_accumulation_steps))).to(model.transformer.wte.weight.device) + for sequence in tqdm(block, desc="GRADIENT ACCUMULATION", leave=False): # input_ids is the context to the model (without the soft prompt) and labels is what we expect the model to generate (the -100s represent soft prompt tokens for which loss is not calculated) - input_ids = block[:-1].unsqueeze(0).detach() - labels = torch.cat((torch.full((model.get_soft_params().size(0) - 1,), -100, device=block.device), block)).unsqueeze(0).cuda().detach() + input_ids = sequence[:-1].unsqueeze(0).detach() + labels = torch.cat((torch.full((model.get_soft_params().size(0) - 1,), -100, device=sequence.device), sequence), dim=-1).unsqueeze(0).detach() # Give the context to the model and compare the model's output logits with the labels to compute the loss logits = model(input_ids=input_ids, labels=input_ids).logits @@ -579,12 +616,13 @@ class TrainerBase(abc.ABC): scheduler.step() optimizer.zero_grad() - step += 1 - # Save checkpoint every few steps if step == 1 or step % self.data.stparams["save_every"] == 0: save_mkusp(mean_loss, mean_grad_norm) + bar1.set_postfix({"loss": mean_loss, "grad_norm": mean_grad_norm, "learning_rate": lr}) + bar1.update() + class BasicTrainer(TrainerBase): class TrainerData(TrainerBase.TrainerData): @@ -652,12 +690,12 @@ class BasicTrainer(TrainerBase): ) ) sample_space = [ - k for k in range(self.data.params["n_vocab"]) if k not in special_tokens + k for k in range(model.get_input_embeddings().weight.shape[-2]) if k not in special_tokens ] sample = rng.choice(sample_space, self.data.soft_in_dim, False) - return model.get_input_embeddings()(torch.tensor(sample, dtype=torch.int32)) + return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(sample, dtype=torch.int32))) elif self.data.prompt_method == "tokens": - return model.get_input_embeddings()(torch.tensor(self.data.initial_softprompt, dtype=torch.int32)) + return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(self.data.initial_softprompt, dtype=torch.int32))) self.raise_configuration_error( f"Unknown prompt method {repr(self.data.prompt_method)}", code=104 ) From 3d5c83fc2348584448eeb4d075e7fad481e5b63a Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 19:29:20 -0400 Subject: [PATCH 09/26] prompt_tuner.py now uses lazy loader and accelerate --- aiserver.py | 11 +- prompt_tuner.py | 437 +++++++++++++++++++++++++++++++++++++++++++++--- utils.py | 3 + 3 files changed, 424 insertions(+), 27 deletions(-) diff --git a/aiserver.py b/aiserver.py index ef785313..d6c3754f 100644 --- a/aiserver.py +++ b/aiserver.py @@ -429,6 +429,7 @@ def emit(*args, **kwargs): return _emit(*args, **kwargs) except AttributeError: return socketio.emit(*args, **kwargs) +utils.emit = emit # marshmallow/apispec setup from apispec import APISpec @@ -1311,6 +1312,8 @@ def general_startup(override_args=None): args = parser.parse_args(shlex.split(os.environ["KOBOLDAI_ARGS"])) else: args = parser.parse_args() + + utils.args = args if args.customsettings: f = open (args.customsettings) @@ -1648,7 +1651,9 @@ def patch_transformers(): if not args.no_aria2: utils.aria2_hook(pretrained_model_name_or_path, **kwargs) return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) - PreTrainedModel.from_pretrained = new_from_pretrained + if(not hasattr(PreTrainedModel, "_kai_patched")): + PreTrainedModel.from_pretrained = new_from_pretrained + PreTrainedModel._kai_patched = True if(hasattr(modeling_utils, "get_checkpoint_shard_files")): old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs): @@ -2490,7 +2495,9 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if not args.no_aria2: utils.aria2_hook(pretrained_model_name_or_path, **kwargs) return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) - PreTrainedModel.from_pretrained = new_from_pretrained + if(not hasattr(PreTrainedModel, "_kai_patched")): + PreTrainedModel.from_pretrained = new_from_pretrained + PreTrainedModel._kai_patched = True if(hasattr(modeling_utils, "get_checkpoint_shard_files")): old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs): diff --git a/prompt_tuner.py b/prompt_tuner.py index a958f882..48d3fcca 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -15,15 +15,249 @@ import base64 import pickle import hashlib import itertools +import functools +import bisect +import eventlet +import packaging +import gc +import time from tqdm.auto import tqdm import torch import torch.nn.functional as F from torch.nn import Embedding, CrossEntropyLoss import transformers -from transformers import AutoTokenizer, GPT2TokenizerFast, AutoConfig +from transformers import __version__ as transformers_version +from transformers import AutoTokenizer, GPT2TokenizerFast, AutoConfig, AutoModelForCausalLM, GPTNeoForCausalLM, PreTrainedModel, modeling_utils, GPTNeoModel, GPTJModel +import accelerate +import accelerate.utils from mkultra.tuning import GPTPromptTuningMixin, GPTNeoPromptTuningLM from mkultra.soft_prompt import SoftPrompt -from typing import List, Optional, TextIO, Union +from typing import Dict, List, Optional, TextIO, Union + +try: + from transformers import XGLMModel +except: + pass +try: + from transformers.models.opt.modeling_opt import OPTDecoder +except: + pass + +import breakmodel +import torch_lazy_loader +import utils + +USE_BREAKMODEL = True + + +class Send_to_socketio(object): + def write(self, bar): + print(bar, end="") + time.sleep(0.01) + try: + if utils.emit is not None: + utils.emit('from_server', {'cmd': 'model_load_status', 'data': bar.replace(" ", " ")}, broadcast=True) + except: + pass + +def patch_transformers_download(): + global transformers + import copy, requests, tqdm, time + class Send_to_socketio(object): + def write(self, bar): + bar = bar.replace("\r", "").replace("\n", "") + if bar != "": + try: + print(bar, end="\r") + if utils.emit is not None: + utils.emit('from_server', {'cmd': 'model_load_status', 'data': bar.replace(" ", " ")}, broadcast=True) + eventlet.sleep(seconds=0) + except: + pass + def http_get( + url: str, + temp_file: transformers.utils.hub.BinaryIO, + proxies=None, + resume_size=0, + headers: transformers.utils.hub.Optional[transformers.utils.hub.Dict[str, str]] = None, + file_name: transformers.utils.hub.Optional[str] = None, + ): + """ + Download remote file. Do not gobble up errors. + """ + headers = copy.deepcopy(headers) + if resume_size > 0: + headers["Range"] = f"bytes={resume_size}-" + r = requests.get(url, stream=True, proxies=proxies, headers=headers) + transformers.utils.hub._raise_for_status(r) + content_length = r.headers.get("Content-Length") + total = resume_size + int(content_length) if content_length is not None else None + # `tqdm` behavior is determined by `utils.logging.is_progress_bar_enabled()` + # and can be set using `utils.logging.enable/disable_progress_bar()` + if url[-11:] != 'config.json': + progress = tqdm.tqdm( + unit="B", + unit_scale=True, + unit_divisor=1024, + total=total, + initial=resume_size, + desc=f"Downloading {file_name}" if file_name is not None else "Downloading", + file=Send_to_socketio(), + ) + for chunk in r.iter_content(chunk_size=1024): + if chunk: # filter out keep-alive new chunks + if url[-11:] != 'config.json': + progress.update(len(chunk)) + temp_file.write(chunk) + if url[-11:] != 'config.json': + progress.close() + + transformers.utils.hub.http_get = http_get + + +def patch_transformers(): + global transformers + + patch_transformers_download() + + old_from_pretrained = PreTrainedModel.from_pretrained.__func__ + @classmethod + def new_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs): + utils.num_shards = None + utils.current_shard = 0 + utils.from_pretrained_model_name = pretrained_model_name_or_path + utils.from_pretrained_index_filename = None + utils.from_pretrained_kwargs = kwargs + utils.bar = None + if utils.args is None or not utils.args.no_aria2: + utils.aria2_hook(pretrained_model_name_or_path, **kwargs) + return old_from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs) + if(not hasattr(PreTrainedModel, "_kai_patched")): + PreTrainedModel.from_pretrained = new_from_pretrained + PreTrainedModel._kai_patched = True + if(hasattr(modeling_utils, "get_checkpoint_shard_files")): + old_get_checkpoint_shard_files = modeling_utils.get_checkpoint_shard_files + def new_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs): + utils.num_shards = utils.get_num_shards(index_filename) + utils.from_pretrained_index_filename = index_filename + return old_get_checkpoint_shard_files(pretrained_model_name_or_path, index_filename, *args, **kwargs) + modeling_utils.get_checkpoint_shard_files = new_get_checkpoint_shard_files + + # Some versions of transformers 4.17.0.dev0 are affected by + # https://github.com/huggingface/transformers/issues/15736 + # This is a workaround for those versions of transformers. + if(transformers_version == "4.17.0.dev0"): + try: + from transformers.models.xglm.modeling_xglm import XGLMSinusoidalPositionalEmbedding + except ImportError: + pass + else: + @torch.no_grad() + def new_forward(self, input_ids: torch.Tensor = None, inputs_embeds: torch.Tensor = None, past_key_values_length: int = 0): + bsz, seq_len = inputs_embeds.size()[:-1] + input_shape = inputs_embeds.size()[:-1] + sequence_length = input_shape[1] + position_ids = torch.arange( + past_key_values_length + self.padding_idx + 1, past_key_values_length + sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device + ).unsqueeze(0).expand(input_shape).contiguous() + max_pos = self.padding_idx + 1 + seq_len + past_key_values_length + if max_pos > self.weights.size(0): + self.make_weights(max_pos + self.offset, self.embedding_dim, self.padding_idx) + return self.weights.index_select(0, position_ids.view(-1)).view(bsz, seq_len, -1).detach() + XGLMSinusoidalPositionalEmbedding.forward = new_forward + + + # Fix a bug in OPTForCausalLM where self.lm_head is the wrong size + if(packaging.version.parse("4.19.0.dev0") <= packaging.version.parse(transformers_version) < packaging.version.parse("4.20.0")): + try: + from transformers import OPTForCausalLM, OPTModel + except ImportError: + pass + else: + # This is the same as the original __init__ but with + # config.hidden_size + # replaced with + # config.word_embed_proj_dim + def new_init(self, config): + super(OPTForCausalLM, self).__init__(config) + self.model = OPTModel(config) + self.lm_head = torch.nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False) + self.post_init() + OPTForCausalLM.__init__ = new_init + + +def move_model_to_devices(model, usegpu, gpu_device): + global generator + + if(not utils.HAS_ACCELERATE and not USE_BREAKMODEL): + if(usegpu): + model = model.half().to(gpu_device) + else: + model = model.to('cpu').float() + generator = model.generate + return + + import breakmodel + + if(utils.HAS_ACCELERATE): + import accelerate.utils + for key, value in model.state_dict().items(): + target_dtype = torch.float32 if breakmodel.primary_device == "cpu" else torch.float16 + if(value.dtype is not target_dtype): + accelerate.utils.set_module_tensor_to_device(model, key, target_dtype) + disk_blocks = breakmodel.disk_blocks + gpu_blocks = breakmodel.gpu_blocks + ram_blocks = len(utils.layers_module_names) - sum(gpu_blocks) + cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks)) + device_map = {} + for name in utils.layers_module_names: + layer = int(name.rsplit(".", 1)[1]) + device = ("disk" if layer < disk_blocks else "cpu") if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks) + device_map[name] = device + for name in utils.get_missing_module_names(model, list(device_map.keys())): + device_map[name] = breakmodel.primary_device + breakmodel.dispatch_model_ex(model, device_map, main_device=breakmodel.primary_device, offload_buffers=True, offload_dir="accelerate-disk-cache") + gc.collect() + generator = model.generate + return + + model.half() + gc.collect() + + if(hasattr(model, "transformer")): + model.transformer.wte.to(breakmodel.primary_device) + model.transformer.ln_f.to(breakmodel.primary_device) + if(hasattr(model, 'lm_head')): + model.lm_head.to(breakmodel.primary_device) + if(hasattr(model.transformer, 'wpe')): + model.transformer.wpe.to(breakmodel.primary_device) + elif(not hasattr(model.model, "decoder")): + model.model.embed_tokens.to(breakmodel.primary_device) + model.model.layer_norm.to(breakmodel.primary_device) + model.lm_head.to(breakmodel.primary_device) + model.model.embed_positions.to(breakmodel.primary_device) + else: + model.model.decoder.embed_tokens.to(breakmodel.primary_device) + if(model.model.decoder.project_in is not None): + model.model.decoder.project_in.to(breakmodel.primary_device) + if(model.model.decoder.project_out is not None): + model.model.decoder.project_out.to(breakmodel.primary_device) + model.model.decoder.embed_positions.to(breakmodel.primary_device) + gc.collect() + GPTNeoModel.forward = breakmodel.new_forward_neo + if("GPTJModel" in globals()): + GPTJModel.forward = breakmodel.new_forward_neo # type: ignore + if("XGLMModel" in globals()): + XGLMModel.forward = breakmodel.new_forward_xglm # type: ignore + if("OPTDecoder" in globals()): + OPTDecoder.forward = breakmodel.new_forward_opt # type: ignore + generator = model.generate + if(hasattr(model, "transformer")): + breakmodel.move_hidden_layers(model.transformer) + elif(not hasattr(model.model, "decoder")): + breakmodel.move_hidden_layers(model.model, model.model.layers) + else: + breakmodel.move_hidden_layers(model.model.decoder, model.model.decoder.layers) _PromptTuningPreTrainedModel = Union["UniversalPromptTuningMixin", GPTPromptTuningMixin, transformers.PreTrainedModel] @@ -259,16 +493,20 @@ class TrainerBase(abc.ABC): if "quiet" not in kwargs: kwargs["quiet"] = self.quiet raise ConfigurationError(msg, **kwargs) - - def get_hf_checkpoint_metadata(self) -> bool: + + def _get_model_config(self) -> transformers.configuration_utils.PretrainedConfig: REVISION = None - params = {} if(os.path.isdir(self.data.ckpt_path)): model_config = AutoConfig.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))): model_config = AutoConfig.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache") else: model_config = AutoConfig.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + return model_config + + def get_hf_checkpoint_metadata(self) -> bool: + params = {} + model_config = self._get_model_config() params["tokenizer_id"] = self.data.ckpt_path tokenizer = get_tokenizer(self.data.ckpt_path) params["newlinemode"] = params.get( @@ -467,7 +705,17 @@ class TrainerBase(abc.ABC): if isinstance(output_file, str): f.close() - def train(self): + def train( + self, + breakmodel_primary_device: Optional[Union[str, int, torch.device]] = None, + breakmodel_gpulayers: Optional[List[int]] = None, + breakmodel_disklayers = 0, + ): + if breakmodel_gpulayers is None: + breakmodel_gpulayers = [] + if breakmodel_primary_device is None: + breakmodel_primary_device = 0 if breakmodel_gpulayers else "cpu" + if self.data.params is not None and "max_batch_size" not in self.data.params: self.data.params["max_batch_size"] = 2048 @@ -498,30 +746,169 @@ class TrainerBase(abc.ABC): REVISION = None - tokenizer = self.get_tokenizer() + patch_transformers() + model: _PromptTuningPreTrainedModel - if(os.path.isdir(self.data.ckpt_path)): + model_config = self._get_model_config() + n_layers = utils.num_layers(model_config) + convert_to_float16 = True + hascuda = torch.cuda.is_available() + usegpu = not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers + gpu_device = breakmodel_primary_device + + breakmodel.disk_blocks = breakmodel_disklayers + disk_blocks = breakmodel.disk_blocks + gpu_blocks = breakmodel.gpu_blocks + ram_blocks = ram_blocks = n_layers - sum(gpu_blocks) + cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks)) + + def lazy_load_callback(model_dict: Dict[str, Union[torch_lazy_loader.LazyTensor, torch.Tensor]], f, **_): + if lazy_load_callback.nested: + return + lazy_load_callback.nested = True + + device_map: Dict[str, Union[str, int]] = {} + + @functools.lru_cache(maxsize=None) + def get_original_key(key): + return max((original_key for original_key in utils.module_names if original_key.endswith(key)), key=len) + + for key, value in model_dict.items(): + original_key = get_original_key(key) + if isinstance(value, torch_lazy_loader.LazyTensor) and not any(original_key.startswith(n) for n in utils.layers_module_names): + device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not USE_BREAKMODEL else breakmodel.primary_device + else: + layer = int(max((n for n in utils.layers_module_names if original_key.startswith(n)), key=len).rsplit(".", 1)[1]) + device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not USE_BREAKMODEL else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks) + device_map[key] = device + + if utils.num_shards is None or utils.current_shard == 0: + utils.offload_index = {} + if utils.HAS_ACCELERATE: + if os.path.isdir("accelerate-disk-cache"): + # Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder + # (the folder doesn't contain any subfolders so os.remove will do just fine) + for filename in os.listdir("accelerate-disk-cache"): + try: + os.remove(os.path.join("accelerate-disk-cache", filename)) + except OSError: + pass + os.makedirs("accelerate-disk-cache", exist_ok=True) + if utils.num_shards is not None: + num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs)) + else: + num_tensors = len(device_map) + print(flush=True) + utils.bar = tqdm(total=num_tensors, desc="Loading model tensors", file=Send_to_socketio()) + + with zipfile.ZipFile(f, "r") as z: + try: + last_storage_key = None + f = None + current_offset = 0 + able_to_pin_layers = True + if utils.num_shards is not None: + utils.current_shard += 1 + for key in sorted(device_map.keys(), key=lambda k: (model_dict[k].key, model_dict[k].seek_offset)): + storage_key = model_dict[key].key + if storage_key != last_storage_key or model_dict[key].seek_offset < current_offset: + last_storage_key = storage_key + if isinstance(f, zipfile.ZipExtFile): + f.close() + f = z.open(f"archive/data/{storage_key}") + current_offset = 0 + if current_offset != model_dict[key].seek_offset: + f.read(model_dict[key].seek_offset - current_offset) + current_offset = model_dict[key].seek_offset + device = device_map[key] + size = functools.reduce(lambda x, y: x * y, model_dict[key].shape, 1) + dtype = model_dict[key].dtype + nbytes = size if dtype is torch.bool else size * ((torch.finfo if dtype.is_floating_point else torch.iinfo)(dtype).bits >> 3) + #print(f"Transferring <{key}> to {f'({device.upper()})' if isinstance(device, str) else '[device ' + str(device) + ']'} ... ", end="", flush=True) + model_dict[key] = model_dict[key].materialize(f, map_location="cpu") + # if model_dict[key].dtype is torch.float32: + # fp32_model = True + if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (USE_BREAKMODEL or usegpu) and model_dict[key].dtype is torch.float32: + model_dict[key] = model_dict[key].to(torch.float16) + if breakmodel.primary_device == "cpu" or (not usegpu and not USE_BREAKMODEL and model_dict[key].dtype is torch.float16): + model_dict[key] = model_dict[key].to(torch.float32) + if device == "shared": + model_dict[key] = model_dict[key].to("cpu").detach_() + if able_to_pin_layers and utils.HAS_ACCELERATE: + try: + model_dict[key] = model_dict[key].pin_memory() + except: + able_to_pin_layers = False + elif device == "disk": + accelerate.utils.offload_weight(model_dict[key], get_original_key(key), "accelerate-disk-cache", index=utils.offload_index) + model_dict[key] = model_dict[key].to("meta") + else: + model_dict[key] = model_dict[key].to(device) + #print("OK", flush=True) + current_offset += nbytes + utils.bar.update(1) + finally: + if utils.num_shards is None or utils.current_shard >= utils.num_shards: + if utils.offload_index: + for name, tensor in utils.named_buffers: + if name not in utils.offload_index: + accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index) + accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache") + utils.bar.close() + utils.bar = None + lazy_load_callback.nested = False + if isinstance(f, zipfile.ZipExtFile): + f.close() + + lazy_load_callback.nested = False + + # Since we're using lazy loader, we need to figure out what the model's hidden layers are called + with torch_lazy_loader.use_lazy_torch_load(dematerialized_modules=True, use_accelerate_init_empty_weights=True): try: - model = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + metamodel = AutoModelForCausalLM.from_config(model_config) except Exception as e: - if("out of memory" in traceback.format_exc().lower()): - raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") - model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") - elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))): - try: - model = AutoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache") - except Exception as e: - if("out of memory" in traceback.format_exc().lower()): - raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") - model = GPTNeoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache") + metamodel = GPTNeoForCausalLM.from_config(model_config) + utils.layers_module_names = utils.get_layers_module_names(metamodel) + utils.module_names = list(metamodel.state_dict().keys()) + utils.named_buffers = list(metamodel.named_buffers(recurse=True)) + + with torch_lazy_loader.use_lazy_torch_load(callback=lazy_load_callback, dematerialized_modules=True): + if(os.path.isdir(self.data.ckpt_path)): + try: + model = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + except Exception as e: + if("out of memory" in traceback.format_exc().lower()): + raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") + model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + elif(os.path.isdir("models/{}".format(self.data.ckpt_path.replace('/', '_')))): + try: + model = AutoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache") + except Exception as e: + if("out of memory" in traceback.format_exc().lower()): + raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") + model = GPTNeoPromptTuningLM.from_pretrained("models/{}".format(self.data.ckpt_path.replace('/', '_')), revision=REVISION, cache_dir="cache") + else: + try: + model = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + except Exception as e: + if("out of memory" in traceback.format_exc().lower()): + raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") + model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + + if(hascuda): + if(usegpu): + model = model.half().to(gpu_device) + elif(breakmodel): # Use both RAM and VRAM (breakmodel) + move_model_to_devices(model, usegpu, gpu_device) + elif(__import__("breakmodel").disk_blocks > 0): + move_model_to_devices(model, usegpu, gpu_device) + else: + model = model.to('cpu').float() + elif(__import__("breakmodel").disk_blocks > 0): + move_model_to_devices(model, usegpu, gpu_device) else: - try: - model = AutoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") - except Exception as e: - if("out of memory" in traceback.format_exc().lower()): - raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") - model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") + model.to('cpu').float() if step == 0: soft_embeddings = self.get_initial_soft_embeddings(model) diff --git a/utils.py b/utils.py index 7fd82072..f3de998e 100644 --- a/utils.py +++ b/utils.py @@ -22,6 +22,7 @@ except ImportError: HAS_ACCELERATE = False vars = None +args = None num_shards: Optional[int] = None current_shard = 0 from_pretrained_model_name = "" @@ -35,6 +36,8 @@ named_buffers: Optional[List[tuple]] = None default_sampler_order = [0, 1, 2, 3, 4, 5] +emit = None + #==================================================================# # Decorator to prevent a function's actions from being run until # at least x seconds have passed without the function being called From 8da6893407c1cf2383394b3bad6dc33d620a22a1 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 19:29:56 -0400 Subject: [PATCH 10/26] Replace MTJSP with MKUSP in prompt_tuner.py --- prompt_tuner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index 48d3fcca..6d0c907e 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -535,7 +535,7 @@ class TrainerBase(abc.ABC): assert z["tensor"].shape[0] < self.data.params["max_batch_size"] self.data.soft_in_dim = z["tensor"].shape[0] except AssertionError: - self.raise_configuration_error("MTJSP file is corrupted.", code=14) + self.raise_configuration_error("MKUSP file is corrupted.", code=14) tensor = z["tensor"] @@ -565,7 +565,7 @@ class TrainerBase(abc.ABC): self.data.soft_in_dim = z["tensor"].shape[0] _step = z["step"] except AssertionError: - self.raise_configuration_error("MTJSP file is corrupted.", code=14) + self.raise_configuration_error("MKUSP file is corrupted.", code=14) tensor = z["tensor"] @@ -739,7 +739,7 @@ class TrainerBase(abc.ABC): step = z["step"] opt_state = z["opt_state"] except AssertionError: - self.raise_configuration_error("MTJSP file is corrupted.", code=14) + self.raise_configuration_error("MKUSP file is corrupted.", code=14) print(f"We're resuming a previous soft-tuning session at step {step+1}.\n") self.startup(step=step + 1) soft_embeddings = z["tensor"] From b1c456ec181b36af756eea42108181d0b72fced4 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 19:52:47 -0400 Subject: [PATCH 11/26] prompt_tuner.py always has accelerate --- prompt_tuner.py | 110 +++++++++++++----------------------------------- 1 file changed, 29 insertions(+), 81 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index 6d0c907e..c13a8a53 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -27,22 +27,13 @@ import torch.nn.functional as F from torch.nn import Embedding, CrossEntropyLoss import transformers from transformers import __version__ as transformers_version -from transformers import AutoTokenizer, GPT2TokenizerFast, AutoConfig, AutoModelForCausalLM, GPTNeoForCausalLM, PreTrainedModel, modeling_utils, GPTNeoModel, GPTJModel +from transformers import AutoTokenizer, GPT2TokenizerFast, AutoConfig, AutoModelForCausalLM, GPTNeoForCausalLM, PreTrainedModel, modeling_utils import accelerate import accelerate.utils from mkultra.tuning import GPTPromptTuningMixin, GPTNeoPromptTuningLM from mkultra.soft_prompt import SoftPrompt from typing import Dict, List, Optional, TextIO, Union -try: - from transformers import XGLMModel -except: - pass -try: - from transformers.models.opt.modeling_opt import OPTDecoder -except: - pass - import breakmodel import torch_lazy_loader import utils @@ -189,7 +180,7 @@ def patch_transformers(): def move_model_to_devices(model, usegpu, gpu_device): global generator - if(not utils.HAS_ACCELERATE and not USE_BREAKMODEL): + if(not USE_BREAKMODEL): if(usegpu): model = model.half().to(gpu_device) else: @@ -197,67 +188,25 @@ def move_model_to_devices(model, usegpu, gpu_device): generator = model.generate return - import breakmodel - - if(utils.HAS_ACCELERATE): - import accelerate.utils - for key, value in model.state_dict().items(): - target_dtype = torch.float32 if breakmodel.primary_device == "cpu" else torch.float16 - if(value.dtype is not target_dtype): - accelerate.utils.set_module_tensor_to_device(model, key, target_dtype) - disk_blocks = breakmodel.disk_blocks - gpu_blocks = breakmodel.gpu_blocks - ram_blocks = len(utils.layers_module_names) - sum(gpu_blocks) - cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks)) - device_map = {} - for name in utils.layers_module_names: - layer = int(name.rsplit(".", 1)[1]) - device = ("disk" if layer < disk_blocks else "cpu") if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks) - device_map[name] = device - for name in utils.get_missing_module_names(model, list(device_map.keys())): - device_map[name] = breakmodel.primary_device - breakmodel.dispatch_model_ex(model, device_map, main_device=breakmodel.primary_device, offload_buffers=True, offload_dir="accelerate-disk-cache") - gc.collect() - generator = model.generate - return - - model.half() + for key, value in model.state_dict().items(): + target_dtype = torch.float32 if breakmodel.primary_device == "cpu" else torch.float16 + if(value.dtype is not target_dtype): + accelerate.utils.set_module_tensor_to_device(model, key, target_dtype) + disk_blocks = breakmodel.disk_blocks + gpu_blocks = breakmodel.gpu_blocks + ram_blocks = len(utils.layers_module_names) - sum(gpu_blocks) + cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks)) + device_map = {} + for name in utils.layers_module_names: + layer = int(name.rsplit(".", 1)[1]) + device = ("disk" if layer < disk_blocks else "cpu") if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks) + device_map[name] = device + for name in utils.get_missing_module_names(model, list(device_map.keys())): + device_map[name] = breakmodel.primary_device + breakmodel.dispatch_model_ex(model, device_map, main_device=breakmodel.primary_device, offload_buffers=True, offload_dir="accelerate-disk-cache") gc.collect() - - if(hasattr(model, "transformer")): - model.transformer.wte.to(breakmodel.primary_device) - model.transformer.ln_f.to(breakmodel.primary_device) - if(hasattr(model, 'lm_head')): - model.lm_head.to(breakmodel.primary_device) - if(hasattr(model.transformer, 'wpe')): - model.transformer.wpe.to(breakmodel.primary_device) - elif(not hasattr(model.model, "decoder")): - model.model.embed_tokens.to(breakmodel.primary_device) - model.model.layer_norm.to(breakmodel.primary_device) - model.lm_head.to(breakmodel.primary_device) - model.model.embed_positions.to(breakmodel.primary_device) - else: - model.model.decoder.embed_tokens.to(breakmodel.primary_device) - if(model.model.decoder.project_in is not None): - model.model.decoder.project_in.to(breakmodel.primary_device) - if(model.model.decoder.project_out is not None): - model.model.decoder.project_out.to(breakmodel.primary_device) - model.model.decoder.embed_positions.to(breakmodel.primary_device) - gc.collect() - GPTNeoModel.forward = breakmodel.new_forward_neo - if("GPTJModel" in globals()): - GPTJModel.forward = breakmodel.new_forward_neo # type: ignore - if("XGLMModel" in globals()): - XGLMModel.forward = breakmodel.new_forward_xglm # type: ignore - if("OPTDecoder" in globals()): - OPTDecoder.forward = breakmodel.new_forward_opt # type: ignore generator = model.generate - if(hasattr(model, "transformer")): - breakmodel.move_hidden_layers(model.transformer) - elif(not hasattr(model.model, "decoder")): - breakmodel.move_hidden_layers(model.model, model.model.layers) - else: - breakmodel.move_hidden_layers(model.model.decoder, model.model.decoder.layers) + return _PromptTuningPreTrainedModel = Union["UniversalPromptTuningMixin", GPTPromptTuningMixin, transformers.PreTrainedModel] @@ -785,16 +734,15 @@ class TrainerBase(abc.ABC): if utils.num_shards is None or utils.current_shard == 0: utils.offload_index = {} - if utils.HAS_ACCELERATE: - if os.path.isdir("accelerate-disk-cache"): - # Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder - # (the folder doesn't contain any subfolders so os.remove will do just fine) - for filename in os.listdir("accelerate-disk-cache"): - try: - os.remove(os.path.join("accelerate-disk-cache", filename)) - except OSError: - pass - os.makedirs("accelerate-disk-cache", exist_ok=True) + if os.path.isdir("accelerate-disk-cache"): + # Delete all of the files in the disk cache folder without deleting the folder itself to allow people to create symbolic links for this folder + # (the folder doesn't contain any subfolders so os.remove will do just fine) + for filename in os.listdir("accelerate-disk-cache"): + try: + os.remove(os.path.join("accelerate-disk-cache", filename)) + except OSError: + pass + os.makedirs("accelerate-disk-cache", exist_ok=True) if utils.num_shards is not None: num_tensors = len(utils.get_sharded_checkpoint_num_tensors(utils.from_pretrained_model_name, utils.from_pretrained_index_filename, **utils.from_pretrained_kwargs)) else: @@ -835,7 +783,7 @@ class TrainerBase(abc.ABC): model_dict[key] = model_dict[key].to(torch.float32) if device == "shared": model_dict[key] = model_dict[key].to("cpu").detach_() - if able_to_pin_layers and utils.HAS_ACCELERATE: + if able_to_pin_layers: try: model_dict[key] = model_dict[key].pin_memory() except: From 09750acfa0d6b2dbb4400133746a3022bc042f4a Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 20:02:21 -0400 Subject: [PATCH 12/26] prompt_tuner.py now shows layer configuration --- aiserver.py | 2 +- prompt_tuner.py | 49 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/aiserver.py b/aiserver.py index d6c3754f..9814a037 100644 --- a/aiserver.py +++ b/aiserver.py @@ -848,7 +848,7 @@ def device_config(config): print(f"{colors.RED}Please enter an integer between -1 and {n_layers}.{colors.END}") print(colors.PURPLE + "\nFinal device configuration:") - device_list(n_layers) + device_list(n_layers, primary=breakmodel.primary_device) # If all layers are on the same device, use the old GPU generation mode while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0): diff --git a/prompt_tuner.py b/prompt_tuner.py index c13a8a53..af9e5443 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -38,9 +38,19 @@ import breakmodel import torch_lazy_loader import utils -USE_BREAKMODEL = True +use_breakmodel = True +class colors: + PURPLE = '\033[95m' + BLUE = '\033[94m' + CYAN = '\033[96m' + GREEN = '\033[92m' + YELLOW = '\033[93m' + RED = '\033[91m' + END = '\033[0m' + UNDERLINE = '\033[4m' + class Send_to_socketio(object): def write(self, bar): print(bar, end="") @@ -177,10 +187,29 @@ def patch_transformers(): OPTForCausalLM.__init__ = new_init +def device_list(n_layers, primary=None, selected=None): + device_count = torch.cuda.device_count() + if(device_count < 2): + primary = None + gpu_blocks = breakmodel.gpu_blocks + (device_count - len(breakmodel.gpu_blocks))*[0] + print(f"{colors.YELLOW} DEVICE ID | LAYERS | DEVICE NAME{colors.END}") + for i in range(device_count): + name = torch.cuda.get_device_name(i) + if(len(name) > 47): + name = "..." + name[-44:] + row_color = colors.END + sep_color = colors.YELLOW + print(f"{row_color}{colors.YELLOW + '->' + row_color if i == selected else ' '} {'(primary)' if i == primary else ' '*9} {i:3} {sep_color}|{row_color} {gpu_blocks[i]:3} {sep_color}|{row_color} {name}{colors.END}") + row_color = colors.END + sep_color = colors.YELLOW + print(f"{row_color}{colors.YELLOW + '->' + row_color if -1 == selected else ' '} {' '*9} N/A {sep_color}|{row_color} {breakmodel.disk_blocks:3} {sep_color}|{row_color} (Disk cache){colors.END}") + print(f"{row_color} {' '*9} N/A {sep_color}|{row_color} {n_layers:3} {sep_color}|{row_color} (CPU){colors.END}") + + def move_model_to_devices(model, usegpu, gpu_device): global generator - if(not USE_BREAKMODEL): + if(not use_breakmodel): if(usegpu): model = model.half().to(gpu_device) else: @@ -703,8 +732,12 @@ class TrainerBase(abc.ABC): n_layers = utils.num_layers(model_config) convert_to_float16 = True hascuda = torch.cuda.is_available() - usegpu = not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers + usegpu = hascuda and not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers gpu_device = breakmodel_primary_device + use_breakmodel = bool(hascuda or breakmodel_disklayers or sum(breakmodel_gpulayers)) + + assert len(breakmodel_gpulayers) <= torch.cuda.device_count() + assert sum(breakmodel_gpulayers) + breakmodel_disklayers <= n_layers breakmodel.disk_blocks = breakmodel_disklayers disk_blocks = breakmodel.disk_blocks @@ -712,6 +745,8 @@ class TrainerBase(abc.ABC): ram_blocks = ram_blocks = n_layers - sum(gpu_blocks) cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks)) + device_list(n_layers, primary=breakmodel.primary_device) + def lazy_load_callback(model_dict: Dict[str, Union[torch_lazy_loader.LazyTensor, torch.Tensor]], f, **_): if lazy_load_callback.nested: return @@ -726,10 +761,10 @@ class TrainerBase(abc.ABC): for key, value in model_dict.items(): original_key = get_original_key(key) if isinstance(value, torch_lazy_loader.LazyTensor) and not any(original_key.startswith(n) for n in utils.layers_module_names): - device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not USE_BREAKMODEL else breakmodel.primary_device + device_map[key] = gpu_device if hascuda and usegpu else "cpu" if not hascuda or not use_breakmodel else breakmodel.primary_device else: layer = int(max((n for n in utils.layers_module_names if original_key.startswith(n)), key=len).rsplit(".", 1)[1]) - device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not USE_BREAKMODEL else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks) + device = gpu_device if hascuda and usegpu else "disk" if layer < disk_blocks and layer < ram_blocks else "cpu" if not hascuda or not use_breakmodel else "shared" if layer < ram_blocks else bisect.bisect_right(cumulative_gpu_blocks, layer - ram_blocks) device_map[key] = device if utils.num_shards is None or utils.current_shard == 0: @@ -777,9 +812,9 @@ class TrainerBase(abc.ABC): model_dict[key] = model_dict[key].materialize(f, map_location="cpu") # if model_dict[key].dtype is torch.float32: # fp32_model = True - if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (USE_BREAKMODEL or usegpu) and model_dict[key].dtype is torch.float32: + if convert_to_float16 and breakmodel.primary_device != "cpu" and hascuda and (use_breakmodel or usegpu) and model_dict[key].dtype is torch.float32: model_dict[key] = model_dict[key].to(torch.float16) - if breakmodel.primary_device == "cpu" or (not usegpu and not USE_BREAKMODEL and model_dict[key].dtype is torch.float16): + if breakmodel.primary_device == "cpu" or (not usegpu and not use_breakmodel and model_dict[key].dtype is torch.float16): model_dict[key] = model_dict[key].to(torch.float32) if device == "shared": model_dict[key] = model_dict[key].to("cpu").detach_() From b60d14e3bf220958f7158f60abd0e7091ce17960 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 21:25:07 -0400 Subject: [PATCH 13/26] Handle -1's in prompt_tuner.py breakmodel_gpulayers --- prompt_tuner.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index af9e5443..c6db1bfb 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -692,7 +692,7 @@ class TrainerBase(abc.ABC): if breakmodel_gpulayers is None: breakmodel_gpulayers = [] if breakmodel_primary_device is None: - breakmodel_primary_device = 0 if breakmodel_gpulayers else "cpu" + breakmodel_primary_device = 0 if sum(x if x >= 0 else 0 for x in breakmodel_gpulayers) else "cpu" if self.data.params is not None and "max_batch_size" not in self.data.params: self.data.params["max_batch_size"] = 2048 @@ -730,6 +730,8 @@ class TrainerBase(abc.ABC): model_config = self._get_model_config() n_layers = utils.num_layers(model_config) + breakmodel_gpulayers = [x if x >= 0 else n_layers for x in breakmodel_gpulayers] + convert_to_float16 = True hascuda = torch.cuda.is_available() usegpu = hascuda and not breakmodel_disklayers and len(breakmodel_gpulayers) == 1 and breakmodel_gpulayers[0] == n_layers From aede7ef192a99e3ad6381342a63f3b7b3c653eca Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 21:38:13 -0400 Subject: [PATCH 14/26] Fix typo in training routine of prompt_tuner.py --- prompt_tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index c6db1bfb..1c381f2b 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -965,7 +965,7 @@ class TrainerBase(abc.ABC): # Give the context to the model and compare the model's output logits with the labels to compute the loss logits = model(input_ids=input_ids, labels=input_ids).logits - loss: torch.Tensor = cross_entropy_loss(logits.view(-1, model.transformer.wte.weight.size(1)), labels.view(-1)) + loss: torch.Tensor = cross_entropy_loss(logits.view(-1, model.transformer.wte.weight.size(0)), labels.view(-1)) total_loss += loss.detach() # Compute the gradient of the loss function and add it to model.get_soft_params().grad (model.get_soft_params().grad += gradient) From bae8d88651a071b472c79ac963726d4ad089dd4e Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 21:50:06 -0400 Subject: [PATCH 15/26] Fix typo in get_hf_checkpoint_metadata --- prompt_tuner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index 1c381f2b..b0886741 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -493,7 +493,7 @@ class TrainerBase(abc.ABC): params["max_batch_size"] = 2048 with tokenizer._kai_no_prefix(): params["eos_token"] = ( - [50259, 50259] if model_config.model_type == "xglm" and model_config.eos_token_id == 50259 else tokenizer.encode(model_config.eos_token_id) + [50259, 50259] if model_config.model_type == "xglm" and model_config.eos_token_id == 50259 else [model_config.eos_token_id] ) params["seq"] = 2048 self.data.params = params From a51e4f06519bfe0ac7a01db0ebda5522d82179d8 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 21:52:40 -0400 Subject: [PATCH 16/26] aria2_hook now handles properly when vars is None --- utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utils.py b/utils.py index f3de998e..8f4ec607 100644 --- a/utils.py +++ b/utils.py @@ -205,6 +205,7 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d token = HfFolder.get_token() if token is None: raise EnvironmentError("You specified use_auth_token=True, but a huggingface token was not found.") + aria2_port = 6799 if vars is None else vars.aria2_port _cache_dir = str(cache_dir) if cache_dir is not None else transformers.TRANSFORMERS_CACHE sharded = False headers = {"user-agent": transformers.file_utils.http_user_agent(user_agent)} @@ -269,9 +270,9 @@ def aria2_hook(pretrained_model_name_or_path: str, force_download=False, cache_d with tempfile.NamedTemporaryFile("w+b", delete=False) as f: f.write(aria2_config) f.flush() - p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--enable-rpc=true", f"--rpc-secret={secret}", "--rpc-listen-port", str(vars.aria2_port), "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming=false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + p = subprocess.Popen(["aria2c", "-x", "10", "-s", "10", "-j", "10", "--enable-rpc=true", f"--rpc-secret={secret}", "--rpc-listen-port", str(aria2_port), "--disable-ipv6", "--file-allocation=trunc", "--allow-overwrite", "--auto-file-renaming=false", "-d", _cache_dir, "-i", f.name, "-U", transformers.file_utils.http_user_agent(user_agent)] + (["-c"] if not force_download else []) + ([f"--header='Authorization: Bearer {token}'"] if use_auth_token else []), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) while p.poll() is None: - r = s.post(f"http://localhost:{vars.aria2_port}/jsonrpc", json={"jsonrpc": "2.0", "id": "kai", "method": "aria2.tellActive", "params": [f"token:{secret}"]}).json()["result"] + r = s.post(f"http://localhost:{aria2_port}/jsonrpc", json={"jsonrpc": "2.0", "id": "kai", "method": "aria2.tellActive", "params": [f"token:{secret}"]}).json()["result"] if not r: s.close() if bar is not None: From 07eb2b5c4f27a83b6b9888b5e2d1c7c556fc46af Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 21:57:46 -0400 Subject: [PATCH 17/26] Disable urllib3 logger in prompt_tuner.py to disable aria2 warnings --- prompt_tuner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/prompt_tuner.py b/prompt_tuner.py index b0886741..4b7f5ee1 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -34,6 +34,9 @@ from mkultra.tuning import GPTPromptTuningMixin, GPTNeoPromptTuningLM from mkultra.soft_prompt import SoftPrompt from typing import Dict, List, Optional, TextIO, Union +import logging +logging.getLogger("urllib3").setLevel(logging.ERROR) + import breakmodel import torch_lazy_loader import utils From 624f916dc639a8be757ec38a888cfad7087fdde9 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 22 Aug 2022 22:57:30 -0400 Subject: [PATCH 18/26] Fix some remaining problems in prompt_tuner.py --- prompt_tuner.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index 4b7f5ee1..46092eac 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -532,7 +532,7 @@ class TrainerBase(abc.ABC): with zipfile.ZipFile(output_file, "w", compression=zipfile.ZIP_LZMA) as z: with z.open("tensor.npy", "w") as f: - np.save(f, tensor, allow_pickle=False) + np.save(f, tensor.detach().cpu().numpy(), allow_pickle=False) with zipfile.ZipFile(output_file, "a", compression=zipfile.ZIP_STORED) as z: with z.open("meta.json", "w") as f: f.write(json.dumps(meta, indent=2).encode("utf-8")) @@ -555,7 +555,7 @@ class TrainerBase(abc.ABC): { "metadata": { "step": _step, - "loss": float(z["loss"].item()), + "loss": float(z["loss"]), "uuid": str(uuid.uuid4()), "name": soft_prompt_name, "description": soft_prompt_description, @@ -563,7 +563,7 @@ class TrainerBase(abc.ABC): }, "tensor": base64.b64encode( pickle.dumps( - tensor, + tensor.detach().cpu(), protocol=4, ), ).decode("ascii"), @@ -695,7 +695,7 @@ class TrainerBase(abc.ABC): if breakmodel_gpulayers is None: breakmodel_gpulayers = [] if breakmodel_primary_device is None: - breakmodel_primary_device = 0 if sum(x if x >= 0 else 0 for x in breakmodel_gpulayers) else "cpu" + breakmodel_primary_device = 0 if sum(x if x >= 0 else 1 for x in breakmodel_gpulayers) else "cpu" if self.data.params is not None and "max_batch_size" not in self.data.params: self.data.params["max_batch_size"] = 2048 @@ -744,13 +744,14 @@ class TrainerBase(abc.ABC): assert len(breakmodel_gpulayers) <= torch.cuda.device_count() assert sum(breakmodel_gpulayers) + breakmodel_disklayers <= n_layers + breakmodel.gpu_blocks = breakmodel_gpulayers breakmodel.disk_blocks = breakmodel_disklayers disk_blocks = breakmodel.disk_blocks gpu_blocks = breakmodel.gpu_blocks ram_blocks = ram_blocks = n_layers - sum(gpu_blocks) cumulative_gpu_blocks = tuple(itertools.accumulate(gpu_blocks)) - device_list(n_layers, primary=breakmodel.primary_device) + device_list(ram_blocks, primary=breakmodel.primary_device) def lazy_load_callback(model_dict: Dict[str, Union[torch_lazy_loader.LazyTensor, torch.Tensor]], f, **_): if lazy_load_callback.nested: @@ -883,11 +884,11 @@ class TrainerBase(abc.ABC): if("out of memory" in traceback.format_exc().lower()): raise RuntimeError("One of your GPUs ran out of memory when KoboldAI tried to load your model.") model = GPTNeoPromptTuningLM.from_pretrained(self.data.ckpt_path, revision=REVISION, cache_dir="cache") - + if(hascuda): if(usegpu): model = model.half().to(gpu_device) - elif(breakmodel): # Use both RAM and VRAM (breakmodel) + elif(use_breakmodel): # Use both RAM and VRAM (breakmodel) move_model_to_devices(model, usegpu, gpu_device) elif(__import__("breakmodel").disk_blocks > 0): move_model_to_devices(model, usegpu, gpu_device) @@ -1068,9 +1069,9 @@ class BasicTrainer(TrainerBase): k for k in range(model.get_input_embeddings().weight.shape[-2]) if k not in special_tokens ] sample = rng.choice(sample_space, self.data.soft_in_dim, False) - return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(sample, dtype=torch.int32))) + return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(sample, dtype=torch.int32, device=model.get_input_embeddings().weight.device))) elif self.data.prompt_method == "tokens": - return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(self.data.initial_softprompt, dtype=torch.int32))) + return SoftPrompt.from_inputs_embeds(model.get_input_embeddings()(torch.tensor(self.data.initial_softprompt, dtype=torch.int32, device=model.get_input_embeddings().weight.device))) self.raise_configuration_error( f"Unknown prompt method {repr(self.data.prompt_method)}", code=104 ) From 73865ba0667aa0d1d53dbf7fef11996838743179 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 19 Oct 2022 11:05:17 -0400 Subject: [PATCH 19/26] Add parameter to Colab for not using google drive (data would be ephemeral) --- colab/GPU.ipynb | 8 +++++++- colab/TPU.ipynb | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb index ba171275..d8971127 100644 --- a/colab/GPU.ipynb +++ b/colab/GPU.ipynb @@ -70,10 +70,16 @@ "Model = \"Nerys 2.7B\" #@param [\"Nerys 2.7B\", \"AID 2.7B\", \"Erebus 2.7B\", \"Janeway 2.7B\", \"Picard 2.7B\", \"Horni LN 2.7B\", \"Horni 2.7B\", \"Shinen 2.7B\", \"OPT 2.7B\", \"Fairseq Dense 2.7B\", \"Neo 2.7B\"] {allow-input: true}\n", "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n", "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n", + "use_google_drive = True #@param {type:\"boolean\"}\n", "\n", "!nvidia-smi\n", "from google.colab import drive\n", - "drive.mount('/content/drive/')\n", + "if use_google_drive:\n", + " drive.mount('/content/drive/')\n", + "else:\n", + " import os\n", + " os.mkdir(\"/content/drive\")\n", + " os.mkdir(\"/content/drive/MyDrive/\")\n", "\n", "if Model == \"Nerys 2.7B\":\n", " Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\"\n", diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb index 6ec346fc..6e640e32 100644 --- a/colab/TPU.ipynb +++ b/colab/TPU.ipynb @@ -69,6 +69,7 @@ "Model = \"Nerys 13B V2\" #@param [\"Nerys 13B V2\", \"Erebus 13B\", \"Janeway 13B\", \"Shinen 13B\", \"Skein 20B\", \"Erebus 20B\", \"Skein 6B\", \"Janeway 6B\", \"Adventure 6B\", \"Shinen 6B\", \"Lit V2 6B\", \"Lit 6B\", \"NeoX 20B\", \"OPT 13B\", \"Fairseq Dense 13B\", \"GPT-J-6B\"] {allow-input: true}\n", "Version = \"Official\" #@param [\"Official\", \"United\"] {allow-input: true}\n", "Provider = \"Localtunnel\" #@param [\"Localtunnel\", \"Cloudflare\"]\n", + "use_google_drive = True #@param {type:\"boolean\"}\n", "\n", "import os\n", "try:\n", @@ -79,7 +80,12 @@ " raise RuntimeError(\"⚠️You can not run this notebook without the TPU accelerator, go to Runtime->Sessions, terminate your session and then try again.⚠️\")\n", "print('Now we will need your Google Drive to store settings and saves, you must login with the same account you used for Colab.')\n", "from google.colab import drive\n", - "drive.mount('/content/drive/')\n", + "if use_google_drive:\n", + " drive.mount('/content/drive/')\n", + "else:\n", + " import os\n", + " os.mkdir(\"/content/drive\")\n", + " os.mkdir(\"/content/drive/MyDrive/\")\n", "\n", "if Model == \"Janeway 13B\":\n", " Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n", From d588dc0096473240a839e2ade2a3da28b81d6062 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 19 Oct 2022 11:19:04 -0400 Subject: [PATCH 20/26] Check if dir exists before creating --- colab/GPU.ipynb | 6 ++++-- colab/TPU.ipynb | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/colab/GPU.ipynb b/colab/GPU.ipynb index d8971127..8c76b5ed 100644 --- a/colab/GPU.ipynb +++ b/colab/GPU.ipynb @@ -78,8 +78,10 @@ " drive.mount('/content/drive/')\n", "else:\n", " import os\n", - " os.mkdir(\"/content/drive\")\n", - " os.mkdir(\"/content/drive/MyDrive/\")\n", + " if not os.path.exists(\"/content/drive\"):\n", + " os.mkdir(\"/content/drive\")\n", + " if not os.path.exists(\"/content/drive/MyDrive/\"):\n", + " os.mkdir(\"/content/drive/MyDrive/\")\n", "\n", "if Model == \"Nerys 2.7B\":\n", " Model = \"KoboldAI/fairseq-dense-2.7B-Nerys\"\n", diff --git a/colab/TPU.ipynb b/colab/TPU.ipynb index 6e640e32..efdfa5c4 100644 --- a/colab/TPU.ipynb +++ b/colab/TPU.ipynb @@ -84,8 +84,10 @@ " drive.mount('/content/drive/')\n", "else:\n", " import os\n", - " os.mkdir(\"/content/drive\")\n", - " os.mkdir(\"/content/drive/MyDrive/\")\n", + " if not os.path.exists(\"/content/drive\"):\n", + " os.mkdir(\"/content/drive\")\n", + " if not os.path.exists(\"/content/drive/MyDrive/\"):\n", + " os.mkdir(\"/content/drive/MyDrive/\")\n", "\n", "if Model == \"Janeway 13B\":\n", " Model = \"KoboldAI/fairseq-dense-13B-Janeway\"\n", From f7b799be567292931f9b1683c9d55124d3462054 Mon Sep 17 00:00:00 2001 From: vfbd Date: Fri, 21 Oct 2022 17:06:17 -0400 Subject: [PATCH 21/26] Apply tokenizer fixes to prompt_tuner.py --- prompt_tuner.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/prompt_tuner.py b/prompt_tuner.py index 46092eac..f37a8718 100644 --- a/prompt_tuner.py +++ b/prompt_tuner.py @@ -27,7 +27,7 @@ import torch.nn.functional as F from torch.nn import Embedding, CrossEntropyLoss import transformers from transformers import __version__ as transformers_version -from transformers import AutoTokenizer, GPT2TokenizerFast, AutoConfig, AutoModelForCausalLM, GPTNeoForCausalLM, PreTrainedModel, modeling_utils +from transformers import AutoTokenizer, GPT2Tokenizer, AutoConfig, AutoModelForCausalLM, GPTNeoForCausalLM, PreTrainedModel, modeling_utils import accelerate import accelerate.utils from mkultra.tuning import GPTPromptTuningMixin, GPTNeoPromptTuningLM @@ -344,41 +344,38 @@ default_quiet = False def get_tokenizer(model_id, revision=None) -> transformers.PreTrainedTokenizerBase: if(os.path.isdir(model_id)): - try: - tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache") - except Exception as e: - pass try: tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache", use_fast=False) except Exception as e: try: - tokenizer = GPT2TokenizerFast.from_pretrained(model_id, revision=revision, cache_dir="cache") + tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache") except Exception as e: - tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") + try: + tokenizer = GPT2Tokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache") + except Exception as e: + tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache") elif(os.path.isdir("models/{}".format(model_id.replace('/', '_')))): - try: - tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache") - except Exception as e: - pass try: tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache", use_fast=False) except Exception as e: try: - tokenizer = GPT2TokenizerFast.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache") + tokenizer = AutoTokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache") except Exception as e: - tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") + try: + tokenizer = GPT2Tokenizer.from_pretrained("models/{}".format(model_id.replace('/', '_')), revision=revision, cache_dir="cache") + except Exception as e: + tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache") else: - try: - tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache") - except Exception as e: - pass try: tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache", use_fast=False) except Exception as e: try: - tokenizer = GPT2TokenizerFast.from_pretrained(model_id, revision=revision, cache_dir="cache") + tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache") except Exception as e: - tokenizer = GPT2TokenizerFast.from_pretrained("gpt2", revision=revision, cache_dir="cache") + try: + tokenizer = GPT2Tokenizer.from_pretrained(model_id, revision=revision, cache_dir="cache") + except Exception as e: + tokenizer = GPT2Tokenizer.from_pretrained("gpt2", revision=revision, cache_dir="cache") @contextlib.contextmanager def _kai_no_prefix(): From 4699ded3ce4bc23992cb3c8f0ec02c25594d2c37 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 22 Oct 2022 19:00:06 +0200 Subject: [PATCH 22/26] Tuner Dependencies --- environments/huggingface.yml | 2 ++ environments/rocm.yml | 2 ++ requirements.txt | 2 ++ 3 files changed, 6 insertions(+) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 7abceefa..9957f1c5 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -20,9 +20,11 @@ dependencies: - marshmallow>=3.13 - apispec-webframeworks - loguru + - termcolor - pip: - flask-cloudflared - flask-ngrok - lupa==1.10 - transformers>=4.20.1 - accelerate + - git+https://github.com/VE-FORBRYDERNE/mkultra diff --git a/environments/rocm.yml b/environments/rocm.yml index e885f4df..c6fea0b3 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -17,6 +17,7 @@ dependencies: - marshmallow>=3.13 - apispec-webframeworks - loguru + - termcolor - pip: - --extra-index-url https://download.pytorch.org/whl/rocm5.1.1 - torch @@ -26,3 +27,4 @@ dependencies: - lupa==1.10 - transformers>=4.20.1 - accelerate + - git+https://github.com/VE-FORBRYDERNE/mkultra diff --git a/requirements.txt b/requirements.txt index b1e2247c..0d2bcd6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,5 @@ flask-session marshmallow>=3.13 apispec-webframeworks loguru +termcolor +git+https://github.com/VE-FORBRYDERNE/mkultra \ No newline at end of file From 0da404d4f8cc4b0323db74a725558dceb002e502 Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 23 Oct 2022 14:10:44 +0200 Subject: [PATCH 23/26] Conda conflict fix --- commandline.bat | 2 ++ install_requirements.bat | 1 + play.bat | 2 ++ 3 files changed, 5 insertions(+) diff --git a/commandline.bat b/commandline.bat index 001c0bae..d25da157 100644 --- a/commandline.bat +++ b/commandline.bat @@ -1,5 +1,7 @@ @echo off cd /D %~dp0 +SET CONDA_SHLVL= + TITLE CMD for KoboldAI Runtime SET /P M=nul cd /D %~dp0 +SET CONDA_SHLVL= if exist miniconda3\ ( echo Delete existing installation? diff --git a/play.bat b/play.bat index 4e54fbba..a44f0afa 100644 --- a/play.bat +++ b/play.bat @@ -1,5 +1,7 @@ @echo off cd /D %~dp0 +SET CONDA_SHLVL= + TITLE KoboldAI - Server SET /P M= Date: Sun, 23 Oct 2022 16:00:18 +0200 Subject: [PATCH 24/26] Conda fix for update script --- update-koboldai.bat | 2 ++ 1 file changed, 2 insertions(+) diff --git a/update-koboldai.bat b/update-koboldai.bat index 5ce40985..5e7339a3 100644 --- a/update-koboldai.bat +++ b/update-koboldai.bat @@ -1,5 +1,7 @@ @echo off cd /d %~dp0 +SET CONDA_SHLVL= + TITLE KoboldAI - Updater SET /P M= Date: Sun, 23 Oct 2022 18:28:50 +0200 Subject: [PATCH 25/26] Force compatible HF Hub --- environments/huggingface.yml | 1 + environments/rocm.yml | 1 + requirements.txt | 1 + requirements_mtj.txt | 1 + 4 files changed, 4 insertions(+) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 7abceefa..6654b470 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -25,4 +25,5 @@ dependencies: - flask-ngrok - lupa==1.10 - transformers>=4.20.1 + - huggingface_hub>=0.10.1 - accelerate diff --git a/environments/rocm.yml b/environments/rocm.yml index e885f4df..87fac8bb 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -25,4 +25,5 @@ dependencies: - flask-ngrok - lupa==1.10 - transformers>=4.20.1 + - huggingface_hub>=0.10.1 - accelerate diff --git a/requirements.txt b/requirements.txt index b1e2247c..9412771d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ transformers>=4.20.1 +huggingface_hub>=0.10.1 Flask Flask-SocketIO requests diff --git a/requirements_mtj.txt b/requirements_mtj.txt index 743c9c1d..90011fda 100644 --- a/requirements_mtj.txt +++ b/requirements_mtj.txt @@ -6,6 +6,7 @@ dm-haiku == 0.0.5 jax == 0.2.21 jaxlib >= 0.1.69, <= 0.3.7 transformers >= 4.20.1 +huggingface_hub >= 0.10.1 progressbar2 git+https://github.com/VE-FORBRYDERNE/mesh-transformer-jax@ck flask From 442a9760b8416ddfa49c5b928e6831eb00042e91 Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 23 Oct 2022 19:03:18 +0200 Subject: [PATCH 26/26] Hide V2 Saves --- fileops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fileops.py b/fileops.py index a416f24d..32fdd8b8 100644 --- a/fileops.py +++ b/fileops.py @@ -86,7 +86,7 @@ def uspath(filename): def getstoryfiles(): list = [] for file in listdir("stories"): - if file.endswith(".json"): + if file.endswith(".json") and not file.endswith(".v2.json"): ob = {} ob["name"] = file.replace(".json", "") f = open("stories/"+file, "r")