Connecting...
+ + +diff --git a/aiserver.py b/aiserver.py index 3c54beda..8d481b75 100644 --- a/aiserver.py +++ b/aiserver.py @@ -3509,7 +3509,7 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, botname = (koboldai_vars.botname + ":") else: botname = "" - data = re.sub(r'\n+', ' ', data) + data = re.sub(r'\n+\Z', '', data) if(len(data)): data = f"\n{koboldai_vars.chatname}: {data}\n{botname}" @@ -6308,7 +6308,7 @@ def UI_2_download_story(): @logger.catch def UI_2_Set_Selected_Text(data): if not koboldai_vars.quiet: - print("Updating Selected Text: {}".format(data)) + logger.info("Updating Selected Text: {}".format(data)) action_id = int(data["id"]) if not koboldai_vars.actions.actions[action_id].get("Original Text"): diff --git a/koboldai_settings.py b/koboldai_settings.py index b2cc720e..dfccd4ef 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -397,8 +397,8 @@ class koboldai_vars(object): ######################################### Setup Author's Note Data ######################################################## authors_note_text = self.authornotetemplate.replace("<|>", self.authornote) - if len(authors_note_text) > 0 and authors_note_text[-1] not in [" ", "\n"]: - authors_note_text += " " + if len(authors_note_text) > 0 and authors_note_text[0] not in [" ", "\n"]: + authors_note_text = " " + authors_note_text authors_note_data = [[x, self.tokenizer.decode(x)] for x in self.tokenizer.encode(authors_note_text)] if used_tokens + len(authors_note_data) <= token_budget: used_tokens += len(authors_note_data) @@ -1384,7 +1384,11 @@ class KoboldStoryRegister(object): self.action_count = -1 # The id of the last submission action, or 0 if the last append was not a submission self.submission_id = 0 - self.sentence_re = re.compile(r"[^.!?]*[.!?]+\"?\s*", re.S) + # A regular expression used to break the story into sentences so that the author's + # note can be inserted with minimal disruption. Avoid ending a sentance with + # whitespace because most tokenizers deal better with whitespace at the beginning of text. + # Search for sentence end delimeters (i.e. .!?) and also capture closing parenthesis and quotes. + self.sentence_re = re.compile(r".*?[.!?]+(?=[.!?\]\)}>'\"›»\s])[.!?\]\)}>'\"›»]*", re.S) self.story_settings = story_settings self.tts_model = None self.tortoise = None diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 013590ef..cd609fed 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -20,8 +20,89 @@ class HFInferenceModel(InferenceModel): def _post_load(self) -> None: # These are model specific tokenizer overrides if a model has bad defaults if utils.koboldai_vars.model_type == "llama": - self.tokenizer.decode_with_prefix_space = True + # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer self.tokenizer.add_bos_token = False + + # HF transformers no longer supports decode_with_prefix_space + # We work around this by wrapping decode, encode, and __call__ + # with versions that work around the 'prefix space' misfeature + # of sentencepiece. + vocab = self.tokenizer.convert_ids_to_tokens(range(self.tokenizer.vocab_size)) + has_prefix_space = {i for i, tok in enumerate(vocab) if tok.startswith("▁")} + + # Wrap 'decode' with a method that always returns text starting with a space + # when the head token starts with a space. This is what 'decode_with_prefix_space' + # used to do, and we implement it using the same technique (building a cache of + # tokens that should have a prefix space, and then prepending a space if the first + # token is in this set.) We also work around a bizarre behavior in which decoding + # a single token 13 behaves differently than decoding a squence containing only [13]. + original_decode = type(self.tokenizer.tokenizer).decode + def decode_wrapper(self, token_ids, *args, **kwargs): + first = None + # Note, the code below that wraps single-value token_ids in a list + # is to work around this wonky behavior: + # >>> t.decode(13) + # '<0x0A>' + # >>> t.decode([13]) + # '\n' + # Not doing this causes token streaming to receive <0x0A> characters + # instead of newlines. + if isinstance(token_ids, int): + first = token_ids + token_ids = [first] + elif hasattr(token_ids, 'dim'): # Check for e.g. torch.Tensor + # Tensors don't support the Python standard of 'empty is False' + # and the special case of dimension 0 tensors also needs to be + # handled separately. + if token_ids.dim() == 0: + first = int(token_ids.item()) + token_ids = [first] + elif len(token_ids) > 0: + first = int(token_ids[0]) + elif token_ids: + first = token_ids[0] + result = original_decode(self, token_ids, *args, **kwargs) + if first is not None and first in has_prefix_space: + result = " " + result + return result + # GenericTokenizer overrides __setattr__ so we need to use object.__setattr__ to bypass it + object.__setattr__(self.tokenizer, 'decode', decode_wrapper.__get__(self.tokenizer)) + + # Wrap encode and __call__ to work around the 'prefix space' misfeature also. + # The problem is that "Bob" at the start of text is encoded as if it is + # " Bob". This creates a problem because it means you can't split text, encode + # the pieces, concatenate the tokens, decode them, and get the original text back. + # The workaround is to prepend a known token that (1) starts with a space; and + # (2) is not the prefix of any other token. After searching through the vocab + # " ," (space comma) is the only token containing only printable ascii characters + # that fits this bill. By prepending ',' to the text, the original encode + # method always returns [1919, ...], where the tail of the sequence is the + # actual encoded result we want without the prefix space behavior. + original_encode = type(self.tokenizer.tokenizer).encode + def encode_wrapper(self, text, *args, **kwargs): + if type(text) is str: + text = ',' + text + result = original_encode(self, text, *args, **kwargs) + result = result[1:] + else: + result = original_encode(self, text, *args, **kwargs) + return result + object.__setattr__(self.tokenizer, 'encode', encode_wrapper.__get__(self.tokenizer)) + + # Since 'encode' is documented as being deprecated, also override __call__. + # This doesn't appear to currently be used by KoboldAI, but doing so + # in case someone uses it in the future. + original_call = type(self.tokenizer.tokenizer).__call__ + def call_wrapper(self, text, *args, **kwargs): + if type(text) is str: + text = ',' + text + result = original_call(self, text, *args, **kwargs) + result = result[1:] + else: + result = original_call(self, text, *args, **kwargs) + return result + object.__setattr__(self.tokenizer, '__call__', call_wrapper.__get__(self.tokenizer)) + elif utils.koboldai_vars.model_type == "opt": self.tokenizer._koboldai_header = self.tokenizer.encode("") self.tokenizer.add_bos_token = False diff --git a/static/klite.html b/static/klite.html index 211b623b..0893ebbe 100644 --- a/static/klite.html +++ b/static/klite.html @@ -1 +1,7667 @@ -Coming Soon? \ No newline at end of file + + + + + + + +
+Connecting...
+ + +