From 93890b9034a5c43ad0f05f3e987691e1aa5b59cc Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 6 Dec 2022 10:06:24 -0500 Subject: [PATCH] Alt generate mode is enabled. Each generation will be sequential allowing for lower VRAM usage at potentially slower generation time --- aiserver.py | 5 ++--- koboldai_settings.py | 22 ++++++++++++++-------- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/aiserver.py b/aiserver.py index 18a1aa20..85aeed31 100644 --- a/aiserver.py +++ b/aiserver.py @@ -2358,7 +2358,7 @@ def patch_transformers(): self.halt = not koboldai_vars.lua_koboldbridge.generating koboldai_vars.lua_koboldbridge.regeneration_required = False - for i in range(koboldai_vars.numseqs): + for i in range(koboldai_vars.numseqs) if not koboldai_vars.alt_multi_gen else range(1): koboldai_vars.lua_koboldbridge.generated[i+1][koboldai_vars.generated_tkns] = int(input_ids[i, -1].item()) return self.regeneration_required or self.halt @@ -5281,7 +5281,7 @@ def core_generate(text: list, _min: int, _max: int, found_entries: set, is_core: already_generated += len(genout[0]) try: - assert already_generated <= koboldai_vars.genamt + assert already_generated <= koboldai_vars.genamt * koboldai_vars.numseqs if koboldai_vars.alt_multi_gen else 1 except AssertionError: print("AlreadyGenerated", already_generated) print("genamt", koboldai_vars.genamt) @@ -6059,7 +6059,6 @@ def generate(txt, minimum, maximum, found_entries=None): try: start_time = time.time() genout, already_generated = tpool.execute(core_generate, txt, minimum, maximum, found_entries) - print(genout) logger.debug("Generate: core_generate time {}s".format(time.time()-start_time)) except Exception as e: if(issubclass(type(e), lupa.LuaError)): diff --git a/koboldai_settings.py b/koboldai_settings.py index 628cfcdc..121559c1 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -636,7 +636,8 @@ class model_settings(settings): local_only_variables = ['badwordsids', 'apikey', 'tqdm', 'socketio', 'default_preset', 'koboldai_vars'] no_save_variables = ['tqdm', 'tqdm_progress', 'tqdm_rem_time', 'socketio', 'modelconfig', 'custmodpth', 'generated_tkns', 'loaded_layers', 'total_layers', 'total_download_chunks', 'downloaded_chunks', 'presets', 'default_preset', - 'koboldai_vars', 'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition'] + 'koboldai_vars', 'welcome', 'welcome_default', 'simple_randomness', 'simple_creativity', 'simple_repitition', + 'badwordsids', 'uid_presets'] settings_name = "model" default_settings = {"rep_pen" : 1.1, "rep_pen_slope": 0.7, "rep_pen_range": 1024, "temp": 0.5, "top_p": 0.9, "top_k": 0.0, "top_a": 0.0, "tfs": 1.0, "typical": 1.0, "sampler_order": [6,0,1,2,3,4,5]} @@ -764,13 +765,13 @@ class model_settings(settings): #Setup TQDP for token generation elif name == "generated_tkns" and 'tqdm' in self.__dict__: if value == 0: - self.tqdm.reset(total=self.genamt) + self.tqdm.reset(total=self.genamt * self.numseqs if self.alt_multi_gen else 1 ) self.tqdm_progress = 0 else: self.tqdm.update(value-self.tqdm.n) - self.tqdm_progress = int(float(self.generated_tkns)/float(self.genamt)*100) + self.tqdm_progress = int(float(self.generated_tkns)/float(self.genamt * self.numseqs if self.alt_multi_gen else 1)*100) if self.tqdm.format_dict['rate'] is not None: - self.tqdm_rem_time = str(datetime.timedelta(seconds=int(float(self.genamt-self.generated_tkns)/self.tqdm.format_dict['rate']))) + self.tqdm_rem_time = str(datetime.timedelta(seconds=int(float((self.genamt * self.numseqs if self.alt_multi_gen else 1)-self.generated_tkns)/self.tqdm.format_dict['rate']))) #Setup TQDP for model loading elif name == "loaded_layers" and 'tqdm' in self.__dict__: if value == 0: @@ -1713,21 +1714,26 @@ class KoboldStoryRegister(object): self.story_settings.gamesaved = False def stream_tokens(self, text_list): - if len(text_list) > 1: + if len(text_list) > 1 or (self.koboldai_vars.alt_multi_gen and self.koboldai_vars.numseqs > 1): + if self.koboldai_vars.alt_multi_gen: + #since alt_multi_gen is really just several single gens the text list is always 1 deep, so we need some + #other way to figure out wich spot in our options list we're on. We'll figure it out by seeing how many + #tokens we generated vs how many each option should take + stream_offset = int((self.koboldai_vars.generated_tkns-1) / self.koboldai_vars.genamt) if self.action_count+1 in self.actions: for i in range(len(text_list)): found = False for j in range(len(self.actions[self.action_count+1]['Options'])): if 'stream_id' in self.actions[self.action_count+1]['Options'][j]: - if self.actions[self.action_count+1]['Options'][j]['stream_id'] == i: + if self.actions[self.action_count+1]['Options'][j]['stream_id'] == i+stream_offset: found = True self.actions[self.action_count+1]['Options'][j]['text'] = "{}{}".format(self.actions[self.action_count+1]['Options'][j]['text'], text_list[i]) if not found: - self.actions[self.action_count+1]['Options'].append({"text": text_list[i], "Pinned": False, "Previous Selection": False, "Edited": False, "Probabilities": [], "stream_id": i}) + self.actions[self.action_count+1]['Options'].append({"text": text_list[i], "Pinned": False, "Previous Selection": False, "Edited": False, "Probabilities": [], "stream_id": i+stream_offset}) else: self.actions[self.action_count+1] = {"Selected Text": "", "Selected Text Length": 0, "Options": [], "Time": int(time.time())} for i in range(len(text_list)): - self.actions[self.action_count+1]['Options'].append({"text": text_list[i], "Pinned": False, "Previous Selection": False, "Edited": False, "Probabilities": [], "stream_id": i}) + self.actions[self.action_count+1]['Options'].append({"text": text_list[i], "Pinned": False, "Previous Selection": False, "Edited": False, "Probabilities": [], "stream_id": i+stream_offset}) #We need to see if this is the last token being streamed. If so due to the rely it will come in AFTER the actual trimmed final text overwriting it in the UI if self.koboldai_vars.tokenizer is not None: