From cbacfbdfac372d42fd5138783e1add3b84586e89 Mon Sep 17 00:00:00 2001 From: vfbd Date: Sat, 27 Aug 2022 17:42:49 -0400 Subject: [PATCH 001/118] Fix error that occurs when using dynamic TPU backend --- tpu_mtj_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tpu_mtj_backend.py b/tpu_mtj_backend.py index effb3de0..29ac4b42 100644 --- a/tpu_mtj_backend.py +++ b/tpu_mtj_backend.py @@ -533,7 +533,7 @@ def sample_func(data, key, numseqs_aux, badwords, repetition_penalty, generated_ gen_length, rpslope, rprange, - ) + ), **sampler_options, ) # Remember what token was picked From 171effc29b72f82e446797f1f134ce1b6803873d Mon Sep 17 00:00:00 2001 From: ebolam Date: Sat, 27 Aug 2022 18:25:56 -0400 Subject: [PATCH 002/118] Bug fix for saves putting actions metadata as a dict instead of a list when not used yet --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index ef785313..edb75a65 100644 --- a/aiserver.py +++ b/aiserver.py @@ -270,7 +270,7 @@ class vars: setauthornotetemplate = authornotetemplate # Saved author's note template in settings andepth = 3 # How far back in history to append author's note actions = structures.KoboldStoryRegister() # Actions submitted by user and AI - actions_metadata = {} # List of dictonaries, one dictonary for every action that contains information about the action like alternative options. + actions_metadata = [] # List of dictonaries, one dictonary for every action that contains information about the action like alternative options. # Contains at least the same number of items as actions. Back action will remove an item from actions, but not actions_metadata # Dictonary keys are: # Selected Text: (text the user had selected. None when this is a newly generated action) From b5a6b44582f7cdcb059d4f5ed7ac8cb88a3ab406 Mon Sep 17 00:00:00 2001 From: ebolam Date: Sat, 27 Aug 2022 18:47:57 -0400 Subject: [PATCH 003/118] Revert "Bug fix for saves putting actions metadata as a dict instead of a list when not used yet" This reverts commit 171effc29b72f82e446797f1f134ce1b6803873d. --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index edb75a65..ef785313 100644 --- a/aiserver.py +++ b/aiserver.py @@ -270,7 +270,7 @@ class vars: setauthornotetemplate = authornotetemplate # Saved author's note template in settings andepth = 3 # How far back in history to append author's note actions = structures.KoboldStoryRegister() # Actions submitted by user and AI - actions_metadata = [] # List of dictonaries, one dictonary for every action that contains information about the action like alternative options. + actions_metadata = {} # List of dictonaries, one dictonary for every action that contains information about the action like alternative options. # Contains at least the same number of items as actions. Back action will remove an item from actions, but not actions_metadata # Dictonary keys are: # Selected Text: (text the user had selected. None when this is a newly generated action) From 807ddf6f2633cefcca149590f7f8a839bda59bfb Mon Sep 17 00:00:00 2001 From: vfbd Date: Sun, 28 Aug 2022 15:53:15 -0400 Subject: [PATCH 004/118] Add PUT /model endpoint --- aiserver.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/aiserver.py b/aiserver.py index 0eaddfca..14ee1801 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1952,18 +1952,20 @@ def reset_model_settings(): vars.newlinemode = "n" vars.revision = None -def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model=""): +def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=False, online_model="", use_breakmodel_args=False, breakmodel_args_default_to_cpu=False): global model global generator global torch global model_config global GPT2TokenizerFast global tokenizer + if(initial_load): + use_breakmodel_args = True reset_model_settings() if not utils.HAS_ACCELERATE: disk_layers = None vars.noai = False - if not initial_load: + if not use_breakmodel_args: set_aibusy(True) if vars.model != 'ReadOnly': emit('from_server', {'cmd': 'model_load_status', 'data': "Loading {}".format(vars.model)}, broadcast=True) @@ -1971,12 +1973,16 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal time.sleep(0.1) if gpu_layers is not None: args.breakmodel_gpulayers = gpu_layers - elif initial_load: + elif use_breakmodel_args: gpu_layers = args.breakmodel_gpulayers + if breakmodel_args_default_to_cpu and gpu_layers is None: + gpu_layers = args.breakmodel_gpulayers = [] if disk_layers is not None: args.breakmodel_disklayers = int(disk_layers) - elif initial_load: + elif use_breakmodel_args: disk_layers = args.breakmodel_disklayers + if breakmodel_args_default_to_cpu and disk_layers is None: + disk_layers = args.breakmodel_disklayers = 0 #We need to wipe out the existing model and refresh the cuda cache model = None @@ -2070,6 +2076,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): loadmodelsettings() loadsettings() + print(2) print("{0}Looking for GPU support...{1}".format(colors.PURPLE, colors.END), end="") vars.hascuda = torch.cuda.is_available() vars.bmsupported = (utils.HAS_ACCELERATE or vars.model_type in ("gpt_neo", "gptj", "xglm", "opt")) and not vars.nobreakmodel @@ -2319,7 +2326,6 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal # If we're using torch_lazy_loader, we need to get breakmodel config # early so that it knows where to load the individual model tensors if (utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel) and not vars.nobreakmodel: - print(1) device_config(model_config) # Download model from Huggingface if it does not exist, otherwise load locally @@ -7256,6 +7262,9 @@ class WorldInfoFoldersUIDsSchema(KoboldSchema): class WorldInfoUIDsSchema(WorldInfoEntriesUIDsSchema): folders: List[WorldInfoFolderSchema] = fields.List(fields.Nested(WorldInfoFolderUIDsSchema), required=True) +class ModelSelectionSchema(KoboldSchema): + model: str = fields.String(required=True, validate=validate.Regexp(r"^(?!\s*NeoCustom)(?!\s*GPT2Custom)(?!\s*TPUMeshTransformerGPTJ)(?!\s*TPUMeshTransformerGPTNeoX)(?!\s*GooseAI)(?!\s*OAI)(?!\s*InferKit)(?!\s*Colab)(?!\s*API).*$"), metadata={"description": 'Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model'}) + def _generate_text(body: GenerationInputSchema): if vars.aibusy or vars.genseqs: abort(Response(json.dumps({"detail": { @@ -7467,6 +7476,44 @@ def get_model(): return {"result": vars.model} +@api_v1.put("/model") +@api_schema_wrap +def put_model(body: ModelSelectionSchema): + """--- + put: + summary: Load a model + description: |-2 + Loads a model given its Hugging Face model ID, the path to a model folder (relative to the "models" folder in the KoboldAI root folder) or "ReadOnly" for no model. + tags: + - model + requestBody: + required: true + content: + application/json: + schema: ModelSelectionSchema + example: + model: ReadOnly + responses: + 200: + description: Successful request + content: + application/json: + schema: EmptySchema + {api_validation_error_response} + {api_server_busy_response} + """ + set_aibusy(1) + old_model = vars.model + vars.model = body.model.strip() + try: + load_model(use_breakmodel_args=True, breakmodel_args_default_to_cpu=True) + except Exception as e: + vars.model = old_model + raise e + set_aibusy(0) + return {} + + def prompt_validator(prompt: str): if len(prompt.strip()) == 0: raise ValidationError("String does not match expected pattern.") From 8292f17ab09e067d2f4562e67a461e3e90d089f1 Mon Sep 17 00:00:00 2001 From: vfbd Date: Mon, 29 Aug 2022 13:23:19 -0400 Subject: [PATCH 005/118] Don't allow changing model during generation --- aiserver.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/aiserver.py b/aiserver.py index 14ee1801..2ce2afe5 100644 --- a/aiserver.py +++ b/aiserver.py @@ -7502,6 +7502,11 @@ def put_model(body: ModelSelectionSchema): {api_validation_error_response} {api_server_busy_response} """ + if vars.aibusy or vars.genseqs: + abort(Response(json.dumps({"detail": { + "msg": "Server is busy; please try again later.", + "type": "service_unavailable", + }}), mimetype="application/json", status=503)) set_aibusy(1) old_model = vars.model vars.model = body.model.strip() From 181c93424c92a1073f21486b86d792cd5732ce62 Mon Sep 17 00:00:00 2001 From: ebolam Date: Tue, 30 Aug 2022 15:10:11 -0400 Subject: [PATCH 006/118] Fix for KoboldAI API as a model option --- aiserver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index 2ce2afe5..bf4cfac7 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1426,7 +1426,9 @@ def get_model_info(model, directory=""): gpu_names = [] for i in range(gpu_count): gpu_names.append(torch.cuda.get_device_name(i)) - if model in [x[1] for x in model_menu['apilist']]: + if model in ['Colab', 'API']: + url = True + elif model in [x[1] for x in model_menu['apilist']]: if path.exists("settings/{}.settings".format(model)): with open("settings/{}.settings".format(model), "r") as file: # Check if API key exists @@ -1439,8 +1441,6 @@ def get_model_info(model, directory=""): key = True elif model == 'ReadOnly': pass - elif model == 'Colab': - url = True elif not utils.HAS_ACCELERATE and not torch.cuda.is_available(): pass elif args.cpu: From 42e04afc83fc744c1eb40b45eff19b57c0edf035 Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Tue, 30 Aug 2022 21:11:54 +0200 Subject: [PATCH 007/118] init --- aiserver.py | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/aiserver.py b/aiserver.py index bf4cfac7..81b0aaec 100644 --- a/aiserver.py +++ b/aiserver.py @@ -217,6 +217,7 @@ model_menu = { ["InferKit API (requires API key)", "InferKit", "", False], # ["KoboldAI Server API (Old Google Colab)", "Colab", "", False], ["KoboldAI API", "API", "", False], + ["KoboldAI Cluster", "CLUSTER", "", False], ["Return to Main Menu", "mainmenu", "", True], ] } @@ -1479,7 +1480,7 @@ def get_model_info(model, directory=""): def get_layer_count(model, directory=""): - if(model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]): + if(model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ"]): if(model == "GPT2Custom"): with open(os.path.join(directory, "config.json"), "r") as f: model_config = json.load(f) @@ -2034,7 +2035,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal # If transformers model was selected & GPU available, ask to use CPU or GPU - if(vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): + if(vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): vars.allowsp = True # Test for GPU support @@ -2073,7 +2074,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal print("WARNING: No model type detected, assuming Neo (If this is a GPT2 model use the other menu option or --model GPT2Custom)") vars.model_type = "gpt_neo" - if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): + if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): loadmodelsettings() loadsettings() print(2) @@ -2127,7 +2128,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal vars.noai = True # Start transformers and create pipeline - if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): + if(not vars.use_colab_tpu and vars.model not in ["InferKit", "Colab", "API", "CLUSTER", "OAI", "GooseAI" , "ReadOnly", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): if(not vars.noai): print("{0}Initializing transformers, please wait...{1}".format(colors.PURPLE, colors.END)) for m in ("GPTJModel", "XGLMModel"): @@ -2582,7 +2583,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal } # If we're running Colab or OAI, we still need a tokenizer. - if(vars.model in ("Colab", "API")): + if(vars.model in ("Colab", "API", "CLUSTER")): from transformers import GPT2TokenizerFast tokenizer = GPT2TokenizerFast.from_pretrained("EleutherAI/gpt-neo-2.7B", revision=vars.revision, cache_dir="cache") loadsettings() @@ -3228,7 +3229,7 @@ def lua_set_chunk(k, v): def lua_get_modeltype(): if(vars.noai): return "readonly" - if(vars.model in ("Colab", "API", "OAI", "InferKit")): + if(vars.model in ("Colab", "API", "CLUSTER", "OAI", "InferKit")): return "api" if(not vars.use_colab_tpu and vars.model not in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX") and (vars.model in ("GPT2Custom", "NeoCustom") or vars.model_type in ("gpt2", "gpt_neo", "gptj"))): hidden_size = get_hidden_size_from_model(model) @@ -3257,7 +3258,7 @@ def lua_get_modeltype(): def lua_get_modelbackend(): if(vars.noai): return "readonly" - if(vars.model in ("Colab", "API", "OAI", "InferKit")): + if(vars.model in ("Colab", "API", "CLUSTER", "OAI", "InferKit")): return "api" if(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")): return "mtj" @@ -4228,6 +4229,8 @@ def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=Fals raise NotImplementedError("API generation is not supported in old Colab API mode.") elif(vars.model == "API"): raise NotImplementedError("API generation is not supported in API mode.") + elif(vars.model == "CLUSTER"): + raise NotImplementedError("API generation is not supported in API mode.") elif(vars.model == "OAI"): raise NotImplementedError("API generation is not supported in OpenAI/GooseAI mode.") elif(vars.model == "ReadOnly"): @@ -4278,7 +4281,7 @@ def apiactionsubmit(data, use_memory=False, use_world_info=False, use_story=Fals minimum = len(tokens) + 1 maximum = len(tokens) + vars.genamt - if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): + if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): genout = apiactionsubmit_generate(tokens, minimum, maximum) elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")): genout = apiactionsubmit_tpumtjgenerate(tokens, minimum, maximum) @@ -4446,7 +4449,7 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None, if(actionlen == 0): # First/Prompt action - tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction ln = len(tokens) + lnsp return tokens, ln+1, ln+vars.genamt @@ -4494,12 +4497,12 @@ def calcsubmitbudget(actionlen, winfo, mem, anotetxt, actions, submission=None, # Did we get to add the A.N.? If not, do it here if(anotetxt != ""): if((not anoteadded) or forceanote): - tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens + tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + anotetkns + prompttkns + tokens else: - tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + prompttkns + tokens + tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + prompttkns + tokens else: # Prepend Memory, WI, and Prompt before action tokens - tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "OAI") else []) + memtokens + witokens + prompttkns + tokens + tokens = (tokenizer._koboldai_header if vars.model not in ("Colab", "API", "CLUSTER", "OAI") else []) + memtokens + witokens + prompttkns + tokens # Send completed bundle to generator assert len(tokens) <= vars.max_length - lnsp - vars.genamt - budget_deduction @@ -4521,23 +4524,27 @@ def calcsubmit(txt): if(vars.model != "InferKit"): subtxt, min, max = calcsubmitbudget(actionlen, winfo, mem, anotetxt, vars.actions, submission=txt) if(actionlen == 0): - if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): + if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): generate(subtxt, min, max, found_entries=found_entries) elif(vars.model == "Colab"): sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max) elif(vars.model == "API"): sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max) + elif(vars.model == "CLUSTER"): + sendtocluster(utils.decodenewlines(tokenizer.decode(subtxt)), min, max) elif(vars.model == "OAI"): oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max) elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")): tpumtjgenerate(subtxt, min, max, found_entries=found_entries) else: - if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): + if(not vars.use_colab_tpu and vars.model not in ["Colab", "API", "CLUSTER", "OAI", "TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX"]): generate(subtxt, min, max, found_entries=found_entries) elif(vars.model == "Colab"): sendtocolab(utils.decodenewlines(tokenizer.decode(subtxt)), min, max) elif(vars.model == "API"): sendtoapi(utils.decodenewlines(tokenizer.decode(subtxt)), min, max) + elif(vars.model == "CLUSTER"): + sendtocluster(utils.decodenewlines(tokenizer.decode(subtxt)), min, max) elif(vars.model == "OAI"): oairequest(utils.decodenewlines(tokenizer.decode(subtxt)), min, max) elif(vars.use_colab_tpu or vars.model in ("TPUMeshTransformerGPTJ", "TPUMeshTransformerGPTNeoX")): From 496ef1472d6638d488a3928ac5e6b360e1fad5f6 Mon Sep 17 00:00:00 2001 From: Divided by Zer0 Date: Tue, 30 Aug 2022 21:35:17 +0200 Subject: [PATCH 008/118] updated --- aiserver.py | 102 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 98 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index 81b0aaec..a09a0714 100644 --- a/aiserver.py +++ b/aiserver.py @@ -319,6 +319,7 @@ class vars: colaburl = "" # Ngrok url for Google Colab mode apikey = "" # API key to use for InferKit API calls oaiapikey = "" # API key to use for OpenAI API calls + cluster_requested_models = [] # The models which we allow to generate during cluster mode savedir = getcwd()+"\\stories" hascuda = False # Whether torch has detected CUDA on the system usegpu = False # Whether to launch pipeline with GPU support @@ -1288,6 +1289,8 @@ def general_startup(override_args=None): parser.add_argument("--aria2_port", type=int, help="Specify the port on which aria2's RPC interface will be open if aria2 is installed (defaults to 6799)") parser.add_argument("--model", help="Specify the Model Type to skip the Menu") parser.add_argument("--path", help="Specify the Path for local models (For model NeoCustom or GPT2Custom)") + parser.add_argument("--apikey", help="Specify the API key to use for online services") + parser.add_argument("--req_model", type=str, action='append', required=False, help="Which models which we allow to generate for us during cluster mode. Can be specified multiple times.") parser.add_argument("--revision", help="Specify the model revision for huggingface models (can be a git branch/tag name or a git commit hash)") parser.add_argument("--cpu", action='store_true', help="By default unattended launches are on the GPU use this option to force CPU usage.") parser.add_argument("--breakmodel", action='store_true', help=argparse.SUPPRESS) @@ -1336,6 +1339,11 @@ def general_startup(override_args=None): vars.model = args.model; vars.revision = args.revision + if args.apikey: + vars.apikey = args.apikey + if args.req_model: + vars.cluster_requested_models = args.req_model + if args.colab: args.remote = True; args.override_rename = True; @@ -3979,11 +3987,19 @@ def actionsubmit(data, actionmode=0, force_submit=False, force_prompt_gen=False, while(True): set_aibusy(1) - if(vars.model == "API"): + if(vars.model in ["API","CLUSTER"]): global tokenizer - tokenizer_id = requests.get( - vars.colaburl[:-8] + "/api/v1/model", - ).json()["result"] + if vars.model == "API": + tokenizer_id = requests.get( + vars.colaburl[:-8] + "/api/v1/model", + ).json()["result"] + elif len(vars.cluster_requested_models) >= 1: + # If the player has requested one or more models, we use the first one for the tokenizer + tokenizer_id = vars.cluster_requested_models[0] + # The cluster can return any number of possible models for each gen, but this happens after this step + # So at this point, this is unknown + else: + tokenizer_id = "" if tokenizer_id != vars.api_tokenizer_id: try: if(os.path.isdir(tokenizer_id)): @@ -5024,6 +5040,84 @@ def sendtoapi(txt, min, max): set_aibusy(0) return +#==================================================================# +# Send transformers-style request to KoboldAI Cluster +#==================================================================# +def sendtocluster(txt, min, max): + # Log request to console + if not vars.quiet: + print("{0}Tokens:{1}, Txt:{2}{3}".format(colors.YELLOW, min-1, txt, colors.END)) + + # Store context in memory to use it for comparison with generated content + vars.lastctx = txt + + # Build request JSON data + reqdata = { + 'max_length': max - min + 1, + 'max_context_length': vars.max_length, + 'rep_pen': vars.rep_pen, + 'rep_pen_slope': vars.rep_pen_slope, + 'rep_pen_range': vars.rep_pen_range, + 'temperature': vars.temp, + 'top_p': vars.top_p, + 'top_k': vars.top_k, + 'top_a': vars.top_a, + 'tfs': vars.tfs, + 'typical': vars.typical, + 'n': vars.numseqs, + } + cluster_metadata = { + 'prompt': txt, + 'params': reqdata, + 'username': vars.apikey, + 'models': vars.cluster_requested_models, + } + + # Create request + req = requests.post( + vars.colaburl[:-8] + "/generate/sync", + json=cluster_metadata, + ) + js = req.json() + if(req.status_code == 503): + errmsg = "KoboldAI API Error: No available KoboldAI servers found in cluster to fulfil this request using the selected models and requested lengths." + print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) + emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) + set_aibusy(0) + return + if(req.status_code != 200): + errmsg = "KoboldAI API Error: Failed to get a reply from the server. Please check the console." + print("{0}{1}{2}".format(colors.RED, json.dumps(js, indent=2), colors.END)) + emit('from_server', {'cmd': 'errmsg', 'data': errmsg}, broadcast=True) + set_aibusy(0) + return + genout = js + + for i in range(vars.numseqs): + vars.lua_koboldbridge.outputs[i+1] = genout[i] + + execute_outmod() + if(vars.lua_koboldbridge.regeneration_required): + vars.lua_koboldbridge.regeneration_required = False + genout = [] + for i in range(vars.numseqs): + genout.append(vars.lua_koboldbridge.outputs[i+1]) + assert type(genout[-1]) is str + + if(len(genout) == 1): + genresult(genout[0]) + else: + # Convert torch output format to transformers + seqs = [] + for seq in genout: + seqs.append({"generated_text": seq}) + if(vars.lua_koboldbridge.restart_sequence is not None and vars.lua_koboldbridge.restart_sequence > 0): + genresult(genout[vars.lua_koboldbridge.restart_sequence-1]["generated_text"]) + else: + genselect(genout) + + set_aibusy(0) + return #==================================================================# # Send text to TPU mesh transformer backend From f7b720b127444f7124988757e6dfa9f8966e9c9f Mon Sep 17 00:00:00 2001 From: henk717 Date: Wed, 31 Aug 2022 02:16:35 +0200 Subject: [PATCH 009/118] Clarify umamba People wanted the source code of umamba.exe so we clarify where it was taken from and where its source code can be found. --- readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 968aa995..0d69934b 100644 --- a/readme.md +++ b/readme.md @@ -216,4 +216,4 @@ Did we miss your contribution? Feel free to issue a commit adding your name to t KoboldAI is licensed with a AGPL license, in short this means that it can be used by anyone for any purpose. However, if you decide to make a publicly available instance your users are entitled to a copy of the source code including all modifications that you have made (which needs to be available trough an interface such as a button on your website), you may also not distribute this project in a form that does not contain the source code (Such as compiling / encrypting the code and distributing this version without also distributing the source code that includes the changes that you made. You are allowed to distribute this in a closed form if you also provide a separate archive with the source code.). -umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file. +umamba.exe is bundled for convenience because we observed that many of our users had trouble with command line download methods, it is not part of our project and does not fall under the AGPL license. It is licensed under the BSD-3-Clause license. Other files with differing licenses will have a reference or embedded version of this license within the file. It has been sourced from https://anaconda.org/conda-forge/micromamba/files and its source code can be found here : https://github.com/mamba-org/mamba/tree/master/micromamba From 1031b70731456386076184fc714e601df8ffeca5 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 31 Aug 2022 09:34:14 -0400 Subject: [PATCH 010/118] Starts of adding cluster to UI --- aiserver.py | 57 ++++++++++++++++++++++++++++++++++++++++++- static/application.js | 5 ++++ 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index bf4cfac7..ab4fe521 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1422,12 +1422,17 @@ def get_model_info(model, directory=""): key_value = "" break_values = [] url = False + models_on_url = False gpu_count = torch.cuda.device_count() gpu_names = [] for i in range(gpu_count): gpu_names.append(torch.cuda.get_device_name(i)) if model in ['Colab', 'API']: url = True + elif model == 'CLUSTER': + models_on_url = True + url = True + key = True elif model in [x[1] for x in model_menu['apilist']]: if path.exists("settings/{}.settings".format(model)): with open("settings/{}.settings".format(model), "r") as file: @@ -1473,7 +1478,7 @@ def get_model_info(model, directory=""): 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'disk_break_value': disk_blocks, 'accelerate': utils.HAS_ACCELERATE, 'break_values': break_values, 'gpu_count': gpu_count, - 'url': url, 'gpu_names': gpu_names}, broadcast=True) + 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url}, broadcast=True) if key_value != "": get_oai_models(key_value) @@ -1554,6 +1559,54 @@ def get_oai_models(key): print(req.json()) emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) +def get_cluster_models(msg): + vars.oaiapikey = msg['key'] + url = msg['url'] + + + # Get list of models from OAI + print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="") + req = requests.get( + url, + headers = { + 'Authorization': 'Bearer '+key + } + ) + if(req.status_code == 200): + engines = req.json()["data"] + try: + engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines] + except: + print(engines) + raise + + online_model = "" + changed=False + + #Save the key + if not path.exists("settings"): + # If the client settings file doesn't exist, create it + # Write API key to file + os.makedirs('settings', exist_ok=True) + if path.exists("settings/{}.settings".format(vars.model_selected)): + with open("settings/{}.settings".format(vars.model_selected), "r") as file: + js = json.load(file) + if 'online_model' in js: + online_model = js['online_model'] + if "apikey" in js: + if js['apikey'] != key: + changed=True + if changed: + with open("settings/{}.settings".format(vars.model_selected), "w") as file: + js["apikey"] = key + file.write(json.dumps(js, indent=3)) + + emit('from_server', {'cmd': 'oai_engines', 'data': engines, 'online_model': online_model}, broadcast=True) + else: + # Something went wrong, print the message and quit since we can't initialize an engine + print("{0}ERROR!{1}".format(colors.RED, colors.END)) + print(req.json()) + emit('from_server', {'cmd': 'errmsg', 'data': req.json()}) # Function to patch transformers to use our soft prompt def patch_causallm(model): @@ -3777,6 +3830,8 @@ def get_message(msg): print(colors.RED + "WARNING!!: Someone maliciously attempted to delete " + msg['data'] + " the attempt has been blocked.") elif(msg['cmd'] == 'OAI_Key_Update'): get_oai_models(msg['key']) + elif(msg['cmd'] == 'Cluster_Key_Update'): + get_cluster_models(msg) elif(msg['cmd'] == 'loadselect'): vars.loadselect = msg["data"] elif(msg['cmd'] == 'spselect'): diff --git a/static/application.js b/static/application.js index 9107e161..48bf595a 100644 --- a/static/application.js +++ b/static/application.js @@ -2918,6 +2918,11 @@ $(document).ready(function(){ if (msg.key) { $("#modelkey").removeClass("hidden"); $("#modelkey")[0].value = msg.key_value; + if (msg.models_on_url) { + $("#modelkey").onblur = function () {socket.send({'cmd': 'Cluster_Key_Update', 'key': this.value, 'url': ${'modelurl')[].value});}; + } else { + $("#modelkey").onblur = function () {socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});}; + } //if we're in the API list, disable to load button until the model is selected (after the API Key is entered) disableButtons([load_model_accept]); } else { From 24ac6f3db87eb4ee9439e2af80b13b2c65bcbc74 Mon Sep 17 00:00:00 2001 From: ebolam Date: Wed, 31 Aug 2022 10:46:16 -0400 Subject: [PATCH 011/118] First working CLUSTER ui. Might need change when multiple models selected. --- aiserver.py | 33 +++++++++++++++++++++++---------- static/application.js | 17 +++++++++++++---- templates/index.html | 4 ++-- 3 files changed, 38 insertions(+), 16 deletions(-) diff --git a/aiserver.py b/aiserver.py index 6187bfaf..c47dac89 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1432,6 +1432,7 @@ def get_model_info(model, directory=""): break_values = [] url = False models_on_url = False + multi_online_models = False gpu_count = torch.cuda.device_count() gpu_names = [] for i in range(gpu_count): @@ -1442,6 +1443,16 @@ def get_model_info(model, directory=""): models_on_url = True url = True key = True + multi_online_models = True + if path.exists("settings/{}.settings".format(model)): + with open("settings/{}.settings".format(model), "r") as file: + # Check if API key exists + js = json.load(file) + if("apikey" in js and js["apikey"] != ""): + # API key exists, grab it and close the file + key_value = js["apikey"] + elif 'oaiapikey' in js and js['oaiapikey'] != "": + key_value = js["oaiapikey"] elif model in [x[1] for x in model_menu['apilist']]: if path.exists("settings/{}.settings".format(model)): with open("settings/{}.settings".format(model), "r") as file: @@ -1486,7 +1497,7 @@ def get_model_info(model, directory=""): emit('from_server', {'cmd': 'selected_model_info', 'key_value': key_value, 'key':key, 'gpu':gpu, 'layer_count':layer_count, 'breakmodel':breakmodel, 'disk_break_value': disk_blocks, 'accelerate': utils.HAS_ACCELERATE, - 'break_values': break_values, 'gpu_count': gpu_count, + 'break_values': break_values, 'gpu_count': gpu_count, 'multi_online_models': multi_online_models, 'url': url, 'gpu_names': gpu_names, 'models_on_url': models_on_url}, broadcast=True) if key_value != "": get_oai_models(key_value) @@ -1573,21 +1584,18 @@ def get_cluster_models(msg): url = msg['url'] - # Get list of models from OAI + # Get list of models from public cluster print("{0}Retrieving engine list...{1}".format(colors.PURPLE, colors.END), end="") - req = requests.get( - url, - headers = { - 'Authorization': 'Bearer '+key - } - ) + req = requests.get("{}/models".format(url)) if(req.status_code == 200): - engines = req.json()["data"] + engines = req.json() + print(engines) try: - engines = [[en["id"], "{} ({})".format(en['id'], "Ready" if en["ready"] == True else "Not Ready")] for en in engines] + engines = [[en, en] for en in engines] except: print(engines) raise + print(engines) online_model = "" changed=False @@ -3753,6 +3761,8 @@ def get_message(msg): elif(msg['cmd'] == 'list_model'): sendModelSelection(menu=msg['data']) elif(msg['cmd'] == 'load_model'): + print(msg) + print(vars.model_selected) if not os.path.exists("settings/"): os.mkdir("settings") changed = True @@ -3776,6 +3786,9 @@ def get_message(msg): f.close() vars.colaburl = msg['url'] + "/request" vars.model = vars.model_selected + if vars.model == "CLUSTER": + vars.cluster_requested_models = msg['online_model'] + print(vars.cluster_requested_models) load_model(use_gpu=msg['use_gpu'], gpu_layers=msg['gpu_layers'], disk_layers=msg['disk_layers'], online_model=msg['online_model']) elif(msg['cmd'] == 'show_model'): print("Model Name: {}".format(getmodelname())) diff --git a/static/application.js b/static/application.js index 48bf595a..dc881968 100644 --- a/static/application.js +++ b/static/application.js @@ -2914,21 +2914,30 @@ $(document).ready(function(){ } else if(msg.cmd == 'selected_model_info') { enableButtons([load_model_accept]); $("#oaimodel").addClass("hidden") - $("#oaimodel")[0].options[0].selected = true; if (msg.key) { $("#modelkey").removeClass("hidden"); $("#modelkey")[0].value = msg.key_value; if (msg.models_on_url) { - $("#modelkey").onblur = function () {socket.send({'cmd': 'Cluster_Key_Update', 'key': this.value, 'url': ${'modelurl')[].value});}; + $("#modelkey")[0].onblur = function () {socket.send({'cmd': 'Cluster_Key_Update', 'key': this.value, 'url': document.getElementById("modelurl").value});}; } else { - $("#modelkey").onblur = function () {socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});}; + $("#modelkey")[0].onblur = function () {socket.send({'cmd': 'OAI_Key_Update', 'key': $('#modelkey')[0].value});}; } //if we're in the API list, disable to load button until the model is selected (after the API Key is entered) disableButtons([load_model_accept]); } else { $("#modelkey").addClass("hidden"); - } + + console.log(msg.multi_online_models); + if (msg.multi_online_models) { + $("#oaimodel")[0].setAttribute("multiple", ""); + console.log($("#oaimodel")[0]) + } else { + $("#oaimodel")[0].removeAttribute("multiple"); + } + + + if (msg.url) { $("#modelurl").removeClass("hidden"); } else { diff --git a/templates/index.html b/templates/index.html index 27b50b78..9720b286 100644 --- a/templates/index.html +++ b/templates/index.html @@ -295,12 +295,12 @@
- +
- +