From b9da974eb739f6b8efa69df9f8e33983fae24a76 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 14 Aug 2023 00:56:40 -0500 Subject: [PATCH 01/52] GenericHFTorch: Change use_4_bit to quantization in __init__ --- modeling/inference_models/generic_hf_torch/class.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index a059ebb0..a7734e7d 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -29,7 +29,7 @@ model_backend_type = "Huggingface" #This should be a generic name in case multip class model_backend(HFTorchInferenceModel): def __init__(self) -> None: super().__init__() - self.use_4_bit = False + self.quantization = False def is_valid(self, model_name, model_path, menu_path): base_is_valid = super().is_valid(model_name, model_path, menu_path) @@ -72,7 +72,7 @@ class model_backend(HFTorchInferenceModel): else: logger.warning("Bitsandbytes is not installed, you can not use Quantization for Huggingface models") return requested_parameters - + def set_input_parameters(self, parameters): super().set_input_parameters(parameters) self.quantization = parameters['quantization'] if 'quantization' in parameters else False From 4bd04d02ab4db16f231ac789c06dbeac9256d025 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 14 Aug 2023 01:36:27 -0500 Subject: [PATCH 02/52] Try to fix wi --- aiserver.py | 55 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/aiserver.py b/aiserver.py index 0552eb60..18ba0592 100644 --- a/aiserver.py +++ b/aiserver.py @@ -1114,7 +1114,7 @@ def loadmodelsettings(): try: js = json.load(open(koboldai_vars.custmodpth + "/config.json", "r")) except Exception as e: - js = json.load(open(koboldai_vars.custmodpth.replace('/', '_') + "/config.json", "r")) + js = json.load(open(koboldai_vars.custmodpth.replace('/', '_') + "/config.json", "r")) except Exception as e: js = {} koboldai_vars.default_preset = koboldai_settings.default_preset @@ -2839,7 +2839,7 @@ def get_message(msg): emit('from_server', {'cmd': 'wiupdate', 'num': msg['num'], 'data': {field: koboldai_vars.worldinfo[num][field] for field in fields}}, broadcast=True, room="UI_1") elif(msg['cmd'] == 'wifolderupdate'): setgamesaved(False) - uid = str(msg['uid']) + uid = msg['uid'] fields = ("name", "collapsed") for field in fields: if(field in msg['data'] and type(msg['data'][field]) is (str if field != "collapsed" else bool)): @@ -4287,17 +4287,17 @@ def togglewimode(): # #==================================================================# def addwiitem(folder_uid=None): - assert folder_uid is None or str(folder_uid) in koboldai_vars.wifolders_d + assert folder_uid is None or folder_uid in koboldai_vars.wifolders_d ob = {"key": "", "keysecondary": "", "content": "", "comment": "", "folder": folder_uid, "num": len(koboldai_vars.worldinfo), "init": False, "selective": False, "constant": False} koboldai_vars.worldinfo.append(ob) while(True): - uid = str(int.from_bytes(os.urandom(4), "little", signed=True)) + uid = int.from_bytes(os.urandom(4), "little", signed=True) if(uid not in koboldai_vars.worldinfo_u): break koboldai_vars.worldinfo_u[uid] = koboldai_vars.worldinfo[-1] koboldai_vars.worldinfo[-1]["uid"] = uid if(folder_uid is not None): - koboldai_vars.wifolders_u[str(folder_uid)].append(koboldai_vars.worldinfo[-1]) + koboldai_vars.wifolders_u[folder_uid].append(koboldai_vars.worldinfo[-1]) emit('from_server', {'cmd': 'addwiitem', 'data': ob}, broadcast=True, room="UI_1") #==================================================================# @@ -4305,7 +4305,7 @@ def addwiitem(folder_uid=None): #==================================================================# def addwifolder(): while(True): - uid = str(int.from_bytes(os.urandom(4), "little", signed=True)) + uid = int.from_bytes(os.urandom(4), "little", signed=True) if(uid not in koboldai_vars.wifolders_d): break ob = {"name": "", "collapsed": False} @@ -4321,18 +4321,18 @@ def addwifolder(): #==================================================================# def movewiitem(dst, src): setgamesaved(False) - if(koboldai_vars.worldinfo_u[str(src)]["folder"] is not None): - for i, e in enumerate(koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[str(src)]["folder"])]): - if(e["uid"] == koboldai_vars.worldinfo_u[str(src)]["uid"]): - koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[str(src)]["folder"])].pop(i) + if(koboldai_vars.worldinfo_u[src]["folder"] is not None): + for i, e in enumerate(koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[src]["folder"]]): + if(e["uid"] == koboldai_vars.worldinfo_u[src]["uid"]): + koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[src]["folder"]].pop(i) break - if(koboldai_vars.worldinfo_u[str(dst)]["folder"] is not None): - koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[str(dst)]["folder"])].append(koboldai_vars.worldinfo_u[str(src)]) - koboldai_vars.worldinfo_u[str(src)]["folder"] = koboldai_vars.worldinfo_u[str(dst)]["folder"] + if(koboldai_vars.worldinfo_u[dst]["folder"] is not None): + koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[dst]["folder"]].append(koboldai_vars.worldinfo_u[src]) + koboldai_vars.worldinfo_u[src]["folder"] = koboldai_vars.worldinfo_u[dst]["folder"] for i, e in enumerate(koboldai_vars.worldinfo): - if(e["uid"] == koboldai_vars.worldinfo_u[str(src)]["uid"]): + if(e["uid"] == koboldai_vars.worldinfo_u[src]["uid"]): _src = i - elif(e["uid"] == koboldai_vars.worldinfo_u[str(dst)]["uid"]): + elif(e["uid"] == koboldai_vars.worldinfo_u[dst]["uid"]): _dst = i koboldai_vars.worldinfo[_src]["folder"] = koboldai_vars.worldinfo[_dst]["folder"] koboldai_vars.worldinfo.insert(_dst - (_dst >= _src), koboldai_vars.worldinfo.pop(_src)) @@ -4344,12 +4344,12 @@ def movewiitem(dst, src): #==================================================================# def movewifolder(dst, src): setgamesaved(False) - koboldai_vars.wifolders_l.remove(str(src)) + koboldai_vars.wifolders_l.remove(src) if(dst is None): # If dst is None, that means we should move src to be the last folder - koboldai_vars.wifolders_l.append(str(src)) + koboldai_vars.wifolders_l.append(src) else: - koboldai_vars.wifolders_l.insert(koboldai_vars.wifolders_l.index(str(dst)), str(src)) + koboldai_vars.wifolders_l.insert(koboldai_vars.wifolders_l.index(dst), src) sendwi() #==================================================================# @@ -4375,7 +4375,7 @@ def sendwi(): last_folder = ... for wi in koboldai_vars.worldinfo: if(wi["folder"] != last_folder): - emit('from_server', {'cmd': 'addwifolder', 'uid': wi["folder"], 'data': koboldai_vars.wifolders_d[str(wi["folder"])] if wi["folder"] is not None else None}, broadcast=True, room="UI_1") + emit('from_server', {'cmd': 'addwifolder', 'uid': wi["folder"], 'data': koboldai_vars.wifolders_d[wi["folder"]] if wi["folder"] is not None else None}, broadcast=True, room="UI_1") last_folder = wi["folder"] ob = wi emit('from_server', {'cmd': 'addwiitem', 'data': ob}, broadcast=True, room="UI_1") @@ -4418,7 +4418,6 @@ def stablesortwi(): #==================================================================# def commitwi(ar): for ob in ar: - ob["uid"] = str(ob["uid"]) koboldai_vars.worldinfo_u[ob["uid"]]["key"] = ob["key"] koboldai_vars.worldinfo_u[ob["uid"]]["keysecondary"] = ob["keysecondary"] koboldai_vars.worldinfo_u[ob["uid"]]["content"] = ob["content"] @@ -4441,9 +4440,9 @@ def deletewi(uid): koboldai_vars.deletewi = uid if(koboldai_vars.deletewi is not None): if(koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"] is not None): - for i, e in enumerate(koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"])]): + for i, e in enumerate(koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"]]): if(e["uid"] == koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["uid"]): - koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"])].pop(i) + koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"]].pop(i) break for i, e in enumerate(koboldai_vars.worldinfo): if(e["uid"] == koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["uid"]): @@ -4459,18 +4458,17 @@ def deletewi(uid): # #==================================================================# def deletewifolder(uid): - uid = str(uid) del koboldai_vars.wifolders_u[uid] del koboldai_vars.wifolders_d[uid] del koboldai_vars.wifolders_l[koboldai_vars.wifolders_l.index(uid)] setgamesaved(False) # Delete uninitialized entries in the folder we're going to delete - koboldai_vars.worldinfo = [wi for wi in koboldai_vars.worldinfo if str(wi["folder"]) != uid or wi["init"]] + koboldai_vars.worldinfo = [wi for wi in koboldai_vars.worldinfo if wi["folder"] != uid or wi["init"]] koboldai_vars.worldinfo_i = [wi for wi in koboldai_vars.worldinfo if wi["init"]] # Move WI entries that are inside of the folder we're going to delete # so that they're outside of all folders for wi in koboldai_vars.worldinfo: - if(str(wi["folder"]) == uid): + if(wi["folder"] == uid): wi["folder"] = None sendwi() @@ -6605,7 +6603,7 @@ def UI_2_import_world_info(): for child in children: # Child is index if child not in uids: - entry_data = wi_data["entries"][str(child)] + entry_data = wi_data["entries"][child] uids[child] = koboldai_vars.worldinfo_v2.add_item( title=entry_data["title"], key=entry_data["key"], @@ -9209,7 +9207,7 @@ def get_world_info(): if wi["folder"] != last_folder: folder = [] if wi["folder"] is not None: - folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder}) + folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder}) last_folder = wi["folder"] (folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")}) return {"folders": folders, "entries": entries} @@ -10256,7 +10254,8 @@ def post_world_info_folders_none(body: EmptySchema): stablesortwi() koboldai_vars.worldinfo_i = [wi for wi in koboldai_vars.worldinfo if wi["init"]] setgamesaved(False) - emit('from_server', {'cmd': 'wiexpand', 'data': koboldai_vars.worldinfo[-1]["num"]}, broadcast=True) + if koboldai_vars.worldinfo: + emit('from_server', {'cmd': 'wiexpand', 'data': koboldai_vars.worldinfo[-1]["num"]}, broadcast=True) koboldai_vars.worldinfo[-1]["init"] = True addwiitem(folder_uid=None) return {"uid": koboldai_vars.worldinfo[-2]["uid"]} From 19029939c29071b4531122755a5e19cd13f92074 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 14 Aug 2023 02:04:49 -0500 Subject: [PATCH 03/52] API: somewhat-thoroughly automatically test WI api --- aiserver.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 18ba0592..1e7e8035 100644 --- a/aiserver.py +++ b/aiserver.py @@ -10254,8 +10254,7 @@ def post_world_info_folders_none(body: EmptySchema): stablesortwi() koboldai_vars.worldinfo_i = [wi for wi in koboldai_vars.worldinfo if wi["init"]] setgamesaved(False) - if koboldai_vars.worldinfo: - emit('from_server', {'cmd': 'wiexpand', 'data': koboldai_vars.worldinfo[-1]["num"]}, broadcast=True) + emit('from_server', {'cmd': 'wiexpand', 'data': koboldai_vars.worldinfo[-1]["num"]}, broadcast=True) koboldai_vars.worldinfo[-1]["init"] = True addwiitem(folder_uid=None) return {"uid": koboldai_vars.worldinfo[-2]["uid"]} From 213d7a55d43c457eaf3f33228b87d468f8af1322 Mon Sep 17 00:00:00 2001 From: somebody Date: Mon, 14 Aug 2023 13:30:22 -0500 Subject: [PATCH 04/52] Fixup --- aiserver.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/aiserver.py b/aiserver.py index 1e7e8035..ee86be91 100644 --- a/aiserver.py +++ b/aiserver.py @@ -4396,8 +4396,8 @@ def requestwi(): # and items in different folders are sorted based on the order of the folders #==================================================================# def stablesortwi(): - mapping = {int(uid): index for index, uid in enumerate(koboldai_vars.wifolders_l)} - koboldai_vars.worldinfo.sort(key=lambda x: mapping[int(x["folder"])] if x["folder"] is not None else float("inf")) + mapping = {uid: index for index, uid in enumerate(koboldai_vars.wifolders_l)} + koboldai_vars.worldinfo.sort(key=lambda x: mapping[x["folder"]] if x["folder"] is not None else float("inf")) last_folder = ... last_wi = None for i, wi in enumerate(koboldai_vars.worldinfo): @@ -6548,7 +6548,7 @@ def UI_2_create_world_info_folder(data): @socketio.on('delete_world_info') @logger.catch def UI_2_delete_world_info(uid): - koboldai_vars.worldinfo_v2.delete(int(uid)) + koboldai_vars.worldinfo_v2.delete(uid) #==================================================================# @@ -7706,7 +7706,6 @@ def maybe_review_story() -> None: for uid, wi in koboldai_vars.worldinfo_v2.world_info.items(): if wi["type"] == "commentator": continue - uid = int(uid) allowed_wi_uids.append(uid) prompt = f"\n\n{speaker_name}'s thoughts on what just happened in this story: \"" From ff999657d26857853fe66adb11ec2fbfe63adad9 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 18 Aug 2023 22:42:15 +0200 Subject: [PATCH 05/52] Chat List Update --- aiserver.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index 0552eb60..b3f0abf0 100644 --- a/aiserver.py +++ b/aiserver.py @@ -304,9 +304,11 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'chatlist': [ - MenuModel("Pygmalion 6B", "PygmalionAI/pygmalion-6b", "16GB"), - MenuModel("Pygmalion 2.7B", "PygmalionAI/pygmalion-2.7b", "8GB"), - MenuModel("Pygmalion 1.3B", "PygmalionAI/pygmalion-1.3b", "6GB"), + MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"), + MenuModel("Huginn 13B", "The-Face-Of-Goonery/Huginn-13b-FP16", "12GB*"), + MenuModel("Pygmalion 6B", "PygmalionAI/pygmalion-6b", "8GB*"), + MenuModel("Pygmalion 2.7B", "PygmalionAI/pygmalion-2.7b", "6GB"), + MenuModel("Pygmalion 1.3B", "PygmalionAI/pygmalion-1.3b", "4GB*"), MenuModel("Pygmalion 350M", "PygmalionAI/pygmalion-350m", "2GB"), MenuFolder("Return to Main Menu", "mainmenu"), ], From 79bc1d6610410d67b842e9aa7aef461354aa61b9 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 18 Aug 2023 23:15:21 +0200 Subject: [PATCH 06/52] First instruct batch --- aiserver.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/aiserver.py b/aiserver.py index b3f0abf0..88730314 100644 --- a/aiserver.py +++ b/aiserver.py @@ -243,10 +243,11 @@ model_menu = { MenuPath("Load a model from its directory", "NeoCustom"), MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), MenuModel("Load custom model from Hugging Face", "customhuggingface", ""), - MenuFolder("Adventure Models", "adventurelist"), + MenuFolder("Instruct Models", "instructlist"), MenuFolder("Novel Models", "novellist"), MenuFolder("Chat Models", "chatlist"), MenuFolder("NSFW Models", "nsfwlist"), + MenuFolder("Adventure Models", "adventurelist"), MenuFolder("Untuned OPT", "optlist"), MenuFolder("Untuned GPT-Neo/J", "gptneolist"), MenuFolder("Untuned Pythia", "pythialist"), @@ -258,9 +259,20 @@ model_menu = { MenuFolder("Online Services", "apilist"), MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"), ], + 'instructlist': [ + MenuModel("Holomax 13B", "KoboldAI/LLaMA2-13B-Holomax", "12GB*"), + MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"), + MenuModel("Chronos-Hermes V2 13B", "Austism/chronos-hermes-13b-v2", "12GB*"), + MenuModel("Legerdemain 13B", "CalderaAI/13B-Legerdemain-L2", "12GB*"), + MenuModel("Chronos 13b v2", "elinas/chronos-13b-v2", "12GB*"), + MenuModel("Huginn 13B", "The-Face-Of-Goonery/Huginn-13b-FP16", "12GB*"), + MenuFolder("Return to Main Menu", "mainmenu"), + ], 'adventurelist': [ + MenuFolder("Instruct models may perform better than the models below (Using Instruct mode)", "instructlist"), MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"), MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "32GB"), + MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "32GB"), MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "32GB"), MenuModel("Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"), MenuModel("Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"), @@ -304,8 +316,8 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'chatlist': [ - MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"), - MenuModel("Huginn 13B", "The-Face-Of-Goonery/Huginn-13b-FP16", "12GB*"), + MenuModel("Mythomax 13B (Instruct)", "Gryphe/MythoMax-L2-13b", "12GB*"), + MenuModel("Huginn 13B (Instruct)", "The-Face-Of-Goonery/Huginn-13b-FP16", "12GB*"), MenuModel("Pygmalion 6B", "PygmalionAI/pygmalion-6b", "8GB*"), MenuModel("Pygmalion 2.7B", "PygmalionAI/pygmalion-2.7b", "6GB"), MenuModel("Pygmalion 1.3B", "PygmalionAI/pygmalion-1.3b", "4GB*"), From f8987cb2f0981f61b65abd944b4bf57c7dae2a0e Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 19 Aug 2023 00:21:22 +0200 Subject: [PATCH 07/52] Adventure ram update --- aiserver.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/aiserver.py b/aiserver.py index 88730314..e18864c9 100644 --- a/aiserver.py +++ b/aiserver.py @@ -270,17 +270,17 @@ model_menu = { ], 'adventurelist': [ MenuFolder("Instruct models may perform better than the models below (Using Instruct mode)", "instructlist"), - MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"), - MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "32GB"), - MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "32GB"), - MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "32GB"), - MenuModel("Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"), - MenuModel("Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"), - MenuModel("OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB"), - MenuModel("Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"), - MenuModel("Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"), - MenuModel("Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"), - MenuModel("Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"), + MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "20GB*"), + MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "12GB"), + MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "12GB*"), + MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "12GB"), + MenuModel("Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "12GB"), + MenuModel("Skein 6B", "KoboldAI/GPT-J-6B-Skein", "8GB*"), + MenuModel("OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "8GB"), + MenuModel("Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "8GB*"), + MenuModel("Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "6GB"), + MenuModel("Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "6GB"), + MenuModel("Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "4GB*"), MenuModel("Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"), MenuFolder("Return to Main Menu", "mainmenu"), ], From 5ae64354ee78f6cb452ad8725064afbf6a82e174 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 19 Aug 2023 01:20:09 +0200 Subject: [PATCH 08/52] NSFW menu updates --- aiserver.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/aiserver.py b/aiserver.py index e18864c9..57de7bf4 100644 --- a/aiserver.py +++ b/aiserver.py @@ -301,18 +301,20 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'nsfwlist': [ - MenuModel("Erebus 20B (NSFW)", "KoboldAI/GPT-NeoX-20B-Erebus", "64GB"), - MenuModel("Nerybus 13B (NSFW)", "KoboldAI/OPT-13B-Nerybus-Mix", "32GB"), - MenuModel("Erebus 13B (NSFW)", "KoboldAI/OPT-13B-Erebus", "32GB"), - MenuModel("Shinen FSD 13B (NSFW)", "KoboldAI/fairseq-dense-13B-Shinen", "32GB"), - MenuModel("Erebus 6.7B (NSFW)", "KoboldAI/OPT-6.7B-Erebus", "16GB"), - MenuModel("Shinen FSD 6.7B (NSFW)", "KoboldAI/fairseq-dense-6.7B-Shinen", "16GB"), - MenuModel("Lit V2 6B (NSFW)", "hakurei/litv2-6B-rev3", "16GB"), - MenuModel("Lit 6B (NSFW)", "hakurei/lit-6B", "16GB"), - MenuModel("Shinen 6B (NSFW)", "KoboldAI/GPT-J-6B-Shinen", "16GB"), - MenuModel("Erebus 2.7B (NSFW)", "KoboldAI/OPT-2.7B-Erebus", "8GB"), - MenuModel("Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB"), - MenuModel("Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB"), + MenuFolder("Looking to Chat RP? Use a chat model", "chatlist"), + MenuModel("Green Devil (Novel)", "Pirr/pythia-13b-deduped-green_devil", "14GB"), + MenuModel("Erebus 20B (Novel)", "KoboldAI/GPT-NeoX-20B-Erebus", "20GB*"), + MenuModel("Nerybus 13B (Novel)", "KoboldAI/OPT-13B-Nerybus-Mix", "12GB"), + MenuModel("Erebus 13B (Novel)", "KoboldAI/OPT-13B-Erebus", "12GB"), + MenuModel("Shinen FSD 13B (Novel)", "KoboldAI/fairseq-dense-13B-Shinen", "12GB"), + MenuModel("Erebus 6.7B (Novel)", "KoboldAI/OPT-6.7B-Erebus", "8GB"), + MenuModel("Shinen FSD 6.7B (Novel)", "KoboldAI/fairseq-dense-6.7B-Shinen", "8GB"), + MenuModel("Lit V2 6B (Novel)", "hakurei/litv2-6B-rev3", "8GB*"), + MenuModel("Lit 6B (Novel)", "hakurei/lit-6B", "8GB*"), + MenuModel("Shinen 6B (Novel)", "KoboldAI/GPT-J-6B-Shinen", "6GB"), + MenuModel("Erebus 2.7B (Novel)", "KoboldAI/OPT-2.7B-Erebus", "6GB"), + MenuModel("Horni 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Horni", "6GB"), + MenuModel("Shinen 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Shinen", "6GB"), MenuFolder("Return to Main Menu", "mainmenu"), ], 'chatlist': [ From 80e784d3eae783ae221b6c252e887099aa7a40fa Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 19 Aug 2023 01:39:31 +0200 Subject: [PATCH 09/52] Polish --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index 57de7bf4..8e89722c 100644 --- a/aiserver.py +++ b/aiserver.py @@ -301,7 +301,7 @@ model_menu = { MenuFolder("Return to Main Menu", "mainmenu"), ], 'nsfwlist': [ - MenuFolder("Looking to Chat RP? Use a chat model", "chatlist"), + MenuFolder("Looking for NSFW Chat RP? Most chat models give better replies", "chatlist"), MenuModel("Green Devil (Novel)", "Pirr/pythia-13b-deduped-green_devil", "14GB"), MenuModel("Erebus 20B (Novel)", "KoboldAI/GPT-NeoX-20B-Erebus", "20GB*"), MenuModel("Nerybus 13B (Novel)", "KoboldAI/OPT-13B-Nerybus-Mix", "12GB"), From 45486a47b01c9f199264ac130530d179dcbd64af Mon Sep 17 00:00:00 2001 From: somebody Date: Fri, 18 Aug 2023 19:27:02 -0500 Subject: [PATCH 10/52] WI: Fix UID keys being str ...again --- koboldai_settings.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index 62e4918d..3b839d26 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -608,11 +608,16 @@ class settings(object): if key == 'sampler_order': if(len(value) < 7): value = [6] + value - if key == 'autosave': + elif key == 'autosave': autosave = value + elif key in ['worldinfo_u', 'wifolders_d']: + # Fix UID keys to be ints + value = {int(k): v for k, v in value.items()} + if isinstance(value, str): if value[:7] == 'base64:': value = pickle.loads(base64.b64decode(value[7:])) + #Need to fix the data type of value to match the module if type(getattr(self, key)) == int: setattr(self, key, int(value)) @@ -1010,7 +1015,7 @@ class story_settings(settings): new_world_info.add_item([x.strip() for x in wi["key"].split(",")][0], wi["key"], wi.get("keysecondary", ""), - "root" if wi["folder"] is None else self.wifolders_d[str(wi['folder'])]['name'], + "root" if wi["folder"] is None else self.wifolders_d[wi['folder']]['name'], wi.get("constant", False), wi["content"], wi.get("comment", ""), @@ -2551,7 +2556,7 @@ class KoboldWorldInfo(object): with open(image_path, "wb") as file: file.write(base64.b64decode(image_b64)) - data["entries"] = {k: self.upgrade_entry(v) for k,v in data["entries"].items()} + data["entries"] = {int(k): self.upgrade_entry(v) for k,v in data["entries"].items()} #Add the item start_time = time.time() @@ -2632,13 +2637,13 @@ class KoboldWorldInfo(object): self.story_settings.worldinfo.sort(key=lambda x: mapping[x["folder"]] if x["folder"] is not None else float("inf")) #self.wifolders_d = {} # Dictionary of World Info folder UID-info pairs - self.story_settings.wifolders_d = {str(folder_entries[x]): {'name': x, 'collapsed': False} for x in folder_entries if x != "root"} + self.story_settings.wifolders_d = {folder_entries[x]: {'name': x, 'collapsed': False} for x in folder_entries if x != "root"} #self.worldinfo_u = {} # Dictionary of World Info UID - key/value pairs - self.story_settings.worldinfo_u = {str(y["uid"]): y for x in folder_entries for y in self.story_settings.worldinfo if y["folder"] == (folder_entries[x] if x != "root" else None)} + self.story_settings.worldinfo_u = {y["uid"]: y for x in folder_entries for y in self.story_settings.worldinfo if y["folder"] == (folder_entries[x] if x != "root" else None)} #self.wifolders_u = {} # Dictionary of pairs of folder UID - list of WI UID - self.story_settings.wifolders_u = {str(folder_entries[x]): [y for y in self.story_settings.worldinfo if y['folder'] == folder_entries[x]] for x in folder_entries if x != "root"} + self.story_settings.wifolders_u = {folder_entries[x]: [y for y in self.story_settings.worldinfo if y['folder'] == folder_entries[x]] for x in folder_entries if x != "root"} def reset_used_in_game(self): for key in self.world_info: From 13b68c67d122192db163ec2cd9b4727e83927c04 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 19 Aug 2023 13:02:50 +0200 Subject: [PATCH 11/52] Basic GPTQ Downloader --- .../inference_models/gptq_hf_torch/class.py | 45 +++++++++---------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index b48f1d56..b6c9b944 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -228,12 +228,14 @@ class model_backend(HFTorchInferenceModel): logger.warning(f"Gave up on lazy loading due to {e}") self.lazy_load = False - if self.get_local_model_path(): - # Model is stored locally, load it. - self.model = self._get_model(self.get_local_model_path()) - self.tokenizer = self._get_tokenizer(self.get_local_model_path()) - else: - raise NotImplementedError("GPTQ Model downloading not implemented") + if not self.get_local_model_path(): + print(self.get_local_model_path()) + from huggingface_hub import snapshot_download + target_dir = "models/" + self.model_name.replace("/", "_") + snapshot_download(self.model_name, local_dir=target_dir, local_dir_use_symlinks=False, cache_dir="cache/") + + self.model = self._get_model(self.get_local_model_path()) + self.tokenizer = self._get_tokenizer(self.get_local_model_path()) if ( utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default @@ -350,23 +352,20 @@ class model_backend(HFTorchInferenceModel): dematerialized_modules=False, ): if self.implementation == "occam": - try: - if model_type == "gptj": - model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_type == "gpt_neox": - model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_type == "llama": - model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_type == "opt": - model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_tseype == "mpt": - model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_type == "gpt_bigcode": - model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half() - else: - raise RuntimeError("Model not supported by Occam's GPTQ") - except: - self.implementation = "AutoGPTQ" + if model_type == "gptj": + model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_type == "gpt_neox": + model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_type == "llama": + model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_type == "opt": + model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_tseype == "mpt": + model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_type == "gpt_bigcode": + model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half() + else: + raise RuntimeError("Model not supported by Occam's GPTQ") if self.implementation == "AutoGPTQ": try: import auto_gptq From d93631c8896b4ce02c6f76f8b573f2ffe5efb866 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 19 Aug 2023 14:45:45 +0200 Subject: [PATCH 12/52] GPTQ improvements --- aiserver.py | 3 +- .../inference_models/gptq_hf_torch/class.py | 39 +++++++++++-------- modeling/inference_models/hf.py | 4 +- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/aiserver.py b/aiserver.py index 79d10ba3..6c0456b2 100644 --- a/aiserver.py +++ b/aiserver.py @@ -242,7 +242,8 @@ model_menu = { "mainmenu": [ MenuPath("Load a model from its directory", "NeoCustom"), MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), - MenuModel("Load custom model from Hugging Face", "customhuggingface", ""), + MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""), + MenuModel("Load custom GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"), MenuFolder("Instruct Models", "instructlist"), MenuFolder("Novel Models", "novellist"), MenuFolder("Chat Models", "chatlist"), diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index b6c9b944..edcd8c49 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -155,7 +155,7 @@ class model_backend(HFTorchInferenceModel): def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters) - if model_name != 'customhuggingface' or "custom_model_name" in parameters: + if model_name != 'customgptq' or "custom_model_name" in parameters: if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self): with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f: temp = json.load(f) @@ -232,6 +232,7 @@ class model_backend(HFTorchInferenceModel): print(self.get_local_model_path()) from huggingface_hub import snapshot_download target_dir = "models/" + self.model_name.replace("/", "_") + print(self.model_name) snapshot_download(self.model_name, local_dir=target_dir, local_dir_use_symlinks=False, cache_dir="cache/") self.model = self._get_model(self.get_local_model_path()) @@ -352,20 +353,24 @@ class model_backend(HFTorchInferenceModel): dematerialized_modules=False, ): if self.implementation == "occam": - if model_type == "gptj": - model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_type == "gpt_neox": - model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_type == "llama": - model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_type == "opt": - model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_tseype == "mpt": - model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) - elif model_type == "gpt_bigcode": - model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half() - else: - raise RuntimeError("Model not supported by Occam's GPTQ") + try: + if model_type == "gptj": + model = load_quant_offload_device_map(gptj_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_type == "gpt_neox": + model = load_quant_offload_device_map(gptneox_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_type == "llama": + model = load_quant_offload_device_map(llama_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_type == "opt": + model = load_quant_offload_device_map(opt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_tseype == "mpt": + model = load_quant_offload_device_map(mpt_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias) + elif model_type == "gpt_bigcode": + model = load_quant_offload_device_map(bigcode_load_quant, location, gptq_file, gptq_bits, gptq_groupsize, device_map, force_bias=v2_bias).half() + else: + raise RuntimeError("Model not supported by Occam's GPTQ") + except: + self.implementation = "AutoGPTQ" + if self.implementation == "AutoGPTQ": try: import auto_gptq @@ -378,11 +383,13 @@ class model_backend(HFTorchInferenceModel): auto_gptq.modeling._base.AutoConfig = hf_bleeding_edge.AutoConfig auto_gptq.modeling._base.AutoModelForCausalLM = hf_bleeding_edge.AutoModelForCausalLM + autogptq_failed = False try: model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map) except: + autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk + if autogptq_failed: model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True) - # Patch in embeddings function def get_input_embeddings(self): return self.model.get_input_embeddings() diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index 167716d4..e50d87ff 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -47,7 +47,7 @@ class HFInferenceModel(InferenceModel): requested_parameters = [] if not self.hf_torch: return [] - if model_name == 'customhuggingface': + if model_name in ('customhuggingface', 'customgptq'): requested_parameters.append({ "uitype": "text", "unit": "text", @@ -61,7 +61,7 @@ class HFInferenceModel(InferenceModel): "extra_classes": "" }) - if model_name != 'customhuggingface' or "custom_model_name" in parameters: + if model_name not in ('customhuggingface', 'customgptq') or "custom_model_name" in parameters: model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name if model_path is not None and os.path.exists(model_path): self.model_config = AutoConfig.from_pretrained(model_path) From 6f557befa96a741c1a38221b92dd8e308fe36184 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 19 Aug 2023 15:17:29 +0200 Subject: [PATCH 13/52] GPTQ --revision support --- modeling/inference_models/gptq_hf_torch/class.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index edcd8c49..aa65a295 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -233,7 +233,7 @@ class model_backend(HFTorchInferenceModel): from huggingface_hub import snapshot_download target_dir = "models/" + self.model_name.replace("/", "_") print(self.model_name) - snapshot_download(self.model_name, local_dir=target_dir, local_dir_use_symlinks=False, cache_dir="cache/") + snapshot_download(self.model_name, local_dir=target_dir, local_dir_use_symlinks=False, cache_dir="cache/", revision=utils.koboldai_vars.revision) self.model = self._get_model(self.get_local_model_path()) self.tokenizer = self._get_tokenizer(self.get_local_model_path()) From 3dd0e91fbb82a7fd16091abbfbb6447492f08d9a Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 01:58:52 +0200 Subject: [PATCH 14/52] Preliminary HF GPTQ changes --- .../generic_hf_torch/class.py | 60 ++++++++++--------- .../inference_models/gptq_hf_torch/class.py | 2 +- 2 files changed, 32 insertions(+), 30 deletions(-) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index a7734e7d..9b1049cf 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -57,18 +57,19 @@ class model_backend(HFTorchInferenceModel): temp = json.load(f) else: temp = {} - requested_parameters.append({ - "uitype": "dropdown", - "unit": "text", - "label": "Quantization", - "id": "quantization", - "default": temp['quantization'] if 'quantization' in temp else '4bit' if dependency_exists else '16-bit', - "tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode", - "menu_path": "Layers", - "children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}], - "extra_classes": "", - "refresh_model_inputs": False - }) + if not hasattr(self.model_config, 'quantization_config'): + requested_parameters.append({ + "uitype": "dropdown", + "unit": "text", + "label": "Quantization", + "id": "quantization", + "default": temp['quantization'] if 'quantization' in temp else '4bit' if dependency_exists else '16-bit', + "tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode", + "menu_path": "Layers", + "children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}], + "extra_classes": "", + "refresh_model_inputs": False + }) else: logger.warning("Bitsandbytes is not installed, you can not use Quantization for Huggingface models") return requested_parameters @@ -105,24 +106,25 @@ class model_backend(HFTorchInferenceModel): "low_cpu_mem_usage": True, } - if self.quantization == "8bit": - tf_kwargs.update({ - "quantization_config":BitsAndBytesConfig( - load_in_8bit=True, - llm_int8_enable_fp32_cpu_offload=True - ), - }) + if not hasattr(self.model_config, 'quantization_config'): + if self.quantization == "8bit": + tf_kwargs.update({ + "quantization_config":BitsAndBytesConfig( + load_in_8bit=True, + llm_int8_enable_fp32_cpu_offload=True + ), + }) - if self.quantization == "4bit" or utils.koboldai_vars.colab_arg: - tf_kwargs.update({ - "quantization_config":BitsAndBytesConfig( - load_in_4bit=True, - bnb_4bit_compute_dtype=torch.float16, - bnb_4bit_use_double_quant=True, - bnb_4bit_quant_type='nf4', - llm_int8_enable_fp32_cpu_offload=True - ), - }) + if self.quantization == "4bit" or utils.koboldai_vars.colab_arg: + tf_kwargs.update({ + "quantization_config":BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type='nf4', + llm_int8_enable_fp32_cpu_offload=True + ), + }) if self.model_type == "gpt2": # We must disable low_cpu_mem_usage and if using a GPT-2 model diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index aa65a295..3d044b6f 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -21,7 +21,7 @@ from pathlib import Path model_backend_type = "GPTQ" -model_backend_name = "Huggingface GPTQ" +model_backend_name = "Legacy GPTQ" def load_model_gptq_settings(path): From 8daa2f1adc0abe172bb7cbf88c5af17cfc76d6f3 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 02:01:34 +0200 Subject: [PATCH 15/52] Update Optimum on Git HF --- install_git_transformers.bat | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/install_git_transformers.bat b/install_git_transformers.bat index 34194459..7154e2a1 100644 --- a/install_git_transformers.bat +++ b/install_git_transformers.bat @@ -24,6 +24,8 @@ SET TEMP=%~DP0MINICONDA3 SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate pip install git+https://github.com/huggingface/transformers +pip install git+https://github.com/huggingface/optimum +pip install git+https://github.com/huggingface/accelerate cmd /k :drivemap @@ -34,6 +36,8 @@ SET TEMP=K:\ SET TMP=K:\ call K:\python\condabin\activate pip install git+https://github.com/huggingface/transformers +pip install git+https://github.com/huggingface/optimum +pip install git+https://github.com/huggingface/accelerate cmd /k :drivemap_B @@ -44,4 +48,6 @@ SET TEMP=B:\ SET TMP=B:\ call B:\python\condabin\activate pip install git+https://github.com/huggingface/transformers +pip install git+https://github.com/huggingface/optimum +pip install git+https://github.com/huggingface/accelerate cmd /k \ No newline at end of file From 5917737676a786b1ca43551a11d7012f2b4455f2 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 13:17:30 +0200 Subject: [PATCH 16/52] Don't disable exllama --- modeling/inference_models/gptq_hf_torch/class.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeling/inference_models/gptq_hf_torch/class.py b/modeling/inference_models/gptq_hf_torch/class.py index 3d044b6f..3094dc33 100644 --- a/modeling/inference_models/gptq_hf_torch/class.py +++ b/modeling/inference_models/gptq_hf_torch/class.py @@ -389,7 +389,7 @@ class model_backend(HFTorchInferenceModel): except: autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk if autogptq_failed: - model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True) + model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, inject_fused_attention=False) # Patch in embeddings function def get_input_embeddings(self): return self.model.get_input_embeddings() From 57e5f51d63f1c84e6d993ff5ad8fb75a048034c6 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 14:08:14 +0200 Subject: [PATCH 17/52] AutoGPTQ for Colab --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8dc7f9a2..b90db7d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,3 +42,4 @@ git+https://github.com/0cc4m/hf_bleeding_edge/ einops peft==0.3.0 scipy +auto-gptq \ No newline at end of file From 955db1567e941b4e733f7f3648a00c5115cb299b Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 14:29:37 +0200 Subject: [PATCH 18/52] Keep the usual temp folder instead of ours --- commandline.bat | 6 ------ install_git_transformers.bat | 6 ------ install_requirements.bat | 4 ---- play.bat | 6 ------ update-koboldai.bat | 6 ------ 5 files changed, 28 deletions(-) diff --git a/commandline.bat b/commandline.bat index 94e61608..2575c849 100644 --- a/commandline.bat +++ b/commandline.bat @@ -15,21 +15,15 @@ IF %M%==2 GOTO subfolder IF %M%==3 GOTO drivemap_B :subfolder -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate cmd /k "%*" :drivemap subst K: miniconda3 >nul -SET TEMP=K:\ -SET TMP=K:\ call K:\python\condabin\activate cmd /k "%*" :drivemap_B subst B: miniconda3 >nul -SET TEMP=B:\ -SET TMP=B:\ call B:\python\condabin\activate cmd /k "%*" \ No newline at end of file diff --git a/install_git_transformers.bat b/install_git_transformers.bat index 7154e2a1..09f80547 100644 --- a/install_git_transformers.bat +++ b/install_git_transformers.bat @@ -20,8 +20,6 @@ IF %M%==3 GOTO drivemap_B :subfolder ECHO Runtime launching in subfolder mode -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/optimum @@ -32,8 +30,6 @@ cmd /k ECHO Runtime launching in K: drive mode subst /D K: >nul subst K: miniconda3 >nul -SET TEMP=K:\ -SET TMP=K:\ call K:\python\condabin\activate pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/optimum @@ -44,8 +40,6 @@ cmd /k ECHO Runtime launching in B: drive mode subst /D B: >nul subst B: miniconda3 >nul -SET TEMP=B:\ -SET TMP=B:\ call B:\python\condabin\activate pip install git+https://github.com/huggingface/transformers pip install git+https://github.com/huggingface/optimum diff --git a/install_requirements.bat b/install_requirements.bat index 9756a18f..496917c0 100644 --- a/install_requirements.bat +++ b/install_requirements.bat @@ -44,8 +44,6 @@ echo 3 > loader.settings subst B: /D >nul mkdir miniconda3 subst B: miniconda3 -SET TEMP=B:\ -SET TMP=B:\ copy umamba.exe B:\umamba.exe copy loader.settings B:\loader.settings copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat @@ -60,8 +58,6 @@ exit :subfolder echo 2 > loader.settings -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 umamba.exe create -r miniconda3\ -n base umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy umamba.exe clean -a -y diff --git a/play.bat b/play.bat index c9e82b83..ebf9e8b6 100644 --- a/play.bat +++ b/play.bat @@ -18,8 +18,6 @@ IF %M%==3 GOTO drivemap_B :subfolder ECHO Runtime launching in subfolder mode -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate python aiserver.py %* cmd /k @@ -28,8 +26,6 @@ cmd /k ECHO Runtime launching in K: drive mode subst /D K: >nul subst K: miniconda3 >nul -SET TEMP=K:\ -SET TMP=K:\ call K:\python\condabin\activate python aiserver.py %* cmd /k @@ -38,8 +34,6 @@ cmd /k ECHO Runtime launching in B: drive mode subst /D B: >nul subst B: miniconda3 >nul -SET TEMP=B:\ -SET TMP=B:\ call B:\python\condabin\activate python aiserver.py %* cmd /k \ No newline at end of file diff --git a/update-koboldai.bat b/update-koboldai.bat index f2e642ee..b59f4d8f 100644 --- a/update-koboldai.bat +++ b/update-koboldai.bat @@ -15,24 +15,18 @@ IF %M%==2 GOTO subfolder IF %M%==3 GOTO drivemap_B :subfolder -SET TEMP=%~DP0MINICONDA3 -SET TMP=%~DP0MINICONDA3 call miniconda3\condabin\activate GOTO GIT :drivemap subst /D K: >nul subst K: miniconda3 >nul -SET TEMP=K:\ -SET TMP=K:\ call K:\python\condabin\activate GOTO GIT :drivemap_B subst /D B: >nul subst B: miniconda3 >nul -SET TEMP=B:\ -SET TMP=B:\ call B:\python\condabin\activate GOTO GIT From 45661ddc7563ad513766e64cf4ef68732af09352 Mon Sep 17 00:00:00 2001 From: db0 Date: Mon, 21 Aug 2023 15:52:17 +0200 Subject: [PATCH 19/52] switch to AI Horde Worker --- .gitmodules | 4 ++-- koboldai_settings.py | 51 +++++++++++++++++--------------------------- 2 files changed, 22 insertions(+), 33 deletions(-) diff --git a/.gitmodules b/.gitmodules index 0107a8c3..d95fbcae 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "KoboldAI-Horde"] path = KoboldAI-Horde - url = https://github.com/db0/KoboldAI-Horde-Bridge + url = https://github.com/Haidra-Org/AI-Horde-Worker [submodule "KoboldAI-Horde-Bridge"] path = KoboldAI-Horde-Bridge - url = https://github.com/db0/KoboldAI-Horde-Bridge + url = https://github.com/Haidra-Org/AI-Horde-Worker diff --git a/koboldai_settings.py b/koboldai_settings.py index 3b839d26..b2c9ef9d 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1350,38 +1350,27 @@ class system_settings(settings): self._koboldai_var.calc_ai_text() if name == 'horde_share': - if self.on_colab == False: - if os.path.exists("./KoboldAI-Horde-Bridge"): - if value == True: - if self._horde_pid is None: - logger.info("Starting Horde bridge") - bridge = importlib.import_module("KoboldAI-Horde-Bridge.bridge") - self._horde_pid = bridge.kai_bridge() - try: - bridge_cd = importlib.import_module("KoboldAI-Horde-Bridge.clientData") - cluster_url = bridge_cd.cluster_url - kai_name = bridge_cd.kai_name - if kai_name == "My Awesome Instance": - kai_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" - api_key = bridge_cd.api_key - priority_usernames = bridge_cd.priority_usernames - except: - cluster_url = "https://horde.koboldai.net" - kai_name = self._koboldai_var.horde_worker_name - if kai_name == "My Awesome Instance": - kai_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" - api_key = self._koboldai_var.horde_api_key - priority_usernames = [] - # Always use the local URL & port - kai_url = f'http://127.0.0.1:{self.port}' + if self.on_colab is True: + return + if not os.path.exists("./KoboldAI-Horde-Bridge"): + return + if value is True: + if self._horde_pid is None: + logger.info("Starting Horde bridge") + bd_module = importlib.import_module("KoboldAI-Horde-Bridge.worker.bridge_data.scribe") + bridge_data = bd_module.KoboldAIBridgeData() + bridge_data.reload_data() + bridge_data.kai_url = f'http://127.0.0.1:{self.port}' + logger.info(f"Name: {bridge_data.worker_name} on {bridge_data.kai_url}") + worker_module = importlib.import_module("KoboldAI-Horde-Bridge.worker.workers.scribe") + self._horde_pid = worker_module.ScribeWorker(bridge_data) + threading.Thread(target=self._horde_pid.start).run() + else: + if self._horde_pid is not None: + logger.info("Killing Horde bridge") + self._horde_pid.stop() + self._horde_pid = None - logger.info(f"Name: {kai_name} on {kai_url}") - threading.Thread(target=self._horde_pid.bridge, args=(1, api_key, kai_name, kai_url, cluster_url, priority_usernames)).run() - else: - if self._horde_pid is not None: - logger.info("Killing Horde bridge") - self._horde_pid.stop() - self._horde_pid = None class KoboldStoryRegister(object): def __init__(self, socketio, story_settings, koboldai_vars, tokenizer=None, sequence=[]): From a655f8f066e07970a49c8b41fe161eaa60700fde Mon Sep 17 00:00:00 2001 From: db0 Date: Mon, 21 Aug 2023 15:56:27 +0200 Subject: [PATCH 20/52] adjust for stop mechanism --- koboldai_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index b2c9ef9d..423ab5a7 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1368,7 +1368,7 @@ class system_settings(settings): else: if self._horde_pid is not None: logger.info("Killing Horde bridge") - self._horde_pid.stop() + self._horde_pid.should_stop = True self._horde_pid = None From e2d56db195ad8aac800c992990df5436323bfed3 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 16:27:53 +0200 Subject: [PATCH 21/52] Fix bridge reference --- KoboldAI-Horde-Bridge | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KoboldAI-Horde-Bridge b/KoboldAI-Horde-Bridge index 20e8701d..fc9946e0 160000 --- a/KoboldAI-Horde-Bridge +++ b/KoboldAI-Horde-Bridge @@ -1 +1 @@ -Subproject commit 20e8701dd27d478ff405f4ac6e2042edf06174df +Subproject commit fc9946e02c4bc3d25c1b1cf30ed8348ace0f9f6f From a7251fa599ba6419e810608efcf4a96ccbd5f5bc Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 16:44:09 +0200 Subject: [PATCH 22/52] Bridge settings --- colabkobold.sh | 1 + koboldai_settings.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/colabkobold.sh b/colabkobold.sh index 80711541..6a8133ea 100644 --- a/colabkobold.sh +++ b/colabkobold.sh @@ -152,6 +152,7 @@ if [ "$init" != "skip" ]; then cp -rn softprompts/* /content/drive/MyDrive/KoboldAI/softprompts/ cp -rn presets/* /content/drive/MyDrive/KoboldAI/presets/ cp -rn themes/* /content/drive/MyDrive/KoboldAI/themes/ + rm -rf AI-Horde-Worker/ rm -rf KoboldAI-Horde-Bridge/ rm stories rm -rf stories/ diff --git a/koboldai_settings.py b/koboldai_settings.py index 423ab5a7..98160ffb 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1352,17 +1352,22 @@ class system_settings(settings): if name == 'horde_share': if self.on_colab is True: return - if not os.path.exists("./KoboldAI-Horde-Bridge"): + if not os.path.exists("./AI-Horde-Worker"): return if value is True: if self._horde_pid is None: logger.info("Starting Horde bridge") - bd_module = importlib.import_module("KoboldAI-Horde-Bridge.worker.bridge_data.scribe") + bd_module = importlib.import_module("AI-Horde-Worker.worker.bridge_data.scribe") bridge_data = bd_module.KoboldAIBridgeData() bridge_data.reload_data() bridge_data.kai_url = f'http://127.0.0.1:{self.port}' + bridge_data.horde_url = self._koboldai_var.horde_url + bridge_data.api_key = self._koboldai_var.horde_api_key + bridge_data.worker_name = self._koboldai_var.horde_worker_name + if bridge_data.worker_name == "My Awesome Instance": + bridge_data.worker_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" logger.info(f"Name: {bridge_data.worker_name} on {bridge_data.kai_url}") - worker_module = importlib.import_module("KoboldAI-Horde-Bridge.worker.workers.scribe") + worker_module = importlib.import_module("AI-Horde-Worker.worker.workers.scribe") self._horde_pid = worker_module.ScribeWorker(bridge_data) threading.Thread(target=self._horde_pid.start).run() else: From be8f5279114c2b0df6bc23aa258784a2ddd95f42 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 16:44:58 +0200 Subject: [PATCH 23/52] Horde URL fixes --- KoboldAI-Horde-Bridge => AI-Horde-Worker | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename KoboldAI-Horde-Bridge => AI-Horde-Worker (100%) diff --git a/KoboldAI-Horde-Bridge b/AI-Horde-Worker similarity index 100% rename from KoboldAI-Horde-Bridge rename to AI-Horde-Worker From 7b8fba31f735054be29a730e29fc1f0d3effb15e Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 16:46:02 +0200 Subject: [PATCH 24/52] Git is stubborn --- AI-Horde-Worker | 1 - 1 file changed, 1 deletion(-) delete mode 160000 AI-Horde-Worker diff --git a/AI-Horde-Worker b/AI-Horde-Worker deleted file mode 160000 index fc9946e0..00000000 --- a/AI-Horde-Worker +++ /dev/null @@ -1 +0,0 @@ -Subproject commit fc9946e02c4bc3d25c1b1cf30ed8348ace0f9f6f From 8abb5746f86b9c02a7e42a453c5cbcd21c948edf Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 16:50:17 +0200 Subject: [PATCH 25/52] Add bridge back --- .gitmodules | 9 +++------ AI-Horde-Worker | 1 + 2 files changed, 4 insertions(+), 6 deletions(-) create mode 160000 AI-Horde-Worker diff --git a/.gitmodules b/.gitmodules index d95fbcae..7a77eff9 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "KoboldAI-Horde"] - path = KoboldAI-Horde - url = https://github.com/Haidra-Org/AI-Horde-Worker -[submodule "KoboldAI-Horde-Bridge"] - path = KoboldAI-Horde-Bridge - url = https://github.com/Haidra-Org/AI-Horde-Worker +[submodule "AI-Horde-Worker"] + path = AI-Horde-Worker + url = https://github.com/Haidra-Org/AI-Horde-Worker/ diff --git a/AI-Horde-Worker b/AI-Horde-Worker new file mode 160000 index 00000000..fc9946e0 --- /dev/null +++ b/AI-Horde-Worker @@ -0,0 +1 @@ +Subproject commit fc9946e02c4bc3d25c1b1cf30ed8348ace0f9f6f From d9815d4b1f374d0f1947316c55f9d609e6b01b3f Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 17:52:21 +0200 Subject: [PATCH 26/52] New worker fixes --- aiserver.py | 6 ++++++ environments/huggingface.yml | 2 ++ environments/rocm.yml | 2 ++ logger.py | 13 +++++++++++++ requirements.txt | 2 ++ 5 files changed, 25 insertions(+) diff --git a/aiserver.py b/aiserver.py index 6c0456b2..24fb9146 100644 --- a/aiserver.py +++ b/aiserver.py @@ -61,6 +61,12 @@ import gc import traceback import lupa +# Hack to make the new Horde worker understand its imports... +try: + sys.path.append(os.path.abspath("AI-Horde-Worker")) + print(os.path.abspath("AI-Horde-Worker/")) +except: + pass # KoboldAI import fileops diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 004c7ecc..0ceb43ec 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -54,3 +54,5 @@ dependencies: - einops - peft==0.3.0 - scipy + - windows-curses; sys_platform == 'win32' + - pynvml diff --git a/environments/rocm.yml b/environments/rocm.yml index 9538a615..9bf2813e 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -44,3 +44,5 @@ dependencies: - git+https://github.com/0cc4m/hf_bleeding_edge/ - einops - peft==0.3.0 + - windows-curses; sys_platform == 'win32' + - pynvml \ No newline at end of file diff --git a/logger.py b/logger.py index c0b8b8b0..6d37f780 100644 --- a/logger.py +++ b/logger.py @@ -17,6 +17,8 @@ class Colors: STDOUT_LEVELS = ["GENERATION", "PROMPT"] INIT_LEVELS = ["INIT", "INIT_OK", "INIT_WARN", "INIT_ERR"] MESSAGE_LEVELS = ["MESSAGE"] +STATS_LEVELS = ["STATS"] + # By default we're at error level or higher verbosity = 20 quiet = 0 @@ -54,6 +56,16 @@ def is_msg_log(record): return(False) return(True) +def is_stats_log(record): + if record["level"].name not in STATS_LEVELS: + return False + return True + +def is_not_stats_log(record): + if record["level"].name in STATS_LEVELS: + return False + return True + def is_stderr_log(record): if record["level"].name in STDOUT_LEVELS + INIT_LEVELS + MESSAGE_LEVELS: return(False) @@ -91,6 +103,7 @@ logger.level("INIT_ERR", no=31, color="") # Messages contain important information without which this application might not be able to be used # As such, they have the highest priority logger.level("MESSAGE", no=61, color="") +logger.level("STATS", no=19, color="") logger.__class__.generation = partialmethod(logger.__class__.log, "GENERATION") logger.__class__.prompt = partialmethod(logger.__class__.log, "PROMPT") diff --git a/requirements.txt b/requirements.txt index 8dc7f9a2..dff40042 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,3 +42,5 @@ git+https://github.com/0cc4m/hf_bleeding_edge/ einops peft==0.3.0 scipy +windows-curses; sys_platform == 'win32' +pynvml From 148a7c21b8c738a7c3aef0260aa785c307a2a4e9 Mon Sep 17 00:00:00 2001 From: db0 Date: Mon, 21 Aug 2023 19:02:15 +0200 Subject: [PATCH 27/52] using stop() --- koboldai_settings.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index 98160ffb..5db79502 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1369,11 +1369,14 @@ class system_settings(settings): logger.info(f"Name: {bridge_data.worker_name} on {bridge_data.kai_url}") worker_module = importlib.import_module("AI-Horde-Worker.worker.workers.scribe") self._horde_pid = worker_module.ScribeWorker(bridge_data) - threading.Thread(target=self._horde_pid.start).run() + new_thread = threading.Thread(target=self._horde_pid.start) + new_thread.daemon = True + new_thread.start() + else: if self._horde_pid is not None: logger.info("Killing Horde bridge") - self._horde_pid.should_stop = True + self._horde_pid.stop() self._horde_pid = None From 401cc1609ac3f678c93f0b05e03bd0a1b4284aaa Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 21 Aug 2023 19:05:35 +0200 Subject: [PATCH 28/52] Kaiemb branch --- AI-Horde-Worker | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AI-Horde-Worker b/AI-Horde-Worker index fc9946e0..7e728219 160000 --- a/AI-Horde-Worker +++ b/AI-Horde-Worker @@ -1 +1 @@ -Subproject commit fc9946e02c4bc3d25c1b1cf30ed8348ace0f9f6f +Subproject commit 7e72821919343c7dff29cc616b5e25b94f6df85c From f570787077fdf07bd3cd2d50a9c4b05c31cc0a52 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 22 Aug 2023 13:38:28 +0200 Subject: [PATCH 29/52] Allow worker to stop --- AI-Horde-Worker | 2 +- koboldai_settings.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AI-Horde-Worker b/AI-Horde-Worker index 7e728219..b006ce4a 160000 --- a/AI-Horde-Worker +++ b/AI-Horde-Worker @@ -1 +1 @@ -Subproject commit 7e72821919343c7dff29cc616b5e25b94f6df85c +Subproject commit b006ce4a6100de18140934da79f6fa1f30234844 diff --git a/koboldai_settings.py b/koboldai_settings.py index 5db79502..3105ea0e 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1364,6 +1364,7 @@ class system_settings(settings): bridge_data.horde_url = self._koboldai_var.horde_url bridge_data.api_key = self._koboldai_var.horde_api_key bridge_data.worker_name = self._koboldai_var.horde_worker_name + bridge_data.disable_terminal_ui = True # I know people love it, but it prevents stopping the worker at the moment. Feel free to flip the switch if you prefer to have it, you must then terminate your worker with Q. - Henk if bridge_data.worker_name == "My Awesome Instance": bridge_data.worker_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" logger.info(f"Name: {bridge_data.worker_name} on {bridge_data.kai_url}") From 179c4ad07f1401ee801ab46a362fee4d2f578751 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 22 Aug 2023 13:58:02 +0200 Subject: [PATCH 30/52] Restore UI --- AI-Horde-Worker | 2 +- koboldai_settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/AI-Horde-Worker b/AI-Horde-Worker index b006ce4a..60f09aa8 160000 --- a/AI-Horde-Worker +++ b/AI-Horde-Worker @@ -1 +1 @@ -Subproject commit b006ce4a6100de18140934da79f6fa1f30234844 +Subproject commit 60f09aa8b017bead5305e7bfd921e65b05ba7fae diff --git a/koboldai_settings.py b/koboldai_settings.py index 3105ea0e..9f460804 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1364,7 +1364,7 @@ class system_settings(settings): bridge_data.horde_url = self._koboldai_var.horde_url bridge_data.api_key = self._koboldai_var.horde_api_key bridge_data.worker_name = self._koboldai_var.horde_worker_name - bridge_data.disable_terminal_ui = True # I know people love it, but it prevents stopping the worker at the moment. Feel free to flip the switch if you prefer to have it, you must then terminate your worker with Q. - Henk + bridge_data.disable_terminal_ui = False if bridge_data.worker_name == "My Awesome Instance": bridge_data.worker_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" logger.info(f"Name: {bridge_data.worker_name} on {bridge_data.kai_url}") From b41bf99b5512a47f8dd49f3a2d341594ba226164 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 22 Aug 2023 14:00:05 +0200 Subject: [PATCH 31/52] Cleanup --- aiserver.py | 1 - koboldai_settings.py | 1 - 2 files changed, 2 deletions(-) diff --git a/aiserver.py b/aiserver.py index 24fb9146..e1d74172 100644 --- a/aiserver.py +++ b/aiserver.py @@ -64,7 +64,6 @@ import lupa # Hack to make the new Horde worker understand its imports... try: sys.path.append(os.path.abspath("AI-Horde-Worker")) - print(os.path.abspath("AI-Horde-Worker/")) except: pass diff --git a/koboldai_settings.py b/koboldai_settings.py index 9f460804..a009f3c4 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1367,7 +1367,6 @@ class system_settings(settings): bridge_data.disable_terminal_ui = False if bridge_data.worker_name == "My Awesome Instance": bridge_data.worker_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" - logger.info(f"Name: {bridge_data.worker_name} on {bridge_data.kai_url}") worker_module = importlib.import_module("AI-Horde-Worker.worker.workers.scribe") self._horde_pid = worker_module.ScribeWorker(bridge_data) new_thread = threading.Thread(target=self._horde_pid.start) From 4b482a061928f790f4ac649fd030480bb2ea2284 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 22 Aug 2023 14:58:44 +0200 Subject: [PATCH 32/52] Pending trick --- koboldai_settings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/koboldai_settings.py b/koboldai_settings.py index a009f3c4..ce2bcbc1 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1356,6 +1356,7 @@ class system_settings(settings): return if value is True: if self._horde_pid is None: + self._horde_pid = "Pending" # Hack to make sure we don't launch twice while it loads logger.info("Starting Horde bridge") bd_module = importlib.import_module("AI-Horde-Worker.worker.bridge_data.scribe") bridge_data = bd_module.KoboldAIBridgeData() From 69c794506bdcb1775bb01d36e2339bfe4929c58e Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 22 Aug 2023 17:48:00 +0200 Subject: [PATCH 33/52] HF 4.32 --- environments/huggingface.yml | 10 +++++----- environments/rocm.yml | 8 ++++---- requirements.txt | 7 ++++--- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 004c7ecc..a1a788d8 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,10 +32,10 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.31.0 - - huggingface_hub==0.15.1 - - safetensors==0.3.1 - - accelerate==0.20.3 + - transformers==4.32.* + - huggingface_hub==0.16.4 + - git+https://github.com/huggingface/optimum@2c1eaf6c0242badb86f3d626811ee3d7d220ec06 + - accelerate==0.21.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session - ansi2html @@ -50,7 +50,7 @@ dependencies: - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' - - https://github.com/henk717/KoboldAI/releases/download/Snapshot-11-08-23/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' + - https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' - einops - peft==0.3.0 - scipy diff --git a/environments/rocm.yml b/environments/rocm.yml index 9538a615..35f29c3e 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,10 +30,10 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.31.0 - - huggingface_hub==0.15.1 - - safetensors==0.3.1 - - accelerate==0.20.3 + - transformers==4.32.* + - huggingface_hub==0.16.4 + - git+https://github.com/huggingface/optimum@2c1eaf6c0242badb86f3d626811ee3d7d220ec06 + - accelerate==0.21.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - ansi2html - flask_compress diff --git a/requirements.txt b/requirements.txt index b90db7d9..f3b2ac94 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ -transformers==4.31.* -huggingface_hub==0.15.1 +transformers==4.32.* +huggingface_hub==0.16.4 +git+https://github.com/huggingface/optimum@2c1eaf6c0242badb86f3d626811ee3d7d220ec06 Flask==2.2.3 Flask-SocketIO==5.3.2 python-socketio==5.7.2 @@ -15,7 +16,7 @@ markdown bleach==4.1.0 sentencepiece protobuf -accelerate==0.20.3 +accelerate==0.21.0 flask-session==0.4.0 marshmallow>=3.13 apispec-webframeworks From e5aca6fdadfa39506644b71032395a507245c186 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 22 Aug 2023 18:43:29 +0200 Subject: [PATCH 34/52] Cleaned horde --- AI-Horde-Worker | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AI-Horde-Worker b/AI-Horde-Worker index 60f09aa8..960723f3 160000 --- a/AI-Horde-Worker +++ b/AI-Horde-Worker @@ -1 +1 @@ -Subproject commit 60f09aa8b017bead5305e7bfd921e65b05ba7fae +Subproject commit 960723f39a2b51a1e24d59d3c46121e7d59618ca From 3f438fda53b98ae9811f87fb8bb9d0df660c236d Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 22 Aug 2023 18:56:23 +0200 Subject: [PATCH 35/52] Scribe name instead of worker name --- koboldai_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index ce2bcbc1..e0cbe619 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1364,7 +1364,7 @@ class system_settings(settings): bridge_data.kai_url = f'http://127.0.0.1:{self.port}' bridge_data.horde_url = self._koboldai_var.horde_url bridge_data.api_key = self._koboldai_var.horde_api_key - bridge_data.worker_name = self._koboldai_var.horde_worker_name + bridge_data.scribe_name = self._koboldai_var.horde_worker_name bridge_data.disable_terminal_ui = False if bridge_data.worker_name == "My Awesome Instance": bridge_data.worker_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" From f66173f2a0ab05025b2702897bc59024ee36dddb Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 22 Aug 2023 20:43:44 +0200 Subject: [PATCH 36/52] Git gonna git --- AI-Horde-Worker | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AI-Horde-Worker b/AI-Horde-Worker index 960723f3..755696b9 160000 --- a/AI-Horde-Worker +++ b/AI-Horde-Worker @@ -1 +1 @@ -Subproject commit 960723f39a2b51a1e24d59d3c46121e7d59618ca +Subproject commit 755696b9d4464e4167bfea5fd426686420015038 From 91155ed2f303704b9cd77ff71a5aa60339bf832e Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 23 Aug 2023 20:34:40 +0200 Subject: [PATCH 37/52] HF dependencies --- environments/huggingface.yml | 3 ++- environments/rocm.yml | 3 ++- requirements.txt | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index db6b931c..1e54011b 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -34,7 +34,8 @@ dependencies: - lupa==1.10 - transformers==4.32.* - huggingface_hub==0.16.4 - - git+https://github.com/huggingface/optimum@2c1eaf6c0242badb86f3d626811ee3d7d220ec06 + - optimum==1.12.0 + - safetensors==0.3.2 - accelerate==0.21.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session diff --git a/environments/rocm.yml b/environments/rocm.yml index bf494799..22aff85c 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -32,7 +32,8 @@ dependencies: - lupa==1.10 - transformers==4.32.* - huggingface_hub==0.16.4 - - git+https://github.com/huggingface/optimum@2c1eaf6c0242badb86f3d626811ee3d7d220ec06 + - optimum==1.12.0 + - safetensors==0.3.2 - accelerate==0.21.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - ansi2html diff --git a/requirements.txt b/requirements.txt index b3ba1d3f..4f0494c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ transformers==4.32.* huggingface_hub==0.16.4 -git+https://github.com/huggingface/optimum@2c1eaf6c0242badb86f3d626811ee3d7d220ec06 +optimum==1.12.0 +safetensors==0.3.2 Flask==2.2.3 Flask-SocketIO==5.3.2 python-socketio==5.7.2 From c20ea949d77c34a007b873faf61bf6f5d168689b Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 23 Aug 2023 21:02:11 +0200 Subject: [PATCH 38/52] Fix duplicate safetensors --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4f0494c8..7f92cf65 100644 --- a/requirements.txt +++ b/requirements.txt @@ -39,7 +39,6 @@ pytest==7.2.2 pytest-html==3.2.0 pytest-metadata==2.0.4 requests-mock==1.10.0 -safetensors==0.3.1 git+https://github.com/0cc4m/hf_bleeding_edge/ einops peft==0.3.0 From 85810cd3fd0b244ab770981a38185476d832e7c1 Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 23 Aug 2023 21:30:58 +0200 Subject: [PATCH 39/52] AutoGPTQ for Colab --- requirements.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7f92cf65..2be7e9a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,7 @@ ansi2html flask_compress ijson bitsandbytes==0.40.0.post4; sys_platform == 'linux' -https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32' +https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32'; python_version ftfy py==1.11.0 pydub @@ -43,6 +43,9 @@ git+https://github.com/0cc4m/hf_bleeding_edge/ einops peft==0.3.0 scipy -auto-gptq +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux'; python_version == '3.10' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32'; python_version == '3.10' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'; python_version == '3.8' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'; python_version == '3.8' windows-curses; sys_platform == 'win32' pynvml From 5d9f180489076abe1c550424e7afdaaa040c31fb Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 23 Aug 2023 21:36:26 +0200 Subject: [PATCH 40/52] Fix typo --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2be7e9a5..b10ad26c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,7 +31,7 @@ ansi2html flask_compress ijson bitsandbytes==0.40.0.post4; sys_platform == 'linux' -https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32'; python_version +https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.0.post4-py3-none-win_amd64.whl; sys_platform == 'win32' ftfy py==1.11.0 pydub From 39c1b39b4ab32366579f0cb2f56a46db42752ace Mon Sep 17 00:00:00 2001 From: Henk Date: Wed, 23 Aug 2023 21:42:06 +0200 Subject: [PATCH 41/52] Fix markers --- requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index b10ad26c..4c9c6681 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,9 +43,9 @@ git+https://github.com/0cc4m/hf_bleeding_edge/ einops peft==0.3.0 scipy -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux'; python_version == '3.10' -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32'; python_version == '3.10' -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'; python_version == '3.8' -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'; python_version == '3.8' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8' +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8' windows-curses; sys_platform == 'win32' pynvml From 2887467eecbbb493b10a4fa8ea7e63a1eaecf3b1 Mon Sep 17 00:00:00 2001 From: Henk Date: Thu, 24 Aug 2023 14:30:44 +0200 Subject: [PATCH 42/52] Safetensors 0.3.3 --- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index 1e54011b..e053654a 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -35,7 +35,7 @@ dependencies: - transformers==4.32.* - huggingface_hub==0.16.4 - optimum==1.12.0 - - safetensors==0.3.2 + - safetensors==0.3.3 - accelerate==0.21.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - flask-session diff --git a/environments/rocm.yml b/environments/rocm.yml index 22aff85c..bd468a6a 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -33,7 +33,7 @@ dependencies: - transformers==4.32.* - huggingface_hub==0.16.4 - optimum==1.12.0 - - safetensors==0.3.2 + - safetensors==0.3.3 - accelerate==0.21.0 - git+https://github.com/VE-FORBRYDERNE/mkultra - ansi2html diff --git a/requirements.txt b/requirements.txt index 4c9c6681..b7abbed1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ transformers==4.32.* huggingface_hub==0.16.4 optimum==1.12.0 -safetensors==0.3.2 +safetensors==0.3.3 Flask==2.2.3 Flask-SocketIO==5.3.2 python-socketio==5.7.2 From f40236c04a69663834e04e7b39e4f5d86193d741 Mon Sep 17 00:00:00 2001 From: Henk Date: Fri, 25 Aug 2023 14:27:44 +0200 Subject: [PATCH 43/52] Modern llama tokenizer --- modeling/inference_models/hf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modeling/inference_models/hf.py b/modeling/inference_models/hf.py index e50d87ff..7b005c9e 100644 --- a/modeling/inference_models/hf.py +++ b/modeling/inference_models/hf.py @@ -234,7 +234,7 @@ class HFInferenceModel(InferenceModel): if self.model_type == "llama": # Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer self.tokenizer.add_bos_token = False - + self.tokenizer.legacy = False # HF transformers no longer supports decode_with_prefix_space # We work around this by wrapping decode, encode, and __call__ # with versions that work around the 'prefix space' misfeature From 4b2d591354b0d2626e9193445514b38ca5dbe445 Mon Sep 17 00:00:00 2001 From: db0 Date: Fri, 25 Aug 2023 15:05:36 +0200 Subject: [PATCH 44/52] avoid conflictinng sys args --- koboldai_settings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/koboldai_settings.py b/koboldai_settings.py index e0cbe619..c9acdfcf 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1359,7 +1359,11 @@ class system_settings(settings): self._horde_pid = "Pending" # Hack to make sure we don't launch twice while it loads logger.info("Starting Horde bridge") bd_module = importlib.import_module("AI-Horde-Worker.worker.bridge_data.scribe") + logger.debug("Clearing command line args in sys.argv before AI Horde Scribe load") + sys_arg_bkp = sys.argv.copy() + sys.argv = sys.argv[:1] bridge_data = bd_module.KoboldAIBridgeData() + sys.argv = sys_arg_bkp bridge_data.reload_data() bridge_data.kai_url = f'http://127.0.0.1:{self.port}' bridge_data.horde_url = self._koboldai_var.horde_url From 290f2ce05e9f56e04ca4b430d8348b201fe37789 Mon Sep 17 00:00:00 2001 From: Henk Date: Sat, 26 Aug 2023 00:03:28 +0200 Subject: [PATCH 45/52] CPU only warning --- modeling/inference_models/generic_hf_torch/class.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modeling/inference_models/generic_hf_torch/class.py b/modeling/inference_models/generic_hf_torch/class.py index 9b1049cf..f95bb24a 100644 --- a/modeling/inference_models/generic_hf_torch/class.py +++ b/modeling/inference_models/generic_hf_torch/class.py @@ -49,6 +49,9 @@ class model_backend(HFTorchInferenceModel): def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}): requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters) + if not utils.koboldai_vars.hascuda: + logger.warning("Your GPU has not been detected and you can only make use of 32-bit inference, meaning the ram requirements are 8 times higher than specified on the menu and your generations will be slow.\nUnless this is an error and your GPU is known to be compatible with our software check out https://koboldai.org/cpp for a suitable alternative that has wider GPU support and has the ability to run models in 4-bit on the CPU.") + dependency_exists = importlib.util.find_spec("bitsandbytes") if dependency_exists: if model_name != 'customhuggingface' or "custom_model_name" in parameters: @@ -57,7 +60,7 @@ class model_backend(HFTorchInferenceModel): temp = json.load(f) else: temp = {} - if not hasattr(self.model_config, 'quantization_config'): + if not hasattr(self.model_config, 'quantization_config') and utils.koboldai_vars.hascuda: requested_parameters.append({ "uitype": "dropdown", "unit": "text", From 3e0b8279f2c5d03657298474b310d331014161d0 Mon Sep 17 00:00:00 2001 From: Henk Date: Sun, 27 Aug 2023 20:51:14 +0200 Subject: [PATCH 46/52] Rename GPTQ loading --- aiserver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiserver.py b/aiserver.py index e1d74172..ba3be3d4 100644 --- a/aiserver.py +++ b/aiserver.py @@ -248,7 +248,7 @@ model_menu = { MenuPath("Load a model from its directory", "NeoCustom"), MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"), MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""), - MenuModel("Load custom GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"), + MenuModel("Load old GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"), MenuFolder("Instruct Models", "instructlist"), MenuFolder("Novel Models", "novellist"), MenuFolder("Chat Models", "chatlist"), From 31161409f664d25be61fd6e41dc2164e18a63016 Mon Sep 17 00:00:00 2001 From: db0 Date: Mon, 28 Aug 2023 17:47:52 +0200 Subject: [PATCH 47/52] fix argsv placement --- koboldai_settings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index c9acdfcf..b75ed63a 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1358,10 +1358,11 @@ class system_settings(settings): if self._horde_pid is None: self._horde_pid = "Pending" # Hack to make sure we don't launch twice while it loads logger.info("Starting Horde bridge") - bd_module = importlib.import_module("AI-Horde-Worker.worker.bridge_data.scribe") logger.debug("Clearing command line args in sys.argv before AI Horde Scribe load") + logger.info(args) sys_arg_bkp = sys.argv.copy() sys.argv = sys.argv[:1] + bd_module = importlib.import_module("AI-Horde-Worker.worker.bridge_data.scribe") bridge_data = bd_module.KoboldAIBridgeData() sys.argv = sys_arg_bkp bridge_data.reload_data() From b20b910568e656d40dc9d5737fa7ad0d11b0856d Mon Sep 17 00:00:00 2001 From: db0 Date: Mon, 28 Aug 2023 17:49:34 +0200 Subject: [PATCH 48/52] remove debug --- koboldai_settings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index b75ed63a..ccaadfbc 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1359,7 +1359,6 @@ class system_settings(settings): self._horde_pid = "Pending" # Hack to make sure we don't launch twice while it loads logger.info("Starting Horde bridge") logger.debug("Clearing command line args in sys.argv before AI Horde Scribe load") - logger.info(args) sys_arg_bkp = sys.argv.copy() sys.argv = sys.argv[:1] bd_module = importlib.import_module("AI-Horde-Worker.worker.bridge_data.scribe") From c29a5019b3b9b19f1bf19f54c5d330402c1110ea Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 28 Aug 2023 20:02:29 +0200 Subject: [PATCH 49/52] Disable Horde UI due to lockups --- GPU0.cmd | 2 ++ koboldai_settings.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 GPU0.cmd diff --git a/GPU0.cmd b/GPU0.cmd new file mode 100644 index 00000000..610ffe06 --- /dev/null +++ b/GPU0.cmd @@ -0,0 +1,2 @@ +set CUDA_VISIBLE_DEVICES=0 +play \ No newline at end of file diff --git a/koboldai_settings.py b/koboldai_settings.py index ccaadfbc..2732ac33 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1369,7 +1369,7 @@ class system_settings(settings): bridge_data.horde_url = self._koboldai_var.horde_url bridge_data.api_key = self._koboldai_var.horde_api_key bridge_data.scribe_name = self._koboldai_var.horde_worker_name - bridge_data.disable_terminal_ui = False + bridge_data.disable_terminal_ui = True if bridge_data.worker_name == "My Awesome Instance": bridge_data.worker_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" worker_module = importlib.import_module("AI-Horde-Worker.worker.workers.scribe") From ec4cecf2f87b85fa096f3ce319df82234b589304 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 28 Aug 2023 21:46:30 +0200 Subject: [PATCH 50/52] Restore Horde UI for local users --- koboldai_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/koboldai_settings.py b/koboldai_settings.py index 2732ac33..30d7f0f7 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -1369,7 +1369,7 @@ class system_settings(settings): bridge_data.horde_url = self._koboldai_var.horde_url bridge_data.api_key = self._koboldai_var.horde_api_key bridge_data.scribe_name = self._koboldai_var.horde_worker_name - bridge_data.disable_terminal_ui = True + bridge_data.disable_terminal_ui = self._koboldai_var.host if bridge_data.worker_name == "My Awesome Instance": bridge_data.worker_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}" worker_module = importlib.import_module("AI-Horde-Worker.worker.workers.scribe") From d77acf17eba78737b379f5bba5b5874a184d6339 Mon Sep 17 00:00:00 2001 From: Henk Date: Mon, 28 Aug 2023 22:55:33 +0200 Subject: [PATCH 51/52] Transformers 4.32.1 --- environments/huggingface.yml | 2 +- environments/rocm.yml | 2 +- requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/environments/huggingface.yml b/environments/huggingface.yml index e053654a..fdf82e15 100644 --- a/environments/huggingface.yml +++ b/environments/huggingface.yml @@ -32,7 +32,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.32.* + - transformers==4.32.1 - huggingface_hub==0.16.4 - optimum==1.12.0 - safetensors==0.3.3 diff --git a/environments/rocm.yml b/environments/rocm.yml index bd468a6a..00f50929 100644 --- a/environments/rocm.yml +++ b/environments/rocm.yml @@ -30,7 +30,7 @@ dependencies: - flask-ngrok - flask-cors - lupa==1.10 - - transformers==4.32.* + - transformers==4.32.1 - huggingface_hub==0.16.4 - optimum==1.12.0 - safetensors==0.3.3 diff --git a/requirements.txt b/requirements.txt index b7abbed1..323239c3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -transformers==4.32.* +transformers==4.32.1 huggingface_hub==0.16.4 optimum==1.12.0 safetensors==0.3.3 From 49fa63052f22ae1a0dd6470d9ea5afdce89b0269 Mon Sep 17 00:00:00 2001 From: Henk Date: Tue, 29 Aug 2023 20:51:09 +0200 Subject: [PATCH 52/52] Allow EOS unbanning --- aiserver.py | 5 +++- gensettings.py | 16 ++++++++++ koboldai_settings.py | 1 + modeling/inference_models/hf_torch.py | 42 ++++++++++++++++++++------- 4 files changed, 52 insertions(+), 12 deletions(-) diff --git a/aiserver.py b/aiserver.py index ba3be3d4..40ff9c5a 100644 --- a/aiserver.py +++ b/aiserver.py @@ -930,7 +930,7 @@ tags = [ api_version = None # This gets set automatically so don't change this value api_v1 = KoboldAPISpec( - version="1.2.3", + version="1.2.4", prefixes=["/api/v1", "/api/latest"], tags=tags, ) @@ -8161,6 +8161,7 @@ class GenerationInputSchema(SamplerSettingsSchema): frmtrmblln: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, replaces all occurrences of two or more consecutive newlines in the output with one newline.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."}) frmtrmspch: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes `#/@%{}+=~|\^<>` from the output.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."}) singleline: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes everything after the first line of the output, including the newline.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."}) + use_default_badwordids: bool = fields.Boolean(load_default=True, metadata={"description": "Ban tokens that commonly worsen the writing experience for continuous story writing"}) disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"}) frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."}) quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."}) @@ -8169,6 +8170,7 @@ class GenerationInputSchema(SamplerSettingsSchema): sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."}) stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."}, validate=[validate.Length(max=10)]) + class GenerationResultSchema(KoboldSchema): text: str = fields.String(required=True, metadata={"description": "Generated output as plain text."}) @@ -8311,6 +8313,7 @@ def _generate_text(body: GenerationInputSchema): "sampler_order": ("koboldai_vars", "sampler_order", None), "sampler_full_determinism": ("koboldai_vars", "full_determinism", None), "stop_sequence": ("koboldai_vars", "stop_sequence", None), + "use_default_badwordids": ("koboldai_vars", "use_default_badwordids", None), } saved_settings = {} set_aibusy(1) diff --git a/gensettings.py b/gensettings.py index 4b395266..8bb28513 100644 --- a/gensettings.py +++ b/gensettings.py @@ -396,6 +396,22 @@ gensettingstf = [ "name": "output_streaming", "ui_level": 1 }, + { + "uitype": "toggle", + "unit": "bool", + "label": "Ban Bad Tokens", + "id": "setusedefaultbadwordids", + "min": 0, + "max": 1, + "step": 1, + "default": 1, + "tooltip": "Ban tokens that commonly worsen the writing experience for continuous story writing.", + "menu_path": "Settings", + "sub_path": "Sampling", + "classname": "model", + "name": "use_default_badwordids", + "ui_level": 0 + }, { "uitype": "toggle", "unit": "bool", diff --git a/koboldai_settings.py b/koboldai_settings.py index 30d7f0f7..5598eb62 100644 --- a/koboldai_settings.py +++ b/koboldai_settings.py @@ -693,6 +693,7 @@ class model_settings(settings): self._koboldai_vars = koboldai_vars self.alt_multi_gen = False self.bit_8_available = None + self.use_default_badwordids = True self.supported_gen_modes = [] def reset_for_model_load(self): diff --git a/modeling/inference_models/hf_torch.py b/modeling/inference_models/hf_torch.py index 82e60304..5e6e0a95 100644 --- a/modeling/inference_models/hf_torch.py +++ b/modeling/inference_models/hf_torch.py @@ -330,19 +330,39 @@ class HFTorchInferenceModel(HFInferenceModel): if seed is not None: torch.manual_seed(seed) + if utils.koboldai_vars.use_default_badwordids: + self.active_badwordids = self.badwordsids + additional_bad_words_ids + else: + if additional_bad_words_ids: + self.active_badwordids = additional_bad_words_ids + else: + self.active_badwordids = None + with torch.no_grad(): start_time = time.time() - genout = self.model.generate( - input_ids=gen_in, - do_sample=True, - max_length=min( - len(prompt_tokens) + max_new, utils.koboldai_vars.max_length - ), - repetition_penalty=1.0, - bad_words_ids=self.badwordsids + additional_bad_words_ids, - use_cache=True, - num_return_sequences=batch_count, - ) + if self.active_badwordids: ## I know duplicating this is ugly, but HF checks if its present and accepts nothing but actual token bans if its there (Which I can't guarantee would be universal enough).... - Henk + genout = self.model.generate( + input_ids=gen_in, + do_sample=True, + max_length=min( + len(prompt_tokens) + max_new, utils.koboldai_vars.max_length + ), + repetition_penalty=1.0, + bad_words_ids=self.active_badwordids, + use_cache=True, + num_return_sequences=batch_count, + ) + else: + genout = self.model.generate( + input_ids=gen_in, + do_sample=True, + max_length=min( + len(prompt_tokens) + max_new, utils.koboldai_vars.max_length + ), + repetition_penalty=1.0, + use_cache=True, + num_return_sequences=batch_count, + ) logger.debug( "torch_raw_generate: run generator {}s".format(time.time() - start_time) )