mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge pull request #69 from pi6am/merge/united-exllama
Merge branch henk717/united into exllama
This commit is contained in:
9
.gitmodules
vendored
9
.gitmodules
vendored
@@ -1,6 +1,3 @@
|
||||
[submodule "KoboldAI-Horde"]
|
||||
path = KoboldAI-Horde
|
||||
url = https://github.com/db0/KoboldAI-Horde-Bridge
|
||||
[submodule "KoboldAI-Horde-Bridge"]
|
||||
path = KoboldAI-Horde-Bridge
|
||||
url = https://github.com/db0/KoboldAI-Horde-Bridge
|
||||
[submodule "AI-Horde-Worker"]
|
||||
path = AI-Horde-Worker
|
||||
url = https://github.com/Haidra-Org/AI-Horde-Worker/
|
||||
|
1
AI-Horde-Worker
Submodule
1
AI-Horde-Worker
Submodule
Submodule AI-Horde-Worker added at 755696b9d4
Submodule KoboldAI-Horde-Bridge deleted from 20e8701dd2
140
aiserver.py
140
aiserver.py
@@ -61,6 +61,11 @@ import gc
|
||||
import traceback
|
||||
|
||||
import lupa
|
||||
# Hack to make the new Horde worker understand its imports...
|
||||
try:
|
||||
sys.path.append(os.path.abspath("AI-Horde-Worker"))
|
||||
except:
|
||||
pass
|
||||
|
||||
# KoboldAI
|
||||
import fileops
|
||||
@@ -242,11 +247,13 @@ model_menu = {
|
||||
"mainmenu": [
|
||||
MenuPath("Load a model from its directory", "NeoCustom"),
|
||||
MenuPath("Load an old GPT-2 model (eg CloverEdition)", "GPT2Custom"),
|
||||
MenuModel("Load custom model from Hugging Face", "customhuggingface", ""),
|
||||
MenuFolder("Adventure Models", "adventurelist"),
|
||||
MenuModel("Load custom Pytorch model from Hugging Face", "customhuggingface", ""),
|
||||
MenuModel("Load old GPTQ model from Hugging Face", "customgptq", "", model_backend="GPTQ"),
|
||||
MenuFolder("Instruct Models", "instructlist"),
|
||||
MenuFolder("Novel Models", "novellist"),
|
||||
MenuFolder("Chat Models", "chatlist"),
|
||||
MenuFolder("NSFW Models", "nsfwlist"),
|
||||
MenuFolder("Adventure Models", "adventurelist"),
|
||||
MenuFolder("Untuned OPT", "optlist"),
|
||||
MenuFolder("Untuned GPT-Neo/J", "gptneolist"),
|
||||
MenuFolder("Untuned Pythia", "pythialist"),
|
||||
@@ -258,17 +265,28 @@ model_menu = {
|
||||
MenuFolder("Online Services", "apilist"),
|
||||
MenuModel("Read Only (No AI)", "ReadOnly", model_type=MenuModelType.OTHER, model_backend="Read Only"),
|
||||
],
|
||||
'instructlist': [
|
||||
MenuModel("Holomax 13B", "KoboldAI/LLaMA2-13B-Holomax", "12GB*"),
|
||||
MenuModel("Mythomax 13B", "Gryphe/MythoMax-L2-13b", "12GB*"),
|
||||
MenuModel("Chronos-Hermes V2 13B", "Austism/chronos-hermes-13b-v2", "12GB*"),
|
||||
MenuModel("Legerdemain 13B", "CalderaAI/13B-Legerdemain-L2", "12GB*"),
|
||||
MenuModel("Chronos 13b v2", "elinas/chronos-13b-v2", "12GB*"),
|
||||
MenuModel("Huginn 13B", "The-Face-Of-Goonery/Huginn-13b-FP16", "12GB*"),
|
||||
MenuFolder("Return to Main Menu", "mainmenu"),
|
||||
],
|
||||
'adventurelist': [
|
||||
MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "64GB"),
|
||||
MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "32GB"),
|
||||
MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "32GB"),
|
||||
MenuModel("Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "32GB"),
|
||||
MenuModel("Skein 6B", "KoboldAI/GPT-J-6B-Skein", "16GB"),
|
||||
MenuModel("OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "16GB"),
|
||||
MenuModel("Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "16GB"),
|
||||
MenuModel("Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "8GB"),
|
||||
MenuModel("Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "8GB"),
|
||||
MenuModel("Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "6GB"),
|
||||
MenuFolder("Instruct models may perform better than the models below (Using Instruct mode)", "instructlist"),
|
||||
MenuModel("Skein 20B", "KoboldAI/GPT-NeoX-20B-Skein", "20GB*"),
|
||||
MenuModel("Nerys OPT 13B V2 (Hybrid)", "KoboldAI/OPT-13B-Nerys-v2", "12GB"),
|
||||
MenuModel("Spring Dragon 13B", "Henk717/spring-dragon", "12GB*"),
|
||||
MenuModel("Nerys FSD 13B V2 (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys-v2", "12GB"),
|
||||
MenuModel("Nerys FSD 13B (Hybrid)", "KoboldAI/fairseq-dense-13B-Nerys", "12GB"),
|
||||
MenuModel("Skein 6B", "KoboldAI/GPT-J-6B-Skein", "8GB*"),
|
||||
MenuModel("OPT Nerys 6B V2 (Hybrid)", "KoboldAI/OPT-6B-nerys-v2", "8GB"),
|
||||
MenuModel("Adventure 6B", "KoboldAI/GPT-J-6B-Adventure", "8GB*"),
|
||||
MenuModel("Nerys FSD 2.7B (Hybrid)", "KoboldAI/fairseq-dense-2.7B-Nerys", "6GB"),
|
||||
MenuModel("Adventure 2.7B", "KoboldAI/GPT-Neo-2.7B-AID", "6GB"),
|
||||
MenuModel("Adventure 1.3B", "KoboldAI/GPT-Neo-1.3B-Adventure", "4GB*"),
|
||||
MenuModel("Adventure 125M (Mia)", "Merry/AID-Neo-125M", "2GB"),
|
||||
MenuFolder("Return to Main Menu", "mainmenu"),
|
||||
],
|
||||
@@ -289,24 +307,28 @@ model_menu = {
|
||||
MenuFolder("Return to Main Menu", "mainmenu"),
|
||||
],
|
||||
'nsfwlist': [
|
||||
MenuModel("Erebus 20B (NSFW)", "KoboldAI/GPT-NeoX-20B-Erebus", "64GB"),
|
||||
MenuModel("Nerybus 13B (NSFW)", "KoboldAI/OPT-13B-Nerybus-Mix", "32GB"),
|
||||
MenuModel("Erebus 13B (NSFW)", "KoboldAI/OPT-13B-Erebus", "32GB"),
|
||||
MenuModel("Shinen FSD 13B (NSFW)", "KoboldAI/fairseq-dense-13B-Shinen", "32GB"),
|
||||
MenuModel("Erebus 6.7B (NSFW)", "KoboldAI/OPT-6.7B-Erebus", "16GB"),
|
||||
MenuModel("Shinen FSD 6.7B (NSFW)", "KoboldAI/fairseq-dense-6.7B-Shinen", "16GB"),
|
||||
MenuModel("Lit V2 6B (NSFW)", "hakurei/litv2-6B-rev3", "16GB"),
|
||||
MenuModel("Lit 6B (NSFW)", "hakurei/lit-6B", "16GB"),
|
||||
MenuModel("Shinen 6B (NSFW)", "KoboldAI/GPT-J-6B-Shinen", "16GB"),
|
||||
MenuModel("Erebus 2.7B (NSFW)", "KoboldAI/OPT-2.7B-Erebus", "8GB"),
|
||||
MenuModel("Horni 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Horni", "8GB"),
|
||||
MenuModel("Shinen 2.7B (NSFW)", "KoboldAI/GPT-Neo-2.7B-Shinen", "8GB"),
|
||||
MenuFolder("Looking for NSFW Chat RP? Most chat models give better replies", "chatlist"),
|
||||
MenuModel("Green Devil (Novel)", "Pirr/pythia-13b-deduped-green_devil", "14GB"),
|
||||
MenuModel("Erebus 20B (Novel)", "KoboldAI/GPT-NeoX-20B-Erebus", "20GB*"),
|
||||
MenuModel("Nerybus 13B (Novel)", "KoboldAI/OPT-13B-Nerybus-Mix", "12GB"),
|
||||
MenuModel("Erebus 13B (Novel)", "KoboldAI/OPT-13B-Erebus", "12GB"),
|
||||
MenuModel("Shinen FSD 13B (Novel)", "KoboldAI/fairseq-dense-13B-Shinen", "12GB"),
|
||||
MenuModel("Erebus 6.7B (Novel)", "KoboldAI/OPT-6.7B-Erebus", "8GB"),
|
||||
MenuModel("Shinen FSD 6.7B (Novel)", "KoboldAI/fairseq-dense-6.7B-Shinen", "8GB"),
|
||||
MenuModel("Lit V2 6B (Novel)", "hakurei/litv2-6B-rev3", "8GB*"),
|
||||
MenuModel("Lit 6B (Novel)", "hakurei/lit-6B", "8GB*"),
|
||||
MenuModel("Shinen 6B (Novel)", "KoboldAI/GPT-J-6B-Shinen", "6GB"),
|
||||
MenuModel("Erebus 2.7B (Novel)", "KoboldAI/OPT-2.7B-Erebus", "6GB"),
|
||||
MenuModel("Horni 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Horni", "6GB"),
|
||||
MenuModel("Shinen 2.7B (Novel)", "KoboldAI/GPT-Neo-2.7B-Shinen", "6GB"),
|
||||
MenuFolder("Return to Main Menu", "mainmenu"),
|
||||
],
|
||||
'chatlist': [
|
||||
MenuModel("Pygmalion 6B", "PygmalionAI/pygmalion-6b", "16GB"),
|
||||
MenuModel("Pygmalion 2.7B", "PygmalionAI/pygmalion-2.7b", "8GB"),
|
||||
MenuModel("Pygmalion 1.3B", "PygmalionAI/pygmalion-1.3b", "6GB"),
|
||||
MenuModel("Mythomax 13B (Instruct)", "Gryphe/MythoMax-L2-13b", "12GB*"),
|
||||
MenuModel("Huginn 13B (Instruct)", "The-Face-Of-Goonery/Huginn-13b-FP16", "12GB*"),
|
||||
MenuModel("Pygmalion 6B", "PygmalionAI/pygmalion-6b", "8GB*"),
|
||||
MenuModel("Pygmalion 2.7B", "PygmalionAI/pygmalion-2.7b", "6GB"),
|
||||
MenuModel("Pygmalion 1.3B", "PygmalionAI/pygmalion-1.3b", "4GB*"),
|
||||
MenuModel("Pygmalion 350M", "PygmalionAI/pygmalion-350m", "2GB"),
|
||||
MenuFolder("Return to Main Menu", "mainmenu"),
|
||||
],
|
||||
@@ -908,7 +930,7 @@ tags = [
|
||||
api_version = None # This gets set automatically so don't change this value
|
||||
|
||||
api_v1 = KoboldAPISpec(
|
||||
version="1.2.3",
|
||||
version="1.2.4",
|
||||
prefixes=["/api/v1", "/api/latest"],
|
||||
tags=tags,
|
||||
)
|
||||
@@ -1114,7 +1136,7 @@ def loadmodelsettings():
|
||||
try:
|
||||
js = json.load(open(koboldai_vars.custmodpth + "/config.json", "r"))
|
||||
except Exception as e:
|
||||
js = json.load(open(koboldai_vars.custmodpth.replace('/', '_') + "/config.json", "r"))
|
||||
js = json.load(open(koboldai_vars.custmodpth.replace('/', '_') + "/config.json", "r"))
|
||||
except Exception as e:
|
||||
js = {}
|
||||
koboldai_vars.default_preset = koboldai_settings.default_preset
|
||||
@@ -2839,7 +2861,7 @@ def get_message(msg):
|
||||
emit('from_server', {'cmd': 'wiupdate', 'num': msg['num'], 'data': {field: koboldai_vars.worldinfo[num][field] for field in fields}}, broadcast=True, room="UI_1")
|
||||
elif(msg['cmd'] == 'wifolderupdate'):
|
||||
setgamesaved(False)
|
||||
uid = str(msg['uid'])
|
||||
uid = msg['uid']
|
||||
fields = ("name", "collapsed")
|
||||
for field in fields:
|
||||
if(field in msg['data'] and type(msg['data'][field]) is (str if field != "collapsed" else bool)):
|
||||
@@ -4287,17 +4309,17 @@ def togglewimode():
|
||||
#
|
||||
#==================================================================#
|
||||
def addwiitem(folder_uid=None):
|
||||
assert folder_uid is None or str(folder_uid) in koboldai_vars.wifolders_d
|
||||
assert folder_uid is None or folder_uid in koboldai_vars.wifolders_d
|
||||
ob = {"key": "", "keysecondary": "", "content": "", "comment": "", "folder": folder_uid, "num": len(koboldai_vars.worldinfo), "init": False, "selective": False, "constant": False}
|
||||
koboldai_vars.worldinfo.append(ob)
|
||||
while(True):
|
||||
uid = str(int.from_bytes(os.urandom(4), "little", signed=True))
|
||||
uid = int.from_bytes(os.urandom(4), "little", signed=True)
|
||||
if(uid not in koboldai_vars.worldinfo_u):
|
||||
break
|
||||
koboldai_vars.worldinfo_u[uid] = koboldai_vars.worldinfo[-1]
|
||||
koboldai_vars.worldinfo[-1]["uid"] = uid
|
||||
if(folder_uid is not None):
|
||||
koboldai_vars.wifolders_u[str(folder_uid)].append(koboldai_vars.worldinfo[-1])
|
||||
koboldai_vars.wifolders_u[folder_uid].append(koboldai_vars.worldinfo[-1])
|
||||
emit('from_server', {'cmd': 'addwiitem', 'data': ob}, broadcast=True, room="UI_1")
|
||||
|
||||
#==================================================================#
|
||||
@@ -4305,7 +4327,7 @@ def addwiitem(folder_uid=None):
|
||||
#==================================================================#
|
||||
def addwifolder():
|
||||
while(True):
|
||||
uid = str(int.from_bytes(os.urandom(4), "little", signed=True))
|
||||
uid = int.from_bytes(os.urandom(4), "little", signed=True)
|
||||
if(uid not in koboldai_vars.wifolders_d):
|
||||
break
|
||||
ob = {"name": "", "collapsed": False}
|
||||
@@ -4321,18 +4343,18 @@ def addwifolder():
|
||||
#==================================================================#
|
||||
def movewiitem(dst, src):
|
||||
setgamesaved(False)
|
||||
if(koboldai_vars.worldinfo_u[str(src)]["folder"] is not None):
|
||||
for i, e in enumerate(koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[str(src)]["folder"])]):
|
||||
if(e["uid"] == koboldai_vars.worldinfo_u[str(src)]["uid"]):
|
||||
koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[str(src)]["folder"])].pop(i)
|
||||
if(koboldai_vars.worldinfo_u[src]["folder"] is not None):
|
||||
for i, e in enumerate(koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[src]["folder"]]):
|
||||
if(e["uid"] == koboldai_vars.worldinfo_u[src]["uid"]):
|
||||
koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[src]["folder"]].pop(i)
|
||||
break
|
||||
if(koboldai_vars.worldinfo_u[str(dst)]["folder"] is not None):
|
||||
koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[str(dst)]["folder"])].append(koboldai_vars.worldinfo_u[str(src)])
|
||||
koboldai_vars.worldinfo_u[str(src)]["folder"] = koboldai_vars.worldinfo_u[str(dst)]["folder"]
|
||||
if(koboldai_vars.worldinfo_u[dst]["folder"] is not None):
|
||||
koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[dst]["folder"]].append(koboldai_vars.worldinfo_u[src])
|
||||
koboldai_vars.worldinfo_u[src]["folder"] = koboldai_vars.worldinfo_u[dst]["folder"]
|
||||
for i, e in enumerate(koboldai_vars.worldinfo):
|
||||
if(e["uid"] == koboldai_vars.worldinfo_u[str(src)]["uid"]):
|
||||
if(e["uid"] == koboldai_vars.worldinfo_u[src]["uid"]):
|
||||
_src = i
|
||||
elif(e["uid"] == koboldai_vars.worldinfo_u[str(dst)]["uid"]):
|
||||
elif(e["uid"] == koboldai_vars.worldinfo_u[dst]["uid"]):
|
||||
_dst = i
|
||||
koboldai_vars.worldinfo[_src]["folder"] = koboldai_vars.worldinfo[_dst]["folder"]
|
||||
koboldai_vars.worldinfo.insert(_dst - (_dst >= _src), koboldai_vars.worldinfo.pop(_src))
|
||||
@@ -4344,12 +4366,12 @@ def movewiitem(dst, src):
|
||||
#==================================================================#
|
||||
def movewifolder(dst, src):
|
||||
setgamesaved(False)
|
||||
koboldai_vars.wifolders_l.remove(str(src))
|
||||
koboldai_vars.wifolders_l.remove(src)
|
||||
if(dst is None):
|
||||
# If dst is None, that means we should move src to be the last folder
|
||||
koboldai_vars.wifolders_l.append(str(src))
|
||||
koboldai_vars.wifolders_l.append(src)
|
||||
else:
|
||||
koboldai_vars.wifolders_l.insert(koboldai_vars.wifolders_l.index(str(dst)), str(src))
|
||||
koboldai_vars.wifolders_l.insert(koboldai_vars.wifolders_l.index(dst), src)
|
||||
sendwi()
|
||||
|
||||
#==================================================================#
|
||||
@@ -4375,7 +4397,7 @@ def sendwi():
|
||||
last_folder = ...
|
||||
for wi in koboldai_vars.worldinfo:
|
||||
if(wi["folder"] != last_folder):
|
||||
emit('from_server', {'cmd': 'addwifolder', 'uid': wi["folder"], 'data': koboldai_vars.wifolders_d[str(wi["folder"])] if wi["folder"] is not None else None}, broadcast=True, room="UI_1")
|
||||
emit('from_server', {'cmd': 'addwifolder', 'uid': wi["folder"], 'data': koboldai_vars.wifolders_d[wi["folder"]] if wi["folder"] is not None else None}, broadcast=True, room="UI_1")
|
||||
last_folder = wi["folder"]
|
||||
ob = wi
|
||||
emit('from_server', {'cmd': 'addwiitem', 'data': ob}, broadcast=True, room="UI_1")
|
||||
@@ -4396,8 +4418,8 @@ def requestwi():
|
||||
# and items in different folders are sorted based on the order of the folders
|
||||
#==================================================================#
|
||||
def stablesortwi():
|
||||
mapping = {int(uid): index for index, uid in enumerate(koboldai_vars.wifolders_l)}
|
||||
koboldai_vars.worldinfo.sort(key=lambda x: mapping[int(x["folder"])] if x["folder"] is not None else float("inf"))
|
||||
mapping = {uid: index for index, uid in enumerate(koboldai_vars.wifolders_l)}
|
||||
koboldai_vars.worldinfo.sort(key=lambda x: mapping[x["folder"]] if x["folder"] is not None else float("inf"))
|
||||
last_folder = ...
|
||||
last_wi = None
|
||||
for i, wi in enumerate(koboldai_vars.worldinfo):
|
||||
@@ -4418,7 +4440,6 @@ def stablesortwi():
|
||||
#==================================================================#
|
||||
def commitwi(ar):
|
||||
for ob in ar:
|
||||
ob["uid"] = str(ob["uid"])
|
||||
koboldai_vars.worldinfo_u[ob["uid"]]["key"] = ob["key"]
|
||||
koboldai_vars.worldinfo_u[ob["uid"]]["keysecondary"] = ob["keysecondary"]
|
||||
koboldai_vars.worldinfo_u[ob["uid"]]["content"] = ob["content"]
|
||||
@@ -4441,9 +4462,9 @@ def deletewi(uid):
|
||||
koboldai_vars.deletewi = uid
|
||||
if(koboldai_vars.deletewi is not None):
|
||||
if(koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"] is not None):
|
||||
for i, e in enumerate(koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"])]):
|
||||
for i, e in enumerate(koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"]]):
|
||||
if(e["uid"] == koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["uid"]):
|
||||
koboldai_vars.wifolders_u[str(koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"])].pop(i)
|
||||
koboldai_vars.wifolders_u[koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["folder"]].pop(i)
|
||||
break
|
||||
for i, e in enumerate(koboldai_vars.worldinfo):
|
||||
if(e["uid"] == koboldai_vars.worldinfo_u[koboldai_vars.deletewi]["uid"]):
|
||||
@@ -4459,18 +4480,17 @@ def deletewi(uid):
|
||||
#
|
||||
#==================================================================#
|
||||
def deletewifolder(uid):
|
||||
uid = str(uid)
|
||||
del koboldai_vars.wifolders_u[uid]
|
||||
del koboldai_vars.wifolders_d[uid]
|
||||
del koboldai_vars.wifolders_l[koboldai_vars.wifolders_l.index(uid)]
|
||||
setgamesaved(False)
|
||||
# Delete uninitialized entries in the folder we're going to delete
|
||||
koboldai_vars.worldinfo = [wi for wi in koboldai_vars.worldinfo if str(wi["folder"]) != uid or wi["init"]]
|
||||
koboldai_vars.worldinfo = [wi for wi in koboldai_vars.worldinfo if wi["folder"] != uid or wi["init"]]
|
||||
koboldai_vars.worldinfo_i = [wi for wi in koboldai_vars.worldinfo if wi["init"]]
|
||||
# Move WI entries that are inside of the folder we're going to delete
|
||||
# so that they're outside of all folders
|
||||
for wi in koboldai_vars.worldinfo:
|
||||
if(str(wi["folder"]) == uid):
|
||||
if(wi["folder"] == uid):
|
||||
wi["folder"] = None
|
||||
|
||||
sendwi()
|
||||
@@ -6550,7 +6570,7 @@ def UI_2_create_world_info_folder(data):
|
||||
@socketio.on('delete_world_info')
|
||||
@logger.catch
|
||||
def UI_2_delete_world_info(uid):
|
||||
koboldai_vars.worldinfo_v2.delete(int(uid))
|
||||
koboldai_vars.worldinfo_v2.delete(uid)
|
||||
|
||||
|
||||
#==================================================================#
|
||||
@@ -6605,7 +6625,7 @@ def UI_2_import_world_info():
|
||||
for child in children:
|
||||
# Child is index
|
||||
if child not in uids:
|
||||
entry_data = wi_data["entries"][str(child)]
|
||||
entry_data = wi_data["entries"][child]
|
||||
uids[child] = koboldai_vars.worldinfo_v2.add_item(
|
||||
title=entry_data["title"],
|
||||
key=entry_data["key"],
|
||||
@@ -7708,7 +7728,6 @@ def maybe_review_story() -> None:
|
||||
for uid, wi in koboldai_vars.worldinfo_v2.world_info.items():
|
||||
if wi["type"] == "commentator":
|
||||
continue
|
||||
uid = int(uid)
|
||||
allowed_wi_uids.append(uid)
|
||||
|
||||
prompt = f"\n\n{speaker_name}'s thoughts on what just happened in this story: \""
|
||||
@@ -8142,6 +8161,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
|
||||
frmtrmblln: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, replaces all occurrences of two or more consecutive newlines in the output with one newline.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
|
||||
frmtrmspch: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes `#/@%{}+=~|\^<>` from the output.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
|
||||
singleline: Optional[bool] = fields.Boolean(metadata={"description": "Output formatting option. When enabled, removes everything after the first line of the output, including the newline.\n\nIf `disable_output_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
|
||||
use_default_badwordids: bool = fields.Boolean(load_default=True, metadata={"description": "Ban tokens that commonly worsen the writing experience for continuous story writing"})
|
||||
disable_input_formatting: bool = fields.Boolean(load_default=True, metadata={"description": "When enabled, all input formatting options default to `false` instead of the value in the KoboldAI GUI"})
|
||||
frmtadsnsp: Optional[bool] = fields.Boolean(metadata={"description": "Input formatting option. When enabled, adds a leading space to your input if there is no trailing whitespace at the end of the previous action.\n\nIf `disable_input_formatting` is `true`, this defaults to `false` instead of the value in the KoboldAI GUI."})
|
||||
quiet: Optional[bool] = fields.Boolean(metadata={"description": "When enabled, Generated output will not be displayed in the console."})
|
||||
@@ -8150,6 +8170,7 @@ class GenerationInputSchema(SamplerSettingsSchema):
|
||||
sampler_full_determinism: Optional[bool] = fields.Boolean(metadata={"description": "If enabled, the generated text will always be the same as long as you use the same RNG seed, input and settings. If disabled, only the *sequence* of generated texts that you get when repeatedly generating text will be the same given the same RNG seed, input and settings."})
|
||||
stop_sequence: Optional[List[str]] = fields.List(fields.String(),metadata={"description": "An array of string sequences where the API will stop generating further tokens. The returned text WILL contain the stop sequence."}, validate=[validate.Length(max=10)])
|
||||
|
||||
|
||||
class GenerationResultSchema(KoboldSchema):
|
||||
text: str = fields.String(required=True, metadata={"description": "Generated output as plain text."})
|
||||
|
||||
@@ -8292,6 +8313,7 @@ def _generate_text(body: GenerationInputSchema):
|
||||
"sampler_order": ("koboldai_vars", "sampler_order", None),
|
||||
"sampler_full_determinism": ("koboldai_vars", "full_determinism", None),
|
||||
"stop_sequence": ("koboldai_vars", "stop_sequence", None),
|
||||
"use_default_badwordids": ("koboldai_vars", "use_default_badwordids", None),
|
||||
}
|
||||
saved_settings = {}
|
||||
set_aibusy(1)
|
||||
@@ -9209,7 +9231,7 @@ def get_world_info():
|
||||
if wi["folder"] != last_folder:
|
||||
folder = []
|
||||
if wi["folder"] is not None:
|
||||
folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[str(wi["folder"])]["name"], "entries": folder})
|
||||
folders.append({"uid": wi["folder"], "name": koboldai_vars.wifolders_d[wi["folder"]]["name"], "entries": folder})
|
||||
last_folder = wi["folder"]
|
||||
(folder if wi["folder"] is not None else entries).append({k: v for k, v in wi.items() if k not in ("init", "folder", "num") and (wi["selective"] or k != "keysecondary")})
|
||||
return {"folders": folders, "entries": entries}
|
||||
|
@@ -152,6 +152,7 @@ if [ "$init" != "skip" ]; then
|
||||
cp -rn softprompts/* /content/drive/MyDrive/KoboldAI/softprompts/
|
||||
cp -rn presets/* /content/drive/MyDrive/KoboldAI/presets/
|
||||
cp -rn themes/* /content/drive/MyDrive/KoboldAI/themes/
|
||||
rm -rf AI-Horde-Worker/
|
||||
rm -rf KoboldAI-Horde-Bridge/
|
||||
rm stories
|
||||
rm -rf stories/
|
||||
|
@@ -15,21 +15,15 @@ IF %M%==2 GOTO subfolder
|
||||
IF %M%==3 GOTO drivemap_B
|
||||
|
||||
:subfolder
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
call miniconda3\condabin\activate
|
||||
cmd /k "%*"
|
||||
|
||||
:drivemap
|
||||
subst K: miniconda3 >nul
|
||||
SET TEMP=K:\
|
||||
SET TMP=K:\
|
||||
call K:\python\condabin\activate
|
||||
cmd /k "%*"
|
||||
|
||||
:drivemap_B
|
||||
subst B: miniconda3 >nul
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
call B:\python\condabin\activate
|
||||
cmd /k "%*"
|
@@ -32,10 +32,11 @@ dependencies:
|
||||
- flask-ngrok
|
||||
- flask-cors
|
||||
- lupa==1.10
|
||||
- transformers==4.31.0
|
||||
- huggingface_hub==0.15.1
|
||||
- safetensors==0.3.1
|
||||
- accelerate==0.20.3
|
||||
- transformers==4.32.1
|
||||
- huggingface_hub==0.16.4
|
||||
- optimum==1.12.0
|
||||
- safetensors==0.3.3
|
||||
- accelerate==0.21.0
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
- flask-session
|
||||
- ansi2html
|
||||
@@ -50,9 +51,11 @@ dependencies:
|
||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/0cc4m/GPTQ-for-LLaMa/releases/download/0.0.6/gptq_koboldai-0.0.6-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux'
|
||||
- https://github.com/henk717/KoboldAI/releases/download/Snapshot-11-08-23/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32'
|
||||
- einops
|
||||
- peft==0.3.0
|
||||
- scipy
|
||||
- --find-links=https://0cc4m.github.io/exllama/exllama-whl-links.html
|
||||
- exllama==0.0.6
|
||||
- windows-curses; sys_platform == 'win32'
|
||||
- pynvml
|
||||
|
@@ -30,10 +30,11 @@ dependencies:
|
||||
- flask-ngrok
|
||||
- flask-cors
|
||||
- lupa==1.10
|
||||
- transformers==4.31.0
|
||||
- huggingface_hub==0.15.1
|
||||
- safetensors==0.3.1
|
||||
- accelerate==0.20.3
|
||||
- transformers==4.32.1
|
||||
- huggingface_hub==0.16.4
|
||||
- optimum==1.12.0
|
||||
- safetensors==0.3.3
|
||||
- accelerate==0.21.0
|
||||
- git+https://github.com/VE-FORBRYDERNE/mkultra
|
||||
- ansi2html
|
||||
- flask_compress
|
||||
@@ -44,3 +45,5 @@ dependencies:
|
||||
- git+https://github.com/0cc4m/hf_bleeding_edge/
|
||||
- einops
|
||||
- peft==0.3.0
|
||||
- windows-curses; sys_platform == 'win32'
|
||||
- pynvml
|
@@ -396,6 +396,22 @@ gensettingstf = [
|
||||
"name": "output_streaming",
|
||||
"ui_level": 1
|
||||
},
|
||||
{
|
||||
"uitype": "toggle",
|
||||
"unit": "bool",
|
||||
"label": "Ban Bad Tokens",
|
||||
"id": "setusedefaultbadwordids",
|
||||
"min": 0,
|
||||
"max": 1,
|
||||
"step": 1,
|
||||
"default": 1,
|
||||
"tooltip": "Ban tokens that commonly worsen the writing experience for continuous story writing.",
|
||||
"menu_path": "Settings",
|
||||
"sub_path": "Sampling",
|
||||
"classname": "model",
|
||||
"name": "use_default_badwordids",
|
||||
"ui_level": 0
|
||||
},
|
||||
{
|
||||
"uitype": "toggle",
|
||||
"unit": "bool",
|
||||
|
@@ -20,28 +20,28 @@ IF %M%==3 GOTO drivemap_B
|
||||
|
||||
:subfolder
|
||||
ECHO Runtime launching in subfolder mode
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
call miniconda3\condabin\activate
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
pip install git+https://github.com/huggingface/optimum
|
||||
pip install git+https://github.com/huggingface/accelerate
|
||||
cmd /k
|
||||
|
||||
:drivemap
|
||||
ECHO Runtime launching in K: drive mode
|
||||
subst /D K: >nul
|
||||
subst K: miniconda3 >nul
|
||||
SET TEMP=K:\
|
||||
SET TMP=K:\
|
||||
call K:\python\condabin\activate
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
pip install git+https://github.com/huggingface/optimum
|
||||
pip install git+https://github.com/huggingface/accelerate
|
||||
cmd /k
|
||||
|
||||
:drivemap_B
|
||||
ECHO Runtime launching in B: drive mode
|
||||
subst /D B: >nul
|
||||
subst B: miniconda3 >nul
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
call B:\python\condabin\activate
|
||||
pip install git+https://github.com/huggingface/transformers
|
||||
pip install git+https://github.com/huggingface/optimum
|
||||
pip install git+https://github.com/huggingface/accelerate
|
||||
cmd /k
|
@@ -44,8 +44,6 @@ echo 3 > loader.settings
|
||||
subst B: /D >nul
|
||||
mkdir miniconda3
|
||||
subst B: miniconda3
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
copy umamba.exe B:\umamba.exe
|
||||
copy loader.settings B:\loader.settings
|
||||
copy disconnect-kobold-drive.bat B:\disconnect-kobold-drive.bat
|
||||
@@ -60,8 +58,6 @@ exit
|
||||
|
||||
:subfolder
|
||||
echo 2 > loader.settings
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
umamba.exe create -r miniconda3\ -n base
|
||||
umamba.exe install --no-shortcuts -r miniconda3 -n base -f environments\huggingface.yml -y --always-copy
|
||||
umamba.exe clean -a -y
|
||||
|
@@ -608,11 +608,16 @@ class settings(object):
|
||||
if key == 'sampler_order':
|
||||
if(len(value) < 7):
|
||||
value = [6] + value
|
||||
if key == 'autosave':
|
||||
elif key == 'autosave':
|
||||
autosave = value
|
||||
elif key in ['worldinfo_u', 'wifolders_d']:
|
||||
# Fix UID keys to be ints
|
||||
value = {int(k): v for k, v in value.items()}
|
||||
|
||||
if isinstance(value, str):
|
||||
if value[:7] == 'base64:':
|
||||
value = pickle.loads(base64.b64decode(value[7:]))
|
||||
|
||||
#Need to fix the data type of value to match the module
|
||||
if type(getattr(self, key)) == int:
|
||||
setattr(self, key, int(value))
|
||||
@@ -688,6 +693,7 @@ class model_settings(settings):
|
||||
self._koboldai_vars = koboldai_vars
|
||||
self.alt_multi_gen = False
|
||||
self.bit_8_available = None
|
||||
self.use_default_badwordids = True
|
||||
self.supported_gen_modes = []
|
||||
|
||||
def reset_for_model_load(self):
|
||||
@@ -1010,7 +1016,7 @@ class story_settings(settings):
|
||||
new_world_info.add_item([x.strip() for x in wi["key"].split(",")][0],
|
||||
wi["key"],
|
||||
wi.get("keysecondary", ""),
|
||||
"root" if wi["folder"] is None else self.wifolders_d[str(wi['folder'])]['name'],
|
||||
"root" if wi["folder"] is None else self.wifolders_d[wi['folder']]['name'],
|
||||
wi.get("constant", False),
|
||||
wi["content"],
|
||||
wi.get("comment", ""),
|
||||
@@ -1345,38 +1351,40 @@ class system_settings(settings):
|
||||
self._koboldai_var.calc_ai_text()
|
||||
|
||||
if name == 'horde_share':
|
||||
if self.on_colab == False:
|
||||
if os.path.exists("./KoboldAI-Horde-Bridge"):
|
||||
if value == True:
|
||||
if self._horde_pid is None:
|
||||
logger.info("Starting Horde bridge")
|
||||
bridge = importlib.import_module("KoboldAI-Horde-Bridge.bridge")
|
||||
self._horde_pid = bridge.kai_bridge()
|
||||
try:
|
||||
bridge_cd = importlib.import_module("KoboldAI-Horde-Bridge.clientData")
|
||||
cluster_url = bridge_cd.cluster_url
|
||||
kai_name = bridge_cd.kai_name
|
||||
if kai_name == "My Awesome Instance":
|
||||
kai_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}"
|
||||
api_key = bridge_cd.api_key
|
||||
priority_usernames = bridge_cd.priority_usernames
|
||||
except:
|
||||
cluster_url = "https://horde.koboldai.net"
|
||||
kai_name = self._koboldai_var.horde_worker_name
|
||||
if kai_name == "My Awesome Instance":
|
||||
kai_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}"
|
||||
api_key = self._koboldai_var.horde_api_key
|
||||
priority_usernames = []
|
||||
# Always use the local URL & port
|
||||
kai_url = f'http://127.0.0.1:{self.port}'
|
||||
if self.on_colab is True:
|
||||
return
|
||||
if not os.path.exists("./AI-Horde-Worker"):
|
||||
return
|
||||
if value is True:
|
||||
if self._horde_pid is None:
|
||||
self._horde_pid = "Pending" # Hack to make sure we don't launch twice while it loads
|
||||
logger.info("Starting Horde bridge")
|
||||
logger.debug("Clearing command line args in sys.argv before AI Horde Scribe load")
|
||||
sys_arg_bkp = sys.argv.copy()
|
||||
sys.argv = sys.argv[:1]
|
||||
bd_module = importlib.import_module("AI-Horde-Worker.worker.bridge_data.scribe")
|
||||
bridge_data = bd_module.KoboldAIBridgeData()
|
||||
sys.argv = sys_arg_bkp
|
||||
bridge_data.reload_data()
|
||||
bridge_data.kai_url = f'http://127.0.0.1:{self.port}'
|
||||
bridge_data.horde_url = self._koboldai_var.horde_url
|
||||
bridge_data.api_key = self._koboldai_var.horde_api_key
|
||||
bridge_data.scribe_name = self._koboldai_var.horde_worker_name
|
||||
bridge_data.disable_terminal_ui = self._koboldai_var.host
|
||||
if bridge_data.worker_name == "My Awesome Instance":
|
||||
bridge_data.worker_name = f"KoboldAI UI Instance #{random.randint(-100000000, 100000000)}"
|
||||
worker_module = importlib.import_module("AI-Horde-Worker.worker.workers.scribe")
|
||||
self._horde_pid = worker_module.ScribeWorker(bridge_data)
|
||||
new_thread = threading.Thread(target=self._horde_pid.start)
|
||||
new_thread.daemon = True
|
||||
new_thread.start()
|
||||
|
||||
else:
|
||||
if self._horde_pid is not None:
|
||||
logger.info("Killing Horde bridge")
|
||||
self._horde_pid.stop()
|
||||
self._horde_pid = None
|
||||
|
||||
logger.info(f"Name: {kai_name} on {kai_url}")
|
||||
threading.Thread(target=self._horde_pid.bridge, args=(1, api_key, kai_name, kai_url, cluster_url, priority_usernames)).run()
|
||||
else:
|
||||
if self._horde_pid is not None:
|
||||
logger.info("Killing Horde bridge")
|
||||
self._horde_pid.stop()
|
||||
self._horde_pid = None
|
||||
|
||||
class KoboldStoryRegister(object):
|
||||
def __init__(self, socketio, story_settings, koboldai_vars, tokenizer=None, sequence=[]):
|
||||
@@ -2551,7 +2559,7 @@ class KoboldWorldInfo(object):
|
||||
with open(image_path, "wb") as file:
|
||||
file.write(base64.b64decode(image_b64))
|
||||
|
||||
data["entries"] = {k: self.upgrade_entry(v) for k,v in data["entries"].items()}
|
||||
data["entries"] = {int(k): self.upgrade_entry(v) for k,v in data["entries"].items()}
|
||||
|
||||
#Add the item
|
||||
start_time = time.time()
|
||||
@@ -2632,13 +2640,13 @@ class KoboldWorldInfo(object):
|
||||
self.story_settings.worldinfo.sort(key=lambda x: mapping[x["folder"]] if x["folder"] is not None else float("inf"))
|
||||
|
||||
#self.wifolders_d = {} # Dictionary of World Info folder UID-info pairs
|
||||
self.story_settings.wifolders_d = {str(folder_entries[x]): {'name': x, 'collapsed': False} for x in folder_entries if x != "root"}
|
||||
self.story_settings.wifolders_d = {folder_entries[x]: {'name': x, 'collapsed': False} for x in folder_entries if x != "root"}
|
||||
|
||||
#self.worldinfo_u = {} # Dictionary of World Info UID - key/value pairs
|
||||
self.story_settings.worldinfo_u = {str(y["uid"]): y for x in folder_entries for y in self.story_settings.worldinfo if y["folder"] == (folder_entries[x] if x != "root" else None)}
|
||||
self.story_settings.worldinfo_u = {y["uid"]: y for x in folder_entries for y in self.story_settings.worldinfo if y["folder"] == (folder_entries[x] if x != "root" else None)}
|
||||
|
||||
#self.wifolders_u = {} # Dictionary of pairs of folder UID - list of WI UID
|
||||
self.story_settings.wifolders_u = {str(folder_entries[x]): [y for y in self.story_settings.worldinfo if y['folder'] == folder_entries[x]] for x in folder_entries if x != "root"}
|
||||
self.story_settings.wifolders_u = {folder_entries[x]: [y for y in self.story_settings.worldinfo if y['folder'] == folder_entries[x]] for x in folder_entries if x != "root"}
|
||||
|
||||
def reset_used_in_game(self):
|
||||
for key in self.world_info:
|
||||
|
13
logger.py
13
logger.py
@@ -17,6 +17,8 @@ class Colors:
|
||||
STDOUT_LEVELS = ["GENERATION", "PROMPT"]
|
||||
INIT_LEVELS = ["INIT", "INIT_OK", "INIT_WARN", "INIT_ERR"]
|
||||
MESSAGE_LEVELS = ["MESSAGE"]
|
||||
STATS_LEVELS = ["STATS"]
|
||||
|
||||
# By default we're at error level or higher
|
||||
verbosity = 20
|
||||
quiet = 0
|
||||
@@ -54,6 +56,16 @@ def is_msg_log(record):
|
||||
return(False)
|
||||
return(True)
|
||||
|
||||
def is_stats_log(record):
|
||||
if record["level"].name not in STATS_LEVELS:
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_not_stats_log(record):
|
||||
if record["level"].name in STATS_LEVELS:
|
||||
return False
|
||||
return True
|
||||
|
||||
def is_stderr_log(record):
|
||||
if record["level"].name in STDOUT_LEVELS + INIT_LEVELS + MESSAGE_LEVELS:
|
||||
return(False)
|
||||
@@ -91,6 +103,7 @@ logger.level("INIT_ERR", no=31, color="<red>")
|
||||
# Messages contain important information without which this application might not be able to be used
|
||||
# As such, they have the highest priority
|
||||
logger.level("MESSAGE", no=61, color="<green>")
|
||||
logger.level("STATS", no=19, color="<blue>")
|
||||
|
||||
logger.__class__.generation = partialmethod(logger.__class__.log, "GENERATION")
|
||||
logger.__class__.prompt = partialmethod(logger.__class__.log, "PROMPT")
|
||||
|
@@ -29,7 +29,7 @@ model_backend_type = "Huggingface" #This should be a generic name in case multip
|
||||
class model_backend(HFTorchInferenceModel):
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
self.use_4_bit = False
|
||||
self.quantization = False
|
||||
|
||||
def is_valid(self, model_name, model_path, menu_path):
|
||||
base_is_valid = super().is_valid(model_name, model_path, menu_path)
|
||||
@@ -49,6 +49,9 @@ class model_backend(HFTorchInferenceModel):
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||
requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
|
||||
if not utils.koboldai_vars.hascuda:
|
||||
logger.warning("Your GPU has not been detected and you can only make use of 32-bit inference, meaning the ram requirements are 8 times higher than specified on the menu and your generations will be slow.\nUnless this is an error and your GPU is known to be compatible with our software check out https://koboldai.org/cpp for a suitable alternative that has wider GPU support and has the ability to run models in 4-bit on the CPU.")
|
||||
|
||||
dependency_exists = importlib.util.find_spec("bitsandbytes")
|
||||
if dependency_exists:
|
||||
if model_name != 'customhuggingface' or "custom_model_name" in parameters:
|
||||
@@ -57,22 +60,23 @@ class model_backend(HFTorchInferenceModel):
|
||||
temp = json.load(f)
|
||||
else:
|
||||
temp = {}
|
||||
requested_parameters.append({
|
||||
"uitype": "dropdown",
|
||||
"unit": "text",
|
||||
"label": "Quantization",
|
||||
"id": "quantization",
|
||||
"default": temp['quantization'] if 'quantization' in temp else '4bit' if dependency_exists else '16-bit',
|
||||
"tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode",
|
||||
"menu_path": "Layers",
|
||||
"children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}],
|
||||
"extra_classes": "",
|
||||
"refresh_model_inputs": False
|
||||
})
|
||||
if not hasattr(self.model_config, 'quantization_config') and utils.koboldai_vars.hascuda:
|
||||
requested_parameters.append({
|
||||
"uitype": "dropdown",
|
||||
"unit": "text",
|
||||
"label": "Quantization",
|
||||
"id": "quantization",
|
||||
"default": temp['quantization'] if 'quantization' in temp else '4bit' if dependency_exists else '16-bit',
|
||||
"tooltip": "Whether or not to use BnB's 4-bit or 8-bit mode",
|
||||
"menu_path": "Layers",
|
||||
"children": [{'text': '4-bit', 'value': '4bit'}, {'text': '8-bit', 'value': '8bit'}, {'text': '16-bit', 'value':'16-bit'}],
|
||||
"extra_classes": "",
|
||||
"refresh_model_inputs": False
|
||||
})
|
||||
else:
|
||||
logger.warning("Bitsandbytes is not installed, you can not use Quantization for Huggingface models")
|
||||
return requested_parameters
|
||||
|
||||
|
||||
def set_input_parameters(self, parameters):
|
||||
super().set_input_parameters(parameters)
|
||||
self.quantization = parameters['quantization'] if 'quantization' in parameters else False
|
||||
@@ -105,24 +109,25 @@ class model_backend(HFTorchInferenceModel):
|
||||
"low_cpu_mem_usage": True,
|
||||
}
|
||||
|
||||
if self.quantization == "8bit":
|
||||
tf_kwargs.update({
|
||||
"quantization_config":BitsAndBytesConfig(
|
||||
load_in_8bit=True,
|
||||
llm_int8_enable_fp32_cpu_offload=True
|
||||
),
|
||||
})
|
||||
if not hasattr(self.model_config, 'quantization_config'):
|
||||
if self.quantization == "8bit":
|
||||
tf_kwargs.update({
|
||||
"quantization_config":BitsAndBytesConfig(
|
||||
load_in_8bit=True,
|
||||
llm_int8_enable_fp32_cpu_offload=True
|
||||
),
|
||||
})
|
||||
|
||||
if self.quantization == "4bit" or utils.koboldai_vars.colab_arg:
|
||||
tf_kwargs.update({
|
||||
"quantization_config":BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=torch.float16,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_quant_type='nf4',
|
||||
llm_int8_enable_fp32_cpu_offload=True
|
||||
),
|
||||
})
|
||||
if self.quantization == "4bit" or utils.koboldai_vars.colab_arg:
|
||||
tf_kwargs.update({
|
||||
"quantization_config":BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=torch.float16,
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_quant_type='nf4',
|
||||
llm_int8_enable_fp32_cpu_offload=True
|
||||
),
|
||||
})
|
||||
|
||||
if self.model_type == "gpt2":
|
||||
# We must disable low_cpu_mem_usage and if using a GPT-2 model
|
||||
|
@@ -21,7 +21,7 @@ from pathlib import Path
|
||||
|
||||
|
||||
model_backend_type = "GPTQ"
|
||||
model_backend_name = "Huggingface GPTQ"
|
||||
model_backend_name = "Legacy GPTQ"
|
||||
|
||||
|
||||
def load_model_gptq_settings(path):
|
||||
@@ -155,7 +155,7 @@ class model_backend(HFTorchInferenceModel):
|
||||
|
||||
def get_requested_parameters(self, model_name, model_path, menu_path, parameters = {}):
|
||||
requested_parameters = super().get_requested_parameters(model_name, model_path, menu_path, parameters)
|
||||
if model_name != 'customhuggingface' or "custom_model_name" in parameters:
|
||||
if model_name != 'customgptq' or "custom_model_name" in parameters:
|
||||
if os.path.exists("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_"))) and 'base_url' not in vars(self):
|
||||
with open("settings/{}.generic_hf_torch.model_backend.settings".format(model_name.replace("/", "_")), "r") as f:
|
||||
temp = json.load(f)
|
||||
@@ -228,12 +228,15 @@ class model_backend(HFTorchInferenceModel):
|
||||
logger.warning(f"Gave up on lazy loading due to {e}")
|
||||
self.lazy_load = False
|
||||
|
||||
if self.get_local_model_path():
|
||||
# Model is stored locally, load it.
|
||||
self.model = self._get_model(self.get_local_model_path())
|
||||
self.tokenizer = self._get_tokenizer(self.get_local_model_path())
|
||||
else:
|
||||
raise NotImplementedError("GPTQ Model downloading not implemented")
|
||||
if not self.get_local_model_path():
|
||||
print(self.get_local_model_path())
|
||||
from huggingface_hub import snapshot_download
|
||||
target_dir = "models/" + self.model_name.replace("/", "_")
|
||||
print(self.model_name)
|
||||
snapshot_download(self.model_name, local_dir=target_dir, local_dir_use_symlinks=False, cache_dir="cache/", revision=utils.koboldai_vars.revision)
|
||||
|
||||
self.model = self._get_model(self.get_local_model_path())
|
||||
self.tokenizer = self._get_tokenizer(self.get_local_model_path())
|
||||
|
||||
if (
|
||||
utils.koboldai_vars.badwordsids is koboldai_settings.badwordsids_default
|
||||
@@ -367,6 +370,7 @@ class model_backend(HFTorchInferenceModel):
|
||||
raise RuntimeError("Model not supported by Occam's GPTQ")
|
||||
except:
|
||||
self.implementation = "AutoGPTQ"
|
||||
|
||||
if self.implementation == "AutoGPTQ":
|
||||
try:
|
||||
import auto_gptq
|
||||
@@ -379,11 +383,13 @@ class model_backend(HFTorchInferenceModel):
|
||||
auto_gptq.modeling._base.AutoConfig = hf_bleeding_edge.AutoConfig
|
||||
auto_gptq.modeling._base.AutoModelForCausalLM = hf_bleeding_edge.AutoModelForCausalLM
|
||||
|
||||
autogptq_failed = False
|
||||
try:
|
||||
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map)
|
||||
except:
|
||||
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, disable_exllama=True)
|
||||
|
||||
autogptq_failed = True # Ugly hack to get it to free the VRAM of the last attempt like we do above, better suggestions welcome - Henk
|
||||
if autogptq_failed:
|
||||
model = AutoGPTQForCausalLM.from_quantized(location, model_basename=Path(gptq_file).stem, use_safetensors=gptq_file.endswith(".safetensors"), device_map=device_map, inject_fused_attention=False)
|
||||
# Patch in embeddings function
|
||||
def get_input_embeddings(self):
|
||||
return self.model.get_input_embeddings()
|
||||
|
@@ -47,7 +47,7 @@ class HFInferenceModel(InferenceModel):
|
||||
requested_parameters = []
|
||||
if not self.hf_torch:
|
||||
return []
|
||||
if model_name == 'customhuggingface':
|
||||
if model_name in ('customhuggingface', 'customgptq'):
|
||||
requested_parameters.append({
|
||||
"uitype": "text",
|
||||
"unit": "text",
|
||||
@@ -61,7 +61,7 @@ class HFInferenceModel(InferenceModel):
|
||||
"extra_classes": ""
|
||||
})
|
||||
|
||||
if model_name != 'customhuggingface' or "custom_model_name" in parameters:
|
||||
if model_name not in ('customhuggingface', 'customgptq') or "custom_model_name" in parameters:
|
||||
model_name = parameters["custom_model_name"] if "custom_model_name" in parameters and parameters["custom_model_name"] != "" else model_name
|
||||
if model_path is not None and os.path.exists(model_path):
|
||||
self.model_config = AutoConfig.from_pretrained(model_path)
|
||||
@@ -234,7 +234,7 @@ class HFInferenceModel(InferenceModel):
|
||||
if self.model_type == "llama":
|
||||
# Note: self.tokenizer is a GenericTokenizer, and self.tokenizer.tokenizer is the actual LlamaTokenizer
|
||||
self.tokenizer.add_bos_token = False
|
||||
|
||||
self.tokenizer.legacy = False
|
||||
# HF transformers no longer supports decode_with_prefix_space
|
||||
# We work around this by wrapping decode, encode, and __call__
|
||||
# with versions that work around the 'prefix space' misfeature
|
||||
|
@@ -330,19 +330,39 @@ class HFTorchInferenceModel(HFInferenceModel):
|
||||
if seed is not None:
|
||||
torch.manual_seed(seed)
|
||||
|
||||
if utils.koboldai_vars.use_default_badwordids:
|
||||
self.active_badwordids = self.badwordsids + additional_bad_words_ids
|
||||
else:
|
||||
if additional_bad_words_ids:
|
||||
self.active_badwordids = additional_bad_words_ids
|
||||
else:
|
||||
self.active_badwordids = None
|
||||
|
||||
with torch.no_grad():
|
||||
start_time = time.time()
|
||||
genout = self.model.generate(
|
||||
input_ids=gen_in,
|
||||
do_sample=True,
|
||||
max_length=min(
|
||||
len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
|
||||
),
|
||||
repetition_penalty=1.0,
|
||||
bad_words_ids=self.badwordsids + additional_bad_words_ids,
|
||||
use_cache=True,
|
||||
num_return_sequences=batch_count,
|
||||
)
|
||||
if self.active_badwordids: ## I know duplicating this is ugly, but HF checks if its present and accepts nothing but actual token bans if its there (Which I can't guarantee would be universal enough).... - Henk
|
||||
genout = self.model.generate(
|
||||
input_ids=gen_in,
|
||||
do_sample=True,
|
||||
max_length=min(
|
||||
len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
|
||||
),
|
||||
repetition_penalty=1.0,
|
||||
bad_words_ids=self.active_badwordids,
|
||||
use_cache=True,
|
||||
num_return_sequences=batch_count,
|
||||
)
|
||||
else:
|
||||
genout = self.model.generate(
|
||||
input_ids=gen_in,
|
||||
do_sample=True,
|
||||
max_length=min(
|
||||
len(prompt_tokens) + max_new, utils.koboldai_vars.max_length
|
||||
),
|
||||
repetition_penalty=1.0,
|
||||
use_cache=True,
|
||||
num_return_sequences=batch_count,
|
||||
)
|
||||
logger.debug(
|
||||
"torch_raw_generate: run generator {}s".format(time.time() - start_time)
|
||||
)
|
||||
|
6
play.bat
6
play.bat
@@ -18,8 +18,6 @@ IF %M%==3 GOTO drivemap_B
|
||||
|
||||
:subfolder
|
||||
ECHO Runtime launching in subfolder mode
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
call miniconda3\condabin\activate
|
||||
python aiserver.py %*
|
||||
cmd /k
|
||||
@@ -28,8 +26,6 @@ cmd /k
|
||||
ECHO Runtime launching in K: drive mode
|
||||
subst /D K: >nul
|
||||
subst K: miniconda3 >nul
|
||||
SET TEMP=K:\
|
||||
SET TMP=K:\
|
||||
call K:\python\condabin\activate
|
||||
python aiserver.py %*
|
||||
cmd /k
|
||||
@@ -38,8 +34,6 @@ cmd /k
|
||||
ECHO Runtime launching in B: drive mode
|
||||
subst /D B: >nul
|
||||
subst B: miniconda3 >nul
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
call B:\python\condabin\activate
|
||||
python aiserver.py %*
|
||||
cmd /k
|
@@ -1,5 +1,7 @@
|
||||
transformers==4.31.*
|
||||
huggingface_hub==0.15.1
|
||||
transformers==4.32.1
|
||||
huggingface_hub==0.16.4
|
||||
optimum==1.12.0
|
||||
safetensors==0.3.3
|
||||
Flask==2.2.3
|
||||
Flask-SocketIO==5.3.2
|
||||
python-socketio==5.7.2
|
||||
@@ -15,7 +17,7 @@ markdown
|
||||
bleach==4.1.0
|
||||
sentencepiece
|
||||
protobuf
|
||||
accelerate==0.20.3
|
||||
accelerate==0.21.0
|
||||
flask-session==0.4.0
|
||||
marshmallow>=3.13
|
||||
apispec-webframeworks
|
||||
@@ -37,8 +39,13 @@ pytest==7.2.2
|
||||
pytest-html==3.2.0
|
||||
pytest-metadata==2.0.4
|
||||
requests-mock==1.10.0
|
||||
safetensors==0.3.1
|
||||
git+https://github.com/0cc4m/hf_bleeding_edge/
|
||||
einops
|
||||
peft==0.3.0
|
||||
scipy
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.10'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp310-cp310-win_amd64.whl; sys_platform == 'win32' and python_version == '3.10'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-linux_x86_64.whl; sys_platform == 'linux' and python_version == '3.8'
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu118-cp38-cp38-win_amd64.whl; sys_platform == 'win32' and python_version == '3.8'
|
||||
windows-curses; sys_platform == 'win32'
|
||||
pynvml
|
||||
|
@@ -15,24 +15,18 @@ IF %M%==2 GOTO subfolder
|
||||
IF %M%==3 GOTO drivemap_B
|
||||
|
||||
:subfolder
|
||||
SET TEMP=%~DP0MINICONDA3
|
||||
SET TMP=%~DP0MINICONDA3
|
||||
call miniconda3\condabin\activate
|
||||
GOTO GIT
|
||||
|
||||
:drivemap
|
||||
subst /D K: >nul
|
||||
subst K: miniconda3 >nul
|
||||
SET TEMP=K:\
|
||||
SET TMP=K:\
|
||||
call K:\python\condabin\activate
|
||||
GOTO GIT
|
||||
|
||||
:drivemap_B
|
||||
subst /D B: >nul
|
||||
subst B: miniconda3 >nul
|
||||
SET TEMP=B:\
|
||||
SET TMP=B:\
|
||||
call B:\python\condabin\activate
|
||||
GOTO GIT
|
||||
|
||||
|
Reference in New Issue
Block a user