mirror of
https://github.com/KoboldAI/KoboldAI-Client.git
synced 2025-06-05 21:59:24 +02:00
Merge branch 'united' into api
This commit is contained in:
107
aiserver.py
107
aiserver.py
@ -755,7 +755,10 @@ def device_config(config):
|
||||
global breakmodel, generator
|
||||
import breakmodel
|
||||
n_layers = utils.num_layers(config)
|
||||
if(args.breakmodel_gpulayers is not None or (utils.HAS_ACCELERATE and args.breakmodel_disklayers is not None)):
|
||||
if args.cpu:
|
||||
breakmodel.gpu_blocks = [0]*n_layers
|
||||
return
|
||||
elif(args.breakmodel_gpulayers is not None or (utils.HAS_ACCELERATE and args.breakmodel_disklayers is not None)):
|
||||
try:
|
||||
if(not args.breakmodel_gpulayers):
|
||||
breakmodel.gpu_blocks = []
|
||||
@ -1428,6 +1431,8 @@ def get_model_info(model, directory=""):
|
||||
url = True
|
||||
elif not utils.HAS_ACCELERATE and not torch.cuda.is_available():
|
||||
pass
|
||||
elif args.cpu:
|
||||
pass
|
||||
else:
|
||||
layer_count = get_layer_count(model, directory=directory)
|
||||
if layer_count is None:
|
||||
@ -1952,8 +1957,12 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
time.sleep(0.1)
|
||||
if gpu_layers is not None:
|
||||
args.breakmodel_gpulayers = gpu_layers
|
||||
elif initial_load:
|
||||
gpu_layers = args.breakmodel_gpulayers
|
||||
if disk_layers is not None:
|
||||
args.breakmodel_disklayers = int(disk_layers)
|
||||
elif initial_load:
|
||||
disk_layers = args.breakmodel_disklayers
|
||||
|
||||
#We need to wipe out the existing model and refresh the cuda cache
|
||||
model = None
|
||||
@ -2064,41 +2073,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
else:
|
||||
print("{0}NOT FOUND!{1}".format(colors.YELLOW, colors.END))
|
||||
|
||||
if args.model:
|
||||
if(vars.hascuda):
|
||||
genselected = True
|
||||
if args.cpu:
|
||||
vars.usegpu = False
|
||||
gpu_layers = None
|
||||
disk_layers = None
|
||||
vars.breakmodel = False
|
||||
elif vars.hascuda:
|
||||
if(vars.bmsupported):
|
||||
vars.usegpu = False
|
||||
vars.breakmodel = True
|
||||
else:
|
||||
vars.breakmodel = False
|
||||
vars.usegpu = True
|
||||
vars.breakmodel = utils.HAS_ACCELERATE
|
||||
if(vars.bmsupported):
|
||||
vars.usegpu = False
|
||||
vars.breakmodel = True
|
||||
if(args.cpu):
|
||||
vars.usegpu = False
|
||||
vars.breakmodel = utils.HAS_ACCELERATE
|
||||
elif(vars.hascuda):
|
||||
if(vars.bmsupported):
|
||||
genselected = True
|
||||
vars.usegpu = False
|
||||
vars.breakmodel = True
|
||||
else:
|
||||
genselected = False
|
||||
else:
|
||||
genselected = False
|
||||
|
||||
if(vars.hascuda):
|
||||
if(use_gpu):
|
||||
if(vars.bmsupported):
|
||||
vars.breakmodel = True
|
||||
vars.usegpu = False
|
||||
genselected = True
|
||||
else:
|
||||
vars.breakmodel = False
|
||||
vars.usegpu = True
|
||||
genselected = True
|
||||
else:
|
||||
vars.breakmodel = utils.HAS_ACCELERATE
|
||||
vars.usegpu = False
|
||||
genselected = True
|
||||
|
||||
# Ask for API key if InferKit was selected
|
||||
if(vars.model == "InferKit"):
|
||||
@ -2317,7 +2304,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
|
||||
# If we're using torch_lazy_loader, we need to get breakmodel config
|
||||
# early so that it knows where to load the individual model tensors
|
||||
if(utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel):
|
||||
if (utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel) and not vars.nobreakmodel:
|
||||
print(1)
|
||||
device_config(model_config)
|
||||
|
||||
# Download model from Huggingface if it does not exist, otherwise load locally
|
||||
@ -2448,6 +2436,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
|
||||
elif(vars.breakmodel): # Use both RAM and VRAM (breakmodel)
|
||||
vars.modeldim = get_hidden_size_from_model(model)
|
||||
if(not vars.lazy_load):
|
||||
print(2)
|
||||
device_config(model.config)
|
||||
move_model_to_devices(model)
|
||||
elif(utils.HAS_ACCELERATE and __import__("breakmodel").disk_blocks > 0):
|
||||
@ -3694,7 +3683,7 @@ def get_message(msg):
|
||||
else:
|
||||
filename = "settings/{}.breakmodel".format(vars.model.replace('/', '_'))
|
||||
f = open(filename, "w")
|
||||
f.write(msg['gpu_layers'] + '\n' + msg['disk_layers'])
|
||||
f.write(str(msg['gpu_layers']) + '\n' + str(msg['disk_layers']))
|
||||
f.close()
|
||||
vars.colaburl = msg['url'] + "/request"
|
||||
load_model(use_gpu=msg['use_gpu'], gpu_layers=msg['gpu_layers'], disk_layers=msg['disk_layers'], online_model=msg['online_model'])
|
||||
@ -3842,6 +3831,40 @@ def get_message(msg):
|
||||
emit('from_server', {'cmd': 'set_debug', 'data': msg['data']}, broadcast=True)
|
||||
if vars.debug:
|
||||
send_debug()
|
||||
elif(msg['cmd'] == 'getfieldbudget'):
|
||||
unencoded = msg["data"]["unencoded"]
|
||||
field = msg["data"]["field"]
|
||||
|
||||
# Tokenizer may be undefined here when a model has not been chosen.
|
||||
if "tokenizer" not in globals():
|
||||
# We don't have a tokenizer, just return nulls.
|
||||
emit(
|
||||
'from_server',
|
||||
{'cmd': 'showfieldbudget', 'data': {"length": None, "max": None, "field": field}},
|
||||
broadcast=True
|
||||
)
|
||||
return
|
||||
|
||||
header_length = len(tokenizer._koboldai_header)
|
||||
max_tokens = vars.max_length - header_length - vars.sp_length - vars.genamt
|
||||
|
||||
if not unencoded:
|
||||
# Unencoded is empty, just return 0
|
||||
emit(
|
||||
'from_server',
|
||||
{'cmd': 'showfieldbudget', 'data': {"length": 0, "max": max_tokens, "field": field}},
|
||||
broadcast=True
|
||||
)
|
||||
else:
|
||||
if field == "anoteinput":
|
||||
unencoded = buildauthorsnote(unencoded, msg["data"]["anotetemplate"])
|
||||
tokens_length = len(tokenizer.encode(unencoded))
|
||||
|
||||
emit(
|
||||
'from_server',
|
||||
{'cmd': 'showfieldbudget', 'data': {"length": tokens_length, "max": max_tokens, "field": field}},
|
||||
broadcast=True
|
||||
)
|
||||
|
||||
#==================================================================#
|
||||
# Send userscripts list to client
|
||||
@ -4326,6 +4349,12 @@ def actionredo():
|
||||
#==================================================================#
|
||||
#
|
||||
#==================================================================#
|
||||
def buildauthorsnote(authorsnote, template):
|
||||
# Build Author's Note if set
|
||||
if authorsnote == "":
|
||||
return ""
|
||||
return ("\n" + template + "\n").replace("<|>", authorsnote)
|
||||
|
||||
def calcsubmitbudgetheader(txt, **kwargs):
|
||||
# Scan for WorldInfo matches
|
||||
winfo, found_entries = checkworldinfo(txt, **kwargs)
|
||||
@ -4336,11 +4365,7 @@ def calcsubmitbudgetheader(txt, **kwargs):
|
||||
else:
|
||||
mem = vars.memory
|
||||
|
||||
# Build Author's Note if set
|
||||
if(vars.authornote != ""):
|
||||
anotetxt = ("\n" + vars.authornotetemplate + "\n").replace("<|>", vars.authornote)
|
||||
else:
|
||||
anotetxt = ""
|
||||
anotetxt = buildauthorsnote(vars.authornote, vars.authornotetemplate)
|
||||
|
||||
return winfo, mem, anotetxt, found_entries
|
||||
|
||||
|
Reference in New Issue
Block a user