Merge branch 'united' into api

This commit is contained in:
vfbd
2022-08-12 13:57:50 -04:00
5 changed files with 185 additions and 129 deletions

View File

@ -755,7 +755,10 @@ def device_config(config):
global breakmodel, generator
import breakmodel
n_layers = utils.num_layers(config)
if(args.breakmodel_gpulayers is not None or (utils.HAS_ACCELERATE and args.breakmodel_disklayers is not None)):
if args.cpu:
breakmodel.gpu_blocks = [0]*n_layers
return
elif(args.breakmodel_gpulayers is not None or (utils.HAS_ACCELERATE and args.breakmodel_disklayers is not None)):
try:
if(not args.breakmodel_gpulayers):
breakmodel.gpu_blocks = []
@ -1428,6 +1431,8 @@ def get_model_info(model, directory=""):
url = True
elif not utils.HAS_ACCELERATE and not torch.cuda.is_available():
pass
elif args.cpu:
pass
else:
layer_count = get_layer_count(model, directory=directory)
if layer_count is None:
@ -1952,8 +1957,12 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
time.sleep(0.1)
if gpu_layers is not None:
args.breakmodel_gpulayers = gpu_layers
elif initial_load:
gpu_layers = args.breakmodel_gpulayers
if disk_layers is not None:
args.breakmodel_disklayers = int(disk_layers)
elif initial_load:
disk_layers = args.breakmodel_disklayers
#We need to wipe out the existing model and refresh the cuda cache
model = None
@ -2064,41 +2073,19 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
else:
print("{0}NOT FOUND!{1}".format(colors.YELLOW, colors.END))
if args.model:
if(vars.hascuda):
genselected = True
if args.cpu:
vars.usegpu = False
gpu_layers = None
disk_layers = None
vars.breakmodel = False
elif vars.hascuda:
if(vars.bmsupported):
vars.usegpu = False
vars.breakmodel = True
else:
vars.breakmodel = False
vars.usegpu = True
vars.breakmodel = utils.HAS_ACCELERATE
if(vars.bmsupported):
vars.usegpu = False
vars.breakmodel = True
if(args.cpu):
vars.usegpu = False
vars.breakmodel = utils.HAS_ACCELERATE
elif(vars.hascuda):
if(vars.bmsupported):
genselected = True
vars.usegpu = False
vars.breakmodel = True
else:
genselected = False
else:
genselected = False
if(vars.hascuda):
if(use_gpu):
if(vars.bmsupported):
vars.breakmodel = True
vars.usegpu = False
genselected = True
else:
vars.breakmodel = False
vars.usegpu = True
genselected = True
else:
vars.breakmodel = utils.HAS_ACCELERATE
vars.usegpu = False
genselected = True
# Ask for API key if InferKit was selected
if(vars.model == "InferKit"):
@ -2317,7 +2304,8 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
# If we're using torch_lazy_loader, we need to get breakmodel config
# early so that it knows where to load the individual model tensors
if(utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel):
if (utils.HAS_ACCELERATE or vars.lazy_load and vars.hascuda and vars.breakmodel) and not vars.nobreakmodel:
print(1)
device_config(model_config)
# Download model from Huggingface if it does not exist, otherwise load locally
@ -2448,6 +2436,7 @@ def load_model(use_gpu=True, gpu_layers=None, disk_layers=None, initial_load=Fal
elif(vars.breakmodel): # Use both RAM and VRAM (breakmodel)
vars.modeldim = get_hidden_size_from_model(model)
if(not vars.lazy_load):
print(2)
device_config(model.config)
move_model_to_devices(model)
elif(utils.HAS_ACCELERATE and __import__("breakmodel").disk_blocks > 0):
@ -3694,7 +3683,7 @@ def get_message(msg):
else:
filename = "settings/{}.breakmodel".format(vars.model.replace('/', '_'))
f = open(filename, "w")
f.write(msg['gpu_layers'] + '\n' + msg['disk_layers'])
f.write(str(msg['gpu_layers']) + '\n' + str(msg['disk_layers']))
f.close()
vars.colaburl = msg['url'] + "/request"
load_model(use_gpu=msg['use_gpu'], gpu_layers=msg['gpu_layers'], disk_layers=msg['disk_layers'], online_model=msg['online_model'])
@ -3842,6 +3831,40 @@ def get_message(msg):
emit('from_server', {'cmd': 'set_debug', 'data': msg['data']}, broadcast=True)
if vars.debug:
send_debug()
elif(msg['cmd'] == 'getfieldbudget'):
unencoded = msg["data"]["unencoded"]
field = msg["data"]["field"]
# Tokenizer may be undefined here when a model has not been chosen.
if "tokenizer" not in globals():
# We don't have a tokenizer, just return nulls.
emit(
'from_server',
{'cmd': 'showfieldbudget', 'data': {"length": None, "max": None, "field": field}},
broadcast=True
)
return
header_length = len(tokenizer._koboldai_header)
max_tokens = vars.max_length - header_length - vars.sp_length - vars.genamt
if not unencoded:
# Unencoded is empty, just return 0
emit(
'from_server',
{'cmd': 'showfieldbudget', 'data': {"length": 0, "max": max_tokens, "field": field}},
broadcast=True
)
else:
if field == "anoteinput":
unencoded = buildauthorsnote(unencoded, msg["data"]["anotetemplate"])
tokens_length = len(tokenizer.encode(unencoded))
emit(
'from_server',
{'cmd': 'showfieldbudget', 'data': {"length": tokens_length, "max": max_tokens, "field": field}},
broadcast=True
)
#==================================================================#
# Send userscripts list to client
@ -4326,6 +4349,12 @@ def actionredo():
#==================================================================#
#
#==================================================================#
def buildauthorsnote(authorsnote, template):
# Build Author's Note if set
if authorsnote == "":
return ""
return ("\n" + template + "\n").replace("<|>", authorsnote)
def calcsubmitbudgetheader(txt, **kwargs):
# Scan for WorldInfo matches
winfo, found_entries = checkworldinfo(txt, **kwargs)
@ -4336,11 +4365,7 @@ def calcsubmitbudgetheader(txt, **kwargs):
else:
mem = vars.memory
# Build Author's Note if set
if(vars.authornote != ""):
anotetxt = ("\n" + vars.authornotetemplate + "\n").replace("<|>", vars.authornote)
else:
anotetxt = ""
anotetxt = buildauthorsnote(vars.authornote, vars.authornotetemplate)
return winfo, mem, anotetxt, found_entries