Merge pull request #28 from VE-FORBRYDERNE/gpu

Use the old GPU generation mode when all layers are on one GPU
This commit is contained in:
henk717
2021-11-15 07:33:48 +01:00
committed by GitHub

View File

@@ -210,8 +210,6 @@ def device_config(model):
global breakmodel, generator global breakmodel, generator
import breakmodel import breakmodel
n_layers = model.config.num_layers n_layers = model.config.num_layers
model.half().to('cpu')
gc.collect()
if(args.breakmodel_gpulayers is not None): if(args.breakmodel_gpulayers is not None):
try: try:
breakmodel.gpu_blocks = list(map(int, args.breakmodel_gpulayers.split(','))) breakmodel.gpu_blocks = list(map(int, args.breakmodel_gpulayers.split(',')))
@@ -274,6 +272,18 @@ def device_config(model):
print(colors.PURPLE + "\nFinal device configuration:") print(colors.PURPLE + "\nFinal device configuration:")
device_list(n_layers) device_list(n_layers)
# If all layers are on the same device, use the old GPU generation mode
while(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] == 0):
breakmodel.gpu_blocks.pop()
if(len(breakmodel.gpu_blocks) and breakmodel.gpu_blocks[-1] in (-1, model.config.num_layers)):
vars.breakmodel = False
vars.usegpu = True
model = model.to(len(breakmodel.gpu_blocks)-1)
generator = model.generate
return
model.half().to('cpu')
gc.collect()
model.transformer.wte.to(breakmodel.primary_device) model.transformer.wte.to(breakmodel.primary_device)
model.transformer.ln_f.to(breakmodel.primary_device) model.transformer.ln_f.to(breakmodel.primary_device)
if(hasattr(model, 'lm_head')): if(hasattr(model, 'lm_head')):